Remove local AI inference path and keep trained model remote-only

Rohan5commit · Rohan5commit · commit f525f32ca8ad · 2026-04-06T13:14:58.000+08:00
diff --git a/config.yaml b/config.yaml
@@ -118,11 +118,7 @@ ai_trading:
     inference_url_env: "TRAINED_MODEL_INFERENCE_URL"
     api_key_env: "TRAINED_MODEL_API_KEY"
     timeout_seconds: 60
-    base_model: "Qwen/Qwen2.5-7B-Instruct"
-    adapter_dir: "./models/lora_solid_adapter"
-    max_new_tokens: 64
-    temperature: 0.0
-    cpu_threads: 4
+    model_name: "quant-trained-trading-model"
 
 
 
diff --git a/requirements-ai-local.txt b/requirements-ai-local.txt
diff --git a/trained_model_client.py b/trained_model_client.py
@@ -2,8 +2,7 @@
 import logging
 import os
 import re
-import threading
-from typing import Any, Dict, Optional
+from typing import Optional
 
 import requests
 
@@ -23,9 +22,6 @@
 
 
 class TrainedModelTradeClient:
-    _runtime_lock = threading.Lock()
-    _runtime_cache: Dict[str, Dict[str, Any]] = {}
-
     def __init__(self, ai_cfg: Optional[dict] = None):
         ai_cfg = dict(ai_cfg or {})
         model_cfg = dict(ai_cfg.get("trained_model") or {})
@@ -37,40 +33,28 @@ def __init__(self, ai_cfg: Optional[dict] = None):
         self.api_key_env = str(model_cfg.get("api_key_env", "") or "").strip()
         self.api_key = os.getenv(self.api_key_env).strip() if self.api_key_env and os.getenv(self.api_key_env) else ""
         self.timeout_seconds = int(model_cfg.get("timeout_seconds", 60) or 60)
-        self.base_model = str(model_cfg.get("base_model", "Qwen/Qwen2.5-7B-Instruct") or "Qwen/Qwen2.5-7B-Instruct").strip()
-        self.adapter_dir = str(model_cfg.get("adapter_dir", "./models/lora_solid_adapter") or "./models/lora_solid_adapter").strip()
-        self.max_new_tokens = int(model_cfg.get("max_new_tokens", 64) or 64)
-        self.temperature = float(model_cfg.get("temperature", 0.0) or 0.0)
-        self.cpu_threads = int(model_cfg.get("cpu_threads", 4) or 4)
+        self.model_name = str(model_cfg.get("model_name", "trained-trading-model") or "trained-trading-model").strip()
         self.last_error = None
         self.last_model_used = None
 
     @property
     def model_identifier(self) -> str:
-        if self.backend == "http":
-            return self.inference_url or "trained-model-http"
-        adapter_name = os.path.basename(os.path.normpath(self.adapter_dir or "adapter")) or "adapter"
-        return f"{self.base_model}+{adapter_name}"
+        return self.model_name or self.inference_url or "trained-model-http"
 
     def is_ready(self) -> bool:
-        if self.backend == "http":
-            if not self.inference_url:
-                self.last_error = "trained_model.inference_url is not configured"
-                return False
-            return True
-        if self.backend == "local":
-            if not self.base_model or not self.adapter_dir:
-                self.last_error = "trained_model.base_model or trained_model.adapter_dir is missing"
-                return False
-            return True
-        self.last_error = f"Unsupported trained model backend: {self.backend}"
-        return False
+        if self.backend != "http":
+            self.last_error = f"Unsupported trained model backend: {self.backend}. Use remote HTTP inference only."
+            return False
+        if not self.inference_url:
+            self.last_error = "trained_model.inference_url is not configured"
+            return False
+        return True
 
     def predict_candidate(self, candidate: dict) -> Optional[dict]:
         if not self.is_ready():
             return None
         try:
-            raw = self._predict_http(candidate) if self.backend == "http" else self._predict_local(candidate)
+            raw = self._predict_http(candidate)
         except Exception as exc:
             self.last_error = str(exc)
             logger.warning("Trained model inference failed for %s: %s", candidate.get("symbol"), exc)
@@ -81,8 +65,6 @@ def _predict_http(self, candidate: dict):
         payload = {
             "candidate": candidate,
             "task": "trade_signal_classification",
-            "max_new_tokens": self.max_new_tokens,
-            "temperature": self.temperature,
         }
         headers = {"Content-Type": "application/json", "Accept": "application/json"}
         if self.api_key:
@@ -95,87 +77,6 @@ def _predict_http(self, candidate: dict):
             return data["signal"]
         return data
 
-    def _predict_local(self, candidate: dict):
-        runtime = self._ensure_local_runtime()
-        tokenizer = runtime["tokenizer"]
-        model = runtime["model"]
-        torch = runtime["torch"]
-        messages = self._build_messages(candidate)
-        prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-        encoded = tokenizer(prompt, return_tensors="pt")
-        input_len = encoded["input_ids"].shape[-1]
-        with torch.no_grad():
-            generated = model.generate(
-                **encoded,
-                max_new_tokens=self.max_new_tokens,
-                do_sample=bool(self.temperature and self.temperature > 0.0),
-                temperature=max(self.temperature, 1e-5) if self.temperature and self.temperature > 0 else 1.0,
-                pad_token_id=tokenizer.eos_token_id,
-            )
-        text = tokenizer.decode(generated[0][input_len:], skip_special_tokens=True).strip()
-        self.last_model_used = self.model_identifier
-        return text
-
-    def _ensure_local_runtime(self):
-        cache_key = f"{self.base_model}|{self.adapter_dir}"
-        with self._runtime_lock:
-            cached = self._runtime_cache.get(cache_key)
-            if cached is not None:
-                return cached
-            import torch
-            from peft import PeftModel
-            from transformers import AutoModelForCausalLM, AutoTokenizer
-
-            torch.set_num_threads(max(1, int(self.cpu_threads or 1)))
-            tokenizer = AutoTokenizer.from_pretrained(self.base_model, trust_remote_code=True)
-            if tokenizer.pad_token is None:
-                tokenizer.pad_token = tokenizer.eos_token
-            model = AutoModelForCausalLM.from_pretrained(
-                self.base_model,
-                trust_remote_code=True,
-                low_cpu_mem_usage=True,
-            )
-            model = PeftModel.from_pretrained(model, self.adapter_dir, is_trainable=False)
-            model.eval()
-            runtime = {"tokenizer": tokenizer, "model": model, "torch": torch}
-            self._runtime_cache[cache_key] = runtime
-            return runtime
-
-    def _build_messages(self, candidate: dict):
-        symbol = str(candidate.get("symbol") or "UNKNOWN").strip().upper()
-        as_of_date = candidate.get("as_of_date") or candidate.get("last_date") or "UNKNOWN"
-        lines = [
-            f"TICKER: {symbol}",
-            f"DATE: {as_of_date}",
-            "PRICE_ACTION:",
-            f"- last_close: {candidate.get('last_close')}",
-            f"- closes_tail: {candidate.get('closes_tail')}",
-            f"- volume_1d: {candidate.get('volume_1d')}",
-            f"- volume_20d_avg: {candidate.get('volume_20d_avg')}",
-            "INDICATORS:",
-            f"- return_1d: {candidate.get('return_1d')}",
-            f"- return_5d: {candidate.get('return_5d')}",
-            f"- return_10d: {candidate.get('return_10d')}",
-            f"- volatility_20d: {candidate.get('volatility_20d')}",
-            f"- dist_ma_20: {candidate.get('dist_ma_20')}",
-            f"- dist_ma_50: {candidate.get('dist_ma_50')}",
-            f"- rsi_14: {candidate.get('rsi_14')}",
-            f"- volume_ratio: {candidate.get('volume_ratio')}",
-            "NEWS_CONTEXT:",
-            f"- news_count_7d: {candidate.get('news_count_7d')}",
-            f"- news_sentiment_7d: {candidate.get('news_sentiment_7d')}",
-            "",
-            "QUESTION: Classify the expected 5-day return as STRONG_BUY | BUY | NEUTRAL | SELL | STRONG_SELL.",
-            "Return ONLY JSON using this schema:",
-            '{"label":"BUY","confidence":0.63,"reason":"..."}',
-        ]
-        system = (
-            "You are the trained AI trading decision engine. "
-            "Return only valid JSON with label, confidence, and reason. "
-            "Use the provided market snapshot to classify the next 5-day return."
-        )
-        return [{"role": "system", "content": system}, {"role": "user", "content": "\n".join(lines)}]
-
     def _normalize_prediction(self, raw) -> Optional[dict]:
         parsed = raw
         raw_text = None