lsdefine · valkryhx · May 13, 2026 · May 13, 2026 · May 13, 2026 · May 13, 2026
diff --git a/.gitignore b/.gitignore
@@ -94,6 +94,11 @@ memory/L4_raw_sessions/*
 # Memory management
 !memory/memory_cleanup_sop.md
 
+# Codex session distillation tool (state remains ignored under memory/codex_distill/)
+!memory/codex_session_distill.py
+!memory/codex_session_distill_sop.md
+!memory/codex_coding_sop.md
+
 # Visual Studio
 .vs/
 restore_commit.txt

diff --git a/AGENTS.md b/AGENTS.md
@@ -0,0 +1,32 @@
+# Repository Guidelines
+
+## 项目结构与模块组织
+
+GenericAgent 是一个紧凑的 Python 项目。核心运行时代码位于仓库根目录，包括 `agentmain.py`、`agent_loop.py`、`ga.py`、`llmcore.py` 和 `simphtml.py`。可安装的 CLI 包在 `ga_cli/`，`ga` 命令入口映射到 `ga_cli.cli:main`。各类界面和聊天/机器人适配器位于 `frontends/`；图片、皮肤和静态资源位于 `frontends/skins/` 与 `assets/`。长期记忆、SOP 和辅助工具位于 `memory/`，反射与自主运行辅助逻辑位于 `reflect/`，可选集成放在 `plugins/`。测试统一放在 `tests/`。
+
+## 构建、测试与本地运行
+
+- `python -m pip install -e .`：以 editable 模式安装核心包和 `ga` 命令。
+- `python -m pip install -e ".[ui]"`：安装核心依赖和桌面/TUI UI 依赖。
+- `python launch.pyw`：启动默认桌面界面。
+- `python frontends/tuiapp.py`：启动终端 UI。
+- `streamlit run frontends/stapp2.py`：启动 Streamlit 前端。
+- `python -m unittest discover -s tests`：运行当前测试套件。
+
+只安装正在修改的前端或机器人适配器所需的可选依赖。
+
+## 编码风格与命名规范
+
+使用 Python 3.10-3.13。代码应保持紧凑、可读，并贴合现有文件风格。优先使用自解释的函数和变量，少写解释性注释。避免过宽的 `try/except`，重要错误应清晰暴露。模块、函数和变量使用 `snake_case`，类名使用 `PascalCase`。新增模块应靠近功能边界，例如 UI 适配放在 `frontends/`，可选集成放在 `plugins/`。
+
+## 测试指南
+
+测试使用标准库 `unittest`。测试文件命名为 `test_*.py`，放在 `tests/`。新增前端或适配器行为时，应 stub 外部服务，避免依赖真实 API 凭据。提交前运行 `python -m unittest discover -s tests`，修复 bug 时补充聚焦的回归测试。
+
+## 提交与 PR 规范
+
+近期历史使用 Conventional Commits，例如 `feat(tui): ...`、`fix(tgapp): ...`、`docs: ...` 和 `refactor: ...`。提交应小而聚焦。PR 应说明背景、概述行为变化、列出验证命令；只有可见 UI 变化才附截图。避免不必要的新依赖和大范围重构。
+
+## 安全与配置提示
+
+不要提交真实 API key 或本地密钥。配置示例应维护在 `mykey_template.py`、`mykey_template_en.py` 或 `assets/configure_mykey.py`。本地生成状态、日志和凭据应保持在版本控制之外。
diff --git a/agentmain.py b/agentmain.py
@@ -1,4 +1,4 @@
-import os, sys, threading, queue, time, json, re, random, locale
+import os, sys, threading, queue, time, json, re, random, locale, base64, mimetypes
 os.environ.setdefault('GA_LANG', 'zh' if any(k in (locale.getlocale()[0] or '').lower() for k in ('zh', 'chinese')) else 'en')
 if sys.stdout is None: sys.stdout = open(os.devnull, "w")
 elif hasattr(sys.stdout, 'reconfigure'): sys.stdout.reconfigure(errors='replace')
@@ -11,6 +11,50 @@
 from ga import GenericAgentHandler, smart_format, get_global_memory, format_error, consume_file
 
 script_dir = os.path.dirname(os.path.abspath(__file__))
+_IMAGE_EXTS = {'.png', '.jpg', '.jpeg', '.webp', '.gif', '.bmp'}
+
+
+def _extract_image_paths(text):
+    paths = []
+    for raw in re.findall(r'"([^"]+\.(?:png|jpe?g|webp|gif|bmp))"|\'([^\']+\.(?:png|jpe?g|webp|gif|bmp))\'|(\S+\.(?:png|jpe?g|webp|gif|bmp))', text or '', re.I):
+        p = next((x for x in raw if x), '')
+        if not p: continue
+        path = p if os.path.isabs(p) else os.path.join(script_dir, p)
+        if os.path.isfile(path) and path not in paths:
+            paths.append(path)
+    return paths
+
+
+def _image_block(path):
+    media_type = mimetypes.guess_type(path)[0] or 'image/png'
+    with open(path, 'rb') as f:
+        data = base64.b64encode(f.read()).decode('ascii')
+    return {"type": "image", "source": {"type": "base64", "media_type": media_type, "data": data}}
+
+
+def _build_user_content_with_images(text, images=None):
+    image_paths = []
+    for p in list(images or []) + _extract_image_paths(text):
+        path = p if os.path.isabs(str(p)) else os.path.join(script_dir, str(p))
+        if os.path.isfile(path) and os.path.splitext(path)[1].lower() in _IMAGE_EXTS and path not in image_paths:
+            image_paths.append(path)
+    if not image_paths:
+        return None
+    content = [{"type": "text", "text": text or ""}]
+    for path in image_paths:
+        try:
+            content.append(_image_block(path))
+        except Exception as e:
+            content[0]["text"] += f"\n[图片附件读取失败: {path}: {e}]"
+    return content
+
+
+def _native_image_input_enabled(llmclient):
+    backend = getattr(llmclient, 'backend', None)
+    backend = getattr(backend, 'primary', backend)
+    return isinstance(backend, NativeOAISession) and bool(getattr(backend, 'native_image_input', False))
+
+
 def load_tool_schema(suffix=''):
     global TOOLS_SCHEMA
     TS = open(os.path.join(script_dir, f'assets/tools_schema{suffix}.json'), 'r', encoding='utf-8').read()
@@ -64,14 +108,19 @@ def load_llm_sessions(self):
                 if 'mixin' in k: llm_sessions += [{'mixin_cfg': cfg}]
                 elif c := resolve_client(k): llm_sessions += [c]
             except: pass
+        resolved_sessions = []
         for i, s in enumerate(llm_sessions):
             if isinstance(s, dict) and 'mixin_cfg' in s:
                 try:
                     mixin = MixinSession(llm_sessions, s['mixin_cfg'])
-                    if isinstance(mixin._sessions[0], (NativeClaudeSession, NativeOAISession)): llm_sessions[i] = NativeToolClient(mixin)
-                    else: llm_sessions[i] = ToolClient(mixin)
+                    if isinstance(mixin._sessions[0], (NativeClaudeSession, NativeOAISession)): resolved_sessions.append(NativeToolClient(mixin))
+                    else: resolved_sessions.append(ToolClient(mixin))
                 except Exception as e: print(f'\n\n\n[ERROR] Failed to init MixinSession with cfg {s["mixin_cfg"]}: {e}!!!\n\n')
-        self.llmclients = llm_sessions
+            else:
+                resolved_sessions.append(s)
+        if not resolved_sessions: raise Exception('[ERROR] No available LLM sessions: Check your mykey.py')
+        self.llmclients = resolved_sessions
+        self.llm_no %= len(self.llmclients)
         self.llmclient = self.llmclients[self.llm_no%len(self.llmclients)]
         if oldhistory: self.llmclient.backend.history = oldhistory
 
@@ -125,7 +174,7 @@ def _handle_slash_cmd(self, raw_query, display_queue):
     def run(self):
         while True:
             task = self.task_queue.get()
-            raw_query, source, display_queue = task["query"], task["source"], task["output"]
+            raw_query, source, images, display_queue = task["query"], task["source"], task.get("images") or [], task["output"]
             raw_query = self._handle_slash_cmd(raw_query, display_queue)
             if raw_query is None:
                 self.task_queue.task_done(); continue
@@ -143,8 +192,10 @@ def run(self):
                 if ps > 0: handler.working['key_info'] += f'\n[SYSTEM] 此为 {ps} 个对话前设置的key_info，若已在新任务，先更新或清除工作记忆。\n'
             self.handler = handler  # although new handler, the **full** history is in llmclient, so it is full history!
             self.llmclient.log_path = self.log_path
-            gen = agent_runner_loop(self.llmclient, sys_prompt, raw_query, 
-                                handler, TOOLS_SCHEMA, max_turns=70, verbose=self.verbose)
+            initial_content = _build_user_content_with_images(raw_query, images) if _native_image_input_enabled(self.llmclient) else None
+            gen = agent_runner_loop(self.llmclient, sys_prompt, raw_query,
+                                handler, TOOLS_SCHEMA, max_turns=70, verbose=self.verbose,
+                                initial_user_content=initial_content)
             try:
                 full_resp = ""; last_pos = 0
                 for chunk in gen:

diff --git a/assets/global_mem_insight_template.txt b/assets/global_mem_insight_template.txt
@@ -2,14 +2,15 @@
 需要时read L2 或 ls ../memory/ 查L3
 L0(META-SOP): memory_management_sop
 L2: 现空
-L3: memory_cleanup_sop(记忆整理) | skill_search | ui_detect.py | ocr_utils.py | subagent | web_setup_sop | plan_sop 
+L3: memory_cleanup_sop(记忆整理) | skill_search | ui_detect.py | ocr_utils.py | subagent | web_setup_sop | plan_sop | codex_session_distill_sop
 | procmem_scanner | keychain | ljqCtrl_sop+.py | tmwebdriver_sop | autonomous_operation_sop | scheduled_task_sop | vision_sop | adb_ui.py
 L4: L4_raw_sessions/ 历史会话
 
 浏览器特殊操作: tmwebdriver_sop(文件上传/图搜/PDF blob/物理坐标/HttpOnly Cookie/autofill突破/跨域iframe/CDP/跨tab)
 键鼠: ljqCtrl_sop(禁pyautogui/先activate) 截图/视觉: ocr/vision_sop | 禁全屏截图，优先窗口
 定时:scheduled_task_sop | 自主:autonomous_operation_sop | watchdog/反射:agentmain --reflect
 手机:adb_ui.py
+编码经验:codex_coding_sop
 
 [RULES]
 1. 搜索先行: 搜文件名严禁不用es(禁PS递归/禁dir遍历), 搜索一定优先使用web工具的google(严禁duckduckgo等), 优先看cwd，禁猜路径

diff --git a/assets/tools_schema.json b/assets/tools_schema.json
@@ -66,9 +66,21 @@
       "question": {"type": "string", "description": "Question for the user"},
       "candidates": {"type": "array", "items": {"type": "string"}, "description": "Optional quick-select choices for the user"}}}
   }},
+  {"type": "function", "function": {
+    "name": "codex_lesson_update",
+    "description": "Record one LLM-proposed reusable coding lesson after reading a redacted Codex session packet. LLM analysis is required: do not use this for simple rule matches, raw logs, secrets, private paths, or project-specific facts. The tool validates, deduplicates, and stores candidates; independent source_hash values become promotion evidence.",
+    "parameters": {"type": "object", "properties": {
+      "title": {"type": "string", "description": "Short reusable lesson title"},
+      "guidance": {"type": "string", "description": "Concrete reusable guidance, not project-specific"},
+      "category": {"type": "string", "description": "workflow/debugging/testing/git/security/frontend/planning/communication/quality", "default": "workflow"},
+      "evidence": {"type": "array", "items": {"type": "string"}, "description": "Short evidence signals from the redacted packet timeline, verification, failure recovery, or seed observations"},
+      "source_hash": {"type": "string", "description": "Packet/session hash prefix from the packet; used as independent promotion evidence"},
+      "state_dir": {"type": "string", "description": "Optional isolated distill state directory for tests or explicit runs; omit for the default memory/codex_distill state"},
+      "confidence": {"type": "number", "description": "0.0-1.0 confidence", "default": 0.5}}}
+  }},
   {"type": "function", "function": {
     "name": "start_long_term_update",
     "description": "Start distilling long-term memory. Call when discovering info worth remembering (env facts/user prefs/lessons learned). Skip if memory already updated or in autonomous flow. Must call when a task that took 15+ turns is completed",
     "parameters": {"type": "object", "properties": {}}}
   }
-]
+]
diff --git a/assets/tools_schema_cn.json b/assets/tools_schema_cn.json
@@ -66,9 +66,21 @@
       "question": {"type": "string", "description": "向用户提出的明确问题"},
       "candidates": {"type": "array", "items": {"type": "string"}, "description": "提供给用户的可选快捷选项列表"}}}
   }},
+  {"type": "function", "function": {
+    "name": "codex_lesson_update",
+    "description": "在阅读脱敏 Codex session packet 后，记录一条由 LLM 分析提出的可复用编码经验。必须由 LLM 基于完整 packet 判断，不要用于简单规则匹配、原始日志、密钥、私有路径或项目特定事实。工具会校验、去重并写入候选库；独立 source_hash 会作为晋升证据。",
+    "parameters": {"type": "object", "properties": {
+      "title": {"type": "string", "description": "简短的可复用经验标题"},
+      "guidance": {"type": "string", "description": "具体可复用做法，不能是项目特定事实"},
+      "category": {"type": "string", "description": "workflow/debugging/testing/git/security/frontend/planning/communication/quality", "default": "workflow"},
+      "evidence": {"type": "array", "items": {"type": "string"}, "description": "来自脱敏 packet 的 timeline、验证命令、失败恢复或 seed observation 的短证据信号"},
+      "source_hash": {"type": "string", "description": "packet 中的 session hash 前缀；用于独立晋升证据计数"},
+      "state_dir": {"type": "string", "description": "可选的隔离蒸馏状态目录，用于测试或显式指定运行；不填则使用默认 memory/codex_distill"},
+      "confidence": {"type": "number", "description": "0.0-1.0置信度", "default": 0.5}}}
+  }},
   {"type": "function", "function": {
     "name": "start_long_term_update",
     "description": "准备开始提炼记忆。发现值得长期记忆的信息（环境事实/用户偏好/避坑经验）时调用此工具。已记忆更新或在自主流程内时无需调用。超15轮完成的任务必须调用以沉淀经验",
     "parameters": {"type": "object", "properties": {}}}
   }
-]
+]