Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,11 @@ memory/L4_raw_sessions/*
# Memory management
!memory/memory_cleanup_sop.md

# Codex session distillation tool (state remains ignored under memory/codex_distill/)
!memory/codex_session_distill.py
!memory/codex_session_distill_sop.md
!memory/codex_coding_sop.md

# Visual Studio
.vs/
restore_commit.txt
Expand Down
32 changes: 32 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Repository Guidelines

## 项目结构与模块组织

GenericAgent 是一个紧凑的 Python 项目。核心运行时代码位于仓库根目录,包括 `agentmain.py`、`agent_loop.py`、`ga.py`、`llmcore.py` 和 `simphtml.py`。可安装的 CLI 包在 `ga_cli/`,`ga` 命令入口映射到 `ga_cli.cli:main`。各类界面和聊天/机器人适配器位于 `frontends/`;图片、皮肤和静态资源位于 `frontends/skins/` 与 `assets/`。长期记忆、SOP 和辅助工具位于 `memory/`,反射与自主运行辅助逻辑位于 `reflect/`,可选集成放在 `plugins/`。测试统一放在 `tests/`。

## 构建、测试与本地运行

- `python -m pip install -e .`:以 editable 模式安装核心包和 `ga` 命令。
- `python -m pip install -e ".[ui]"`:安装核心依赖和桌面/TUI UI 依赖。
- `python launch.pyw`:启动默认桌面界面。
- `python frontends/tuiapp.py`:启动终端 UI。
- `streamlit run frontends/stapp2.py`:启动 Streamlit 前端。
- `python -m unittest discover -s tests`:运行当前测试套件。

只安装正在修改的前端或机器人适配器所需的可选依赖。

## 编码风格与命名规范

使用 Python 3.10-3.13。代码应保持紧凑、可读,并贴合现有文件风格。优先使用自解释的函数和变量,少写解释性注释。避免过宽的 `try/except`,重要错误应清晰暴露。模块、函数和变量使用 `snake_case`,类名使用 `PascalCase`。新增模块应靠近功能边界,例如 UI 适配放在 `frontends/`,可选集成放在 `plugins/`。

## 测试指南

测试使用标准库 `unittest`。测试文件命名为 `test_*.py`,放在 `tests/`。新增前端或适配器行为时,应 stub 外部服务,避免依赖真实 API 凭据。提交前运行 `python -m unittest discover -s tests`,修复 bug 时补充聚焦的回归测试。

## 提交与 PR 规范

近期历史使用 Conventional Commits,例如 `feat(tui): ...`、`fix(tgapp): ...`、`docs: ...` 和 `refactor: ...`。提交应小而聚焦。PR 应说明背景、概述行为变化、列出验证命令;只有可见 UI 变化才附截图。避免不必要的新依赖和大范围重构。

## 安全与配置提示

不要提交真实 API key 或本地密钥。配置示例应维护在 `mykey_template.py`、`mykey_template_en.py` 或 `assets/configure_mykey.py`。本地生成状态、日志和凭据应保持在版本控制之外。
65 changes: 58 additions & 7 deletions agentmain.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import os, sys, threading, queue, time, json, re, random, locale
import os, sys, threading, queue, time, json, re, random, locale, base64, mimetypes
os.environ.setdefault('GA_LANG', 'zh' if any(k in (locale.getlocale()[0] or '').lower() for k in ('zh', 'chinese')) else 'en')
if sys.stdout is None: sys.stdout = open(os.devnull, "w")
elif hasattr(sys.stdout, 'reconfigure'): sys.stdout.reconfigure(errors='replace')
Expand All @@ -11,6 +11,50 @@
from ga import GenericAgentHandler, smart_format, get_global_memory, format_error, consume_file

script_dir = os.path.dirname(os.path.abspath(__file__))
_IMAGE_EXTS = {'.png', '.jpg', '.jpeg', '.webp', '.gif', '.bmp'}


def _extract_image_paths(text):
paths = []
for raw in re.findall(r'"([^"]+\.(?:png|jpe?g|webp|gif|bmp))"|\'([^\']+\.(?:png|jpe?g|webp|gif|bmp))\'|(\S+\.(?:png|jpe?g|webp|gif|bmp))', text or '', re.I):
p = next((x for x in raw if x), '')
if not p: continue
path = p if os.path.isabs(p) else os.path.join(script_dir, p)
if os.path.isfile(path) and path not in paths:
paths.append(path)
return paths


def _image_block(path):
media_type = mimetypes.guess_type(path)[0] or 'image/png'
with open(path, 'rb') as f:
data = base64.b64encode(f.read()).decode('ascii')
return {"type": "image", "source": {"type": "base64", "media_type": media_type, "data": data}}


def _build_user_content_with_images(text, images=None):
image_paths = []
for p in list(images or []) + _extract_image_paths(text):
path = p if os.path.isabs(str(p)) else os.path.join(script_dir, str(p))
if os.path.isfile(path) and os.path.splitext(path)[1].lower() in _IMAGE_EXTS and path not in image_paths:
image_paths.append(path)
if not image_paths:
return None
content = [{"type": "text", "text": text or ""}]
for path in image_paths:
try:
content.append(_image_block(path))
except Exception as e:
content[0]["text"] += f"\n[图片附件读取失败: {path}: {e}]"
return content


def _native_image_input_enabled(llmclient):
backend = getattr(llmclient, 'backend', None)
backend = getattr(backend, 'primary', backend)
return isinstance(backend, NativeOAISession) and bool(getattr(backend, 'native_image_input', False))


def load_tool_schema(suffix=''):
global TOOLS_SCHEMA
TS = open(os.path.join(script_dir, f'assets/tools_schema{suffix}.json'), 'r', encoding='utf-8').read()
Expand Down Expand Up @@ -64,14 +108,19 @@ def load_llm_sessions(self):
if 'mixin' in k: llm_sessions += [{'mixin_cfg': cfg}]
elif c := resolve_client(k): llm_sessions += [c]
except: pass
resolved_sessions = []
for i, s in enumerate(llm_sessions):
if isinstance(s, dict) and 'mixin_cfg' in s:
try:
mixin = MixinSession(llm_sessions, s['mixin_cfg'])
if isinstance(mixin._sessions[0], (NativeClaudeSession, NativeOAISession)): llm_sessions[i] = NativeToolClient(mixin)
else: llm_sessions[i] = ToolClient(mixin)
if isinstance(mixin._sessions[0], (NativeClaudeSession, NativeOAISession)): resolved_sessions.append(NativeToolClient(mixin))
else: resolved_sessions.append(ToolClient(mixin))
except Exception as e: print(f'\n\n\n[ERROR] Failed to init MixinSession with cfg {s["mixin_cfg"]}: {e}!!!\n\n')
self.llmclients = llm_sessions
else:
resolved_sessions.append(s)
if not resolved_sessions: raise Exception('[ERROR] No available LLM sessions: Check your mykey.py')
self.llmclients = resolved_sessions
self.llm_no %= len(self.llmclients)
self.llmclient = self.llmclients[self.llm_no%len(self.llmclients)]
if oldhistory: self.llmclient.backend.history = oldhistory

Expand Down Expand Up @@ -125,7 +174,7 @@ def _handle_slash_cmd(self, raw_query, display_queue):
def run(self):
while True:
task = self.task_queue.get()
raw_query, source, display_queue = task["query"], task["source"], task["output"]
raw_query, source, images, display_queue = task["query"], task["source"], task.get("images") or [], task["output"]
raw_query = self._handle_slash_cmd(raw_query, display_queue)
if raw_query is None:
self.task_queue.task_done(); continue
Expand All @@ -143,8 +192,10 @@ def run(self):
if ps > 0: handler.working['key_info'] += f'\n[SYSTEM] 此为 {ps} 个对话前设置的key_info,若已在新任务,先更新或清除工作记忆。\n'
self.handler = handler # although new handler, the **full** history is in llmclient, so it is full history!
self.llmclient.log_path = self.log_path
gen = agent_runner_loop(self.llmclient, sys_prompt, raw_query,
handler, TOOLS_SCHEMA, max_turns=70, verbose=self.verbose)
initial_content = _build_user_content_with_images(raw_query, images) if _native_image_input_enabled(self.llmclient) else None
gen = agent_runner_loop(self.llmclient, sys_prompt, raw_query,
handler, TOOLS_SCHEMA, max_turns=70, verbose=self.verbose,
initial_user_content=initial_content)
try:
full_resp = ""; last_pos = 0
for chunk in gen:
Expand Down
3 changes: 2 additions & 1 deletion assets/global_mem_insight_template.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@
需要时read L2 或 ls ../memory/ 查L3
L0(META-SOP): memory_management_sop
L2: 现空
L3: memory_cleanup_sop(记忆整理) | skill_search | ui_detect.py | ocr_utils.py | subagent | web_setup_sop | plan_sop
L3: memory_cleanup_sop(记忆整理) | skill_search | ui_detect.py | ocr_utils.py | subagent | web_setup_sop | plan_sop | codex_session_distill_sop
| procmem_scanner | keychain | ljqCtrl_sop+.py | tmwebdriver_sop | autonomous_operation_sop | scheduled_task_sop | vision_sop | adb_ui.py
L4: L4_raw_sessions/ 历史会话

浏览器特殊操作: tmwebdriver_sop(文件上传/图搜/PDF blob/物理坐标/HttpOnly Cookie/autofill突破/跨域iframe/CDP/跨tab)
键鼠: ljqCtrl_sop(禁pyautogui/先activate) 截图/视觉: ocr/vision_sop | 禁全屏截图,优先窗口
定时:scheduled_task_sop | 自主:autonomous_operation_sop | watchdog/反射:agentmain --reflect
手机:adb_ui.py
编码经验:codex_coding_sop

[RULES]
1. 搜索先行: 搜文件名严禁不用es(禁PS递归/禁dir遍历), 搜索一定优先使用web工具的google(严禁duckduckgo等), 优先看cwd,禁猜路径
Expand Down
14 changes: 13 additions & 1 deletion assets/tools_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,21 @@
"question": {"type": "string", "description": "Question for the user"},
"candidates": {"type": "array", "items": {"type": "string"}, "description": "Optional quick-select choices for the user"}}}
}},
{"type": "function", "function": {
"name": "codex_lesson_update",
"description": "Record one LLM-proposed reusable coding lesson after reading a redacted Codex session packet. LLM analysis is required: do not use this for simple rule matches, raw logs, secrets, private paths, or project-specific facts. The tool validates, deduplicates, and stores candidates; independent source_hash values become promotion evidence.",
"parameters": {"type": "object", "properties": {
"title": {"type": "string", "description": "Short reusable lesson title"},
"guidance": {"type": "string", "description": "Concrete reusable guidance, not project-specific"},
"category": {"type": "string", "description": "workflow/debugging/testing/git/security/frontend/planning/communication/quality", "default": "workflow"},
"evidence": {"type": "array", "items": {"type": "string"}, "description": "Short evidence signals from the redacted packet timeline, verification, failure recovery, or seed observations"},
"source_hash": {"type": "string", "description": "Packet/session hash prefix from the packet; used as independent promotion evidence"},
"state_dir": {"type": "string", "description": "Optional isolated distill state directory for tests or explicit runs; omit for the default memory/codex_distill state"},
"confidence": {"type": "number", "description": "0.0-1.0 confidence", "default": 0.5}}}
}},
{"type": "function", "function": {
"name": "start_long_term_update",
"description": "Start distilling long-term memory. Call when discovering info worth remembering (env facts/user prefs/lessons learned). Skip if memory already updated or in autonomous flow. Must call when a task that took 15+ turns is completed",
"parameters": {"type": "object", "properties": {}}}
}
]
]
14 changes: 13 additions & 1 deletion assets/tools_schema_cn.json
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,21 @@
"question": {"type": "string", "description": "向用户提出的明确问题"},
"candidates": {"type": "array", "items": {"type": "string"}, "description": "提供给用户的可选快捷选项列表"}}}
}},
{"type": "function", "function": {
"name": "codex_lesson_update",
"description": "在阅读脱敏 Codex session packet 后,记录一条由 LLM 分析提出的可复用编码经验。必须由 LLM 基于完整 packet 判断,不要用于简单规则匹配、原始日志、密钥、私有路径或项目特定事实。工具会校验、去重并写入候选库;独立 source_hash 会作为晋升证据。",
"parameters": {"type": "object", "properties": {
"title": {"type": "string", "description": "简短的可复用经验标题"},
"guidance": {"type": "string", "description": "具体可复用做法,不能是项目特定事实"},
"category": {"type": "string", "description": "workflow/debugging/testing/git/security/frontend/planning/communication/quality", "default": "workflow"},
"evidence": {"type": "array", "items": {"type": "string"}, "description": "来自脱敏 packet 的 timeline、验证命令、失败恢复或 seed observation 的短证据信号"},
"source_hash": {"type": "string", "description": "packet 中的 session hash 前缀;用于独立晋升证据计数"},
"state_dir": {"type": "string", "description": "可选的隔离蒸馏状态目录,用于测试或显式指定运行;不填则使用默认 memory/codex_distill"},
"confidence": {"type": "number", "description": "0.0-1.0置信度", "default": 0.5}}}
}},
{"type": "function", "function": {
"name": "start_long_term_update",
"description": "准备开始提炼记忆。发现值得长期记忆的信息(环境事实/用户偏好/避坑经验)时调用此工具。已记忆更新或在自主流程内时无需调用。超15轮完成的任务必须调用以沉淀经验",
"parameters": {"type": "object", "properties": {}}}
}
]
]
Loading