From 0f896e438e7af7cd9469508d9c1ddc97a4b505bc Mon Sep 17 00:00:00 2001 From: star-med Date: Tue, 30 Jun 2026 06:45:33 +0800 Subject: [PATCH 1/4] Cache invalid-workspace alias inference per storage fingerprint (#116) Compute invalid_workspace_aliases once per mtime-keyed fingerprint and reuse it in assemble_single_tab, tab summaries, and workspace listing instead of re-scanning all composerData rows on every request. --- services/summary_cache.py | 42 +++++++++ services/workspace_context.py | 105 ++++++++++++++++++++++- services/workspace_listing.py | 21 ++--- services/workspace_tabs.py | 64 ++++++-------- tests/test_summary_cache.py | 18 ++++ tests/test_workspace_context.py | 122 +++++++++++++++++++++++++++ tests/test_workspace_tabs_summary.py | 16 +++- 7 files changed, 333 insertions(+), 55 deletions(-) diff --git a/services/summary_cache.py b/services/summary_cache.py index ed87eb3..8690283 100644 --- a/services/summary_cache.py +++ b/services/summary_cache.py @@ -23,6 +23,7 @@ CACHE_DIR = Path.home() / ".cache" / "cursor-chat-browser" PROJECTS_CACHE_FILE = CACHE_DIR / "projects.json" COMPOSER_MAP_CACHE_FILE = CACHE_DIR / "composer-id-to-ws.json" +INVALID_WORKSPACE_ALIASES_CACHE_FILE = CACHE_DIR / "invalid-workspace-aliases.json" TAB_SUMMARIES_PREFIX = "tab-summaries-" @@ -238,6 +239,47 @@ def set_cached_composer_id_to_ws( ) +def get_cached_invalid_workspace_aliases( + fingerprint: dict[str, Any], +) -> dict[str, str] | None: + """Load cached invalid-workspace alias map when the fingerprint matches. + + Args: + fingerprint: Storage mtime/rules digest. + + Returns: + ``{invalid_id: replacement_id}`` on hit, else ``None``. + """ + data = _read_cache_file(INVALID_WORKSPACE_ALIASES_CACHE_FILE) + if not data: + return None + if not _fingerprint_equal(data.get("fingerprint"), fingerprint): + return None + aliases = data.get("invalid_workspace_aliases") + if not isinstance(aliases, dict): + return None + return {str(k): str(v) for k, v in aliases.items()} + + +def set_cached_invalid_workspace_aliases( + fingerprint: dict[str, Any], + aliases: dict[str, str], +) -> None: + """Persist invalid-workspace alias map under *fingerprint*. + + Args: + fingerprint: Invalidation fingerprint paired with *aliases*. + aliases: ``{invalid_id: replacement_id}`` from alias inference. + """ + _write_cache_file( + INVALID_WORKSPACE_ALIASES_CACHE_FILE, + { + "fingerprint": fingerprint, + "invalid_workspace_aliases": aliases, + }, + ) + + def _tab_summaries_path(workspace_id: str) -> Path: safe = hashlib.sha256(workspace_id.encode("utf-8")).hexdigest()[:16] return CACHE_DIR / f"{TAB_SUMMARIES_PREFIX}{safe}.json" diff --git a/services/workspace_context.py b/services/workspace_context.py index 477bf94..bd7f838 100644 --- a/services/workspace_context.py +++ b/services/workspace_context.py @@ -2,22 +2,27 @@ from __future__ import annotations +import os import sqlite3 -from dataclasses import dataclass, replace +from dataclasses import dataclass, field, replace from typing import Any from models import Bubble from services.workspace_db import ( + COMPOSER_ROWS_WITH_HEADERS_SQL, build_composer_id_to_workspace_id, build_composer_id_to_workspace_id_cached, collect_invalid_workspace_ids, collect_workspace_entries, + global_storage_db_path, load_bubble_map, load_project_layouts_map, + safe_fetchall, ) from services.workspace_resolver import ( create_project_name_to_workspace_id_map, create_workspace_path_to_id_map, + infer_invalid_workspace_aliases, ) @@ -32,6 +37,7 @@ class WorkspaceContext: workspace_path_to_id: dict[str, str] project_layouts_map: dict[str, list[str]] bubble_map: dict[str, Bubble] + invalid_workspace_aliases: dict[str, str] = field(default_factory=dict) def _entries( @@ -135,3 +141,100 @@ def enrich_workspace_context_from_global_db( if not updates: return ctx return replace(ctx, **updates) + + +def resolve_invalid_workspace_aliases_cached( + ctx: WorkspaceContext, + global_db: sqlite3.Connection, + workspace_path: str, + rules: list[Any], + *, + nocache: bool = False, + project_layouts_map: dict[str, list[str]] | None = None, +) -> dict[str, str]: + """Return invalid-workspace alias map, using the summary-cache fingerprint. + + Computes ``infer_invalid_workspace_aliases`` at most once per storage + fingerprint (same mtime key as composer-map / tab-summary caches). When + *ctx* already carries a populated ``invalid_workspace_aliases`` field, + that value is returned without touching disk or the global DB roster. + + Args: + ctx: Workspace maps from :func:`resolve_workspace_context_cached`. + global_db: Open global ``state.vscdb`` connection. + workspace_path: Cursor ``workspaceStorage`` root. + rules: Exclusion rule token lists (fingerprint input). + nocache: When ``True``, bypass disk cache reads and writes. + project_layouts_map: Pre-loaded layouts; loaded from *global_db* when + ``None``. + + Returns: + ``{invalid_id: replacement_id}``, or ``{}`` when every workspace is valid. + """ + if not ctx.invalid_workspace_ids: + return {} + + from services.summary_cache import ( + fingerprint_workspace_storage, + get_cached_invalid_workspace_aliases, + nocache_enabled, + set_cached_invalid_workspace_aliases, + ) + from utils.workspace_path import get_cli_chats_path + + gdb = global_storage_db_path(workspace_path) + cli_path = get_cli_chats_path() + fingerprint = fingerprint_workspace_storage( + workspace_path, + ctx.workspace_entries, + global_db_path=gdb if os.path.isfile(gdb) else None, + rules=rules, + cli_chats_path=cli_path if os.path.isdir(cli_path) else None, + ) + if not nocache_enabled(request_nocache=nocache): + cached = get_cached_invalid_workspace_aliases(fingerprint) + if cached is not None: + return cached + + layouts = ( + project_layouts_map + if project_layouts_map is not None + else load_project_layouts_map(global_db) + ) + composer_rows = safe_fetchall(global_db, COMPOSER_ROWS_WITH_HEADERS_SQL) + aliases = infer_invalid_workspace_aliases( + composer_rows=composer_rows, + project_layouts_map=layouts, + project_name_map=ctx.project_name_to_workspace_id, + workspace_path_map=ctx.workspace_path_to_id, + workspace_entries=ctx.workspace_entries, + bubble_map={}, + composer_id_to_ws=ctx.composer_id_to_workspace_id, + invalid_workspace_ids=ctx.invalid_workspace_ids, + ) + if not nocache_enabled(request_nocache=nocache): + set_cached_invalid_workspace_aliases(fingerprint, aliases) + return aliases + + +def with_invalid_workspace_aliases( + ctx: WorkspaceContext, + global_db: sqlite3.Connection, + workspace_path: str, + rules: list[Any], + *, + nocache: bool = False, + project_layouts_map: dict[str, list[str]] | None = None, +) -> WorkspaceContext: + """Return *ctx* with ``invalid_workspace_aliases`` populated from cache.""" + aliases = resolve_invalid_workspace_aliases_cached( + ctx, + global_db, + workspace_path, + rules, + nocache=nocache, + project_layouts_map=project_layouts_map, + ) + if aliases is ctx.invalid_workspace_aliases: + return ctx + return replace(ctx, invalid_workspace_aliases=aliases) diff --git a/services/workspace_listing.py b/services/workspace_listing.py index 0f96d90..7b1483a 100644 --- a/services/workspace_listing.py +++ b/services/workspace_listing.py @@ -30,6 +30,7 @@ nocache_enabled, set_cached_projects, ) +from services.workspace_context import resolve_invalid_workspace_aliases_cached from services.workspace_db import ( COMPOSER_ROWS_WITH_HEADERS_SQL, collect_workspace_entries, @@ -41,7 +42,6 @@ from utils.workspace_path import get_cli_chats_path from services.workspace_resolver import ( build_composer_ids_by_workspace, - infer_invalid_workspace_aliases, infer_workspace_name_from_layouts, lookup_workspace_display_name, ) @@ -124,18 +124,13 @@ def _build_workspace_projects_uncached( project_layouts_map = load_project_layouts_map(global_db) bubble_map: dict[str, Bubble] = {} - invalid_workspace_aliases: dict[str, str] = {} - if invalid_workspace_ids: - invalid_workspace_aliases = infer_invalid_workspace_aliases( - composer_rows=composer_rows, - project_layouts_map=project_layouts_map, - project_name_map=project_name_map, - workspace_path_map=workspace_path_map, - workspace_entries=workspace_entries, - bubble_map=bubble_map, - composer_id_to_ws=composer_id_to_ws, - invalid_workspace_ids=invalid_workspace_ids, - ) + invalid_workspace_aliases = resolve_invalid_workspace_aliases_cached( + ctx, + global_db, + workspace_path, + rules, + project_layouts_map=project_layouts_map, + ) for row in composer_rows: composer = parse_composer_data_row( diff --git a/services/workspace_tabs.py b/services/workspace_tabs.py index f61a1a4..4ced9f1 100644 --- a/services/workspace_tabs.py +++ b/services/workspace_tabs.py @@ -50,7 +50,10 @@ nocache_enabled, set_cached_tab_summaries, ) -from services.workspace_context import resolve_workspace_context_cached +from services.workspace_context import ( + resolve_invalid_workspace_aliases_cached, + resolve_workspace_context_cached, +) from services.workspace_db import ( COMPOSER_ROWS_WITH_HEADERS_SQL, collect_workspace_entries, @@ -67,7 +70,6 @@ ) from utils.workspace_path import get_cli_chats_path from services.workspace_resolver import ( - infer_invalid_workspace_aliases, lookup_workspace_display_name, matching_workspace_ids_for_folder, ) @@ -453,18 +455,14 @@ def _build_workspace_tab_summaries_uncached( composer_rows = safe_fetchall(global_db, COMPOSER_ROWS_WITH_HEADERS_SQL) - invalid_workspace_aliases: dict[str, str] = {} - if invalid_workspace_ids: - invalid_workspace_aliases = infer_invalid_workspace_aliases( - composer_rows=composer_rows, - project_layouts_map=project_layouts_map, - project_name_map=project_name_map, - workspace_path_map=workspace_path_map, - workspace_entries=workspace_entries, - bubble_map={}, - composer_id_to_ws=composer_id_to_ws, - invalid_workspace_ids=invalid_workspace_ids, - ) + invalid_workspace_aliases = resolve_invalid_workspace_aliases_cached( + ctx, + global_db, + workspace_path, + rules, + nocache=nocache, + project_layouts_map=project_layouts_map, + ) for row in composer_rows: composer = parse_composer_data_row( @@ -582,22 +580,15 @@ def assemble_single_tab( return {"error": "Conversation not found"}, 404 project_layouts_map: dict[str, list[str]] = {} - invalid_workspace_aliases: dict[str, str] = {} project_layouts_map[composer_id] = load_project_layouts_for_composer( global_db, composer_id, ) - if invalid_workspace_ids: - composer_rows_for_aliases = safe_fetchall(global_db, COMPOSER_ROWS_WITH_HEADERS_SQL) - invalid_workspace_aliases = infer_invalid_workspace_aliases( - composer_rows=composer_rows_for_aliases, - project_layouts_map=project_layouts_map, - project_name_map=project_name_map, - workspace_path_map=workspace_path_map, - workspace_entries=workspace_entries, - bubble_map={}, - composer_id_to_ws=composer_id_to_ws, - invalid_workspace_ids=invalid_workspace_ids, - ) + invalid_workspace_aliases = resolve_invalid_workspace_aliases_cached( + ctx, + global_db, + workspace_path, + rules, + ) bubble_map = load_bubbles_for_composer( global_db, composer_id, parse_warnings=parse_warnings, @@ -723,18 +714,13 @@ def assemble_workspace_tabs( # Get composer data entries with conversations composer_rows = safe_fetchall(global_db, COMPOSER_ROWS_WITH_HEADERS_SQL) - invalid_workspace_aliases: dict[str, str] = {} - if invalid_workspace_ids: - invalid_workspace_aliases = infer_invalid_workspace_aliases( - composer_rows=composer_rows, - project_layouts_map=project_layouts_map, - project_name_map=project_name_map, - workspace_path_map=workspace_path_map, - workspace_entries=workspace_entries, - bubble_map=bubble_map, - composer_id_to_ws=composer_id_to_ws, - invalid_workspace_ids=invalid_workspace_ids, - ) + invalid_workspace_aliases = resolve_invalid_workspace_aliases_cached( + ctx, + global_db, + workspace_path, + rules, + project_layouts_map=project_layouts_map, + ) for row in composer_rows: composer = parse_composer_data_row( diff --git a/tests/test_summary_cache.py b/tests/test_summary_cache.py index f5330c2..bcab593 100644 --- a/tests/test_summary_cache.py +++ b/tests/test_summary_cache.py @@ -18,7 +18,9 @@ from services.summary_cache import ( fingerprint_workspace_storage, + get_cached_invalid_workspace_aliases, get_cached_projects, + set_cached_invalid_workspace_aliases, set_cached_projects, ) @@ -29,6 +31,9 @@ def setUp(self): self.cache_patch = patch.object(summary_cache, "CACHE_DIR", self.tmp.name) self.cache_patch.start() summary_cache.PROJECTS_CACHE_FILE = Path(self.tmp.name) / "projects.json" + summary_cache.INVALID_WORKSPACE_ALIASES_CACHE_FILE = ( + Path(self.tmp.name) / "invalid-workspace-aliases.json" + ) def tearDown(self): self.cache_patch.stop() @@ -84,6 +89,19 @@ def test_workspace_files_fingerprint_round_trip(self): assert hit is not None self.assertEqual(hit[0], projects) + def test_invalid_workspace_aliases_cache_hit(self): + fp = {"version": 1, "workspace_path": "/ws", "global_db_mtime_ns": 100} + aliases = {"broken-ws": "good-ws"} + set_cached_invalid_workspace_aliases(fp, aliases) + hit = get_cached_invalid_workspace_aliases(fp) + self.assertEqual(hit, aliases) + + def test_invalid_workspace_aliases_cache_miss_on_fingerprint_change(self): + fp1 = {"version": 1, "workspace_path": "/ws", "global_db_mtime_ns": 100} + fp2 = {**fp1, "global_db_mtime_ns": 101} + set_cached_invalid_workspace_aliases(fp1, {"broken-ws": "good-ws"}) + self.assertIsNone(get_cached_invalid_workspace_aliases(fp2)) + if __name__ == "__main__": unittest.main() diff --git a/tests/test_workspace_context.py b/tests/test_workspace_context.py index 07d20f5..857cd2b 100644 --- a/tests/test_workspace_context.py +++ b/tests/test_workspace_context.py @@ -11,9 +11,11 @@ from services.workspace_context import ( WorkspaceContext, enrich_workspace_context_from_global_db, + resolve_invalid_workspace_aliases_cached, resolve_workspace_context, resolve_workspace_context_cached, resolve_workspace_context_minimal, + with_invalid_workspace_aliases, ) @@ -47,6 +49,17 @@ def _open_global_db(tmp: str) -> sqlite3.Connection: return conn +def _open_workspace_global_db(ws_root: str) -> sqlite3.Connection: + """Open the global DB at the path ``open_global_db`` expects for *ws_root*.""" + global_dir = os.path.normpath(os.path.join(ws_root, "..", "globalStorage")) + os.makedirs(global_dir, exist_ok=True) + db_path = os.path.join(global_dir, "state.vscdb") + conn = sqlite3.connect(db_path) + conn.row_factory = sqlite3.Row + conn.execute("CREATE TABLE cursorDiskKV (key TEXT PRIMARY KEY, value TEXT)") + return conn + + def test_resolve_workspace_context_minimal(): with tempfile.TemporaryDirectory() as tmp: ws_root = _make_workspace_root(tmp) @@ -241,3 +254,112 @@ def test_enrich_with_no_flags_returns_unchanged_context(): finally: conn.close() assert result is ctx + + +def test_resolve_invalid_workspace_aliases_empty_when_all_workspaces_valid(): + with tempfile.TemporaryDirectory() as tmp: + ws_root = _make_workspace_root(tmp) + ctx = resolve_workspace_context(ws_root) + conn = _open_global_db(tmp) + conn.commit() + try: + aliases = resolve_invalid_workspace_aliases_cached( + ctx, conn, ws_root, [], + ) + finally: + conn.close() + assert aliases == {} + + +def test_resolve_invalid_workspace_aliases_cached_uses_disk_cache(): + from pathlib import Path + from services import summary_cache + + with tempfile.TemporaryDirectory() as cache_tmp: + with patch.object(summary_cache, "CACHE_DIR", cache_tmp): + summary_cache.INVALID_WORKSPACE_ALIASES_CACHE_FILE = ( + Path(cache_tmp) / "invalid-workspace-aliases.json" + ) + with tempfile.TemporaryDirectory() as tmp: + ws_root = _make_workspace_root(tmp) + _add_workspace_without_folders(ws_root, "invalidws") + ctx = resolve_workspace_context(ws_root) + conn = _open_workspace_global_db(ws_root) + conn.commit() + try: + with patch( + "services.workspace_context.infer_invalid_workspace_aliases", + return_value={"invalidws": "abc123workspace"}, + ) as mock_infer: + first = resolve_invalid_workspace_aliases_cached( + ctx, conn, ws_root, [], + ) + second = resolve_invalid_workspace_aliases_cached( + ctx, conn, ws_root, [], + ) + assert first == {"invalidws": "abc123workspace"} + assert second == first + mock_infer.assert_called_once() + finally: + conn.close() + + +def test_resolve_invalid_workspace_aliases_cache_miss_after_fingerprint_change(): + from pathlib import Path + from services import summary_cache + + with tempfile.TemporaryDirectory() as cache_tmp: + with patch.object(summary_cache, "CACHE_DIR", cache_tmp): + summary_cache.INVALID_WORKSPACE_ALIASES_CACHE_FILE = ( + Path(cache_tmp) / "invalid-workspace-aliases.json" + ) + with tempfile.TemporaryDirectory() as tmp: + ws_root = _make_workspace_root(tmp) + _add_workspace_without_folders(ws_root, "invalidws") + ctx = resolve_workspace_context(ws_root) + conn = _open_workspace_global_db(ws_root) + conn.commit() + global_db_path = os.path.normpath( + os.path.join(ws_root, "..", "globalStorage", "state.vscdb"), + ) + try: + with patch( + "services.workspace_context.infer_invalid_workspace_aliases", + return_value={"invalidws": "abc123workspace"}, + ) as mock_infer: + resolve_invalid_workspace_aliases_cached(ctx, conn, ws_root, []) + stat = os.stat(global_db_path) + os.utime(global_db_path, (stat.st_atime, stat.st_mtime + 2)) + resolve_invalid_workspace_aliases_cached(ctx, conn, ws_root, []) + assert mock_infer.call_count == 2 + finally: + conn.close() + + +def test_with_invalid_workspace_aliases_attaches_to_context(): + from pathlib import Path + from services import summary_cache + + with tempfile.TemporaryDirectory() as cache_tmp: + with patch.object(summary_cache, "CACHE_DIR", cache_tmp): + summary_cache.INVALID_WORKSPACE_ALIASES_CACHE_FILE = ( + Path(cache_tmp) / "invalid-workspace-aliases.json" + ) + with tempfile.TemporaryDirectory() as tmp: + ws_root = _make_workspace_root(tmp) + _add_workspace_without_folders(ws_root, "invalidws") + ctx = resolve_workspace_context(ws_root) + conn = _open_workspace_global_db(ws_root) + conn.commit() + try: + with patch( + "services.workspace_context.infer_invalid_workspace_aliases", + return_value={"invalidws": "abc123workspace"}, + ): + enriched = with_invalid_workspace_aliases(ctx, conn, ws_root, []) + finally: + conn.close() + assert enriched.invalid_workspace_aliases == { + "invalidws": "abc123workspace", + } + assert ctx.invalid_workspace_aliases == {} diff --git a/tests/test_workspace_tabs_summary.py b/tests/test_workspace_tabs_summary.py index 4c3d96e..a5081f7 100644 --- a/tests/test_workspace_tabs_summary.py +++ b/tests/test_workspace_tabs_summary.py @@ -229,8 +229,10 @@ def test_scoped_bubble_query_only(self): ) def test_scoped_mrc_load_with_invalid_workspaces(self): - """With invalid workspace folders, alias scan runs but MRC stays per-composer.""" + """With invalid workspaces, alias map is cached — no per-tab global composer/MRC scans.""" _add_invalid_workspace_and_mrc_rows(self.ws_path) + # Warm the alias disk cache (one global layout/composer pass per fingerprint). + assemble_single_tab("global", COMPOSER_ID, self.ws_path, rules=[]) (_, _), queries = _collect_queries( self.ws_path, lambda p: assemble_single_tab("global", COMPOSER_ID, p, rules=[]), @@ -244,7 +246,17 @@ def test_scoped_mrc_load_with_invalid_workspaces(self): self.assertEqual( mrc_scans, [], - msg=f"assemble_single_tab ran a global MRC scan:\n{mrc_scans}", + msg=f"assemble_single_tab ran a global MRC scan on cache hit:\n{mrc_scans}", + ) + composer_scans = [ + q for q in queries + if "composerData:%" in q + and f"composerData:{COMPOSER_ID}" not in q + ] + self.assertEqual( + composer_scans, + [], + msg=f"assemble_single_tab scanned all composers on cache hit:\n{composer_scans}", ) def test_scoped_mrc_load_no_invalid_workspaces(self): From ab8a3fafa2c848c2143e1ea709e74e6cb43d8225 Mon Sep 17 00:00:00 2001 From: star-med Date: Tue, 30 Jun 2026 07:04:29 +0800 Subject: [PATCH 2/4] fix(#125): harden alias cache validation and honor nocache fast path --- services/summary_cache.py | 7 +++++- services/workspace_context.py | 10 +++++--- services/workspace_listing.py | 5 +++- tests/test_summary_cache.py | 11 ++++++++ tests/test_workspace_context.py | 28 ++++++++++++++++++++- tests/test_workspace_listing_performance.py | 18 +++++++++++++ 6 files changed, 72 insertions(+), 7 deletions(-) diff --git a/services/summary_cache.py b/services/summary_cache.py index 8690283..39d0adc 100644 --- a/services/summary_cache.py +++ b/services/summary_cache.py @@ -258,7 +258,12 @@ def get_cached_invalid_workspace_aliases( aliases = data.get("invalid_workspace_aliases") if not isinstance(aliases, dict): return None - return {str(k): str(v) for k, v in aliases.items()} + validated: dict[str, str] = {} + for key, value in aliases.items(): + if not isinstance(key, str) or not isinstance(value, str): + return None + validated[key] = value + return validated def set_cached_invalid_workspace_aliases( diff --git a/services/workspace_context.py b/services/workspace_context.py index bd7f838..a2d6930 100644 --- a/services/workspace_context.py +++ b/services/workspace_context.py @@ -4,7 +4,7 @@ import os import sqlite3 -from dataclasses import dataclass, field, replace +from dataclasses import dataclass, replace from typing import Any from models import Bubble @@ -37,7 +37,7 @@ class WorkspaceContext: workspace_path_to_id: dict[str, str] project_layouts_map: dict[str, list[str]] bubble_map: dict[str, Bubble] - invalid_workspace_aliases: dict[str, str] = field(default_factory=dict) + invalid_workspace_aliases: dict[str, str] | None = None def _entries( @@ -171,6 +171,8 @@ def resolve_invalid_workspace_aliases_cached( Returns: ``{invalid_id: replacement_id}``, or ``{}`` when every workspace is valid. """ + if ctx.invalid_workspace_aliases is not None: + return ctx.invalid_workspace_aliases if not ctx.invalid_workspace_ids: return {} @@ -227,6 +229,8 @@ def with_invalid_workspace_aliases( project_layouts_map: dict[str, list[str]] | None = None, ) -> WorkspaceContext: """Return *ctx* with ``invalid_workspace_aliases`` populated from cache.""" + if ctx.invalid_workspace_aliases is not None: + return ctx aliases = resolve_invalid_workspace_aliases_cached( ctx, global_db, @@ -235,6 +239,4 @@ def with_invalid_workspace_aliases( nocache=nocache, project_layouts_map=project_layouts_map, ) - if aliases is ctx.invalid_workspace_aliases: - return ctx return replace(ctx, invalid_workspace_aliases=aliases) diff --git a/services/workspace_listing.py b/services/workspace_listing.py index 7b1483a..2a9bbd9 100644 --- a/services/workspace_listing.py +++ b/services/workspace_listing.py @@ -93,7 +93,7 @@ def list_workspace_projects( ) projects, warnings = _build_workspace_projects_uncached( - workspace_path, rules, orch, + workspace_path, rules, orch, nocache=effective_nocache, ) if not effective_nocache: set_cached_projects(orch.fingerprint, projects, warnings) @@ -104,6 +104,8 @@ def _build_workspace_projects_uncached( workspace_path: str, rules: list[Any], orch: WorkspaceOrchestration, + *, + nocache: bool = False, ) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]: parse_warnings = ParseWarningCollector() ctx = orch.ctx @@ -129,6 +131,7 @@ def _build_workspace_projects_uncached( global_db, workspace_path, rules, + nocache=nocache, project_layouts_map=project_layouts_map, ) diff --git a/tests/test_summary_cache.py b/tests/test_summary_cache.py index bcab593..7aea99e 100644 --- a/tests/test_summary_cache.py +++ b/tests/test_summary_cache.py @@ -102,6 +102,17 @@ def test_invalid_workspace_aliases_cache_miss_on_fingerprint_change(self): set_cached_invalid_workspace_aliases(fp1, {"broken-ws": "good-ws"}) self.assertIsNone(get_cached_invalid_workspace_aliases(fp2)) + def test_invalid_workspace_aliases_rejects_non_string_entries(self): + fp = {"version": 1, "workspace_path": "/ws", "global_db_mtime_ns": 100} + summary_cache._write_cache_file( + summary_cache.INVALID_WORKSPACE_ALIASES_CACHE_FILE, + { + "fingerprint": fp, + "invalid_workspace_aliases": {"broken-ws": 123}, + }, + ) + self.assertIsNone(get_cached_invalid_workspace_aliases(fp)) + if __name__ == "__main__": unittest.main() diff --git a/tests/test_workspace_context.py b/tests/test_workspace_context.py index 857cd2b..fbf529b 100644 --- a/tests/test_workspace_context.py +++ b/tests/test_workspace_context.py @@ -362,4 +362,30 @@ def test_with_invalid_workspace_aliases_attaches_to_context(): assert enriched.invalid_workspace_aliases == { "invalidws": "abc123workspace", } - assert ctx.invalid_workspace_aliases == {} + assert ctx.invalid_workspace_aliases is None + + +def test_resolve_invalid_workspace_aliases_uses_ctx_fast_path(): + from dataclasses import replace + + with tempfile.TemporaryDirectory() as tmp: + ws_root = _make_workspace_root(tmp) + _add_workspace_without_folders(ws_root, "invalidws") + ctx = resolve_workspace_context(ws_root) + enriched = replace( + ctx, + invalid_workspace_aliases={"invalidws": "abc123workspace"}, + ) + conn = _open_workspace_global_db(ws_root) + conn.commit() + try: + with patch( + "services.workspace_context.infer_invalid_workspace_aliases", + ) as mock_infer: + aliases = resolve_invalid_workspace_aliases_cached( + enriched, conn, ws_root, [], + ) + assert aliases == {"invalidws": "abc123workspace"} + mock_infer.assert_not_called() + finally: + conn.close() diff --git a/tests/test_workspace_listing_performance.py b/tests/test_workspace_listing_performance.py index 5afb634..3db8ba9 100644 --- a/tests/test_workspace_listing_performance.py +++ b/tests/test_workspace_listing_performance.py @@ -65,6 +65,15 @@ def _make_fixture(base: str) -> str: return ws_path +def _make_fixture_with_invalid_workspace(base: str) -> str: + ws_path = _make_fixture(base) + invalid_dir = os.path.join(ws_path, "invalid-ws") + os.makedirs(invalid_dir) + with open(os.path.join(invalid_dir, "workspace.json"), "w", encoding="utf-8") as f: + json.dump({"folders": []}, f) + return ws_path + + class TestListWorkspaceProjectsNoBubbleScan(unittest.TestCase): """list_workspace_projects must not query bubbleId rows from global storage.""" @@ -126,6 +135,15 @@ def test_output_shape_preserved(self): self.assertIn("conversationCount", p) self.assertIn("lastModified", p) + def test_nocache_bypasses_alias_disk_cache(self): + ws_path = _make_fixture_with_invalid_workspace(self.tmp.name) + with patch( + "services.summary_cache.get_cached_invalid_workspace_aliases", + ) as mock_get: + mock_get.return_value = {"invalid-ws": "global"} + list_workspace_projects(ws_path, rules=[], nocache=True) + mock_get.assert_not_called() + if __name__ == "__main__": unittest.main() From f2c76636d2ac3df6a66f43a8c3acd9585bbde9ee Mon Sep 17 00:00:00 2001 From: chen Date: Tue, 30 Jun 2026 21:39:13 +0800 Subject: [PATCH 3/4] fix(#125): wire export alias cache and log corrupt cache rejects --- services/export_engine.py | 23 +++++++++++++---------- services/search.py | 3 +++ services/summary_cache.py | 8 ++++++++ tests/test_summary_cache.py | 7 ++++++- 4 files changed, 30 insertions(+), 11 deletions(-) diff --git a/services/export_engine.py b/services/export_engine.py index ef6ad63..46c1cf9 100644 --- a/services/export_engine.py +++ b/services/export_engine.py @@ -16,6 +16,7 @@ from services.workspace_context import ( WorkspaceContext, enrich_workspace_context_from_global_db, + resolve_invalid_workspace_aliases_cached, resolve_workspace_context_cached, ) from services.workspace_db import ( @@ -28,7 +29,6 @@ ) from services.workspace_resolver import ( determine_project_for_conversation, - infer_invalid_workspace_aliases, lookup_workspace_display_name, ) from utils.cli_chat_reader import ( @@ -169,6 +169,9 @@ def prepare_workspace_orchestration( def load_global_db_export_data( orch: WorkspaceOrchestration, + rules: list[Any], + *, + nocache: bool = False, ) -> GlobalDbExportData | None: """Load global DB maps needed for IDE composer export.""" ctx = orch.ctx @@ -197,15 +200,13 @@ def load_global_db_export_data( code_block_diff_map = load_code_block_diff_map(global_db) ide_composer_rows = safe_fetchall(global_db, COMPOSER_ROWS_WITH_HEADERS_SQL) - invalid_workspace_aliases = infer_invalid_workspace_aliases( - composer_rows=ide_composer_rows, + invalid_workspace_aliases = resolve_invalid_workspace_aliases_cached( + ctx, + global_db, + orch.workspace_path, + rules, + nocache=nocache, project_layouts_map=project_layouts_map, - project_name_map=ctx.project_name_to_workspace_id, - workspace_path_map=ctx.workspace_path_to_id, - workspace_entries=orch.workspace_entries, - bubble_map=bubble_map, - composer_id_to_ws=ctx.composer_id_to_workspace_id, - invalid_workspace_ids=ctx.invalid_workspace_ids, ) return GlobalDbExportData( @@ -503,7 +504,9 @@ def collect_export_entries( exported: list[CollectedExportEntry] = [] if include_composer: - db_data = load_global_db_export_data(orch) + db_data = load_global_db_export_data( + orch, exclusion_rules, nocache=effective_nocache, + ) if db_data is not None: exported.extend( _collect_ide_export_entries( diff --git a/services/search.py b/services/search.py index 1e81dea..400cc55 100644 --- a/services/search.py +++ b/services/search.py @@ -382,6 +382,9 @@ def _load_search_workspace_assigner( ) invalid_workspace_aliases: dict[str, str] = {} if ctx.invalid_workspace_ids: + # Issue #116 follow-up: search assigner still cold-scans composerData:* + # rows here; sharing resolve_invalid_workspace_aliases_cached is + # intentionally deferred (operator scope — see issue Out of scope). composer_rows = safe_fetchall(global_db, COMPOSER_ROWS_WITH_HEADERS_SQL) invalid_workspace_aliases = infer_invalid_workspace_aliases( composer_rows=composer_rows, diff --git a/services/summary_cache.py b/services/summary_cache.py index 39d0adc..2c0dbc5 100644 --- a/services/summary_cache.py +++ b/services/summary_cache.py @@ -257,10 +257,18 @@ def get_cached_invalid_workspace_aliases( return None aliases = data.get("invalid_workspace_aliases") if not isinstance(aliases, dict): + _logger.debug( + "Invalid workspace aliases cache rejected: invalid_workspace_aliases is not a dict", + ) return None validated: dict[str, str] = {} for key, value in aliases.items(): if not isinstance(key, str) or not isinstance(value, str): + _logger.debug( + "Invalid workspace aliases cache rejected: non-string entry (%r -> %r)", + key, + value, + ) return None validated[key] = value return validated diff --git a/tests/test_summary_cache.py b/tests/test_summary_cache.py index 7aea99e..1de95b0 100644 --- a/tests/test_summary_cache.py +++ b/tests/test_summary_cache.py @@ -111,7 +111,12 @@ def test_invalid_workspace_aliases_rejects_non_string_entries(self): "invalid_workspace_aliases": {"broken-ws": 123}, }, ) - self.assertIsNone(get_cached_invalid_workspace_aliases(fp)) + with self.assertLogs(summary_cache._logger, level="DEBUG") as logs: + self.assertIsNone(get_cached_invalid_workspace_aliases(fp)) + self.assertTrue( + any("non-string entry" in msg for msg in logs.output), + msg=f"expected debug log for corrupt cache entry, got: {logs.output}", + ) if __name__ == "__main__": From 378937542f7521f8de1b540644770aa755cce86b Mon Sep 17 00:00:00 2001 From: chen Date: Tue, 30 Jun 2026 21:47:51 +0800 Subject: [PATCH 4/4] fix(#125): address bradjin8 tab nocache, ctx enrichment, and tests --- api/workspaces.py | 16 ++++++++-- services/workspace_tabs.py | 33 ++++++++++++++------- tests/test_workspace_listing_performance.py | 12 ++++++-- 3 files changed, 45 insertions(+), 16 deletions(-) diff --git a/api/workspaces.py b/api/workspaces.py index 9e81a8b..5a716dd 100644 --- a/api/workspaces.py +++ b/api/workspaces.py @@ -167,7 +167,8 @@ def get_workspace_tabs(workspace_id: str) -> tuple[Response, int] | Response: workspace_id: Storage folder name, ``global`` for unassigned chats, or ``cli:``. summary: When ``1`` or ``true``, return lightweight tab headers only. - nocache: When ``1`` or ``true``, bypass cache on summary requests. + nocache: When ``1`` or ``true``, bypass cache on summary and full-tab + requests (alias disk cache on per-tab lazy load). Returns: Tabs payload from :func:`services.workspace_tabs` helpers (typically @@ -190,7 +191,9 @@ def get_workspace_tabs(workspace_id: str) -> tuple[Response, int] | Response: workspace_id, workspace_path, rules, nocache=_request_nocache(), ) else: - payload, status = assemble_workspace_tabs(workspace_id, workspace_path, rules) + payload, status = assemble_workspace_tabs( + workspace_id, workspace_path, rules, nocache=_request_nocache(), + ) return json_response(payload, status) except Exception: _logger.exception("Failed to get workspace tabs") @@ -209,6 +212,7 @@ def get_workspace_tab(workspace_id: str, composer_id: str) -> tuple[Response, in workspace_id: Storage folder name, ``global`` for unassigned chats, or ``cli:`` (CLI workspaces return 400). composer_id: Composer UUID to load. + nocache: When ``1`` or ``true``, bypass alias disk cache. Returns: Single-tab JSON from :func:`services.workspace_tabs.assemble_single_tab` @@ -221,7 +225,13 @@ def get_workspace_tab(workspace_id: str, composer_id: str) -> tuple[Response, in try: workspace_path = resolve_workspace_path() rules = exclusion_rules() - payload, status = assemble_single_tab(workspace_id, composer_id, workspace_path, rules) + payload, status = assemble_single_tab( + workspace_id, + composer_id, + workspace_path, + rules, + nocache=_request_nocache(), + ) return json_response(payload, status) except Exception: _logger.exception("Failed to get workspace tab") diff --git a/services/workspace_tabs.py b/services/workspace_tabs.py index 4ced9f1..9fddd30 100644 --- a/services/workspace_tabs.py +++ b/services/workspace_tabs.py @@ -51,8 +51,8 @@ set_cached_tab_summaries, ) from services.workspace_context import ( - resolve_invalid_workspace_aliases_cached, resolve_workspace_context_cached, + with_invalid_workspace_aliases, ) from services.workspace_db import ( COMPOSER_ROWS_WITH_HEADERS_SQL, @@ -453,9 +453,11 @@ def _build_workspace_tab_summaries_uncached( project_layouts_map = load_project_layouts_map(global_db) + # Full composerData roster still required for the summary tab loop; + # alias inference alone is fingerprint-cached across requests. composer_rows = safe_fetchall(global_db, COMPOSER_ROWS_WITH_HEADERS_SQL) - invalid_workspace_aliases = resolve_invalid_workspace_aliases_cached( + ctx = with_invalid_workspace_aliases( ctx, global_db, workspace_path, @@ -463,6 +465,7 @@ def _build_workspace_tab_summaries_uncached( nocache=nocache, project_layouts_map=project_layouts_map, ) + invalid_workspace_aliases = ctx.invalid_workspace_aliases or {} for row in composer_rows: composer = parse_composer_data_row( @@ -529,6 +532,8 @@ def assemble_single_tab( composer_id: str, workspace_path: str, rules: list[Any], + *, + nocache: bool = False, ) -> tuple[dict[str, Any], int]: """Assemble a single conversation tab for GET /api/workspaces//tabs/. @@ -541,6 +546,7 @@ def assemble_single_tab( composer_id: UUID of the composer / conversation to assemble. workspace_path: Cursor ``workspaceStorage`` root. rules: Exclusion rule token lists. + nocache: When ``True``, bypass alias disk cache reads and writes. Returns: ``(payload, status)``. On success (``200``), *payload* is @@ -550,7 +556,9 @@ def assemble_single_tab( """ parse_warnings = ParseWarningCollector() - ctx = resolve_workspace_context_cached(workspace_path, rules) + ctx = resolve_workspace_context_cached( + workspace_path, rules, nocache=nocache, + ) workspace_entries = ctx.workspace_entries invalid_workspace_ids = ctx.invalid_workspace_ids project_name_map = ctx.project_name_to_workspace_id @@ -583,12 +591,10 @@ def assemble_single_tab( project_layouts_map[composer_id] = load_project_layouts_for_composer( global_db, composer_id, ) - invalid_workspace_aliases = resolve_invalid_workspace_aliases_cached( - ctx, - global_db, - workspace_path, - rules, + ctx = with_invalid_workspace_aliases( + ctx, global_db, workspace_path, rules, nocache=nocache, ) + invalid_workspace_aliases = ctx.invalid_workspace_aliases or {} bubble_map = load_bubbles_for_composer( global_db, composer_id, parse_warnings=parse_warnings, @@ -633,6 +639,8 @@ def assemble_workspace_tabs( workspace_id: str, workspace_path: str, rules: list[Any], + *, + nocache: bool = False, ) -> tuple[dict[str, Any], int]: """Build tabs payload for GET /api/workspaces//tabs (IDE workspaces). @@ -640,6 +648,7 @@ def assemble_workspace_tabs( workspace_id: Workspace folder name, or ``"global"`` for unassigned chats. workspace_path: Cursor ``workspaceStorage`` root. rules: Exclusion rule token lists from :func:`utils.exclusion_rules.load_rules`. + nocache: When ``True``, bypass alias disk cache reads and writes. Returns: ``(payload, status)``. On success (``200``), *payload* contains ``tabs`` @@ -651,7 +660,9 @@ def assemble_workspace_tabs( parse_warnings = ParseWarningCollector() response: dict[str, Any] = {"tabs": []} - ctx = resolve_workspace_context_cached(workspace_path, rules) + ctx = resolve_workspace_context_cached( + workspace_path, rules, nocache=nocache, + ) workspace_entries = ctx.workspace_entries invalid_workspace_ids = ctx.invalid_workspace_ids project_name_map = ctx.project_name_to_workspace_id @@ -714,13 +725,15 @@ def assemble_workspace_tabs( # Get composer data entries with conversations composer_rows = safe_fetchall(global_db, COMPOSER_ROWS_WITH_HEADERS_SQL) - invalid_workspace_aliases = resolve_invalid_workspace_aliases_cached( + ctx = with_invalid_workspace_aliases( ctx, global_db, workspace_path, rules, + nocache=nocache, project_layouts_map=project_layouts_map, ) + invalid_workspace_aliases = ctx.invalid_workspace_aliases or {} for row in composer_rows: composer = parse_composer_data_row( diff --git a/tests/test_workspace_listing_performance.py b/tests/test_workspace_listing_performance.py index 3db8ba9..e8eba5f 100644 --- a/tests/test_workspace_listing_performance.py +++ b/tests/test_workspace_listing_performance.py @@ -137,12 +137,18 @@ def test_output_shape_preserved(self): def test_nocache_bypasses_alias_disk_cache(self): ws_path = _make_fixture_with_invalid_workspace(self.tmp.name) - with patch( - "services.summary_cache.get_cached_invalid_workspace_aliases", - ) as mock_get: + with ( + patch( + "services.summary_cache.get_cached_invalid_workspace_aliases", + ) as mock_get, + patch( + "services.summary_cache.set_cached_invalid_workspace_aliases", + ) as mock_set, + ): mock_get.return_value = {"invalid-ws": "global"} list_workspace_projects(ws_path, rules=[], nocache=True) mock_get.assert_not_called() + mock_set.assert_not_called() if __name__ == "__main__":