Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 103 additions & 12 deletions api/search.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
"""
API route for search — mirrors src/app/api/search/route.ts
GET /api/search?q=...&type=all|chat|composer&all_history=1
GET /api/search?q=...&type=all|chat|composer&all_history=1&workspace=<id>
"""

from __future__ import annotations

import logging
import os
import re
import sqlite3
from typing import Any

from flask import Blueprint, Response, current_app, request
Expand All @@ -19,12 +24,16 @@
search_global_storage,
search_legacy_workspaces,
)
from utils.cli_chat_reader import list_cli_projects
from utils.workspace_path import get_cli_chats_path, resolve_workspace_path

bp = Blueprint("search", __name__)
_logger = logging.getLogger(__name__)

_MAX_SEARCH_SINCE_DAYS = 36_500 # ~100 years; avoids timedelta overflow on bad input
_MAX_SEARCH_QUERY_LEN = 500
_VALID_SEARCH_TYPES = frozenset({"all", "chat", "composer"})
_SAFE_WORKSPACE_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*$")


def _parse_since_days_param(raw: str | None) -> int | None:
Expand All @@ -39,31 +48,101 @@ def _parse_since_days_param(raw: str | None) -> int | None:
return days


def _search_error(
message: str,
code: str,
status: int,
) -> tuple[Response, int]:
return json_response({"error": message, "code": code}, status)


def _is_safe_workspace_folder_id(workspace_id: str) -> bool:
"""Return whether *workspace_id* is a safe Cursor workspace folder name."""
if not workspace_id or workspace_id in {".", ".."}:
return False
if (
os.path.isabs(workspace_id)
or ".." in workspace_id
or "/" in workspace_id
or "\\" in workspace_id
):
return False
return _SAFE_WORKSPACE_ID_RE.fullmatch(workspace_id) is not None


def _workspace_exists(workspace_id: str, workspace_path: str) -> bool:
if workspace_id == "global":
return True
if workspace_id.startswith("cli:"):
project_id = workspace_id[4:]
if not _is_safe_workspace_folder_id(project_id):
return False
return any(
cp.get("project_id") == project_id
for cp in list_cli_projects(get_cli_chats_path())
)
if not _is_safe_workspace_folder_id(workspace_id):
return False
candidate = os.path.join(workspace_path, workspace_id)
root = os.path.normpath(workspace_path)
joined = os.path.normpath(candidate)
if os.path.commonpath([root, joined]) != root:
return False
return os.path.isdir(joined)


def _filter_results_by_workspace(
results: list[SearchResult],
workspace_id: str,
) -> list[SearchResult]:
return [r for r in results if r.get("workspaceId") == workspace_id]


@bp.route("/api/search")
def search() -> tuple[Response, int] | Response:
"""Search chats, composers, and CLI sessions across Cursor storage.

Args:
q: Search query string (required; 400 when empty).
type: Filter scope — ``all`` (default), ``chat``, or ``composer``.
workspace: Optional workspace folder hash; 404 when unknown.

Returns:
JSON ``{"results": [...]}`` with optional ``warnings``. 400 when ``q`` is
empty; 500 with ``{"error": ..., "results": []}`` on unexpected failure.
JSON ``{"results": [...]}`` with optional ``warnings``. Structured
``{"error", "code"}`` bodies for 400/404/503/500 failures.
"""
query = request.args.get("q", "").strip()
if not query:
return _search_error("No search query provided", "empty_query", 400)

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Old 400/500 bodies included no code / 500 included "results": []. New contract is cleaner; note breaking change for any external client that expected results on error.

if len(query) > _MAX_SEARCH_QUERY_LEN:
return _search_error("Search query is too long", "query_too_long", 400)

search_type = request.args.get("type", "all")
if search_type not in _VALID_SEARCH_TYPES:
Comment on lines +114 to +121

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

test_workspace_path_resolution_failure_returns_structured_500 documents OSError → 500. Issue #117 suggested 404/503 for storage unavailability — consider 503 + storage_unavailable if you want clients to retry config/discovery failures differently from bugs. Current choice is defensible; note in PR description.

return _search_error("Invalid search type", "invalid_type", 400)

since_days_raw = request.args.get("since_days")
if (
since_days_raw is not None
and str(since_days_raw).strip()
and _parse_since_days_param(since_days_raw) is None
):
return _search_error("Invalid since_days parameter", "invalid_since_days", 400)

workspace_filter = request.args.get("workspace", "").strip() or None

try:
query = request.args.get("q", "").strip()
search_type = request.args.get("type", "all")
workspace_path = resolve_workspace_path()
if workspace_filter and not _workspace_exists(workspace_filter, workspace_path):
return _search_error("Workspace not found", "workspace_not_found", 404)

rules = current_app.config.get("EXCLUSION_RULES") or []
all_history = request.args.get("all_history") in ("1", "true")
since_ms = resolve_search_since_ms(
all_history=all_history,
since_days=_parse_since_days_param(request.args.get("since_days")),
since_days=_parse_since_days_param(since_days_raw),
)

if not query:
return json_response({"error": "No search query provided"}, 400)
workspace_path = resolve_workspace_path()
parse_warnings = ParseWarningCollector()
query_lower = query.lower()

Expand Down Expand Up @@ -101,18 +180,30 @@ def search() -> tuple[Response, int] | Response:
)
)

ranked = rank_results(results)
if workspace_filter:
ranked = _filter_results_by_workspace(ranked, workspace_filter)
Comment on lines 132 to +185

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

workspace query param + post-filter is useful but outside #117 scope (API-only; UI does not expose it). Call out in PR as bonus scope or split to a follow-up if you want strict issue alignment.


payload: dict[str, Any] = {
"results": rank_results(results),
"results": ranked,
"allHistory": since_ms is None,
"searchWindowDays": (
None if since_ms is None else (
_parse_since_days_param(request.args.get("since_days"))
_parse_since_days_param(since_days_raw)
or DEFAULT_SEARCH_WINDOW_DAYS
)
),
}
return json_response(parse_warnings.attach_to(payload))

except sqlite3.OperationalError:
_logger.exception("Search index unavailable")
return _search_error(
"Search index is temporarily unavailable",
"search_index_unavailable",
503,
)
except Exception:
_logger.exception("Search failed")
return json_response({"error": "Search failed", "results": []}, 500)
return _search_error("Search failed", "internal_error", 500)
Comment on lines +199 to +208

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only sqlite3.OperationalError maps to 503. Other DB errors (e.g. sqlite3.DatabaseError) fall through to 500. Fine for now; widen only if search layer raises them in practice.


51 changes: 50 additions & 1 deletion static/css/style.css
Original file line number Diff line number Diff line change
Expand Up @@ -245,9 +245,58 @@ h3 { font-size: 1.15rem; font-weight: 600; }
}
.input:focus { border-color: var(--blue); box-shadow: 0 0 0 2px rgba(59, 130, 246, 0.2); }

.search-bar { display: flex; gap: 0.5rem; }
.search-bar { display: flex; gap: 0.5rem; align-items: center; }
.search-bar .input { flex: 1; }

.search-info-tooltip {
position: relative;
display: inline-flex;
align-items: center;
justify-content: center;
flex-shrink: 0;
border: none;
background: none;
padding: 0;
font: inherit;
cursor: help;
}
.search-info-icon {
display: inline-flex;
align-items: center;
justify-content: center;
width: 1.25rem;
height: 1.25rem;
border-radius: 50%;
border: 1px solid var(--border);
color: var(--text-muted);
font-size: 0.75rem;
font-weight: 700;
font-style: italic;
cursor: help;
user-select: none;
}
.search-info-tooltip-text {
display: none;
position: absolute;
top: calc(100% + 0.5rem);
right: 0;
z-index: 60;
width: min(20rem, 70vw);
padding: 0.75rem 0.875rem;
border: 1px solid var(--info-border);
border-radius: 0.5rem;
background: var(--info-bg);
color: var(--info-text);
font-size: 0.8125rem;
line-height: 1.45;
box-shadow: 0 4px 12px var(--shadow);
}
.search-info-tooltip:hover .search-info-tooltip-text,
.search-info-tooltip:focus .search-info-tooltip-text,
.search-info-tooltip:focus-within .search-info-tooltip-text {
display: block;
}

/* ---------- Alerts ---------- */
.alert {
padding: 0.75rem 1rem;
Expand Down
18 changes: 16 additions & 2 deletions templates/search.html
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,19 @@ <h1>Search</h1>
<div class="form-group" style="margin-bottom:1.5rem">
<div class="search-bar">
<input type="text" id="search-input" class="input" placeholder="Search across all logs..." autofocus>
<button
type="button"
class="search-info-tooltip"
id="search-window-help"
aria-describedby="search-window-help-text"
>
Comment on lines +17 to +22

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Second commit dropped aria-label="Search window help" when switching span → button. aria-describedby alone may not give the control an accessible name in all AT. Restore aria-label on the button.

<span class="search-info-icon" aria-hidden="true">i</span>
<span class="search-info-tooltip-text" id="search-window-help-text" role="tooltip">
By default, search covers the most recent 30-day indexed window.
Chats without a parseable date are always included.
Check &ldquo;Include chats older than 30 days&rdquo; to search full history.
</span>
</button>
<button class="btn btn-primary" onclick="doSearch()">Search</button>
</div>
<label class="text-sm" style="display:flex;align-items:center;gap:0.5rem;margin-top:0.75rem;cursor:pointer">
Expand Down Expand Up @@ -94,13 +107,14 @@ <h1>Search</h1>
const params = new URLSearchParams({ q: query, type: type });
if (allHistory) params.set('all_history', '1');
const res = await fetch(`/api/search?${params.toString()}`);
const data = await res.json().catch(() => ({}));
if (!res.ok) {
showIncompleteResultsBanner('parse-warnings-host', []);
loading.style.display = 'none';
container.innerHTML = '<p class="text-danger">Search failed.</p>';
const message = typeof data.error === 'string' ? data.error : 'Search failed.';
container.innerHTML = `<p class="text-danger">${escapeHtml(message)}</p>`;
return;
}
Comment on lines 111 to 117

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Frontend shows data.error on failure — good. Consider surfacing data.code in a data-* attribute or dev-only suffix for operators debugging 503 vs 500.

const data = await res.json();
const results = data.results || [];
showIncompleteResultsBanner('parse-warnings-host', data.warnings);

Expand Down
Loading
Loading