Remove seed tracking, have snapshot evaluator own tracker instance

treysp · treysp · commit f866937eeadd · 2025-08-22T16:05:36.000-05:00
diff --git a/sqlmesh/core/console.py b/sqlmesh/core/console.py
@@ -4308,7 +4308,7 @@ def _calculate_annotation_str_len(
 def _format_bytes(num_bytes: t.Optional[int]) -> str:
     if num_bytes and num_bytes > 0:
         if num_bytes < 1024:
-            return f"{num_bytes} Bytes"
+            return f"{num_bytes} bytes"
 
         num_bytes_float = float(num_bytes) / 1024.0
         for unit in ["KiB", "MiB", "GiB", "TiB", "PiB"]:
diff --git a/sqlmesh/core/execution_tracker.py b/sqlmesh/core/execution_tracker.py
@@ -3,7 +3,7 @@
 import time
 import typing as t
 from contextlib import contextmanager
-from threading import local
+from threading import local, Lock
 from dataclasses import dataclass, field
 
 
@@ -66,34 +66,32 @@ class QueryExecutionTracker:
 
     _thread_local = local()
     _contexts: t.Dict[str, QueryExecutionContext] = {}
+    _contexts_lock = Lock()
 
-    @classmethod
-    def get_execution_context(cls, snapshot_id_batch: str) -> t.Optional[QueryExecutionContext]:
-        return cls._contexts.get(snapshot_id_batch)
+    def get_execution_context(self, snapshot_id_batch: str) -> t.Optional[QueryExecutionContext]:
+        with self._contexts_lock:
+            return self._contexts.get(snapshot_id_batch)
 
     @classmethod
     def is_tracking(cls) -> bool:
         return getattr(cls._thread_local, "context", None) is not None
 
-    @classmethod
     @contextmanager
     def track_execution(
-        cls, snapshot_id_batch: str, condition: bool = True
+        self, snapshot_id_batch: str
     ) -> t.Iterator[t.Optional[QueryExecutionContext]]:
         """
         Context manager for tracking snapshot execution statistics.
         """
-        if not condition:
-            yield None
-            return
-
         context = QueryExecutionContext(snapshot_batch_id=snapshot_id_batch)
-        cls._thread_local.context = context
-        cls._contexts[snapshot_id_batch] = context
+        self._thread_local.context = context
+        with self._contexts_lock:
+            self._contexts[snapshot_id_batch] = context
+
         try:
             yield context
         finally:
-            cls._thread_local.context = None
+            self._thread_local.context = None
 
     @classmethod
     def record_execution(
@@ -103,8 +101,8 @@ def record_execution(
         if context is not None:
             context.add_execution(sql, row_count, bytes_processed)
 
-    @classmethod
-    def get_execution_stats(cls, snapshot_id_batch: str) -> t.Optional[QueryExecutionStats]:
-        context = cls.get_execution_context(snapshot_id_batch)
-        cls._contexts.pop(snapshot_id_batch, None)
+    def get_execution_stats(self, snapshot_id_batch: str) -> t.Optional[QueryExecutionStats]:
+        with self._contexts_lock:
+            context = self._contexts.get(snapshot_id_batch)
+            self._contexts.pop(snapshot_id_batch, None)
         return context.get_execution_stats() if context else None
diff --git a/sqlmesh/core/scheduler.py b/sqlmesh/core/scheduler.py
@@ -9,7 +9,6 @@
 from sqlmesh.core import constants as c
 from sqlmesh.core.console import Console, get_console
 from sqlmesh.core.environment import EnvironmentNamingInfo, execute_environment_statements
-from sqlmesh.core.execution_tracker import QueryExecutionTracker
 from sqlmesh.core.macros import RuntimeStage
 from sqlmesh.core.model.definition import AuditResult
 from sqlmesh.core.node import IntervalUnit
@@ -536,7 +535,7 @@ def run_node(node: SchedulingUnit) -> None:
                         num_audits = len(audit_results)
                         num_audits_failed = sum(1 for result in audit_results if result.count)
 
-                        execution_stats = QueryExecutionTracker.get_execution_stats(
+                        execution_stats = self.snapshot_evaluator.execution_tracker.get_execution_stats(
                             f"{snapshot.snapshot_id}_{batch_idx}"
                         )
 
diff --git a/sqlmesh/core/snapshot/evaluator.py b/sqlmesh/core/snapshot/evaluator.py
@@ -136,6 +136,7 @@ def __init__(
         )
         self.selected_gateway = selected_gateway
         self.ddl_concurrent_tasks = ddl_concurrent_tasks
+        self.execution_tracker = QueryExecutionTracker()
 
     def evaluate(
         self,
@@ -170,9 +171,7 @@ def evaluate(
         Returns:
             The WAP ID of this evaluation if supported, None otherwise.
         """
-        with QueryExecutionTracker.track_execution(
-            f"{snapshot.snapshot_id}_{batch_index}", condition=not snapshot.is_seed
-        ):
+        with self.execution_tracker.track_execution(f"{snapshot.snapshot_id}_{batch_index}"):
             result = self._evaluate_snapshot(
                 start=start,
                 end=end,
diff --git a/tests/core/engine_adapter/integration/test_integration.py b/tests/core/engine_adapter/integration/test_integration.py
@@ -2454,13 +2454,10 @@ def capture_row_counts(
     assert len(physical_layer_results.tables) == len(physical_layer_results.non_temp_tables) == 3
 
     if ctx.engine_adapter.SUPPORTS_QUERY_EXECUTION_TRACKING:
-        assert len(actual_execution_stats) == 3
-        assert actual_execution_stats["seed_model"].total_rows_processed == 7
         assert actual_execution_stats["incremental_model"].total_rows_processed == 7
         assert actual_execution_stats["full_model"].total_rows_processed == 3
 
         if ctx.mark.startswith("bigquery"):
-            assert actual_execution_stats["seed_model"].total_bytes_processed
             assert actual_execution_stats["incremental_model"].total_bytes_processed
             assert actual_execution_stats["full_model"].total_bytes_processed
 
diff --git a/tests/core/test_execution_tracker.py b/tests/core/test_execution_tracker.py
@@ -7,15 +7,17 @@
 
 def test_execution_tracker_thread_isolation() -> None:
     def worker(id: str, row_counts: list[int]) -> QueryExecutionStats:
-        with QueryExecutionTracker.track_execution(id) as ctx:
-            assert QueryExecutionTracker.is_tracking()
+        with execution_tracker.track_execution(id) as ctx:
+            assert execution_tracker.is_tracking()
 
             for count in row_counts:
-                QueryExecutionTracker.record_execution("SELECT 1", count, None)
+                execution_tracker.record_execution("SELECT 1", count, None)
 
             assert ctx is not None
             return ctx.get_execution_stats()
 
+    execution_tracker = QueryExecutionTracker()
+
     with ThreadPoolExecutor() as executor:
         futures = [
             executor.submit(worker, "batch_A", [10, 5]),
@@ -24,9 +26,9 @@ def worker(id: str, row_counts: list[int]) -> QueryExecutionStats:
         results = [f.result() for f in futures]
 
     # Main thread has no active tracking context
-    assert not QueryExecutionTracker.is_tracking()
-    QueryExecutionTracker.record_execution("q", 10, None)
-    assert QueryExecutionTracker.get_execution_stats("q") is None
+    assert not execution_tracker.is_tracking()
+    execution_tracker.record_execution("q", 10, None)
+    assert execution_tracker.get_execution_stats("q") is None
 
     # Order of results is not deterministic, so look up by id
     by_batch = {s.snapshot_batch_id: s for s in results}