Remove seed tracking, have snapshot evaluator own tracker instance

treysp · treysp · commit b7754b53958a · 2025-08-18T17:34:56.000-05:00
diff --git a/sqlmesh/core/console.py b/sqlmesh/core/console.py
@@ -4274,7 +4274,7 @@ def _calculate_annotation_str_len(
 def _format_bytes(num_bytes: t.Optional[int]) -> str:
     if num_bytes and num_bytes > 0:
         if num_bytes < 1024:
-            return f"{num_bytes} Bytes"
+            return f"{num_bytes} bytes"
 
         num_bytes_float = float(num_bytes) / 1024.0
         for unit in ["KiB", "MiB", "GiB", "TiB", "PiB"]:
diff --git a/sqlmesh/core/execution_tracker.py b/sqlmesh/core/execution_tracker.py
@@ -3,7 +3,7 @@
 import time
 import typing as t
 from contextlib import contextmanager
-from threading import local
+from threading import local, Lock
 from dataclasses import dataclass, field
 
 
@@ -66,34 +66,32 @@ class QueryExecutionTracker:
 
     _thread_local = local()
     _contexts: t.Dict[str, QueryExecutionContext] = {}
+    _contexts_lock = Lock()
 
-    @classmethod
-    def get_execution_context(cls, snapshot_id_batch: str) -> t.Optional[QueryExecutionContext]:
-        return cls._contexts.get(snapshot_id_batch)
+    def get_execution_context(self, snapshot_id_batch: str) -> t.Optional[QueryExecutionContext]:
+        with self._contexts_lock:
+            return self._contexts.get(snapshot_id_batch)
 
     @classmethod
     def is_tracking(cls) -> bool:
         return getattr(cls._thread_local, "context", None) is not None
 
-    @classmethod
     @contextmanager
     def track_execution(
-        cls, snapshot_id_batch: str, condition: bool = True
+        self, snapshot_id_batch: str
     ) -> t.Iterator[t.Optional[QueryExecutionContext]]:
         """
         Context manager for tracking snapshot execution statistics.
         """
-        if not condition:
-            yield None
-            return
-
         context = QueryExecutionContext(snapshot_batch_id=snapshot_id_batch)
-        cls._thread_local.context = context
-        cls._contexts[snapshot_id_batch] = context
+        self._thread_local.context = context
+        with self._contexts_lock:
+            self._contexts[snapshot_id_batch] = context
+
         try:
             yield context
         finally:
-            cls._thread_local.context = None
+            self._thread_local.context = None
 
     @classmethod
     def record_execution(
@@ -103,8 +101,8 @@ def record_execution(
         if context is not None:
             context.add_execution(sql, row_count, bytes_processed)
 
-    @classmethod
-    def get_execution_stats(cls, snapshot_id_batch: str) -> t.Optional[QueryExecutionStats]:
-        context = cls.get_execution_context(snapshot_id_batch)
-        cls._contexts.pop(snapshot_id_batch, None)
+    def get_execution_stats(self, snapshot_id_batch: str) -> t.Optional[QueryExecutionStats]:
+        with self._contexts_lock:
+            context = self._contexts.get(snapshot_id_batch)
+            self._contexts.pop(snapshot_id_batch, None)
         return context.get_execution_stats() if context else None
diff --git a/sqlmesh/core/scheduler.py b/sqlmesh/core/scheduler.py
@@ -7,7 +7,6 @@
 from sqlmesh.core import constants as c
 from sqlmesh.core.console import Console, get_console
 from sqlmesh.core.environment import EnvironmentNamingInfo, execute_environment_statements
-from sqlmesh.core.execution_tracker import QueryExecutionTracker
 from sqlmesh.core.macros import RuntimeStage
 from sqlmesh.core.model.definition import AuditResult
 from sqlmesh.core.node import IntervalUnit
@@ -463,7 +462,7 @@ def evaluate_node(node: SchedulingUnit) -> None:
                 num_audits = len(audit_results)
                 num_audits_failed = sum(1 for result in audit_results if result.count)
 
-                execution_stats = QueryExecutionTracker.get_execution_stats(
+                execution_stats = self.snapshot_evaluator.execution_tracker.get_execution_stats(
                     f"{snapshot.snapshot_id}_{batch_idx}"
                 )
 
diff --git a/sqlmesh/core/snapshot/evaluator.py b/sqlmesh/core/snapshot/evaluator.py
@@ -130,6 +130,7 @@ def __init__(
         )
         self.selected_gateway = selected_gateway
         self.ddl_concurrent_tasks = ddl_concurrent_tasks
+        self.execution_tracker = QueryExecutionTracker()
 
     def evaluate(
         self,
@@ -158,9 +159,7 @@ def evaluate(
         Returns:
             The WAP ID of this evaluation if supported, None otherwise.
         """
-        with QueryExecutionTracker.track_execution(
-            f"{snapshot.snapshot_id}_{batch_index}", condition=not snapshot.is_seed
-        ):
+        with self.execution_tracker.track_execution(f"{snapshot.snapshot_id}_{batch_index}"):
             result = self._evaluate_snapshot(
                 snapshot,
                 start,
@@ -204,19 +203,16 @@ def evaluate_and_fetch(
         Returns:
             The result of the evaluation as a dataframe.
         """
-        with QueryExecutionTracker.track_execution(
-            f"{snapshot.snapshot_id}_0", condition=not snapshot.is_seed
-        ):
-            result = self._evaluate_snapshot(
-                snapshot,
-                start,
-                end,
-                execution_time,
-                snapshots,
-                limit=limit,
-                deployability_index=deployability_index,
-                **kwargs,
-            )
+        result = self._evaluate_snapshot(
+            snapshot,
+            start,
+            end,
+            execution_time,
+            snapshots,
+            limit=limit,
+            deployability_index=deployability_index,
+            **kwargs,
+        )
         if result is None or isinstance(result, str):
             raise SQLMeshError(
                 f"Unexpected result {result} when evaluating snapshot {snapshot.snapshot_id}."
@@ -903,18 +899,15 @@ def _create_snapshot(
                         )
                         continue
 
-                    with QueryExecutionTracker.track_execution(
-                        f"{snapshot.snapshot_id}_0", condition=snapshot.is_seed
-                    ):
-                        self._execute_create(
-                            snapshot=snapshot,
-                            table_name=snapshot.table_name(is_deployable=is_table_deployable),
-                            is_table_deployable=is_table_deployable,
-                            deployability_index=deployability_index,
-                            create_render_kwargs=create_render_kwargs,
-                            rendered_physical_properties=rendered_physical_properties,
-                            dry_run=dry_run,
-                        )
+                    self._execute_create(
+                        snapshot=snapshot,
+                        table_name=snapshot.table_name(is_deployable=is_table_deployable),
+                        is_table_deployable=is_table_deployable,
+                        deployability_index=deployability_index,
+                        create_render_kwargs=create_render_kwargs,
+                        rendered_physical_properties=rendered_physical_properties,
+                        dry_run=dry_run,
+                    )
 
         if on_complete is not None:
             on_complete(snapshot)
diff --git a/tests/core/engine_adapter/integration/test_integration.py b/tests/core/engine_adapter/integration/test_integration.py
@@ -2403,13 +2403,10 @@ def capture_row_counts(
     assert len(physical_layer_results.tables) == len(physical_layer_results.non_temp_tables) == 3
 
     if ctx.engine_adapter.SUPPORTS_QUERY_EXECUTION_TRACKING:
-        assert len(actual_execution_stats) == 3
-        assert actual_execution_stats["seed_model"].total_rows_processed == 7
         assert actual_execution_stats["incremental_model"].total_rows_processed == 7
         assert actual_execution_stats["full_model"].total_rows_processed == 3
 
         if ctx.mark.startswith("bigquery"):
-            assert actual_execution_stats["seed_model"].total_bytes_processed
             assert actual_execution_stats["incremental_model"].total_bytes_processed
             assert actual_execution_stats["full_model"].total_bytes_processed
 
diff --git a/tests/core/test_execution_tracker.py b/tests/core/test_execution_tracker.py
@@ -7,15 +7,17 @@
 
 def test_execution_tracker_thread_isolation() -> None:
     def worker(id: str, row_counts: list[int]) -> QueryExecutionStats:
-        with QueryExecutionTracker.track_execution(id) as ctx:
-            assert QueryExecutionTracker.is_tracking()
+        with execution_tracker.track_execution(id) as ctx:
+            assert execution_tracker.is_tracking()
 
             for count in row_counts:
-                QueryExecutionTracker.record_execution("SELECT 1", count, None)
+                execution_tracker.record_execution("SELECT 1", count, None)
 
             assert ctx is not None
             return ctx.get_execution_stats()
 
+    execution_tracker = QueryExecutionTracker()
+
     with ThreadPoolExecutor() as executor:
         futures = [
             executor.submit(worker, "batch_A", [10, 5]),
@@ -24,9 +26,9 @@ def worker(id: str, row_counts: list[int]) -> QueryExecutionStats:
         results = [f.result() for f in futures]
 
     # Main thread has no active tracking context
-    assert not QueryExecutionTracker.is_tracking()
-    QueryExecutionTracker.record_execution("q", 10, None)
-    assert QueryExecutionTracker.get_execution_stats("q") is None
+    assert not execution_tracker.is_tracking()
+    execution_tracker.record_execution("q", 10, None)
+    assert execution_tracker.get_execution_stats("q") is None
 
     # Order of results is not deterministic, so look up by id
     by_batch = {s.snapshot_batch_id: s for s in results}