Future-House · sidnarayanan · Oct 23, 2025 · Oct 23, 2025 · jamesbraza · Oct 23, 2025
diff --git a/packages/lmi/src/lmi/cost_tracker.py b/packages/lmi/src/lmi/cost_tracker.py
@@ -11,20 +11,47 @@
 
 logger = logging.getLogger(__name__)
 
+# Module-level context variable to track the currently active cost tracker
+_active_tracker: contextvars.ContextVar["CostTracker | None"] = contextvars.ContextVar(
+    "active_cost_tracker", default=None
+)
+
+
+def _get_active_tracker() -> "CostTracker":
+    """Get the currently active cost tracker, defaulting to GLOBAL_COST_TRACKER."""
+    return _active_tracker.get() or GLOBAL_COST_TRACKER
+
 
 class CostTracker:
-    def __init__(self):
+    def __init__(self, enabled: bool = True):
         self.lifetime_cost_usd = 0.0
         self.last_report = 0.0
         # A contextvar so that different coroutines don't affect each other's cost tracking
-        self.enabled = contextvars.ContextVar[bool]("track_costs", default=False)
+        self.enabled = contextvars.ContextVar[bool]("track_costs", default=enabled)
         # Not a contextvar because I can't imagine a scenario where you'd want more fine-grained control
         self.report_every_usd = 1.0
         self._callbacks: list[Callable[[LLMResponse], Awaitable]] = []
 
     def add_callback(self, callback: Callable[[LLMResponse], Awaitable]) -> None:
         self._callbacks.append(callback)
 
+    def set_reporting_threshold(self, threshold_usd: float) -> None:
+        """Set the threshold for cost reporting."""
+        self.report_every_usd = threshold_usd
+
+    def enable_cost_tracking(self, enabled: bool = True) -> None:
+        """Enable or disable cost tracking for this tracker."""
+        self.enabled.set(enabled)
+
+    def __enter__(self):
-    def __enter__(self):
+    def __enter__(self) -> Self:
-    def __enter__(self):
+    def __enter__(self) -> Self:
+        """Enter the context manager, making this the active tracker."""
+        self._token = _active_tracker.set(self)
+        return self
+
+    def __exit__(self, *args):
+        """Exit the context manager, restoring the previous active tracker."""
+        _active_tracker.reset(self._token)
+
     async def record(self, response: LLMResponse) -> None:
         # Only record on responses with usage information (final chunk in streaming)
         # We check for usage presence rather than cost > 0 because:
@@ -50,15 +77,17 @@ async def record(self, response: LLMResponse) -> None:
                 )
 
 
-GLOBAL_COST_TRACKER = CostTracker()
+GLOBAL_COST_TRACKER = CostTracker(enabled=False)
 
 
 def set_reporting_threshold(threshold_usd: float) -> None:
-    GLOBAL_COST_TRACKER.report_every_usd = threshold_usd
+    """Set the reporting threshold for the global cost tracker."""
+    GLOBAL_COST_TRACKER.set_reporting_threshold(threshold_usd)
 
 
 def enable_cost_tracking(enabled: bool = True) -> None:
-    GLOBAL_COST_TRACKER.enabled.set(enabled)
+    """Enable or disable cost tracking for the global cost tracker."""
+    GLOBAL_COST_TRACKER.enable_cost_tracking(enabled)
 
 
 @contextmanager
@@ -85,7 +114,8 @@ def track_costs(
     """Automatically track API costs of a coroutine call.
 
     Note that the costs will only be recorded if `enable_cost_tracking()` is called,
-    or if in a `cost_tracking_ctx()` context.
+    or if in a `cost_tracking_ctx()` context, or if using a custom CostTracker
+    as a context manager.
 
     Usage:
     ```
@@ -103,8 +133,9 @@ async def api_call(...) -> litellm.ModelResponse:
 
     async def wrapped_func(*args, **kwargs):
         response = await func(*args, **kwargs)
-        if GLOBAL_COST_TRACKER.enabled.get():
-            await GLOBAL_COST_TRACKER.record(response)
+        tracker = _get_active_tracker()
+        if tracker.enabled.get():
+            await tracker.record(response)
         return response
 
     return wrapped_func
@@ -146,8 +177,9 @@ def __aiter__(self):
 
     async def __anext__(self):
         response = await self.stream.__anext__()
-        if GLOBAL_COST_TRACKER.enabled.get():
-            await GLOBAL_COST_TRACKER.record(response)
+        tracker = _get_active_tracker()
+        if tracker.enabled.get():
+            await tracker.record(response)
         return response
 
 
@@ -161,7 +193,8 @@ def track_costs_iter(
     `TrackedStreamWrapper.stream`.
 
     Note that the costs will only be recorded if `enable_cost_tracking()` is called,
-    or if in a `cost_tracking_ctx()` context.
+    or if in a `cost_tracking_ctx()` context, or if using a custom CostTracker
+    as a context manager.
 
     Usage:
     ```

diff --git a/...ustomCostTracker.test_custom_tracker_with_llm_calls[claude-3-5-haiku-20241022-False].yaml b/...ustomCostTracker.test_custom_tracker_with_llm_calls[claude-3-5-haiku-20241022-False].yaml
@@ -0,0 +1,90 @@
+interactions:
+  - request:
+      body:
+        '{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"Say
+        hello"}]}],"temperature":1.0,"max_tokens":4096}'
+      headers:
+        accept:
+          - application/json
+        accept-encoding:
+          - gzip, deflate
+        anthropic-version:
+          - "2023-06-01"
+        connection:
+          - keep-alive
+        content-length:
+          - "149"
+        content-type:
+          - application/json
+        host:
+          - api.anthropic.com
+        user-agent:
+          - litellm/1.74.15.post2
+      method: POST
+      uri: https://api.anthropic.com/v1/messages
+    response:
+      body:
+        string: !!binary |
+          H4sIAAAAAAAAA3TQTUvDQBAG4L8S3/MGmtSC7kV6EHoRROmlIsuyOyRrNztxP9RS8t8lYvELTwPz
+          vDMMc8TAljwkjNfFUr2sV3Wv3b7U7aI9bxZtCwFnITGkTi2amzXFO3u7vr5Y7reGd0/3u+2mg0A+
+          jDSnKCXdEQQi+7mhU3Ip65AhYDhkChny4XjKZ3qb5aNIbMh7Pqs2/FrpSNWBS2XZha7KbPXhCtOj
+          QMo8qkg6cYAEBatyiQGfkOi5UDAEGYr3AuXjGnmEC2PJKvOeQoK8FDDa9KRMJJ0dB/XTFyePpO1/
+          dpqd19PY00BRe7Ua/ua/tOl/6yTAJX9vNY1AovjiDKnsKEJi/qDV0WKa3gEAAP//AwBCBnFdswEA
+          AA==
+      headers:
+        CF-RAY:
+          - 992d7190ad48159c-SJC
+        Connection:
+          - keep-alive
+        Content-Encoding:
+          - gzip
+        Content-Type:
+          - application/json
+        Date:
+          - Thu, 23 Oct 2025 01:25:59 GMT
+        Server:
+          - cloudflare
+        Transfer-Encoding:
+          - chunked
+        Via:
+          - 1.1 google
+        X-Robots-Tag:
+          - none
+        anthropic-organization-id:
+          - f2c99ed9-038a-406f-9cb5-1f840b758a20
+        anthropic-ratelimit-input-tokens-limit:
+          - "5000000"
+        anthropic-ratelimit-input-tokens-remaining:
+          - "5000000"
+        anthropic-ratelimit-input-tokens-reset:
+          - "2025-10-23T01:25:59Z"
+        anthropic-ratelimit-output-tokens-limit:
+          - "1000000"
+        anthropic-ratelimit-output-tokens-remaining:
+          - "1000000"
+        anthropic-ratelimit-output-tokens-reset:
+          - "2025-10-23T01:25:59Z"
+        anthropic-ratelimit-requests-limit:
+          - "5000"
+        anthropic-ratelimit-requests-remaining:
+          - "4999"
+        anthropic-ratelimit-requests-reset:
+          - "2025-10-23T01:25:58Z"
+        anthropic-ratelimit-tokens-limit:
+          - "6000000"
+        anthropic-ratelimit-tokens-remaining:
+          - "6000000"
+        anthropic-ratelimit-tokens-reset:
+          - "2025-10-23T01:25:59Z"
+        cf-cache-status:
+          - DYNAMIC
+        request-id:
+          - req_011CUPFzKDcuEAx41uaTFRDg
+        strict-transport-security:
+          - max-age=31536000; includeSubDomains; preload
+        x-envoy-upstream-service-time:
+          - "684"
+      status:
+        code: 200
+        message: OK
+version: 1
diff --git a/...CustomCostTracker.test_custom_tracker_with_llm_calls[claude-3-5-haiku-20241022-True].yaml b/...CustomCostTracker.test_custom_tracker_with_llm_calls[claude-3-5-haiku-20241022-True].yaml
@@ -0,0 +1,133 @@
+interactions:
+  - request:
+      body:
+        '{"model": "claude-3-5-haiku-20241022", "messages": [{"role": "user", "content":
+        [{"type": "text", "text": "Say hello"}]}], "temperature": 1.0, "max_tokens":
+        4096, "stream": true}'
+      headers:
+        accept:
+          - application/json
+        accept-encoding:
+          - gzip, deflate
+        anthropic-version:
+          - "2023-06-01"
+        connection:
+          - keep-alive
+        content-length:
+          - "178"
+        content-type:
+          - application/json
+        host:
+          - api.anthropic.com
+        user-agent:
+          - litellm/1.74.15.post2
+      method: POST
+      uri: https://api.anthropic.com/v1/messages
+    response:
+      body:
+        string: 'event: message_start
+
+          data: {"type":"message_start","message":{"model":"claude-3-5-haiku-20241022","id":"msg_015Ec5tfQ5jkVrkJs1Vekcxk","type":"message","role":"assistant","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":9,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":2,"service_tier":"standard"}}           }
+
+
+          event: content_block_start
+
+          data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}        }
+
+
+          event: content_block_delta
+
+          data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hello!"}             }
+
+
+          event: content_block_delta
+
+          data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+          How"} }
+
+
+          event: ping
+
+          data: {"type": "ping"}
+
+
+          event: content_block_delta
+
+          data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+          are you doing today?"}     }
+
+
+          event: content_block_stop
+
+          data: {"type":"content_block_stop","index":0}
+
+
+          event: message_delta
+
+          data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":9,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":11}
+          }
+
+
+          event: message_stop
+
+          data: {"type":"message_stop"              }
+
+
+          '
+      headers:
+        CF-RAY:
+          - 992d71964d171698-SJC
+        Cache-Control:
+          - no-cache
+        Connection:
+          - keep-alive
+        Content-Type:
+          - text/event-stream; charset=utf-8
+        Date:
+          - Thu, 23 Oct 2025 01:25:59 GMT
+        Server:
+          - cloudflare
+        Transfer-Encoding:
+          - chunked
+        Via:
+          - 1.1 google
+        X-Robots-Tag:
+          - none
+        anthropic-organization-id:
+          - f2c99ed9-038a-406f-9cb5-1f840b758a20
+        anthropic-ratelimit-input-tokens-limit:
+          - "5000000"
+        anthropic-ratelimit-input-tokens-remaining:
+          - "5000000"
+        anthropic-ratelimit-input-tokens-reset:
+          - "2025-10-23T01:25:59Z"
+        anthropic-ratelimit-output-tokens-limit:
+          - "1000000"
+        anthropic-ratelimit-output-tokens-remaining:
+          - "1000000"
+        anthropic-ratelimit-output-tokens-reset:
+          - "2025-10-23T01:25:59Z"
+        anthropic-ratelimit-requests-limit:
+          - "5000"
+        anthropic-ratelimit-requests-remaining:
+          - "4999"
+        anthropic-ratelimit-requests-reset:
+          - "2025-10-23T01:25:59Z"
+        anthropic-ratelimit-tokens-limit:
+          - "6000000"
+        anthropic-ratelimit-tokens-remaining:
+          - "6000000"
+        anthropic-ratelimit-tokens-reset:
+          - "2025-10-23T01:25:59Z"
+        cf-cache-status:
+          - DYNAMIC
+        request-id:
+          - req_011CUPFzNhUdpP2dMfVfrVBD
+        strict-transport-security:
+          - max-age=31536000; includeSubDomains; preload
+        x-envoy-upstream-service-time:
+          - "355"
+      status:
+        code: 200
+        message: OK
+version: 1