Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 44 additions & 11 deletions packages/lmi/src/lmi/cost_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,47 @@

logger = logging.getLogger(__name__)

# Module-level context variable to track the currently active cost tracker
_active_tracker: contextvars.ContextVar["CostTracker | None"] = contextvars.ContextVar(
"active_cost_tracker", default=None
)


def _get_active_tracker() -> "CostTracker":
"""Get the currently active cost tracker, defaulting to GLOBAL_COST_TRACKER."""
return _active_tracker.get() or GLOBAL_COST_TRACKER
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Based on this and the PR description, this is where we have to choose between a temporary cost tracker and the global cost tracker?

I can see arguments for both sides (switching between global cost tracking vs always adding to it).

For me, I think having global cost tracking always on is the most easy/predictable. To do a sum across different cost trackers could get arduous.

What did you think?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah after talking about it, I'm on the same page. Will do.



class CostTracker:
def __init__(self):
def __init__(self, enabled: bool = True):
self.lifetime_cost_usd = 0.0
self.last_report = 0.0
# A contextvar so that different coroutines don't affect each other's cost tracking
self.enabled = contextvars.ContextVar[bool]("track_costs", default=False)
self.enabled = contextvars.ContextVar[bool]("track_costs", default=enabled)
# Not a contextvar because I can't imagine a scenario where you'd want more fine-grained control
self.report_every_usd = 1.0
self._callbacks: list[Callable[[LLMResponse], Awaitable]] = []

def add_callback(self, callback: Callable[[LLMResponse], Awaitable]) -> None:
self._callbacks.append(callback)

def set_reporting_threshold(self, threshold_usd: float) -> None:
"""Set the threshold for cost reporting."""
self.report_every_usd = threshold_usd

def enable_cost_tracking(self, enabled: bool = True) -> None:
"""Enable or disable cost tracking for this tracker."""
self.enabled.set(enabled)

def __enter__(self):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def __enter__(self):
def __enter__(self) -> Self:

"""Enter the context manager, making this the active tracker."""
self._token = _active_tracker.set(self)
return self

def __exit__(self, *args):
"""Exit the context manager, restoring the previous active tracker."""
_active_tracker.reset(self._token)

async def record(self, response: LLMResponse) -> None:
# Only record on responses with usage information (final chunk in streaming)
# We check for usage presence rather than cost > 0 because:
Expand All @@ -50,15 +77,17 @@ async def record(self, response: LLMResponse) -> None:
)


GLOBAL_COST_TRACKER = CostTracker()
GLOBAL_COST_TRACKER = CostTracker(enabled=False)


def set_reporting_threshold(threshold_usd: float) -> None:
GLOBAL_COST_TRACKER.report_every_usd = threshold_usd
"""Set the reporting threshold for the global cost tracker."""
GLOBAL_COST_TRACKER.set_reporting_threshold(threshold_usd)


def enable_cost_tracking(enabled: bool = True) -> None:
GLOBAL_COST_TRACKER.enabled.set(enabled)
"""Enable or disable cost tracking for the global cost tracker."""
GLOBAL_COST_TRACKER.enable_cost_tracking(enabled)


@contextmanager
Expand All @@ -85,7 +114,8 @@ def track_costs(
"""Automatically track API costs of a coroutine call.

Note that the costs will only be recorded if `enable_cost_tracking()` is called,
or if in a `cost_tracking_ctx()` context.
or if in a `cost_tracking_ctx()` context, or if using a custom CostTracker
as a context manager.

Usage:
```
Expand All @@ -103,8 +133,9 @@ async def api_call(...) -> litellm.ModelResponse:

async def wrapped_func(*args, **kwargs):
response = await func(*args, **kwargs)
if GLOBAL_COST_TRACKER.enabled.get():
await GLOBAL_COST_TRACKER.record(response)
tracker = _get_active_tracker()
if tracker.enabled.get():
await tracker.record(response)
return response

return wrapped_func
Expand Down Expand Up @@ -146,8 +177,9 @@ def __aiter__(self):

async def __anext__(self):
response = await self.stream.__anext__()
if GLOBAL_COST_TRACKER.enabled.get():
await GLOBAL_COST_TRACKER.record(response)
tracker = _get_active_tracker()
if tracker.enabled.get():
await tracker.record(response)
return response


Expand All @@ -161,7 +193,8 @@ def track_costs_iter(
`TrackedStreamWrapper.stream`.

Note that the costs will only be recorded if `enable_cost_tracking()` is called,
or if in a `cost_tracking_ctx()` context.
or if in a `cost_tracking_ctx()` context, or if using a custom CostTracker
as a context manager.

Usage:
```
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
interactions:
- request:
body:
'{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"Say
hello"}]}],"temperature":1.0,"max_tokens":4096}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
anthropic-version:
- "2023-06-01"
connection:
- keep-alive
content-length:
- "149"
content-type:
- application/json
host:
- api.anthropic.com
user-agent:
- litellm/1.74.15.post2
method: POST
uri: https://api.anthropic.com/v1/messages
response:
body:
string: !!binary |
H4sIAAAAAAAAA3TQTUvDQBAG4L8S3/MGmtSC7kV6EHoRROmlIsuyOyRrNztxP9RS8t8lYvELTwPz
vDMMc8TAljwkjNfFUr2sV3Wv3b7U7aI9bxZtCwFnITGkTi2amzXFO3u7vr5Y7reGd0/3u+2mg0A+
jDSnKCXdEQQi+7mhU3Ip65AhYDhkChny4XjKZ3qb5aNIbMh7Pqs2/FrpSNWBS2XZha7KbPXhCtOj
QMo8qkg6cYAEBatyiQGfkOi5UDAEGYr3AuXjGnmEC2PJKvOeQoK8FDDa9KRMJJ0dB/XTFyePpO1/
dpqd19PY00BRe7Ua/ua/tOl/6yTAJX9vNY1AovjiDKnsKEJi/qDV0WKa3gEAAP//AwBCBnFdswEA
AA==
headers:
CF-RAY:
- 992d7190ad48159c-SJC
Connection:
- keep-alive
Content-Encoding:
- gzip
Content-Type:
- application/json
Date:
- Thu, 23 Oct 2025 01:25:59 GMT
Server:
- cloudflare
Transfer-Encoding:
- chunked
Via:
- 1.1 google
X-Robots-Tag:
- none
anthropic-organization-id:
- f2c99ed9-038a-406f-9cb5-1f840b758a20
anthropic-ratelimit-input-tokens-limit:
- "5000000"
anthropic-ratelimit-input-tokens-remaining:
- "5000000"
anthropic-ratelimit-input-tokens-reset:
- "2025-10-23T01:25:59Z"
anthropic-ratelimit-output-tokens-limit:
- "1000000"
anthropic-ratelimit-output-tokens-remaining:
- "1000000"
anthropic-ratelimit-output-tokens-reset:
- "2025-10-23T01:25:59Z"
anthropic-ratelimit-requests-limit:
- "5000"
anthropic-ratelimit-requests-remaining:
- "4999"
anthropic-ratelimit-requests-reset:
- "2025-10-23T01:25:58Z"
anthropic-ratelimit-tokens-limit:
- "6000000"
anthropic-ratelimit-tokens-remaining:
- "6000000"
anthropic-ratelimit-tokens-reset:
- "2025-10-23T01:25:59Z"
cf-cache-status:
- DYNAMIC
request-id:
- req_011CUPFzKDcuEAx41uaTFRDg
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
x-envoy-upstream-service-time:
- "684"
status:
code: 200
message: OK
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
interactions:
- request:
body:
'{"model": "claude-3-5-haiku-20241022", "messages": [{"role": "user", "content":
[{"type": "text", "text": "Say hello"}]}], "temperature": 1.0, "max_tokens":
4096, "stream": true}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
anthropic-version:
- "2023-06-01"
connection:
- keep-alive
content-length:
- "178"
content-type:
- application/json
host:
- api.anthropic.com
user-agent:
- litellm/1.74.15.post2
method: POST
uri: https://api.anthropic.com/v1/messages
response:
body:
string: 'event: message_start

data: {"type":"message_start","message":{"model":"claude-3-5-haiku-20241022","id":"msg_015Ec5tfQ5jkVrkJs1Vekcxk","type":"message","role":"assistant","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":9,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":2,"service_tier":"standard"}} }


event: content_block_start

data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""} }


event: content_block_delta

data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hello!"} }


event: content_block_delta

data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
How"} }


event: ping

data: {"type": "ping"}


event: content_block_delta

data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
are you doing today?"} }


event: content_block_stop

data: {"type":"content_block_stop","index":0}


event: message_delta

data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":9,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":11}
}


event: message_stop

data: {"type":"message_stop" }


'
headers:
CF-RAY:
- 992d71964d171698-SJC
Cache-Control:
- no-cache
Connection:
- keep-alive
Content-Type:
- text/event-stream; charset=utf-8
Date:
- Thu, 23 Oct 2025 01:25:59 GMT
Server:
- cloudflare
Transfer-Encoding:
- chunked
Via:
- 1.1 google
X-Robots-Tag:
- none
anthropic-organization-id:
- f2c99ed9-038a-406f-9cb5-1f840b758a20
anthropic-ratelimit-input-tokens-limit:
- "5000000"
anthropic-ratelimit-input-tokens-remaining:
- "5000000"
anthropic-ratelimit-input-tokens-reset:
- "2025-10-23T01:25:59Z"
anthropic-ratelimit-output-tokens-limit:
- "1000000"
anthropic-ratelimit-output-tokens-remaining:
- "1000000"
anthropic-ratelimit-output-tokens-reset:
- "2025-10-23T01:25:59Z"
anthropic-ratelimit-requests-limit:
- "5000"
anthropic-ratelimit-requests-remaining:
- "4999"
anthropic-ratelimit-requests-reset:
- "2025-10-23T01:25:59Z"
anthropic-ratelimit-tokens-limit:
- "6000000"
anthropic-ratelimit-tokens-remaining:
- "6000000"
anthropic-ratelimit-tokens-reset:
- "2025-10-23T01:25:59Z"
cf-cache-status:
- DYNAMIC
request-id:
- req_011CUPFzNhUdpP2dMfVfrVBD
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
x-envoy-upstream-service-time:
- "355"
status:
code: 200
message: OK
version: 1
Loading
Loading