Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# 6.8.0 - 2025-11-03

- feat(llma): send web search calls to be used for LLM cost calculations

# 6.7.14 - 2025-11-03

- fix(django): Handle request.user access in async middleware context to prevent SynchronousOnlyOperation errors in Django 5+ (fixes #355)
Expand Down
97 changes: 30 additions & 67 deletions posthog/ai/anthropic/anthropic_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,9 @@
from posthog.ai.types import StreamingContentBlock, TokenUsage, ToolInProgress
from posthog.ai.utils import (
call_llm_and_track_usage_async,
extract_available_tool_calls,
get_model_params,
merge_system_prompt,
merge_usage_stats,
with_privacy_mode,
)
from posthog.ai.anthropic.anthropic_converter import (
format_anthropic_streaming_content,
extract_anthropic_usage_from_event,
handle_anthropic_content_block_start,
handle_anthropic_text_delta,
Expand Down Expand Up @@ -220,66 +215,34 @@ async def _capture_streaming_event(
content_blocks: List[StreamingContentBlock],
accumulated_content: str,
):
if posthog_trace_id is None:
posthog_trace_id = str(uuid.uuid4())

# Format output using converter
formatted_content = format_anthropic_streaming_content(content_blocks)
formatted_output = []

if formatted_content:
formatted_output = [{"role": "assistant", "content": formatted_content}]
else:
# Fallback to accumulated content if no blocks
formatted_output = [
{
"role": "assistant",
"content": [{"type": "text", "text": accumulated_content}],
}
]

event_properties = {
"$ai_provider": "anthropic",
"$ai_model": kwargs.get("model"),
"$ai_model_parameters": get_model_params(kwargs),
"$ai_input": with_privacy_mode(
self._client._ph_client,
posthog_privacy_mode,
sanitize_anthropic(merge_system_prompt(kwargs, "anthropic")),
),
"$ai_output_choices": with_privacy_mode(
self._client._ph_client,
posthog_privacy_mode,
formatted_output,
),
"$ai_http_status": 200,
"$ai_input_tokens": usage_stats.get("input_tokens", 0),
"$ai_output_tokens": usage_stats.get("output_tokens", 0),
"$ai_cache_read_input_tokens": usage_stats.get(
"cache_read_input_tokens", 0
),
"$ai_cache_creation_input_tokens": usage_stats.get(
"cache_creation_input_tokens", 0
from posthog.ai.types import StreamingEventData
from posthog.ai.anthropic.anthropic_converter import (
format_anthropic_streaming_input,
format_anthropic_streaming_output_complete,
)
from posthog.ai.utils import capture_streaming_event

# Prepare standardized event data
formatted_input = format_anthropic_streaming_input(kwargs)
sanitized_input = sanitize_anthropic(formatted_input)

event_data = StreamingEventData(
provider="anthropic",
model=kwargs.get("model", "unknown"),
base_url=str(self._client.base_url),
kwargs=kwargs,
formatted_input=sanitized_input,
formatted_output=format_anthropic_streaming_output_complete(
content_blocks, accumulated_content
),
"$ai_latency": latency,
"$ai_trace_id": posthog_trace_id,
"$ai_base_url": str(self._client.base_url),
**(posthog_properties or {}),
}

# Add tools if available
available_tools = extract_available_tool_calls("anthropic", kwargs)

if available_tools:
event_properties["$ai_tools"] = available_tools

if posthog_distinct_id is None:
event_properties["$process_person_profile"] = False

if hasattr(self._client._ph_client, "capture"):
self._client._ph_client.capture(
distinct_id=posthog_distinct_id or posthog_trace_id,
event="$ai_generation",
properties=event_properties,
groups=posthog_groups,
)
usage_stats=usage_stats,
latency=latency,
distinct_id=posthog_distinct_id,
trace_id=posthog_trace_id,
properties=posthog_properties,
privacy_mode=posthog_privacy_mode,
groups=posthog_groups,
)

# Use the common capture function
capture_streaming_event(self._client._ph_client, event_data)
40 changes: 40 additions & 0 deletions posthog/ai/anthropic/anthropic_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,32 @@ def format_anthropic_streaming_content(
return formatted


def extract_anthropic_web_search_count(response: Any) -> int:
"""
Extract web search count from Anthropic response.

Anthropic provides exact web search counts via usage.server_tool_use.web_search_requests.

Args:
response: The response from Anthropic API

Returns:
Number of web search requests (0 if none)
"""
if not hasattr(response, "usage"):
return 0

if not hasattr(response.usage, "server_tool_use"):
return 0

server_tool_use = response.usage.server_tool_use

if hasattr(server_tool_use, "web_search_requests"):
return max(0, int(getattr(server_tool_use, "web_search_requests", 0)))

return 0


def extract_anthropic_usage_from_response(response: Any) -> TokenUsage:
"""
Extract usage from a full Anthropic response (non-streaming).
Expand Down Expand Up @@ -191,6 +217,10 @@ def extract_anthropic_usage_from_response(response: Any) -> TokenUsage:
if cache_creation and cache_creation > 0:
result["cache_creation_input_tokens"] = cache_creation

web_search_count = extract_anthropic_web_search_count(response)
if web_search_count > 0:
result["web_search_count"] = web_search_count

return result


Expand Down Expand Up @@ -222,6 +252,16 @@ def extract_anthropic_usage_from_event(event: Any) -> TokenUsage:
if hasattr(event, "usage") and event.usage:
usage["output_tokens"] = getattr(event.usage, "output_tokens", 0)

# Extract web search count from usage
if hasattr(event.usage, "server_tool_use"):
server_tool_use = event.usage.server_tool_use
if hasattr(server_tool_use, "web_search_requests"):
web_search_count = int(
getattr(server_tool_use, "web_search_requests", 0)
)
if web_search_count > 0:
usage["web_search_count"] = web_search_count

return usage


Expand Down
76 changes: 73 additions & 3 deletions posthog/ai/gemini/gemini_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,61 @@ def format_gemini_input(contents: Any) -> List[FormattedMessage]:
return [_format_object_message(contents)]


def extract_gemini_web_search_count(response: Any) -> int:
"""
Extract web search count from Gemini response.
Gemini bills per request that uses grounding, not per query.
Returns 1 if grounding_metadata is present with actual search data, 0 otherwise.
Args:
response: The response from Gemini API
Returns:
1 if web search/grounding was used, 0 otherwise
"""

# Check for grounding_metadata in candidates
if hasattr(response, "candidates"):
for candidate in response.candidates:
if (
hasattr(candidate, "grounding_metadata")
and candidate.grounding_metadata
):
grounding_metadata = candidate.grounding_metadata

# Check if web_search_queries exists and is non-empty
if hasattr(grounding_metadata, "web_search_queries"):
queries = grounding_metadata.web_search_queries

if queries is not None and len(queries) > 0:
return 1

# Check if grounding_chunks exists and is non-empty
if hasattr(grounding_metadata, "grounding_chunks"):
chunks = grounding_metadata.grounding_chunks

if chunks is not None and len(chunks) > 0:
return 1

# Also check for google_search or grounding in function call names
if hasattr(candidate, "content") and candidate.content:
if hasattr(candidate.content, "parts") and candidate.content.parts:
for part in candidate.content.parts:
if hasattr(part, "function_call") and part.function_call:
function_name = getattr(
part.function_call, "name", ""
).lower()

if (
"google_search" in function_name
or "grounding" in function_name
):
return 1

return 0


def _extract_usage_from_metadata(metadata: Any) -> TokenUsage:
"""
Common logic to extract usage from Gemini metadata.
Expand Down Expand Up @@ -382,7 +437,14 @@ def extract_gemini_usage_from_response(response: Any) -> TokenUsage:
if not hasattr(response, "usage_metadata") or not response.usage_metadata:
return TokenUsage(input_tokens=0, output_tokens=0)

return _extract_usage_from_metadata(response.usage_metadata)
usage = _extract_usage_from_metadata(response.usage_metadata)

# Add web search count if present
web_search_count = extract_gemini_web_search_count(response)
if web_search_count > 0:
usage["web_search_count"] = web_search_count

return usage


def extract_gemini_usage_from_chunk(chunk: Any) -> TokenUsage:
Expand All @@ -398,11 +460,19 @@ def extract_gemini_usage_from_chunk(chunk: Any) -> TokenUsage:

usage: TokenUsage = TokenUsage()

# Extract web search count from the chunk before checking for usage_metadata
# Web search indicators can appear on any chunk, not just those with usage data
web_search_count = extract_gemini_web_search_count(chunk)
if web_search_count > 0:
usage["web_search_count"] = web_search_count

if not hasattr(chunk, "usage_metadata") or not chunk.usage_metadata:
return usage

# Use the shared helper to extract usage
usage = _extract_usage_from_metadata(chunk.usage_metadata)
usage_from_metadata = _extract_usage_from_metadata(chunk.usage_metadata)

# Merge the usage from metadata with any web search count we found
usage.update(usage_from_metadata)

return usage

Expand Down
19 changes: 19 additions & 0 deletions posthog/ai/openai/openai_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,15 @@ async def _capture_streaming_event(
**(posthog_properties or {}),
}

# Add web search count if present
web_search_count = usage_stats.get("web_search_count")
if (
web_search_count is not None
and isinstance(web_search_count, int)
and web_search_count > 0
):
event_properties["$ai_web_search_count"] = web_search_count

if available_tool_calls:
event_properties["$ai_tools"] = available_tool_calls

Expand Down Expand Up @@ -444,6 +453,16 @@ async def _capture_streaming_event(
**(posthog_properties or {}),
}

# Add web search count if present
web_search_count = usage_stats.get("web_search_count")

if (
web_search_count is not None
and isinstance(web_search_count, int)
and web_search_count > 0
):
event_properties["$ai_web_search_count"] = web_search_count

if available_tool_calls:
event_properties["$ai_tools"] = available_tool_calls

Expand Down
Loading
Loading