PostHog · Radu-Raicea · Nov 4, 2025 · Oct 28, 2025 · Oct 30, 2025 · Oct 30, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,7 @@
+# 6.8.0 - 2025-11-03
+
+- feat(llma): send web search calls to be used for LLM cost calculations
+
 # 6.7.14 - 2025-11-03
 
 - fix(django): Handle request.user access in async middleware context to prevent SynchronousOnlyOperation errors in Django 5+ (fixes #355)

diff --git a/posthog/ai/anthropic/anthropic_async.py b/posthog/ai/anthropic/anthropic_async.py
@@ -14,14 +14,9 @@
 from posthog.ai.types import StreamingContentBlock, TokenUsage, ToolInProgress
 from posthog.ai.utils import (
     call_llm_and_track_usage_async,
-    extract_available_tool_calls,
-    get_model_params,
-    merge_system_prompt,
     merge_usage_stats,
-    with_privacy_mode,
 )
 from posthog.ai.anthropic.anthropic_converter import (
-    format_anthropic_streaming_content,
     extract_anthropic_usage_from_event,
     handle_anthropic_content_block_start,
     handle_anthropic_text_delta,
@@ -220,66 +215,34 @@ async def _capture_streaming_event(
         content_blocks: List[StreamingContentBlock],
         accumulated_content: str,
     ):
-        if posthog_trace_id is None:
-            posthog_trace_id = str(uuid.uuid4())
-
-        # Format output using converter
-        formatted_content = format_anthropic_streaming_content(content_blocks)
-        formatted_output = []
-
-        if formatted_content:
-            formatted_output = [{"role": "assistant", "content": formatted_content}]
-        else:
-            # Fallback to accumulated content if no blocks
-            formatted_output = [
-                {
-                    "role": "assistant",
-                    "content": [{"type": "text", "text": accumulated_content}],
-                }
-            ]
-
-        event_properties = {
-            "$ai_provider": "anthropic",
-            "$ai_model": kwargs.get("model"),
-            "$ai_model_parameters": get_model_params(kwargs),
-            "$ai_input": with_privacy_mode(
-                self._client._ph_client,
-                posthog_privacy_mode,
-                sanitize_anthropic(merge_system_prompt(kwargs, "anthropic")),
-            ),
-            "$ai_output_choices": with_privacy_mode(
-                self._client._ph_client,
-                posthog_privacy_mode,
-                formatted_output,
-            ),
-            "$ai_http_status": 200,
-            "$ai_input_tokens": usage_stats.get("input_tokens", 0),
-            "$ai_output_tokens": usage_stats.get("output_tokens", 0),
-            "$ai_cache_read_input_tokens": usage_stats.get(
-                "cache_read_input_tokens", 0
-            ),
-            "$ai_cache_creation_input_tokens": usage_stats.get(
-                "cache_creation_input_tokens", 0
+        from posthog.ai.types import StreamingEventData
+        from posthog.ai.anthropic.anthropic_converter import (
+            format_anthropic_streaming_input,
+            format_anthropic_streaming_output_complete,
+        )
+        from posthog.ai.utils import capture_streaming_event
+
+        # Prepare standardized event data
+        formatted_input = format_anthropic_streaming_input(kwargs)
+        sanitized_input = sanitize_anthropic(formatted_input)
+
+        event_data = StreamingEventData(
+            provider="anthropic",
+            model=kwargs.get("model", "unknown"),
+            base_url=str(self._client.base_url),
+            kwargs=kwargs,
+            formatted_input=sanitized_input,
+            formatted_output=format_anthropic_streaming_output_complete(
+                content_blocks, accumulated_content
             ),
-            "$ai_latency": latency,
-            "$ai_trace_id": posthog_trace_id,
-            "$ai_base_url": str(self._client.base_url),
-            **(posthog_properties or {}),
-        }
-
-        # Add tools if available
-        available_tools = extract_available_tool_calls("anthropic", kwargs)
-
-        if available_tools:
-            event_properties["$ai_tools"] = available_tools
-
-        if posthog_distinct_id is None:
-            event_properties["$process_person_profile"] = False
-
-        if hasattr(self._client._ph_client, "capture"):
-            self._client._ph_client.capture(
-                distinct_id=posthog_distinct_id or posthog_trace_id,
-                event="$ai_generation",
-                properties=event_properties,
-                groups=posthog_groups,
-            )
+            usage_stats=usage_stats,
+            latency=latency,
+            distinct_id=posthog_distinct_id,
+            trace_id=posthog_trace_id,
+            properties=posthog_properties,
+            privacy_mode=posthog_privacy_mode,
+            groups=posthog_groups,
+        )
+
+        # Use the common capture function
+        capture_streaming_event(self._client._ph_client, event_data)
diff --git a/posthog/ai/anthropic/anthropic_converter.py b/posthog/ai/anthropic/anthropic_converter.py
@@ -163,6 +163,32 @@ def format_anthropic_streaming_content(
     return formatted
 
 
+def extract_anthropic_web_search_count(response: Any) -> int:
+    """
+    Extract web search count from Anthropic response.
+
+    Anthropic provides exact web search counts via usage.server_tool_use.web_search_requests.
+
+    Args:
+        response: The response from Anthropic API
+
+    Returns:
+        Number of web search requests (0 if none)
+    """
+    if not hasattr(response, "usage"):
+        return 0
+
+    if not hasattr(response.usage, "server_tool_use"):
+        return 0
+
+    server_tool_use = response.usage.server_tool_use
+
+    if hasattr(server_tool_use, "web_search_requests"):
+        return max(0, int(getattr(server_tool_use, "web_search_requests", 0)))
+
+    return 0
+
+
 def extract_anthropic_usage_from_response(response: Any) -> TokenUsage:
     """
     Extract usage from a full Anthropic response (non-streaming).
@@ -191,6 +217,10 @@ def extract_anthropic_usage_from_response(response: Any) -> TokenUsage:
         if cache_creation and cache_creation > 0:
             result["cache_creation_input_tokens"] = cache_creation
 
+    web_search_count = extract_anthropic_web_search_count(response)
+    if web_search_count > 0:
+        result["web_search_count"] = web_search_count
+
     return result
 
 
@@ -222,6 +252,16 @@ def extract_anthropic_usage_from_event(event: Any) -> TokenUsage:
     if hasattr(event, "usage") and event.usage:
         usage["output_tokens"] = getattr(event.usage, "output_tokens", 0)
 
+        # Extract web search count from usage
+        if hasattr(event.usage, "server_tool_use"):
+            server_tool_use = event.usage.server_tool_use
+            if hasattr(server_tool_use, "web_search_requests"):
+                web_search_count = int(
+                    getattr(server_tool_use, "web_search_requests", 0)
+                )
+                if web_search_count > 0:
+                    usage["web_search_count"] = web_search_count
+
     return usage
 
 

diff --git a/posthog/ai/gemini/gemini_converter.py b/posthog/ai/gemini/gemini_converter.py
@@ -338,6 +338,61 @@ def format_gemini_input(contents: Any) -> List[FormattedMessage]:
     return [_format_object_message(contents)]
 
 
+def extract_gemini_web_search_count(response: Any) -> int:
+    """
+    Extract web search count from Gemini response.
+
+    Gemini bills per request that uses grounding, not per query.
+    Returns 1 if grounding_metadata is present with actual search data, 0 otherwise.
+
+    Args:
+        response: The response from Gemini API
+
+    Returns:
+        1 if web search/grounding was used, 0 otherwise
+    """
+
+    # Check for grounding_metadata in candidates
+    if hasattr(response, "candidates"):
+        for candidate in response.candidates:
+            if (
+                hasattr(candidate, "grounding_metadata")
+                and candidate.grounding_metadata
+            ):
+                grounding_metadata = candidate.grounding_metadata
+
+                # Check if web_search_queries exists and is non-empty
+                if hasattr(grounding_metadata, "web_search_queries"):
+                    queries = grounding_metadata.web_search_queries
+
+                    if queries is not None and len(queries) > 0:
+                        return 1
+
+                # Check if grounding_chunks exists and is non-empty
+                if hasattr(grounding_metadata, "grounding_chunks"):
+                    chunks = grounding_metadata.grounding_chunks
+
+                    if chunks is not None and len(chunks) > 0:
+                        return 1
+
+            # Also check for google_search or grounding in function call names
+            if hasattr(candidate, "content") and candidate.content:
+                if hasattr(candidate.content, "parts") and candidate.content.parts:
+                    for part in candidate.content.parts:
+                        if hasattr(part, "function_call") and part.function_call:
+                            function_name = getattr(
+                                part.function_call, "name", ""
+                            ).lower()
+
+                            if (
+                                "google_search" in function_name
+                                or "grounding" in function_name
+                            ):
+                                return 1
+
+    return 0
+
+
 def _extract_usage_from_metadata(metadata: Any) -> TokenUsage:
     """
     Common logic to extract usage from Gemini metadata.
@@ -382,7 +437,14 @@ def extract_gemini_usage_from_response(response: Any) -> TokenUsage:
     if not hasattr(response, "usage_metadata") or not response.usage_metadata:
         return TokenUsage(input_tokens=0, output_tokens=0)
 
-    return _extract_usage_from_metadata(response.usage_metadata)
+    usage = _extract_usage_from_metadata(response.usage_metadata)
+
+    # Add web search count if present
+    web_search_count = extract_gemini_web_search_count(response)
+    if web_search_count > 0:
+        usage["web_search_count"] = web_search_count
+
+    return usage
 
 
 def extract_gemini_usage_from_chunk(chunk: Any) -> TokenUsage:
@@ -398,11 +460,19 @@ def extract_gemini_usage_from_chunk(chunk: Any) -> TokenUsage:
 
     usage: TokenUsage = TokenUsage()
 
+    # Extract web search count from the chunk before checking for usage_metadata
+    # Web search indicators can appear on any chunk, not just those with usage data
+    web_search_count = extract_gemini_web_search_count(chunk)
+    if web_search_count > 0:
+        usage["web_search_count"] = web_search_count
+
     if not hasattr(chunk, "usage_metadata") or not chunk.usage_metadata:
         return usage
 
-    # Use the shared helper to extract usage
-    usage = _extract_usage_from_metadata(chunk.usage_metadata)
+    usage_from_metadata = _extract_usage_from_metadata(chunk.usage_metadata)
+
+    # Merge the usage from metadata with any web search count we found
+    usage.update(usage_from_metadata)
 
     return usage
 

diff --git a/posthog/ai/openai/openai_async.py b/posthog/ai/openai/openai_async.py
@@ -213,6 +213,15 @@ async def _capture_streaming_event(
             **(posthog_properties or {}),
         }
 
+        # Add web search count if present
+        web_search_count = usage_stats.get("web_search_count")
+        if (
+            web_search_count is not None
+            and isinstance(web_search_count, int)
+            and web_search_count > 0
+        ):
+            event_properties["$ai_web_search_count"] = web_search_count
+
         if available_tool_calls:
             event_properties["$ai_tools"] = available_tool_calls
 
@@ -444,6 +453,16 @@ async def _capture_streaming_event(
             **(posthog_properties or {}),
         }
 
+        # Add web search count if present
+        web_search_count = usage_stats.get("web_search_count")
+
+        if (
+            web_search_count is not None
+            and isinstance(web_search_count, int)
+            and web_search_count > 0
+        ):
+            event_properties["$ai_web_search_count"] = web_search_count
+
         if available_tool_calls:
             event_properties["$ai_tools"] = available_tool_calls