langchain-ai · KaparthyReddy · Oct 18, 2025 · Oct 24, 2025
@@ -173,9 +173,16 @@
         default_factory=from_env("DEEPSEEK_API_BASE", default=DEFAULT_API_BASE),
     )
     """DeepSeek API base URL"""
+    strict: bool | None = Field(
+        default=None,
+        description=(
+            "Whether to enable strict mode for function calling. "
+            "When enabled, uses the Beta API endpoint and ensures "
+            "outputs strictly comply with the defined JSON schema."
+        ),
+    )
 
     model_config = ConfigDict(populate_by_name=True)
-
     @property
     def _llm_type(self) -> str:
         """Return type of chat model."""
@@ -198,16 +205,22 @@
     @model_validator(mode="after")
     def validate_environment(self) -> Self:
         """Validate necessary environment vars and client params."""
-        if self.api_base == DEFAULT_API_BASE and not (
+        # Use Beta API if strict mode is enabled
+        api_base = self.api_base
+        if self.strict and self.api_base == DEFAULT_API_BASE:
+            api_base = "https://api.deepseek.com/beta"
+
+        if api_base == DEFAULT_API_BASE and not (
             self.api_key and self.api_key.get_secret_value()
         ):
             msg = "If using default api base, DEEPSEEK_API_KEY must be set."
             raise ValueError(msg)
+
         client_params: dict = {
             k: v
             for k, v in {
                 "api_key": self.api_key.get_secret_value() if self.api_key else None,
-                "base_url": self.api_base,
+                "base_url": api_base,
                 "timeout": self.request_timeout,
                 "max_retries": self.max_retries,
                 "default_headers": self.default_headers,
@@ -229,6 +242,59 @@
             self.async_client = self.root_async_client.chat.completions
         return self
 
+    def bind_tools(
+        self,
+        tools: list,
+        *,
+        tool_choice: str | dict | None = None,
+        strict: bool | None = None,
+        **kwargs: Any,
+    ) -> Runnable[LanguageModelInput, BaseMessage]:
+        """Bind tools to the model with optional strict mode.
+
+        Args:
+            tools: A list of tool definitions or Pydantic models.
+            tool_choice: Which tool the model should use.
+            strict: Whether to enable strict mode for these tools.
+                If not provided, uses the instance's strict setting.
+            **kwargs: Additional arguments to pass to the parent method.
+
+        Returns:
+            A Runnable that will call the model with the bound tools.
+        """
+        # Use instance strict setting if not explicitly provided
+        use_strict = strict if strict is not None else self.strict
+
+        # If strict mode is enabled, add strict: true to each tool
+        if use_strict:
+            formatted_tools = []
+            for tool in tools:
+                # Convert to OpenAI format
+                from langchain_core.utils.function_calling import convert_to_openai_tool
+
+                if not isinstance(tool, dict):
+                    tool_dict = convert_to_openai_tool(tool)
+                else:
+                    tool_dict = tool.copy()
+
+                # Add strict: true to the function definition
+                if "function" in tool_dict:
+                    tool_dict["function"]["strict"] = True
+
+                formatted_tools.append(tool_dict)
+
+            tools = formatted_tools
+
+        # Add strict to kwargs if it's being used
+        if use_strict is not None:
+            kwargs["strict"] = use_strict
+
+        return super().bind_tools(
+            tools,
+            tool_choice=tool_choice,
+            **kwargs,
+        )
+
     def _get_request_payload(
         self,
         input_: LanguageModelInput,

@@ -311,3 +311,85 @@ def test_create_chat_result_with_model_provider_multiple_generations(
             assert (
                 generation.message.response_metadata.get("model_provider") == "deepseek"
             )
+
+
+class TestChatDeepSeekStrictMode:
+    """Test strict mode functionality."""
+
+    def test_strict_mode_uses_beta_api(self) -> None:
+        """Test that strict mode switches to Beta API endpoint."""
+        model = ChatDeepSeek(
+            model=MODEL_NAME,
+            api_key=SecretStr("test-key"),
+            strict=True,
+        )
+
+        # Check that the client uses the beta endpoint
+        assert str(model.root_client.base_url) == "https://api.deepseek.com/beta/"
+
+    def test_strict_mode_disabled_uses_default_api(self) -> None:
+        """Test that without strict mode, default API is used."""
+        model = ChatDeepSeek(
+            model=MODEL_NAME,
+            api_key=SecretStr("test-key"),
+            strict=False,
+        )
+
+        # Check that the client uses the default endpoint
+        assert str(model.root_client.base_url) == "https://api.deepseek.com/v1/"
+
+    def test_strict_mode_none_uses_default_api(self) -> None:
+        """Test that strict=None uses default API."""
+        model = ChatDeepSeek(
+            model=MODEL_NAME,
+            api_key=SecretStr("test-key"),
+        )
+
+        # Check that the client uses the default endpoint
+        assert str(model.root_client.base_url) == "https://api.deepseek.com/v1/"
+
+    def test_bind_tools_with_strict_mode(self) -> None:
+        """Test that bind_tools adds strict to tool definitions."""
+        from pydantic import BaseModel, Field
+
+        class GetWeather(BaseModel):
+            """Get the current weather in a given location."""
+            location: str = Field(..., description="The city and state") # pyright: ignore[reportUndefinedVariable]
+
+        model = ChatDeepSeek(
+            model=MODEL_NAME,
+            api_key=SecretStr("test-key"),
+            strict=True,
+        )
+
+        # Bind tools
+        model_with_tools = model.bind_tools([GetWeather])
+
+        # Check that tools were bound
+        assert 'tools' in model_with_tools.kwargs
+
+        # Verify that tools have strict property set
+        tools = model_with_tools.kwargs['tools']
+        assert len(tools) > 0
+        assert tools[0]['function']['strict'] is True
+    def test_bind_tools_override_strict(self) -> None:
+        """Test that bind_tools can override instance strict setting."""
+        from pydantic import BaseModel, Field
+
+        class GetWeather(BaseModel):
+            """Get the current weather in a given location."""
+            location: str = Field(..., description="The city and state")
+
+        model = ChatDeepSeek(
+            model=MODEL_NAME,
+            api_key=SecretStr("test-key"),
+            strict=False,
+        )
+
+        # Override with strict=True in bind_tools
+        model_with_tools = model.bind_tools([GetWeather], strict=True)
+
+        # Check that strict was passed to kwargs
+        assert 'tools' in model_with_tools.kwargs
+        tools = model_with_tools.kwargs['tools']
+        assert tools[0]['function']['strict'] is True
@@ -1268,31 +1268,53 @@
         generation_info: dict | None = None,
     ) -> ChatResult:
         generations = []
-
-        response_dict = (
-            response if isinstance(response, dict) else response.model_dump()
-        )
+
+        # Handle response serialization more robustly for non-OpenAI APIs
+        if isinstance(response, dict):
+            response_dict = response
+        else:
+            # Try model_dump() first
+            try:
+                response_dict = response.model_dump()
+            except Exception as e:
+                # Fallback: try to access raw JSON if model_dump fails
+                try:
+                    if hasattr(response, 'model_dump_json'):
+                        import json
+                        response_dict = json.loads(response.model_dump_json())
+                    else:
+                        raise e
+                except Exception:
+                    # If all else fails, raise the original error
+                    raise e
+
         # Sometimes the AI Model calling will get error, we should raise it (this is
         # typically followed by a null value for `choices`, which we raise for
         # separately below).
         if response_dict.get("error"):
             raise ValueError(response_dict.get("error"))
-
+        
         # Raise informative error messages for non-OpenAI chat completions APIs
         # that return malformed responses.
         try:
             choices = response_dict["choices"]
         except KeyError as e:
             msg = f"Response missing `choices` key: {response_dict.keys()}"
             raise KeyError(msg) from e
-
+
+        # Improved null check with better error message
         if choices is None:
-            msg = "Received response with null value for `choices`."
+            # Provide more debugging info for non-OpenAI APIs
+            msg = (
+                f"Received response with null value for `choices`. "
+                f"Response keys: {list(response_dict.keys())}. "
+                f"This may indicate an incompatibility with the API endpoint. "
+                f"Raw response type: {type(response).__name__}"
+            )
             raise TypeError(msg)
-
+        
         token_usage = response_dict.get("usage")
         service_tier = response_dict.get("service_tier")
-
         for res in choices:
             message = _convert_dict_to_message(res["message"])
             if token_usage and isinstance(message, AIMessage):
@@ -1319,7 +1341,6 @@
             llm_output["id"] = response_dict["id"]
         if service_tier:
             llm_output["service_tier"] = service_tier
-
         if isinstance(response, openai.BaseModel) and getattr(
             response, "choices", None
         ):
@@ -1328,15 +1349,13 @@
                 generations[0].message.additional_kwargs["parsed"] = message.parsed
             if hasattr(message, "refusal"):
                 generations[0].message.additional_kwargs["refusal"] = message.refusal
-
         return ChatResult(generations=generations, llm_output=llm_output)
-
     async def _astream(
         self,
         messages: list[BaseMessage],
        stop: list[str] | None = None,
        run_manager: AsyncCallbackManagerForLLMRun | None = None,
        *,
        stream_usage: bool | None = None,
        **kwargs: Any,
    ) -> AsyncIterator[ChatGenerationChunk]:
@@ -1347,7 +1366,7 @@
        payload = self._get_request_payload(messages, stop=stop, **kwargs)
        default_chunk_class: type[BaseMessageChunk] = AIMessageChunk
        base_generation_info = {}

        if "response_format" in payload:
            if self.include_response_headers:
                warnings.warn(
@@ -1355,13 +1374,13 @@
                    "specified."
                )
            payload.pop("stream")
            response_stream = self.root_async_client.beta.chat.completions.stream(
                **payload
            )
            context_manager = response_stream
        else:
            if self.include_response_headers:
                raw_response = await self.async_client.with_raw_response.create(
                    **payload
                )
                response = raw_response.parse()
@@ -1369,7 +1388,7 @@
            else:
                response = await self.async_client.create(**payload)
            context_manager = response
        try:
            async with context_manager as response:
                is_first_chunk = True
                async for chunk in response:
@@ -1380,7 +1399,7 @@
                        default_chunk_class,
                        base_generation_info if is_first_chunk else {},
                    )
                    if generation_chunk is None:
                        continue
                    default_chunk_class = generation_chunk.message.__class__
                    logprobs = (generation_chunk.generation_info or {}).get("logprobs")

@@ -2997,3 +2997,73 @@ def test_gpt_5_temperature(use_responses_api: bool) -> None:
     messages = [HumanMessage(content="Hello")]
     payload = llm._get_request_payload(messages)
     assert payload["temperature"] == 0.5  # gpt-5-chat is exception
+
+def test_vllm_response_with_valid_choices() -> None:
+    """Test that vLLM-style responses with valid choices don't raise null error.
+
+    This tests the fix for issue #32252 where vLLM responses were incorrectly
+    identified as having null choices.
+    """
+    from langchain_openai import ChatOpenAI
+
+    # Simulate a vLLM-style response (as a dict)
+    vllm_response = {
+        "choices": [
+            {
+                "finish_reason": "stop",
+                "index": 0,
+                "logprobs": None,
+                "message": {
+                    "content": "Test response content",
+                    "role": "assistant",
+                    "tool_calls": []
+                },
+                "stop_reason": None
+            }
+        ],
+        "created": 1753518740,
+        "id": "chatcmpl-test123",
+        "model": "test-model",
+        "object": "chat.completion",
+        "usage": {
+            "completion_tokens": 10,
+            "prompt_tokens": 20,
+            "total_tokens": 30
+        }
+    }
+
+    llm = ChatOpenAI(model="gpt-3.5-turbo", api_key="test")
+
+    # This should not raise "Received response with null value for choices"
+    result = llm._create_chat_result(vllm_response)
+
+    assert result is not None
+    assert len(result.generations) == 1
+    assert result.generations[0].message.content == "Test response content"
+    assert result.llm_output["token_usage"]["total_tokens"] == 30
+
+
+def test_improved_null_choices_error_message() -> None:
+    """Test that the improved error message provides better debugging info."""
+    from langchain_openai import ChatOpenAI
+    import pytest
+
+    # Create a response with null choices
+    bad_response = {
+        "choices": None,
+        "created": 1753518740,
+        "id": "chatcmpl-test123",
+        "model": "test-model",
+    }
+
+    llm = ChatOpenAI(model="gpt-3.5-turbo", api_key="test")
+
+    # Should raise TypeError with improved message
+    with pytest.raises(TypeError) as exc_info:
+        llm._create_chat_result(bad_response)
+
+    error_msg = str(exc_info.value)
+    # Check that the improved error message contains debugging info
+    assert "Response keys:" in error_msg
+    assert "Raw response type:" in error_msg
+    assert "incompatibility with the API endpoint" in error_msg