Improve test coverage for utils and base modules

stellasia · stellasia · commit 02e99d424070 · 2025-09-14T11:49:42.000+02:00
diff --git a/src/neo4j_graphrag/llm/base.py b/src/neo4j_graphrag/llm/base.py
@@ -17,6 +17,8 @@
 from abc import ABC, abstractmethod
 from typing import Any, List, Optional, Sequence, Union
 
+from pydantic import ValidationError
+
 from neo4j_graphrag.message_history import MessageHistory
 from neo4j_graphrag.types import LLMMessage
 from .rate_limit import rate_limit_handler
@@ -31,6 +33,7 @@
 
 from .rate_limit import RateLimitHandler
 from .utils import legacy_inputs_to_messages
+from ..exceptions import LLMGenerationError
 
 
 class LLMInterface(ABC):
@@ -65,7 +68,12 @@ def invoke(
         message_history: Optional[Union[List[LLMMessage], MessageHistory]] = None,
         system_instruction: Optional[str] = None,
     ) -> LLMResponse:
-        messages = legacy_inputs_to_messages(input, message_history, system_instruction)
+        try:
+            messages = legacy_inputs_to_messages(
+                input, message_history, system_instruction
+            )
+        except ValidationError as e:
+            raise LLMGenerationError("Input validation failed") from e
         return self._invoke(messages)
 
     @abstractmethod
@@ -138,7 +146,12 @@ def invoke_with_tools(
             LLMGenerationError: If anything goes wrong.
             NotImplementedError: If the LLM provider does not support tool calling.
         """
-        messages = legacy_inputs_to_messages(input, message_history, system_instruction)
+        try:
+            messages = legacy_inputs_to_messages(
+                input, message_history, system_instruction
+            )
+        except ValidationError as e:
+            raise LLMGenerationError("Input validation failed") from e
         return self._invoke_with_tools(messages, tools)
 
     def _invoke_with_tools(
diff --git a/src/neo4j_graphrag/llm/utils.py b/src/neo4j_graphrag/llm/utils.py
@@ -1,6 +1,22 @@
+#  Copyright (c) "Neo4j"
+#  Neo4j Sweden AB [https://neo4j.com]
+#  #
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#  #
+#      https://www.apache.org/licenses/LICENSE-2.0
+#  #
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
 import warnings
 from typing import Union, Optional
 
+from pydantic import TypeAdapter
+
 from neo4j_graphrag.message_history import MessageHistory
 from neo4j_graphrag.types import LLMMessage
 
@@ -12,6 +28,9 @@ def system_instruction_from_messages(messages: list[LLMMessage]) -> str | None:
     return None
 
 
+llm_messages_adapter = TypeAdapter(list[LLMMessage])
+
+
 def legacy_inputs_to_messages(
     input: Union[str, list[LLMMessage], MessageHistory],
     message_history: Optional[Union[list[LLMMessage], MessageHistory]] = None,
@@ -21,7 +40,7 @@ def legacy_inputs_to_messages(
         if isinstance(message_history, MessageHistory):
             messages = message_history.messages
         else:  # list[LLMMessage]
-            messages = [LLMMessage(**m) for m in message_history]
+            messages = llm_messages_adapter.validate_python(message_history)
     else:
         messages = []
     if system_instruction is not None:
diff --git a/tests/unit/llm/test_base.py b/tests/unit/llm/test_base.py
@@ -1,8 +1,11 @@
-from typing import Type, Generator, Optional, Any
+from typing import Type, Generator
 from unittest.mock import patch, Mock
 
+import pytest
 from joblib.testing import fixture
+from pydantic import ValidationError
 
+from neo4j_graphrag.exceptions import LLMGenerationError
 from neo4j_graphrag.llm import LLMInterface
 from neo4j_graphrag.types import LLMMessage
 
@@ -21,11 +24,20 @@ class CustomLLMInterface(LLMInterface):
 
 
 @patch("neo4j_graphrag.llm.base.legacy_inputs_to_messages")
-def test_base_llm_interface_invoke_with_input_as_str(mock_inputs: Mock, llm_interface: Type[LLMInterface]) -> None:
-    mock_inputs.return_value = [LLMMessage(role="user", content="return value of the legacy_inputs_to_messages function")]
+def test_base_llm_interface_invoke_with_input_as_str(
+    mock_inputs: Mock, llm_interface: Type[LLMInterface]
+) -> None:
+    mock_inputs.return_value = [
+        LLMMessage(
+            role="user",
+            content="return value of the legacy_inputs_to_messages function",
+        )
+    ]
     llm = llm_interface(model_name="test")
     message_history = [
-        LLMMessage(**{"role": "user", "content": "When does the sun come up in the summer?"}),
+        LLMMessage(
+            **{"role": "user", "content": "When does the sun come up in the summer?"}
+        ),
         LLMMessage(**{"role": "assistant", "content": "Usually around 6am."}),
     ]
     question = "What about next season?"
@@ -34,10 +46,91 @@ def test_base_llm_interface_invoke_with_input_as_str(mock_inputs: Mock, llm_inte
     with patch.object(llm, "_invoke") as mock_invoke:
         llm.invoke(question, message_history, system_instruction)
         mock_invoke.assert_called_once_with(
-            [LLMMessage(role="user", content="return value of the legacy_inputs_to_messages function")]
+            [
+                LLMMessage(
+                    role="user",
+                    content="return value of the legacy_inputs_to_messages function",
+                )
+            ]
         )
     mock_inputs.assert_called_once_with(
         question,
         message_history,
         system_instruction,
     )
+
+
+@patch("neo4j_graphrag.llm.base.legacy_inputs_to_messages")
+def test_base_llm_interface_invoke_with_invalid_inputs(
+    mock_inputs: Mock, llm_interface: Type[LLMInterface]
+) -> None:
+    mock_inputs.side_effect = [
+        ValidationError.from_exception_data("Invalid data", line_errors=[])
+    ]
+    llm = llm_interface(model_name="test")
+    question = "What about next season?"
+
+    with pytest.raises(LLMGenerationError, match="Input validation failed"):
+        llm.invoke(question)
+    mock_inputs.assert_called_once_with(
+        question,
+        None,
+        None,
+    )
+
+
+@patch("neo4j_graphrag.llm.base.legacy_inputs_to_messages")
+def test_base_llm_interface_invoke_with_tools_with_input_as_str(
+    mock_inputs: Mock, llm_interface: Type[LLMInterface]
+) -> None:
+    mock_inputs.return_value = [
+        LLMMessage(
+            role="user",
+            content="return value of the legacy_inputs_to_messages function",
+        )
+    ]
+    llm = llm_interface(model_name="test")
+    message_history = [
+        LLMMessage(
+            **{"role": "user", "content": "When does the sun come up in the summer?"}
+        ),
+        LLMMessage(**{"role": "assistant", "content": "Usually around 6am."}),
+    ]
+    question = "What about next season?"
+    system_instruction = "You are a genius."
+
+    with patch.object(llm, "_invoke_with_tools") as mock_invoke:
+        llm.invoke_with_tools(question, [], message_history, system_instruction)
+        mock_invoke.assert_called_once_with(
+            [
+                LLMMessage(
+                    role="user",
+                    content="return value of the legacy_inputs_to_messages function",
+                )
+            ],
+            [],  # tools
+        )
+    mock_inputs.assert_called_once_with(
+        question,
+        message_history,
+        system_instruction,
+    )
+
+
+@patch("neo4j_graphrag.llm.base.legacy_inputs_to_messages")
+def test_base_llm_interface_invoke_with_tools_with_invalid_inputs(
+    mock_inputs: Mock, llm_interface: Type[LLMInterface]
+) -> None:
+    mock_inputs.side_effect = [
+        ValidationError.from_exception_data("Invalid data", line_errors=[])
+    ]
+    llm = llm_interface(model_name="test")
+    question = "What about next season?"
+
+    with pytest.raises(LLMGenerationError, match="Input validation failed"):
+        llm.invoke_with_tools(question, [])
+    mock_inputs.assert_called_once_with(
+        question,
+        None,
+        None,
+    )
diff --git a/tests/unit/llm/test_utils.py b/tests/unit/llm/test_utils.py
@@ -1,12 +1,29 @@
+#  Copyright (c) "Neo4j"
+#  Neo4j Sweden AB [https://neo4j.com]
+#  #
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#  #
+#      https://www.apache.org/licenses/LICENSE-2.0
+#  #
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
 import pytest
+from pydantic import ValidationError
 
-from neo4j_graphrag.llm.utils import system_instruction_from_messages, \
-    legacy_inputs_to_messages
+from neo4j_graphrag.llm.utils import (
+    system_instruction_from_messages,
+    legacy_inputs_to_messages,
+)
 from neo4j_graphrag.message_history import InMemoryMessageHistory
 from neo4j_graphrag.types import LLMMessage
 
 
-def test_system_instruction_from_messages():
+def test_system_instruction_from_messages() -> None:
     messages = [
         LLMMessage(role="system", content="text"),
     ]
@@ -21,66 +38,76 @@ def test_system_instruction_from_messages():
     assert system_instruction_from_messages(messages) is None
 
 
-def test_legacy_inputs_to_messages_only_input_as_llm_message_list():
-    messages = legacy_inputs_to_messages(input=[
-        LLMMessage(role="user", content="text"),
-    ])
+def test_legacy_inputs_to_messages_only_input_as_llm_message_list() -> None:
+    messages = legacy_inputs_to_messages(
+        input=[
+            LLMMessage(role="user", content="text"),
+        ]
+    )
     assert messages == [
         LLMMessage(role="user", content="text"),
     ]
 
 
-def test_legacy_inputs_to_messages_only_input_as_message_history():
-    messages = legacy_inputs_to_messages(input=InMemoryMessageHistory(
-        messages=[
-            LLMMessage(role="user", content="text"),
-        ]
-    ))
+def test_legacy_inputs_to_messages_only_input_as_message_history() -> None:
+    messages = legacy_inputs_to_messages(
+        input=InMemoryMessageHistory(
+            messages=[
+                LLMMessage(role="user", content="text"),
+            ]
+        )
+    )
     assert messages == [
         LLMMessage(role="user", content="text"),
     ]
 
 
-def test_legacy_inputs_to_messages_only_input_as_str():
+def test_legacy_inputs_to_messages_only_input_as_str() -> None:
     messages = legacy_inputs_to_messages(input="text")
     assert messages == [
         LLMMessage(role="user", content="text"),
     ]
 
 
-def test_legacy_inputs_to_messages_input_as_str_and_message_history_as_llm_message_list():
+def test_legacy_inputs_to_messages_input_as_str_and_message_history_as_llm_message_list() -> (
+    None
+):
     messages = legacy_inputs_to_messages(
         input="text",
         message_history=[
             LLMMessage(role="assistant", content="How can I assist you today?"),
-        ]
+        ],
     )
     assert messages == [
         LLMMessage(role="assistant", content="How can I assist you today?"),
         LLMMessage(role="user", content="text"),
     ]
 
 
-def test_legacy_inputs_to_messages_input_as_str_and_message_history_as_message_history():
+def test_legacy_inputs_to_messages_input_as_str_and_message_history_as_message_history() -> (
+    None
+):
     messages = legacy_inputs_to_messages(
         input="text",
-        message_history=InMemoryMessageHistory(messages=[
-            LLMMessage(role="assistant", content="How can I assist you today?"),
-        ])
+        message_history=InMemoryMessageHistory(
+            messages=[
+                LLMMessage(role="assistant", content="How can I assist you today?"),
+            ]
+        ),
     )
     assert messages == [
         LLMMessage(role="assistant", content="How can I assist you today?"),
         LLMMessage(role="user", content="text"),
     ]
 
 
-def test_legacy_inputs_to_messages_with_explicit_system_instruction():
+def test_legacy_inputs_to_messages_with_explicit_system_instruction() -> None:
     messages = legacy_inputs_to_messages(
         input="text",
         message_history=[
             LLMMessage(role="assistant", content="How can I assist you today?"),
         ],
-        system_instruction="You are a genius."
+        system_instruction="You are a genius.",
     )
     assert messages == [
         LLMMessage(role="system", content="You are a genius."),
@@ -89,19 +116,29 @@ def test_legacy_inputs_to_messages_with_explicit_system_instruction():
     ]
 
 
-def test_legacy_inputs_to_messages_do_not_duplicate_system_instruction():
+def test_legacy_inputs_to_messages_do_not_duplicate_system_instruction() -> None:
     with pytest.warns(
         UserWarning,
-        match="system_instruction provided but ignored as the message history already contains a system message"
+        match="system_instruction provided but ignored as the message history already contains a system message",
     ):
         messages = legacy_inputs_to_messages(
             input="text",
             message_history=[
                 LLMMessage(role="system", content="You are super smart."),
             ],
-            system_instruction="You are a genius."
+            system_instruction="You are a genius.",
         )
         assert messages == [
             LLMMessage(role="system", content="You are super smart."),
             LLMMessage(role="user", content="text"),
         ]
+
+
+def test_legacy_inputs_to_messages_wrong_type_in_message_list() -> None:
+    with pytest.raises(ValidationError, match="Input should be a valid string"):
+        legacy_inputs_to_messages(
+            input="text",
+            message_history=[
+                {"role": "system", "content": 10},  # type: ignore
+            ],
+        )