openai
diff --git a/‎pyproject.toml‎
Lines changed: 17 additions & 0 deletions b/‎pyproject.toml‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎tests/conftest.py‎
Lines changed: 129 additions & 0 deletions b/‎tests/conftest.py‎
Lines changed: 129 additions & 0 deletions
diff --git a/‎tests/unit/checks/test_keywords.py‎
Lines changed: 68 additions & 0 deletions b/‎tests/unit/checks/test_keywords.py‎
Lines changed: 68 additions & 0 deletions
diff --git a/‎tests/unit/checks/test_llm_base.py‎
Lines changed: 158 additions & 0 deletions b/‎tests/unit/checks/test_llm_base.py‎
Lines changed: 158 additions & 0 deletions
@@ -58,6 +58,7 @@ dev = [
   "pymdown-extensions>=10.0.0",
   "coverage>=7.8.0",
   "hypothesis>=6.131.20",
+  "pytest-cov>=6.3.0",
 ]
 
 [tool.uv.workspace]
@@ -103,8 +104,24 @@ convention = "google"
 [tool.ruff.format]
 docstring-code-format = true
 
+[tool.coverage.run]
+source = ["guardrails"]
+omit = [
+  "src/guardrails/evals/*",
+]
+
 [tool.mypy]
 strict = true
 disallow_incomplete_defs = false
 disallow_untyped_defs = false
 disallow_untyped_calls = false
+exclude = [
+  "examples",
+  "src/guardrails/evals",
+]
+
+[tool.pyright]
+ignore = [
+  "examples",
+  "src/guardrails/evals",
+]
@@ -0,0 +1,129 @@
+"""Shared pytest fixtures for guardrails tests.
+
+These fixtures provide deterministic test environments by stubbing the OpenAI
+client library, seeding environment variables, and preventing accidental live
+network activity during the suite.
+"""
+
+from __future__ import annotations
+
+import logging
+import sys
+import types
+from collections.abc import Iterator
+from dataclasses import dataclass
+from types import SimpleNamespace
+from typing import Any
+
+import pytest
+
+
+class _StubOpenAIBase:
+    """Base stub with attribute bag behaviour for OpenAI client classes."""
+
+    def __init__(self, **kwargs: Any) -> None:
+        self._client_kwargs = kwargs
+        self.chat = SimpleNamespace()
+        self.responses = SimpleNamespace()
+        self.api_key = kwargs.get("api_key", "test-key")
+        self.base_url = kwargs.get("base_url")
+        self.organization = kwargs.get("organization")
+        self.timeout = kwargs.get("timeout")
+        self.max_retries = kwargs.get("max_retries")
+
+    def __getattr__(self, item: str) -> Any:
+        """Return None for unknown attributes to emulate real client laziness."""
+        return None
+
+
+class _StubAsyncOpenAI(_StubOpenAIBase):
+    """Stub asynchronous OpenAI client."""
+
+
+class _StubSyncOpenAI(_StubOpenAIBase):
+    """Stub synchronous OpenAI client."""
+
+
+@dataclass(frozen=True, slots=True)
+class _DummyResponse:
+    """Minimal response type with choices and output."""
+
+    choices: list[Any] | None = None
+    output: list[Any] | None = None
+    output_text: str | None = None
+    type: str | None = None
+    delta: str | None = None
+
+
+_STUB_OPENAI_MODULE = types.ModuleType("openai")
+_STUB_OPENAI_MODULE.AsyncOpenAI = _StubAsyncOpenAI
+_STUB_OPENAI_MODULE.OpenAI = _StubSyncOpenAI
+_STUB_OPENAI_MODULE.AsyncAzureOpenAI = _StubAsyncOpenAI
+_STUB_OPENAI_MODULE.AzureOpenAI = _StubSyncOpenAI
+_STUB_OPENAI_MODULE.NOT_GIVEN = object()
+
+
+class APITimeoutError(Exception):
+    """Stub API timeout error."""
+
+
+_STUB_OPENAI_MODULE.APITimeoutError = APITimeoutError
+
+_OPENAI_TYPES_MODULE = types.ModuleType("openai.types")
+_OPENAI_TYPES_MODULE.Completion = _DummyResponse
+_OPENAI_TYPES_MODULE.Response = _DummyResponse
+
+_OPENAI_CHAT_MODULE = types.ModuleType("openai.types.chat")
+_OPENAI_CHAT_MODULE.ChatCompletion = _DummyResponse
+_OPENAI_CHAT_MODULE.ChatCompletionChunk = _DummyResponse
+
+_OPENAI_RESPONSES_MODULE = types.ModuleType("openai.types.responses")
+_OPENAI_RESPONSES_MODULE.Response = _DummyResponse
+_OPENAI_RESPONSES_MODULE.ResponseInputItemParam = dict  # type: ignore[attr-defined]
+_OPENAI_RESPONSES_MODULE.ResponseOutputItem = dict  # type: ignore[attr-defined]
+_OPENAI_RESPONSES_MODULE.ResponseStreamEvent = dict  # type: ignore[attr-defined]
+
+
+_OPENAI_RESPONSES_RESPONSE_MODULE = types.ModuleType("openai.types.responses.response")
+_OPENAI_RESPONSES_RESPONSE_MODULE.Response = _DummyResponse
+
+
+class _ResponseTextConfigParam(dict):
+    """Stub config param used for response formatting."""
+
+
+_OPENAI_RESPONSES_MODULE.ResponseTextConfigParam = _ResponseTextConfigParam
+
+sys.modules["openai"] = _STUB_OPENAI_MODULE
+sys.modules["openai.types"] = _OPENAI_TYPES_MODULE
+sys.modules["openai.types.chat"] = _OPENAI_CHAT_MODULE
+sys.modules["openai.types.responses"] = _OPENAI_RESPONSES_MODULE
+sys.modules["openai.types.responses.response"] = _OPENAI_RESPONSES_RESPONSE_MODULE
+
+
+@pytest.fixture(autouse=True)
+def stub_openai_module(monkeypatch: pytest.MonkeyPatch) -> Iterator[types.ModuleType]:
+    """Provide stub OpenAI module so tests avoid real network-bound clients."""
+    # Patch imported symbols in guardrails modules
+    from guardrails import _base_client, client, types as guardrail_types  # type: ignore
+
+    monkeypatch.setattr(_base_client, "AsyncOpenAI", _StubAsyncOpenAI, raising=False)
+    monkeypatch.setattr(_base_client, "OpenAI", _StubSyncOpenAI, raising=False)
+    monkeypatch.setattr(client, "AsyncOpenAI", _StubAsyncOpenAI, raising=False)
+    monkeypatch.setattr(client, "OpenAI", _StubSyncOpenAI, raising=False)
+    monkeypatch.setattr(client, "AsyncAzureOpenAI", _StubAsyncOpenAI, raising=False)
+    monkeypatch.setattr(client, "AzureOpenAI", _StubSyncOpenAI, raising=False)
+    monkeypatch.setattr(guardrail_types, "AsyncOpenAI", _StubAsyncOpenAI, raising=False)
+    monkeypatch.setattr(guardrail_types, "OpenAI", _StubSyncOpenAI, raising=False)
+    monkeypatch.setattr(guardrail_types, "AsyncAzureOpenAI", _StubAsyncOpenAI, raising=False)
+    monkeypatch.setattr(guardrail_types, "AzureOpenAI", _StubSyncOpenAI, raising=False)
+
+    monkeypatch.setenv("OPENAI_API_KEY", "test-key")
+
+    yield _STUB_OPENAI_MODULE
+
+
+@pytest.fixture(autouse=True)
+def configure_logging() -> None:
+    """Ensure logging defaults to DEBUG for deterministic assertions."""
+    logging.basicConfig(level=logging.DEBUG)
@@ -0,0 +1,68 @@
+"""Tests for keyword-based guardrail helpers."""
+
+from __future__ import annotations
+
+import pytest
+from pydantic import ValidationError
+
+from guardrails.checks.text.competitors import CompetitorCfg, competitors
+from guardrails.checks.text.keywords import KeywordCfg, keywords, match_keywords
+from guardrails.types import GuardrailResult
+
+
+def test_match_keywords_sanitizes_trailing_punctuation() -> None:
+    """Ensure keyword sanitization strips trailing punctuation before matching."""
+    config = KeywordCfg(keywords=["token.", "secret!", "KEY?"])
+    result = match_keywords("Leaked token appears here.", config, guardrail_name="Test Guardrail")
+
+    assert result.tripwire_triggered is True  # noqa: S101
+    assert result.info["sanitized_keywords"] == ["token", "secret", "KEY"]  # noqa: S101
+    assert result.info["matched"] == ["token"]  # noqa: S101
+    assert result.info["guardrail_name"] == "Test Guardrail"  # noqa: S101
+    assert result.info["checked_text"] == "Leaked token appears here."  # noqa: S101
+
+
+def test_match_keywords_deduplicates_case_insensitive_matches() -> None:
+    """Repeated matches differing by case should be deduplicated."""
+    config = KeywordCfg(keywords=["Alert"])
+    result = match_keywords("alert ALERT Alert", config, guardrail_name="Keyword Filter")
+
+    assert result.tripwire_triggered is True  # noqa: S101
+    assert result.info["matched"] == ["alert"]  # noqa: S101
+
+
+@pytest.mark.asyncio
+async def test_keywords_guardrail_wraps_match_keywords() -> None:
+    """Async guardrail should mirror match_keywords behaviour."""
+    config = KeywordCfg(keywords=["breach"])
+    result = await keywords(ctx=None, data="Potential breach detected", config=config)
+
+    assert isinstance(result, GuardrailResult)  # noqa: S101
+    assert result.tripwire_triggered is True  # noqa: S101
+    assert result.info["guardrail_name"] == "Keyword Filter"  # noqa: S101
+
+
+@pytest.mark.asyncio
+async def test_competitors_uses_keyword_matching() -> None:
+    """Competitors guardrail delegates to keyword matching with distinct name."""
+    config = CompetitorCfg(keywords=["ACME Corp"])
+    result = await competitors(ctx=None, data="Comparing against ACME Corp today", config=config)
+
+    assert result.tripwire_triggered is True  # noqa: S101
+    assert result.info["guardrail_name"] == "Competitors"  # noqa: S101
+    assert result.info["matched"] == ["ACME Corp"]  # noqa: S101
+
+
+def test_keyword_cfg_requires_non_empty_keywords() -> None:
+    """KeywordCfg should enforce at least one keyword."""
+    with pytest.raises(ValidationError):
+        KeywordCfg(keywords=[])
+
+
+@pytest.mark.asyncio
+async def test_keywords_does_not_trigger_on_benign_text() -> None:
+    """Guardrail should not trigger when no keywords are present."""
+    config = KeywordCfg(keywords=["restricted"])
+    result = await keywords(ctx=None, data="Safe content", config=config)
+
+    assert result.tripwire_triggered is False  # noqa: S101
@@ -0,0 +1,158 @@
+"""Tests for LLM-based guardrail helpers."""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+from typing import Any
+
+import pytest
+
+from guardrails.checks.text import llm_base
+from guardrails.checks.text.llm_base import (
+    LLMConfig,
+    LLMErrorOutput,
+    LLMOutput,
+    _build_full_prompt,
+    _strip_json_code_fence,
+    create_llm_check_fn,
+    run_llm,
+)
+from guardrails.types import GuardrailResult
+
+
+class _FakeCompletions:
+    def __init__(self, content: str | None) -> None:
+        self._content = content
+
+    async def create(self, **kwargs: Any) -> Any:
+        _ = kwargs
+        return SimpleNamespace(choices=[SimpleNamespace(message=SimpleNamespace(content=self._content))])
+
+
+class _FakeAsyncClient:
+    def __init__(self, content: str | None) -> None:
+        self.chat = SimpleNamespace(completions=_FakeCompletions(content))
+
+
+def test_strip_json_code_fence_removes_wrapping() -> None:
+    """Valid JSON code fences should be removed."""
+    fenced = """```json
+{"flagged": false, "confidence": 0.2}
+```"""
+    assert _strip_json_code_fence(fenced) == '{"flagged": false, "confidence": 0.2}'  # noqa: S101
+
+
+def test_build_full_prompt_includes_instructions() -> None:
+    """Generated prompt should embed system instructions and schema guidance."""
+    prompt = _build_full_prompt("Analyze text")
+    assert "Analyze text" in prompt  # noqa: S101
+    assert "Respond with a json object" in prompt  # noqa: S101
+
+
+@pytest.mark.asyncio
+async def test_run_llm_returns_valid_output() -> None:
+    """run_llm should parse the JSON response into the provided output model."""
+    client = _FakeAsyncClient('{"flagged": true, "confidence": 0.9}')
+    result = await run_llm(
+        text="Sensitive text",
+        system_prompt="Detect problems.",
+        client=client,  # type: ignore[arg-type]
+        model="gpt-test",
+        output_model=LLMOutput,
+    )
+    assert isinstance(result, LLMOutput)  # noqa: S101
+    assert result.flagged is True and result.confidence == 0.9  # noqa: S101
+
+
+@pytest.mark.asyncio
+async def test_run_llm_handles_content_filter_error(monkeypatch: pytest.MonkeyPatch) -> None:
+    """Content filter errors should return LLMErrorOutput with flagged=True."""
+
+    class _FailingClient:
+        class _Chat:
+            class _Completions:
+                async def create(self, **kwargs: Any) -> Any:
+                    raise RuntimeError("content_filter triggered by provider")
+
+            completions = _Completions()
+
+        chat = _Chat()
+
+    result = await run_llm(
+        text="Sensitive",
+        system_prompt="Detect.",
+        client=_FailingClient(),  # type: ignore[arg-type]
+        model="gpt-test",
+        output_model=LLMOutput,
+    )
+
+    assert isinstance(result, LLMErrorOutput)  # noqa: S101
+    assert result.flagged is True  # noqa: S101
+    assert result.info["third_party_filter"] is True  # noqa: S101
+
+
+@pytest.mark.asyncio
+async def test_create_llm_check_fn_triggers_on_confident_flag(monkeypatch: pytest.MonkeyPatch) -> None:
+    """Generated guardrail function should trip when confidence exceeds the threshold."""
+
+    async def fake_run_llm(
+        text: str,
+        system_prompt: str,
+        client: Any,
+        model: str,
+        output_model: type[LLMOutput],
+    ) -> LLMOutput:
+        assert system_prompt == "Check with details"  # noqa: S101
+        return LLMOutput(flagged=True, confidence=0.95)
+
+    monkeypatch.setattr(llm_base, "run_llm", fake_run_llm)
+
+    class DetailedConfig(LLMConfig):
+        system_prompt_details: str = "details"
+
+    guardrail_fn = create_llm_check_fn(
+        name="HighConfidence",
+        description="Test guardrail",
+        system_prompt="Check with {system_prompt_details}",
+        output_model=LLMOutput,
+        config_model=DetailedConfig,
+    )
+
+    config = DetailedConfig(model="gpt-test", confidence_threshold=0.9)
+    context = SimpleNamespace(guardrail_llm="fake-client")
+
+    result = await guardrail_fn(context, "content", config)
+
+    assert isinstance(result, GuardrailResult)  # noqa: S101
+    assert result.tripwire_triggered is True  # noqa: S101
+    assert result.info["threshold"] == 0.9  # noqa: S101
+
+
+@pytest.mark.asyncio
+async def test_create_llm_check_fn_handles_llm_error(monkeypatch: pytest.MonkeyPatch) -> None:
+    """LLM error results should mark execution_failed without triggering tripwire."""
+
+    async def fake_run_llm(
+        text: str,
+        system_prompt: str,
+        client: Any,
+        model: str,
+        output_model: type[LLMOutput],
+    ) -> LLMErrorOutput:
+        return LLMErrorOutput(flagged=False, confidence=0.0, info={"error_message": "timeout"})
+
+    monkeypatch.setattr(llm_base, "run_llm", fake_run_llm)
+
+    guardrail_fn = create_llm_check_fn(
+        name="Resilient",
+        description="Test guardrail",
+        system_prompt="Prompt",
+    )
+
+    config = LLMConfig(model="gpt-test", confidence_threshold=0.5)
+    context = SimpleNamespace(guardrail_llm="fake-client")
+    result = await guardrail_fn(context, "text", config)
+
+    assert result.tripwire_triggered is False  # noqa: S101
+    assert result.execution_failed is True  # noqa: S101
+    assert "timeout" in str(result.original_exception)  # noqa: S101