Skip to content

Commit d9569ba

Browse files
authored
Merge branch 'feat/toxicity-hub-validators' into feat/toxicity-huggingface-model
2 parents d7c5eba + 02fd043 commit d9569ba

File tree

4 files changed

+25
-12
lines changed

4 files changed

+25
-12
lines changed

backend/app/core/validators/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,7 @@ Notes / limitations:
407407
```bash
408408
GUARDRAILS_HUB_API_KEY=<your-key> ENABLE_REMOTE_INFERENCING=true bash scripts/install_guardrails_from_hub.sh
409409
```
410-
- `on_fail=fix` behaves like `on_fail=exception` — LlamaGuard has no programmatic fix, so validation stops immediately on failure to prevent downstream validators from receiving `None` as input.
410+
- `on_fail=fix` returns `""` on failure — LlamaGuard has no programmatic fix, so `safe_text` will be `""` and the response `metadata.reason` will identify this validator as the cause.
411411
- LlamaGuard policy classification may produce false positives in news, clinical, or legal contexts.
412412

413413
### 9) NSFW Text Validator (`nsfw_text`)

backend/app/core/validators/config/base_validator_config.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,14 @@ def _on_fix(self, value: str, fail_result: FailResult):
1919
fix_value = fail_result.fix_value if fail_result else None
2020
if not fix_value:
2121
self.validator_metadata = {
22+
<<<<<<< feat/toxicity-huggingface-model
2223
"reason": f"Empty string has been returned since the validation failed for: {self.type}"
2324
}
25+
=======
26+
"reason": f"Empty string has been returned since the validation failed for: {self.type}" # type: ignore[attr-defined]
27+
}
28+
return ""
29+
>>>>>>> feat/toxicity-hub-validators
2430
return fix_value
2531

2632
def resolve_on_fail(self):

backend/app/core/validators/config/llamaguard_7b_safety_validator_config.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from typing import List, Literal, Optional
22

3-
from guardrails import OnFailAction
43
from guardrails.hub import LlamaGuard7B
54

65
from app.core.enum import GuardrailOnFail
@@ -34,12 +33,7 @@ def _resolve_policies(self) -> Optional[List[str]]:
3433
return resolved
3534

3635
def build(self):
37-
on_fail = self.resolve_on_fail()
38-
# LlamaGuard7B has no programmatic fix. If on_fail=fix is requested,
39-
# fall back to exception so downstream validators don't receive None as input.
40-
if self.on_fail == GuardrailOnFail.Fix:
41-
on_fail = OnFailAction.EXCEPTION
4236
return LlamaGuard7B(
4337
policies=self._resolve_policies(),
44-
on_fail=on_fail, # type: ignore[arg-type]
38+
on_fail=self.resolve_on_fail(), # type: ignore[arg-type]
4539
)

backend/app/tests/test_toxicity_hub_validators.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -107,16 +107,29 @@ def test_build_returns_validator_instance(self):
107107

108108
assert result == mock_validator.return_value
109109

110-
def test_on_fail_fix_remaps_to_exception(self):
111-
# LlamaGuard has no programmatic fix; on_fail=fix is silently remapped to
112-
# exception to prevent downstream validators from receiving None as input.
110+
def test_on_fail_fix_resolves_to_callable(self):
113111
config = LlamaGuard7BSafetyValidatorConfig(type="llamaguard_7b", on_fail="fix")
114112

115113
with patch(_LLAMAGUARD_PATCH) as mock_validator:
116114
config.build()
117115

118116
_, kwargs = mock_validator.call_args
119-
assert kwargs["on_fail"] == OnFailAction.EXCEPTION
117+
assert callable(kwargs["on_fail"])
118+
119+
def test_on_fix_sets_validator_metadata_when_fix_value_empty(self):
120+
from unittest.mock import MagicMock
121+
from guardrails.validators import FailResult
122+
123+
config = LlamaGuard7BSafetyValidatorConfig(type="llamaguard_7b", on_fail="fix")
124+
fail_result = MagicMock(spec=FailResult)
125+
fail_result.fix_value = None
126+
127+
result = config._on_fix("some unsafe input", fail_result)
128+
129+
assert result == ""
130+
assert config.validator_metadata == {
131+
"reason": "Empty string has been returned since the validation failed for: llamaguard_7b"
132+
}
120133

121134
def test_on_fail_exception_resolves_to_exception_action(self):
122135
config = LlamaGuard7BSafetyValidatorConfig(

0 commit comments

Comments
 (0)