Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 21 additions & 3 deletions config/testing/config.e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,11 @@ classifier:
use_cpu: true
category_mapping_path: "models/lora_intent_classifier_bert-base-uncased_model/category_mapping.json"
pii_model:
model_id: "models/pii_classifier_modernbert-base_presidio_token_model" # TODO: Use local model for now before the code can download the entire model from huggingface
use_modernbert: true
model_id: "models/lora_pii_detector_bert-base-uncased_model"
use_modernbert: false # BERT-based LoRA model (this field is ignored - always auto-detects)
threshold: 0.7
use_cpu: true
pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
pii_mapping_path: "models/lora_pii_detector_bert-base-uncased_model/pii_type_mapping.json"
categories:
- name: business
description: "Business and management related queries"
Expand Down Expand Up @@ -359,6 +359,24 @@ decisions:
enabled: true
pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER", "US_SSN", "CREDIT_CARD"]

# Default catch-all decision for unmatched requests (E2E PII test fix)
# This ensures PII detection is always enabled, even when no specific decision matches
- name: "default_decision"
description: "Default catch-all decision - blocks all PII for safety"
priority: 1 # Lowest priority - only matches if nothing else does
rules:
operator: "OR"
conditions:
- type: "always" # Always matches as fallback
modelRefs:
- model: "Model-B"
use_reasoning: false
plugins:
- type: "pii"
configuration:
enabled: true
pii_types_allowed: [] # Block ALL PII - empty list means nothing allowed

default_model: "Model-A"

# API Configuration
Expand Down
28 changes: 26 additions & 2 deletions deploy/helm/semantic-router/crds/vllm.ai_intelligentpools.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,12 @@ spec:
properties:
inputTokenPrice:
description: InputTokenPrice is the cost per input token
minimum: 0
type: number
minimum: 0
outputTokenPrice:
description: OutputTokenPrice is the cost per output token
minimum: 0
type: number
minimum: 0
type: object
reasoningFamily:
description: |-
Expand All @@ -120,6 +120,30 @@ spec:
maxItems: 100
minItems: 1
type: array
piiModel:
description: PIIModel defines the PII detection model configuration
properties:
modelPath:
description: ModelPath is the path to the PII detection model
maxLength: 500
minLength: 1
type: string
modelType:
description: ModelType specifies the model type (e.g., "auto"
for auto-detection)
maxLength: 50
type: string
threshold:
description: Threshold is the confidence threshold for PII detection
type: number
minimum: 0
maximum: 1
useCPU:
description: UseCPU specifies whether to use CPU for inference
type: boolean
required:
- modelPath
type: object
required:
- defaultModel
- models
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -257,9 +257,9 @@ spec:
threshold:
description: Threshold is the similarity threshold for matching
(0.0-1.0)
maximum: 1
minimum: 0
type: number
minimum: 0
maximum: 1
required:
- candidates
- name
Expand Down
3 changes: 3 additions & 0 deletions deploy/helm/semantic-router/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Declare variables to be passed into your templates.

# Global settings
global:

Check warning on line 6 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

6:1 [document-start] missing document start "---"
# -- Namespace for all resources (if not specified, uses Release.Namespace)
namespace: ""

Expand Down Expand Up @@ -47,7 +47,7 @@

# Pod security context
podSecurityContext: {}
# fsGroup: 2000

Check warning on line 50 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

50:3 [comments-indentation] comment not indented like content

# Container security context
securityContext:
Expand Down Expand Up @@ -100,7 +100,7 @@
className: ""
# -- Ingress annotations
annotations: {}
# kubernetes.io/ingress.class: nginx

Check warning on line 103 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

103:5 [comments-indentation] comment not indented like content
# kubernetes.io/tls-acme: "true"
# -- Ingress hosts configuration
hosts:
Expand Down Expand Up @@ -166,7 +166,10 @@
- name: jailbreak_classifier_modernbert-base_model
repo: LLM-Semantic-Router/jailbreak_classifier_modernbert-base_model
- name: pii_classifier_modernbert-base_presidio_token_model
repo: LLM-Semantic-Router/pii_classifier_modernbert-base_presidio_token_model

Check failure on line 169 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

169:81 [line-length] line too long (83 > 80 characters)
# LoRA PII detector (for auto-detection feature)
- name: lora_pii_detector_bert-base-uncased_model
repo: LLM-Semantic-Router/lora_pii_detector_bert-base-uncased_model


# Autoscaling configuration
Expand Down Expand Up @@ -229,7 +232,7 @@
size: 10Gi
# -- Annotations for PVC
annotations: {}
# -- Existing claim name (if provided, will use existing PVC instead of creating new one)

Check failure on line 235 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

235:81 [line-length] line too long (91 > 80 characters)
existingClaim: ""

# Application configuration
Expand Down Expand Up @@ -264,7 +267,7 @@
model_id: "models/jailbreak_classifier_modernbert-base_model"
threshold: 0.7
use_cpu: true
jailbreak_mapping_path: "models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json"

Check failure on line 270 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

270:81 [line-length] line too long (107 > 80 characters)

# Classifier configuration
classifier:
Expand All @@ -273,13 +276,13 @@
use_modernbert: true
threshold: 0.6
use_cpu: true
category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"

Check failure on line 279 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

279:81 [line-length] line too long (101 > 80 characters)
pii_model:
model_id: "models/pii_classifier_modernbert-base_presidio_token_model"
use_modernbert: true
threshold: 0.7
use_cpu: true
pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"

Check failure on line 285 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

285:81 [line-length] line too long (106 > 80 characters)

# Reasoning families
reasoning_families:
Expand Down Expand Up @@ -310,7 +313,7 @@
detailed_goroutine_tracking: true
high_resolution_timing: false
sample_rate: 1.0
duration_buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30]

Check failure on line 316 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

316:81 [line-length] line too long (94 > 80 characters)
size_buckets: [1, 2, 5, 10, 20, 50, 100, 200]

# Observability configuration
Expand Down Expand Up @@ -348,7 +351,7 @@
enum: ["celsius", "fahrenheit"]
description: "Temperature unit"
required: ["location"]
description: "Get current weather information, temperature, conditions, forecast for any location, city, or place. Check weather today, now, current conditions, temperature, rain, sun, cloudy, hot, cold, storm, snow"

Check failure on line 354 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

354:81 [line-length] line too long (220 > 80 characters)
category: "weather"
tags: ["weather", "temperature", "forecast", "climate"]
- tool:
Expand All @@ -367,7 +370,7 @@
description: "Number of results to return"
default: 5
required: ["query"]
description: "Search the internet, web search, find information online, browse web content, lookup, research, google, find answers, discover, investigate"

Check failure on line 373 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

373:81 [line-length] line too long (158 > 80 characters)
category: "search"
tags: ["search", "web", "internet", "information", "browse"]
- tool:
Expand All @@ -382,7 +385,7 @@
type: "string"
description: "Mathematical expression to evaluate"
required: ["expression"]
description: "Calculate mathematical expressions, solve math problems, arithmetic operations, compute numbers, addition, subtraction, multiplication, division, equations, formula"

Check failure on line 388 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

388:81 [line-length] line too long (183 > 80 characters)
category: "math"
tags: ["math", "calculation", "arithmetic", "compute", "numbers"]
- tool:
Expand All @@ -403,7 +406,7 @@
type: "string"
description: "Email body content"
required: ["to", "subject", "body"]
description: "Send email messages, email communication, contact people via email, mail, message, correspondence, notify, inform"

Check failure on line 409 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

409:81 [line-length] line too long (132 > 80 characters)
category: "communication"
tags: ["email", "send", "communication", "message", "contact"]
- tool:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -467,11 +467,13 @@ config:
use_cpu: true
category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
pii_model:
model_id: "models/pii_classifier_modernbert-base_presidio_token_model"
use_modernbert: true
# Support both traditional (modernbert) and LoRA-based PII detection
# When model_type is "auto", the system will auto-detect LoRA configuration
model_id: "models/lora_pii_detector_bert-base-uncased_model"
model_type: "auto" # Enables LoRA auto-detection
threshold: 0.7
use_cpu: true
pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
pii_mapping_path: "models/lora_pii_detector_bert-base-uncased_model/label_mapping.json"

keyword_rules:
- category: "thinking"
Expand Down
28 changes: 25 additions & 3 deletions deploy/kubernetes/aibrix/semantic-router-values/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,26 @@ config:
system_prompt: "You are a thinking expert, should think multiple steps before answering. Please answer the question step by step."
mode: "replace"

# Default catch-all decision for unmatched requests (E2E PII test fix)
# This ensures PII detection is always enabled via policy.go fallback mechanism
# When no decision matches, CheckPolicy and IsPIIEnabled fall back to this decision
- name: default_decision
description: "Default catch-all decision - blocks all PII for safety"
priority: 0
rules:
operator: "OR"
conditions:
- type: "domain"
name: "other"
modelRefs:
- model: vllm-llama3-8b-instruct
use_reasoning: false
plugins:
- type: "pii"
configuration:
enabled: true
pii_types_allowed: []

# Strategy for selecting between multiple matching decisions
# Options: "priority" (use decision with highest priority) or "confidence" (use decision with highest confidence)
strategy: "priority"
Expand Down Expand Up @@ -437,11 +457,13 @@ config:
use_cpu: true
category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
pii_model:
model_id: "models/pii_classifier_modernbert-base_presidio_token_model"
use_modernbert: true
# Support both traditional (modernbert) and LoRA-based PII detection
# When model_type is "auto", the system will auto-detect LoRA configuration
model_id: "models/lora_pii_detector_bert-base-uncased_model"
model_type: "auto" # Enables LoRA auto-detection
threshold: 0.7
use_cpu: true
pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
pii_mapping_path: "models/lora_pii_detector_bert-base-uncased_model/pii_type_mapping.json"

keyword_rules:
- name: "thinking"
Expand Down
28 changes: 26 additions & 2 deletions deploy/kubernetes/crds/vllm.ai_intelligentpools.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,12 @@ spec:
properties:
inputTokenPrice:
description: InputTokenPrice is the cost per input token
minimum: 0
type: number
minimum: 0
outputTokenPrice:
description: OutputTokenPrice is the cost per output token
minimum: 0
type: number
minimum: 0
type: object
reasoningFamily:
description: |-
Expand All @@ -120,6 +120,30 @@ spec:
maxItems: 100
minItems: 1
type: array
piiModel:
description: PIIModel defines the PII detection model configuration
properties:
modelPath:
description: ModelPath is the path to the PII detection model
maxLength: 500
minLength: 1
type: string
modelType:
description: ModelType specifies the model type (e.g., "auto"
for auto-detection)
maxLength: 50
type: string
threshold:
description: Threshold is the confidence threshold for PII detection
type: number
minimum: 0
maximum: 1
useCPU:
description: UseCPU specifies whether to use CPU for inference
type: boolean
required:
- modelPath
type: object
required:
- defaultModel
- models
Expand Down
4 changes: 2 additions & 2 deletions deploy/kubernetes/crds/vllm.ai_intelligentroutes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -257,9 +257,9 @@ spec:
threshold:
description: Threshold is the similarity threshold for matching
(0.0-1.0)
maximum: 1
minimum: 0
type: number
minimum: 0
maximum: 1
required:
- candidates
- name
Expand Down
Loading
Loading