vllm-project · yossiovadia · Nov 11, 2025 · Nov 12, 2025 · Nov 12, 2025 · Nov 13, 2025
@@ -69,11 +69,11 @@ classifier:
     use_cpu: true
     category_mapping_path: "models/lora_intent_classifier_bert-base-uncased_model/category_mapping.json"
   pii_model:
-    model_id: "models/pii_classifier_modernbert-base_presidio_token_model"  # TODO: Use local model for now before the code can download the entire model from huggingface
-    use_modernbert: true
+    model_id: "models/lora_pii_detector_bert-base-uncased_model"
+    use_modernbert: false  # BERT-based LoRA model (this field is ignored - always auto-detects)
     threshold: 0.7
     use_cpu: true
-    pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
+    pii_mapping_path: "models/lora_pii_detector_bert-base-uncased_model/pii_type_mapping.json"
 categories:
   - name: business
     description: "Business and management related queries"
@@ -359,6 +359,24 @@ decisions:
           enabled: true
           pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER", "US_SSN", "CREDIT_CARD"]
 
+  # Default catch-all decision for unmatched requests (E2E PII test fix)
+  # This ensures PII detection is always enabled, even when no specific decision matches
+  - name: "default_decision"
+    description: "Default catch-all decision - blocks all PII for safety"
+    priority: 1  # Lowest priority - only matches if nothing else does
+    rules:
+      operator: "OR"
+      conditions:
+        - type: "always"  # Always matches as fallback
+    modelRefs:
+      - model: "Model-B"
+        use_reasoning: false
+    plugins:
+      - type: "pii"
+        configuration:
+          enabled: true
+          pii_types_allowed: []  # Block ALL PII - empty list means nothing allowed
+
 default_model: "Model-A"
 
 # API Configuration

@@ -101,12 +101,12 @@ spec:
                       properties:
                         inputTokenPrice:
                           description: InputTokenPrice is the cost per input token
-                          minimum: 0
                           type: number
+                          minimum: 0
                         outputTokenPrice:
                           description: OutputTokenPrice is the cost per output token
-                          minimum: 0
                           type: number
+                          minimum: 0
                       type: object
                     reasoningFamily:
                       description: |-
@@ -120,6 +120,30 @@ spec:
                 maxItems: 100
                 minItems: 1
                 type: array
+              piiModel:
+                description: PIIModel defines the PII detection model configuration
+                properties:
+                  modelPath:
+                    description: ModelPath is the path to the PII detection model
+                    maxLength: 500
+                    minLength: 1
+                    type: string
+                  modelType:
+                    description: ModelType specifies the model type (e.g., "auto"
+                      for auto-detection)
+                    maxLength: 50
+                    type: string
+                  threshold:
+                    description: Threshold is the confidence threshold for PII detection
+                    type: number
+                    minimum: 0
+                    maximum: 1
+                  useCPU:
+                    description: UseCPU specifies whether to use CPU for inference
+                    type: boolean
+                required:
+                - modelPath
+                type: object
             required:
             - defaultModel
             - models

@@ -257,9 +257,9 @@ spec:
                         threshold:
                           description: Threshold is the similarity threshold for matching
                             (0.0-1.0)
-                          maximum: 1
-                          minimum: 0
                           type: number
+                          minimum: 0
+                          maximum: 1
                       required:
                       - candidates
                       - name

@@ -3,7 +3,7 @@
 # Declare variables to be passed into your templates.

 # Global settings
 global:
  # -- Namespace for all resources (if not specified, uses Release.Namespace)
  namespace: ""

@@ -47,7 +47,7 @@

 # Pod security context
 podSecurityContext: {}
  # fsGroup: 2000

 # Container security context
 securityContext:
@@ -100,7 +100,7 @@
  className: ""
  # -- Ingress annotations
  annotations: {}
    # kubernetes.io/ingress.class: nginx
    # kubernetes.io/tls-acme: "true"
  # -- Ingress hosts configuration
  hosts:
@@ -166,7 +166,10 @@
    - name: jailbreak_classifier_modernbert-base_model
       repo: LLM-Semantic-Router/jailbreak_classifier_modernbert-base_model
     - name: pii_classifier_modernbert-base_presidio_token_model
       repo: LLM-Semantic-Router/pii_classifier_modernbert-base_presidio_token_model
+    # LoRA PII detector (for auto-detection feature)
+    - name: lora_pii_detector_bert-base-uncased_model
+      repo: LLM-Semantic-Router/lora_pii_detector_bert-base-uncased_model
 
 
 # Autoscaling configuration
@@ -229,7 +232,7 @@
  size: 10Gi
  # -- Annotations for PVC
  annotations: {}
  # -- Existing claim name (if provided, will use existing PVC instead of creating new one)
  existingClaim: ""

 # Application configuration
@@ -264,7 +267,7 @@
    model_id: "models/jailbreak_classifier_modernbert-base_model"
    threshold: 0.7
    use_cpu: true
    jailbreak_mapping_path: "models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json"

  # Classifier configuration
  classifier:
@@ -273,13 +276,13 @@
      use_modernbert: true
      threshold: 0.6
      use_cpu: true
      category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
    pii_model:
      model_id: "models/pii_classifier_modernbert-base_presidio_token_model"
      use_modernbert: true
      threshold: 0.7
      use_cpu: true
      pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"

  # Reasoning families
  reasoning_families:
@@ -310,7 +313,7 @@
        detailed_goroutine_tracking: true
        high_resolution_timing: false
        sample_rate: 1.0
        duration_buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30]
        size_buckets: [1, 2, 5, 10, 20, 50, 100, 200]

  # Observability configuration
@@ -348,7 +351,7 @@
              enum: ["celsius", "fahrenheit"]
              description: "Temperature unit"
          required: ["location"]
    description: "Get current weather information, temperature, conditions, forecast for any location, city, or place. Check weather today, now, current conditions, temperature, rain, sun, cloudy, hot, cold, storm, snow"
    category: "weather"
    tags: ["weather", "temperature", "forecast", "climate"]
  - tool:
@@ -367,7 +370,7 @@
              description: "Number of results to return"
              default: 5
          required: ["query"]
    description: "Search the internet, web search, find information online, browse web content, lookup, research, google, find answers, discover, investigate"
    category: "search"
    tags: ["search", "web", "internet", "information", "browse"]
  - tool:
@@ -382,7 +385,7 @@
              type: "string"
              description: "Mathematical expression to evaluate"
          required: ["expression"]
    description: "Calculate mathematical expressions, solve math problems, arithmetic operations, compute numbers, addition, subtraction, multiplication, division, equations, formula"
    category: "math"
    tags: ["math", "calculation", "arithmetic", "compute", "numbers"]
  - tool:
@@ -403,7 +406,7 @@
              type: "string"
              description: "Email body content"
          required: ["to", "subject", "body"]
    description: "Send email messages, email communication, contact people via email, mail, message, correspondence, notify, inform"
    category: "communication"
    tags: ["email", "send", "communication", "message", "contact"]
  - tool:

@@ -467,11 +467,13 @@ config:
       use_cpu: true
       category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
     pii_model:
-      model_id: "models/pii_classifier_modernbert-base_presidio_token_model"
-      use_modernbert: true
+      # Support both traditional (modernbert) and LoRA-based PII detection
+      # When model_type is "auto", the system will auto-detect LoRA configuration
+      model_id: "models/lora_pii_detector_bert-base-uncased_model"
+      model_type: "auto"  # Enables LoRA auto-detection
       threshold: 0.7
       use_cpu: true
-      pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
+      pii_mapping_path: "models/lora_pii_detector_bert-base-uncased_model/label_mapping.json"
 
   keyword_rules:
     - category: "thinking"

@@ -380,6 +380,26 @@ config:
             system_prompt: "You are a thinking expert, should think multiple steps before answering. Please answer the question step by step."
             mode: "replace"
 
+    # Default catch-all decision for unmatched requests (E2E PII test fix)
+    # This ensures PII detection is always enabled via policy.go fallback mechanism
+    # When no decision matches, CheckPolicy and IsPIIEnabled fall back to this decision
+    - name: default_decision
+      description: "Default catch-all decision - blocks all PII for safety"
+      priority: 0
+      rules:
+        operator: "OR"
+        conditions:
+          - type: "domain"
+            name: "other"
+      modelRefs:
+        - model: vllm-llama3-8b-instruct
+          use_reasoning: false
+      plugins:
+        - type: "pii"
+          configuration:
+            enabled: true
+            pii_types_allowed: []
+
   # Strategy for selecting between multiple matching decisions
   # Options: "priority" (use decision with highest priority) or "confidence" (use decision with highest confidence)
   strategy: "priority"
@@ -437,11 +457,13 @@ config:
       use_cpu: true
       category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
     pii_model:
-      model_id: "models/pii_classifier_modernbert-base_presidio_token_model"
-      use_modernbert: true
+      # Support both traditional (modernbert) and LoRA-based PII detection
+      # When model_type is "auto", the system will auto-detect LoRA configuration
+      model_id: "models/lora_pii_detector_bert-base-uncased_model"
+      model_type: "auto"  # Enables LoRA auto-detection
       threshold: 0.7
       use_cpu: true
-      pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
+      pii_mapping_path: "models/lora_pii_detector_bert-base-uncased_model/pii_type_mapping.json"
 
   keyword_rules:
     - name: "thinking"

@@ -101,12 +101,12 @@ spec:
                       properties:
                         inputTokenPrice:
                           description: InputTokenPrice is the cost per input token
-                          minimum: 0
                           type: number
+                          minimum: 0
                         outputTokenPrice:
                           description: OutputTokenPrice is the cost per output token
-                          minimum: 0
                           type: number
+                          minimum: 0
                       type: object
                     reasoningFamily:
                       description: |-
@@ -120,6 +120,30 @@ spec:
                 maxItems: 100
                 minItems: 1
                 type: array
+              piiModel:
+                description: PIIModel defines the PII detection model configuration
+                properties:
+                  modelPath:
+                    description: ModelPath is the path to the PII detection model
+                    maxLength: 500
+                    minLength: 1
+                    type: string
+                  modelType:
+                    description: ModelType specifies the model type (e.g., "auto"
+                      for auto-detection)
+                    maxLength: 50
+                    type: string
+                  threshold:
+                    description: Threshold is the confidence threshold for PII detection
+                    type: number
+                    minimum: 0
+                    maximum: 1
+                  useCPU:
+                    description: UseCPU specifies whether to use CPU for inference
+                    type: boolean
+                required:
+                - modelPath
+                type: object
             required:
             - defaultModel
             - models

@@ -257,9 +257,9 @@ spec:
                         threshold:
                           description: Threshold is the similarity threshold for matching
                             (0.0-1.0)
-                          maximum: 1
-                          minimum: 0
                           type: number
+                          minimum: 0
+                          maximum: 1
                       required:
                       - candidates
                       - name