vllm-project · Xunzhuo · Nov 20, 2025 · Nov 19, 2025 · Nov 20, 2025
@@ -35,7 +35,6 @@ config:
     - name: computer science
     - name: philosophy
     - name: engineering
-    - name: thinking
 
   # Decisions - define routing logic with rules, model selection, and plugins
   decisions:
@@ -365,7 +364,7 @@ config:
       rules:
         operator: "OR"
         conditions:
-          - type: "domain"
+          - type: "keyword"
             name: "thinking"
       modelRefs:
         - model: vllm-llama3-8b-instruct
@@ -445,7 +444,7 @@ config:
       pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
 
   keyword_rules:
-    - category: "thinking"
+    - name: "thinking"
       operator: "OR"
       keywords: ["urgent", "immediate", "asap", "think", "careful"]
       case_sensitive: false

@@ -39,7 +39,13 @@ e2e/
 │   ├── domain_classify.go
 │   ├── cache.go
 │   ├── pii_detection.go
-│   └── jailbreak_detection.go
+│   ├── jailbreak_detection.go
+│   ├── decision_priority.go           # Signal-decision: Priority selection
+│   ├── plugin_chain_execution.go      # Signal-decision: Plugin chains
+│   ├── rule_condition_logic.go        # Signal-decision: AND/OR operators
+│   ├── decision_fallback.go           # Signal-decision: Fallback behavior
+│   ├── keyword_routing.go             # Signal-decision: Keyword matching
+│   └── plugin_config_variations.go    # Signal-decision: Plugin configs
 ├── profiles/
 │   └── ai-gateway/       # AI Gateway test profile
 │       └── profile.go    # Profile definition and environment setup
@@ -50,19 +56,48 @@ e2e/
 
 The framework includes the following test cases (all in `e2e/testcases/`):
 
+### Basic Functionality Tests
+
 | Test Case | Description | Metrics |
 |-----------|-------------|---------|
 | `chat-completions-request` | Basic chat completions API test | Response validation |
 | `chat-completions-stress-request` | Sequential stress test with 1000 requests | Success rate, avg duration |
 | `chat-completions-progressive-stress` | Progressive QPS stress test (10/20/50/100 QPS) | Per-stage success rate, latency stats |
+
+### Classification and Feature Tests
+
+| Test Case | Description | Metrics |
+|-----------|-------------|---------|
 | `domain-classify` | Domain classification accuracy | 65 cases, accuracy rate |
 | `semantic-cache` | Semantic cache hit rate | 5 groups, cache hit rate |
 | `pii-detection` | PII detection and blocking | 10 PII types, detection rate, block rate |
 | `jailbreak-detection` | Jailbreak attack detection | 10 attack types, detection rate, block rate |
 
+### Signal-Decision Engine Tests
+
+| Test Case | Description | Metrics |
+|-----------|-------------|---------|
+| `decision-priority-selection` | Decision priority selection with multiple matches | 4 cases, priority validation (indirect) |
+| `plugin-chain-execution` | Plugin execution order (PII → Cache → System Prompt) | 4 cases, chain validation, blocking behavior |
+| `rule-condition-logic` | AND/OR operators and keyword matching | 6 cases, operator validation |
+| `decision-fallback-behavior` | Fallback to default decision when no match | 5 cases, fallback validation |
+| `keyword-routing` | Keyword-based routing decisions | 6 cases, keyword matching (case-insensitive) |
+| `plugin-config-variations` | Plugin configuration variations (PII allowlist, cache thresholds) | 6 cases, config validation |
+
+**Signal-Decision Engine Features Tested:**
+
+- ✅ Decision priority selection (priority 15 > 10) - validated by checking which decision wins when multiple match
+- ✅ Plugin chain execution order and blocking
+- ✅ Rule condition logic (AND/OR operators)
+- ✅ Keyword-based routing (case-insensitive)
+- ✅ Decision fallback behavior
+- ✅ Per-decision plugin configurations
+- ✅ PII allowlist handling
+- ✅ Per-decision cache thresholds (0.75, 0.92, 0.95)
+
 All test cases:
 
-- Use model name `"MoM"`
+- Use model name `"MoM"` to trigger decision engine
 - Automatically clean up port forwarding
 - Generate detailed reports with statistics
 - Support verbose logging
@@ -312,6 +347,15 @@ Test data is stored in `e2e/testcases/testdata/` as JSON files. Each test case l
 - `pii_detection_cases.json`: 10 PII types (email, phone, SSN, etc.)
 - `jailbreak_detection_cases.json`: 10 attack types (prompt injection, DAN, etc.)
 
+**Signal-Decision Engine Tests** use embedded test cases (defined inline in test files) to validate:
+
+- Decision priority mechanisms (4 test cases)
+- Plugin chain execution and blocking (4 test cases)
+- Rule condition logic with AND/OR operators (6 test cases)
+- Decision fallback behavior (5 test cases)
+- Keyword-based routing (6 test cases)
+- Plugin configuration variations (6 test cases)
+
 **Test Data Format Example:**
 
 ```json

@@ -107,12 +107,27 @@ func (p *Profile) Teardown(ctx context.Context, opts *framework.TeardownOptions)
 // GetTestCases returns the list of test cases for this profile
 func (p *Profile) GetTestCases() []string {
 	return []string{
+		// Basic functionality tests
 		"chat-completions-request",
 		"chat-completions-stress-request",
+
+		// Classification and routing tests
 		"domain-classify",
+
+		// Feature tests
 		"semantic-cache",
 		"pii-detection",
 		"jailbreak-detection",
+
+		// Signal-Decision engine tests (new architecture)
+		"decision-priority-selection", // Priority-based routing
+		"plugin-chain-execution",      // Plugin ordering and blocking
+		"rule-condition-logic",        // AND/OR operators
+		"decision-fallback-behavior",  // Fallback to default
+		"keyword-routing",             // Keyword-based decisions
+		"plugin-config-variations",    // Plugin configuration testing
+
+		// Load tests
 		"chat-completions-progressive-stress",
 	}
 }

@@ -367,7 +367,7 @@ config:
         operator: "OR"
         conditions:
           - type: "keyword"
-            rule_name: "thinking"
+            name: "thinking"
       modelRefs:
         - model: base-model
           lora_name: general-expert
@@ -383,7 +383,7 @@ config:
             system_prompt: "You are a thinking expert, should think multiple steps before answering. Please answer the question step by step."
             mode: "replace"
 
-    - name: general_decision
+    - name: other_decision
       description: "General knowledge and miscellaneous topics"
       priority: 1
       rules:
@@ -474,7 +474,7 @@ config:
       pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
 
   keyword_rules:
-    - category: "thinking"
+    - name: "thinking"
       operator: "OR"
       keywords: ["urgent", "immediate", "asap", "think", "careful"]
       case_sensitive: false

@@ -167,12 +167,27 @@ func (p *Profile) Teardown(ctx context.Context, opts *framework.TeardownOptions)
 // GetTestCases returns the list of test cases for this profile
 func (p *Profile) GetTestCases() []string {
 	return []string{
+		// Basic functionality tests
 		"chat-completions-request",
 		"chat-completions-stress-request",
+
+		// Classification and routing tests
 		"domain-classify",
+
+		// Feature tests
 		"semantic-cache",
 		"pii-detection",
 		"jailbreak-detection",
+
+		// Signal-Decision engine tests (new architecture)
+		"decision-priority-selection", // Priority-based routing
+		"plugin-chain-execution",      // Plugin ordering and blocking
+		"rule-condition-logic",        // AND/OR operators
+		"decision-fallback-behavior",  // Fallback to default
+		"keyword-routing",             // Keyword-based decisions
+		"plugin-config-variations",    // Plugin configuration testing
+
+		// Load tests
 		"chat-completions-progressive-stress",
 	}
 }

@@ -35,7 +35,36 @@ spec:
       - name: "other"
         description: "General knowledge and miscellaneous topics"
 
+    keywords:
+      - name: "thinking"
+        operator: "OR"
+        keywords: ["urgent", "immediate", "asap", "think", "careful"]
+        caseSensitive: false
+
   decisions:
+    - name: "thinking_decision"
+      priority: 15
+      description: "Queries requiring careful thought or urgent attention"
+      signals:
+        operator: "OR"
+        conditions:
+          - type: "keyword"
+            name: "thinking"
+      modelRefs:
+        - model: "base-model"
+          loraName: "math-expert"
+          useReasoning: true
+      plugins:
+        - type: "pii"
+          configuration:
+            enabled: true
+            pii_types_allowed: []
+        - type: "system_prompt"
+          configuration:
+            enabled: true
+            system_prompt: "You are a thoughtful assistant trained to approach problems systematically. When handling urgent matters or complex questions, break down the problem into clear steps, consider multiple angles, and provide thorough, well-reasoned responses. Take your time to think through implications and edge cases."
+            mode: "replace"
+
     - name: "business_decision"
       priority: 10
       description: "Business and management related queries"
@@ -328,6 +357,27 @@ spec:
         conditions:
           - type: "domain"
             name: "engineering"
+      modelRefs:
+        - model: "base-model"
+          loraName: "science-expert"
+          useReasoning: false
+      plugins:
+        - type: "pii"
+          configuration:
+            enabled: true
+            pii_types_allowed: []
+        - type: "system_prompt"
+          configuration:
+            enabled: true
+            system_prompt: "You are an engineering expert with knowledge across multiple engineering disciplines including mechanical, electrical, civil, chemical, software, and systems engineering. Apply engineering principles, design methodologies, and problem-solving approaches to provide practical solutions. Consider safety, efficiency, sustainability, and cost-effectiveness in your recommendations. Use technical precision while explaining concepts clearly, and emphasize the importance of proper engineering practices and standards."
+            mode: "replace"
+
+    - name: "other_decision"
+      priority: 1
+      description: "General knowledge and miscellaneous topics"
+      signals:
+        operator: "OR"
+        conditions:
           - type: "domain"
             name: "other"
       modelRefs:
@@ -339,8 +389,12 @@ spec:
           configuration:
             enabled: true
             pii_types_allowed: []
+        - type: "semantic-cache"
+          configuration:
+            enabled: true
+            similarity_threshold: 0.75
         - type: "system_prompt"
           configuration:
             enabled: true
-            system_prompt: "You are an engineering expert with knowledge across multiple engineering disciplines including mechanical, electrical, civil, chemical, software, and systems engineering. Apply engineering principles, design methodologies, and problem-solving approaches to provide practical solutions. Consider safety, efficiency, sustainability, and cost-effectiveness in your recommendations. Use technical precision while explaining concepts clearly, and emphasize the importance of proper engineering practices and standards."
+            system_prompt: "You are a knowledgeable AI assistant with broad expertise across many domains. Provide accurate, helpful, and well-structured responses to general questions. When uncertain, acknowledge limitations and suggest where to find authoritative information."
             mode: "replace"
@@ -104,12 +104,27 @@ func (p *Profile) Teardown(ctx context.Context, opts *framework.TeardownOptions)
 // GetTestCases returns the list of test cases for this profile
 func (p *Profile) GetTestCases() []string {
 	return []string{
+		// Basic functionality tests
 		"chat-completions-request",
 		"chat-completions-stress-request",
+
+		// Classification and routing tests
 		"domain-classify",
+
+		// Feature tests
 		"semantic-cache",
 		"pii-detection",
 		"jailbreak-detection",
+
+		// Signal-Decision engine tests (new architecture)
+		"decision-priority-selection", // Priority-based routing
+		"plugin-chain-execution",      // Plugin ordering and blocking
+		"rule-condition-logic",        // AND/OR operators
+		"decision-fallback-behavior",  // Fallback to default
+		"keyword-routing",             // Keyword-based decisions
+		"plugin-config-variations",    // Plugin configuration testing
+
+		// Load tests
 		"chat-completions-progressive-stress",
 	}
 }

@@ -144,3 +144,11 @@ func formatResponseHeaders(headers map[string][]string) string {
 	}
 	return sb.String()
 }
+
+// truncateString truncates a string to maxLen characters, adding "..." if truncated
+func truncateString(s string, maxLen int) string {
+	if len(s) <= maxLen {
+		return s
+	}
+	return s[:maxLen-3] + "..."
+}