From 03b5f9ce29541a861d9da3d47482b9f1462090f4 Mon Sep 17 00:00:00 2001 From: Yehudit Kerido Date: Wed, 19 Nov 2025 10:16:48 +0200 Subject: [PATCH 1/2] [E2E] Add Signal-Decision Engine Test Coverage for New Plugin Architecture Signed-off-by: Yehudit Kerido --- .../aibrix/semantic-router-values/values.yaml | 4 +- e2e/README.md | 48 ++- e2e/profiles/ai-gateway/profile.go | 15 + e2e/profiles/ai-gateway/values.yaml | 6 +- e2e/profiles/aibrix/profile.go | 15 + .../dynamic-config/crds/intelligentroute.yaml | 58 ++++ e2e/profiles/dynamic-config/profile.go | 15 + e2e/testcases/common.go | 8 + e2e/testcases/decision_fallback.go | 293 ++++++++++++++++ e2e/testcases/decision_priority.go | 262 ++++++++++++++ e2e/testcases/keyword_routing.go | 308 ++++++++++++++++ e2e/testcases/plugin_chain_execution.go | 314 +++++++++++++++++ e2e/testcases/plugin_config_variations.go | 328 ++++++++++++++++++ e2e/testcases/rule_condition_logic.go | 293 ++++++++++++++++ website/docs/api/crd-reference.md | 136 ++------ 15 files changed, 1982 insertions(+), 121 deletions(-) create mode 100644 e2e/testcases/decision_fallback.go create mode 100644 e2e/testcases/decision_priority.go create mode 100644 e2e/testcases/keyword_routing.go create mode 100644 e2e/testcases/plugin_chain_execution.go create mode 100644 e2e/testcases/plugin_config_variations.go create mode 100644 e2e/testcases/rule_condition_logic.go diff --git a/deploy/kubernetes/aibrix/semantic-router-values/values.yaml b/deploy/kubernetes/aibrix/semantic-router-values/values.yaml index 5b0a242c9..fd5917389 100644 --- a/deploy/kubernetes/aibrix/semantic-router-values/values.yaml +++ b/deploy/kubernetes/aibrix/semantic-router-values/values.yaml @@ -367,6 +367,8 @@ config: conditions: - type: "domain" name: "thinking" + - type: "keyword" + name: "thinking" modelRefs: - model: vllm-llama3-8b-instruct use_reasoning: true @@ -445,7 +447,7 @@ config: pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json" keyword_rules: - - category: "thinking" + - name: "thinking" operator: "OR" keywords: ["urgent", "immediate", "asap", "think", "careful"] case_sensitive: false diff --git a/e2e/README.md b/e2e/README.md index 777169dc3..018d7f935 100644 --- a/e2e/README.md +++ b/e2e/README.md @@ -39,7 +39,13 @@ e2e/ │ ├── domain_classify.go │ ├── cache.go │ ├── pii_detection.go -│ └── jailbreak_detection.go +│ ├── jailbreak_detection.go +│ ├── decision_priority.go # Signal-decision: Priority selection +│ ├── plugin_chain_execution.go # Signal-decision: Plugin chains +│ ├── rule_condition_logic.go # Signal-decision: AND/OR operators +│ ├── decision_fallback.go # Signal-decision: Fallback behavior +│ ├── keyword_routing.go # Signal-decision: Keyword matching +│ └── plugin_config_variations.go # Signal-decision: Plugin configs ├── profiles/ │ └── ai-gateway/ # AI Gateway test profile │ └── profile.go # Profile definition and environment setup @@ -50,19 +56,48 @@ e2e/ The framework includes the following test cases (all in `e2e/testcases/`): +### Basic Functionality Tests + | Test Case | Description | Metrics | |-----------|-------------|---------| | `chat-completions-request` | Basic chat completions API test | Response validation | | `chat-completions-stress-request` | Sequential stress test with 1000 requests | Success rate, avg duration | | `chat-completions-progressive-stress` | Progressive QPS stress test (10/20/50/100 QPS) | Per-stage success rate, latency stats | + +### Classification and Feature Tests + +| Test Case | Description | Metrics | +|-----------|-------------|---------| | `domain-classify` | Domain classification accuracy | 65 cases, accuracy rate | | `semantic-cache` | Semantic cache hit rate | 5 groups, cache hit rate | | `pii-detection` | PII detection and blocking | 10 PII types, detection rate, block rate | | `jailbreak-detection` | Jailbreak attack detection | 10 attack types, detection rate, block rate | +### Signal-Decision Engine Tests + +| Test Case | Description | Metrics | +|-----------|-------------|---------| +| `decision-priority-selection` | Decision priority selection with multiple matches | 4 cases, priority validation | +| `plugin-chain-execution` | Plugin execution order (PII → Cache → System Prompt) | 4 cases, chain validation, blocking behavior | +| `rule-condition-logic` | AND/OR operators and keyword matching | 6 cases, operator validation | +| `decision-fallback-behavior` | Fallback to default decision when no match | 5 cases, fallback validation | +| `keyword-routing` | Keyword-based routing decisions | 6 cases, keyword matching (case-insensitive) | +| `plugin-config-variations` | Plugin configuration variations (PII allowlist, cache thresholds) | 6 cases, config validation | + +**Signal-Decision Engine Features Tested:** + +- ✅ Decision priority selection (priority 15 > 10) +- ✅ Plugin chain execution order and blocking +- ✅ Rule condition logic (AND/OR operators) +- ✅ Keyword-based routing (case-insensitive) +- ✅ Decision fallback behavior +- ✅ Per-decision plugin configurations +- ✅ PII allowlist handling +- ✅ Per-decision cache thresholds (0.75, 0.92, 0.95) + All test cases: -- Use model name `"MoM"` +- Use model name `"MoM"` to trigger decision engine - Automatically clean up port forwarding - Generate detailed reports with statistics - Support verbose logging @@ -312,6 +347,15 @@ Test data is stored in `e2e/testcases/testdata/` as JSON files. Each test case l - `pii_detection_cases.json`: 10 PII types (email, phone, SSN, etc.) - `jailbreak_detection_cases.json`: 10 attack types (prompt injection, DAN, etc.) +**Signal-Decision Engine Tests** use embedded test cases (defined inline in test files) to validate: + +- Decision priority mechanisms (4 test cases) +- Plugin chain execution and blocking (4 test cases) +- Rule condition logic with AND/OR operators (6 test cases) +- Decision fallback behavior (5 test cases) +- Keyword-based routing (6 test cases) +- Plugin configuration variations (6 test cases) + **Test Data Format Example:** ```json diff --git a/e2e/profiles/ai-gateway/profile.go b/e2e/profiles/ai-gateway/profile.go index 46b447d99..9c7e3d0bf 100644 --- a/e2e/profiles/ai-gateway/profile.go +++ b/e2e/profiles/ai-gateway/profile.go @@ -107,12 +107,27 @@ func (p *Profile) Teardown(ctx context.Context, opts *framework.TeardownOptions) // GetTestCases returns the list of test cases for this profile func (p *Profile) GetTestCases() []string { return []string{ + // Basic functionality tests "chat-completions-request", "chat-completions-stress-request", + + // Classification and routing tests "domain-classify", + + // Feature tests "semantic-cache", "pii-detection", "jailbreak-detection", + + // Signal-Decision engine tests (new architecture) + "decision-priority-selection", // Priority-based routing + "plugin-chain-execution", // Plugin ordering and blocking + "rule-condition-logic", // AND/OR operators + "decision-fallback-behavior", // Fallback to default + "keyword-routing", // Keyword-based decisions + "plugin-config-variations", // Plugin configuration testing + + // Load tests "chat-completions-progressive-stress", } } diff --git a/e2e/profiles/ai-gateway/values.yaml b/e2e/profiles/ai-gateway/values.yaml index 1b3f4fe36..8523cdeb3 100644 --- a/e2e/profiles/ai-gateway/values.yaml +++ b/e2e/profiles/ai-gateway/values.yaml @@ -367,7 +367,7 @@ config: operator: "OR" conditions: - type: "keyword" - rule_name: "thinking" + name: "thinking" modelRefs: - model: base-model lora_name: general-expert @@ -383,7 +383,7 @@ config: system_prompt: "You are a thinking expert, should think multiple steps before answering. Please answer the question step by step." mode: "replace" - - name: general_decision + - name: other_decision description: "General knowledge and miscellaneous topics" priority: 1 rules: @@ -474,7 +474,7 @@ config: pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json" keyword_rules: - - category: "thinking" + - name: "thinking" operator: "OR" keywords: ["urgent", "immediate", "asap", "think", "careful"] case_sensitive: false diff --git a/e2e/profiles/aibrix/profile.go b/e2e/profiles/aibrix/profile.go index e25480197..a9c35d9f3 100644 --- a/e2e/profiles/aibrix/profile.go +++ b/e2e/profiles/aibrix/profile.go @@ -167,12 +167,27 @@ func (p *Profile) Teardown(ctx context.Context, opts *framework.TeardownOptions) // GetTestCases returns the list of test cases for this profile func (p *Profile) GetTestCases() []string { return []string{ + // Basic functionality tests "chat-completions-request", "chat-completions-stress-request", + + // Classification and routing tests "domain-classify", + + // Feature tests "semantic-cache", "pii-detection", "jailbreak-detection", + + // Signal-Decision engine tests (new architecture) + "decision-priority-selection", // Priority-based routing + "plugin-chain-execution", // Plugin ordering and blocking + "rule-condition-logic", // AND/OR operators + "decision-fallback-behavior", // Fallback to default + "keyword-routing", // Keyword-based decisions + "plugin-config-variations", // Plugin configuration testing + + // Load tests "chat-completions-progressive-stress", } } diff --git a/e2e/profiles/dynamic-config/crds/intelligentroute.yaml b/e2e/profiles/dynamic-config/crds/intelligentroute.yaml index b04ac926d..a2b417bfc 100644 --- a/e2e/profiles/dynamic-config/crds/intelligentroute.yaml +++ b/e2e/profiles/dynamic-config/crds/intelligentroute.yaml @@ -32,10 +32,43 @@ spec: description: "Philosophy and ethical questions" - name: "engineering" description: "Engineering and technical problem-solving" + - name: "thinking" + description: "Queries requiring careful thought or urgent attention" - name: "other" description: "General knowledge and miscellaneous topics" + keywords: + - name: "thinking" + operator: "OR" + keywords: ["urgent", "immediate", "asap", "think", "careful"] + case_sensitive: false + decisions: + - name: "thinking_decision" + priority: 15 + description: "Queries requiring careful thought or urgent attention" + signals: + operator: "OR" + conditions: + - type: "domain" + name: "thinking" + - type: "keyword" + name: "thinking" + modelRefs: + - model: "base-model" + loraName: "math-expert" + useReasoning: true + plugins: + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + - type: "system_prompt" + configuration: + enabled: true + system_prompt: "You are a thoughtful assistant trained to approach problems systematically. When handling urgent matters or complex questions, break down the problem into clear steps, consider multiple angles, and provide thorough, well-reasoned responses. Take your time to think through implications and edge cases." + mode: "replace" + - name: "business_decision" priority: 10 description: "Business and management related queries" @@ -328,6 +361,27 @@ spec: conditions: - type: "domain" name: "engineering" + modelRefs: + - model: "base-model" + loraName: "science-expert" + useReasoning: false + plugins: + - type: "pii" + configuration: + enabled: true + pii_types_allowed: [] + - type: "system_prompt" + configuration: + enabled: true + system_prompt: "You are an engineering expert with knowledge across multiple engineering disciplines including mechanical, electrical, civil, chemical, software, and systems engineering. Apply engineering principles, design methodologies, and problem-solving approaches to provide practical solutions. Consider safety, efficiency, sustainability, and cost-effectiveness in your recommendations. Use technical precision while explaining concepts clearly, and emphasize the importance of proper engineering practices and standards." + mode: "replace" + + - name: "other_decision" + priority: 1 + description: "General knowledge and miscellaneous topics" + signals: + operator: "OR" + conditions: - type: "domain" name: "other" modelRefs: @@ -339,6 +393,10 @@ spec: configuration: enabled: true pii_types_allowed: [] + - type: "semantic-cache" + configuration: + enabled: true + similarity_threshold: 0.75 - type: "system_prompt" configuration: enabled: true diff --git a/e2e/profiles/dynamic-config/profile.go b/e2e/profiles/dynamic-config/profile.go index 50bcffb1c..b15114196 100644 --- a/e2e/profiles/dynamic-config/profile.go +++ b/e2e/profiles/dynamic-config/profile.go @@ -104,12 +104,27 @@ func (p *Profile) Teardown(ctx context.Context, opts *framework.TeardownOptions) // GetTestCases returns the list of test cases for this profile func (p *Profile) GetTestCases() []string { return []string{ + // Basic functionality tests "chat-completions-request", "chat-completions-stress-request", + + // Classification and routing tests "domain-classify", + + // Feature tests "semantic-cache", "pii-detection", "jailbreak-detection", + + // Signal-Decision engine tests (new architecture) + "decision-priority-selection", // Priority-based routing + "plugin-chain-execution", // Plugin ordering and blocking + "rule-condition-logic", // AND/OR operators + "decision-fallback-behavior", // Fallback to default + "keyword-routing", // Keyword-based decisions + "plugin-config-variations", // Plugin configuration testing + + // Load tests "chat-completions-progressive-stress", } } diff --git a/e2e/testcases/common.go b/e2e/testcases/common.go index 217736c9c..0ed3e9615 100644 --- a/e2e/testcases/common.go +++ b/e2e/testcases/common.go @@ -144,3 +144,11 @@ func formatResponseHeaders(headers map[string][]string) string { } return sb.String() } + +// truncateString truncates a string to maxLen characters, adding "..." if truncated +func truncateString(s string, maxLen int) string { + if len(s) <= maxLen { + return s + } + return s[:maxLen-3] + "..." +} diff --git a/e2e/testcases/decision_fallback.go b/e2e/testcases/decision_fallback.go new file mode 100644 index 000000000..87e984d6a --- /dev/null +++ b/e2e/testcases/decision_fallback.go @@ -0,0 +1,293 @@ +package testcases + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "time" + + pkgtestcases "github.com/vllm-project/semantic-router/e2e/pkg/testcases" + "k8s.io/client-go/kubernetes" +) + +func init() { + pkgtestcases.Register("decision-fallback-behavior", pkgtestcases.TestCase{ + Description: "Test decision fallback behavior when no specific decision matches", + Tags: []string{"signal-decision", "fallback", "routing"}, + Fn: testDecisionFallback, + }) +} + +// DecisionFallbackCase represents a test case for decision fallback +type DecisionFallbackCase struct { + Query string `json:"query"` + ExpectedDecision string `json:"expected_decision"` + ShouldFallback bool `json:"should_fallback"` + Description string `json:"description"` +} + +// DecisionFallbackResult tracks the result of a single fallback test +type DecisionFallbackResult struct { + Query string + ExpectedDecision string + ActualDecision string + ShouldFallback bool + DidFallback bool + Correct bool + Error string +} + +func testDecisionFallback(ctx context.Context, client *kubernetes.Clientset, opts pkgtestcases.TestCaseOptions) error { + if opts.Verbose { + fmt.Println("[Test] Testing decision fallback behavior") + } + + // Setup service connection and get local port + localPort, stopPortForward, err := setupServiceConnection(ctx, client, opts) + if err != nil { + return err + } + defer stopPortForward() + + // Define test cases + testCases := []DecisionFallbackCase{ + { + Query: "What is the weather like today?", + ExpectedDecision: "other_decision", // Generic fallback + ShouldFallback: true, + Description: "Weather query should fall back to general/other decision", + }, + { + Query: "Tell me a joke", + ExpectedDecision: "other_decision", + ShouldFallback: true, + Description: "Entertainment query should fall back to general decision", + }, + { + Query: "Random unclassified query about nothing specific", + ExpectedDecision: "other_decision", + ShouldFallback: true, + Description: "Unclassified query should fall back to general decision", + }, + { + Query: "What is 15 * 23?", + ExpectedDecision: "math_decision", + ShouldFallback: false, + Description: "Math query should match specific decision, not fallback", + }, + { + Query: "Explain photosynthesis", + ExpectedDecision: "biology_decision", + ShouldFallback: false, + Description: "Biology query should match specific decision, not fallback", + }, + } + + // Run fallback tests + var results []DecisionFallbackResult + totalTests := 0 + correctTests := 0 + + for _, testCase := range testCases { + totalTests++ + result := testSingleFallback(ctx, testCase, localPort, opts.Verbose) + results = append(results, result) + if result.Correct { + correctTests++ + } + + // Small delay between tests + time.Sleep(500 * time.Millisecond) + } + + // Calculate accuracy + accuracy := float64(correctTests) / float64(totalTests) * 100 + + // Set details for reporting + if opts.SetDetails != nil { + opts.SetDetails(map[string]interface{}{ + "total_tests": totalTests, + "correct_tests": correctTests, + "accuracy_rate": fmt.Sprintf("%.2f%%", accuracy), + "failed_tests": totalTests - correctTests, + }) + } + + // Print results + printDecisionFallbackResults(results, totalTests, correctTests, accuracy) + + if opts.Verbose { + fmt.Printf("[Test] Decision fallback test completed: %d/%d correct (%.2f%% accuracy)\n", + correctTests, totalTests, accuracy) + } + + // Return error if accuracy is below threshold + if correctTests == 0 { + return fmt.Errorf("decision fallback test failed: 0%% accuracy (0/%d correct)", totalTests) + } + + return nil +} + +func testSingleFallback(ctx context.Context, testCase DecisionFallbackCase, localPort string, verbose bool) DecisionFallbackResult { + result := DecisionFallbackResult{ + Query: testCase.Query, + ExpectedDecision: testCase.ExpectedDecision, + ShouldFallback: testCase.ShouldFallback, + } + + // Create chat completion request + requestBody := map[string]interface{}{ + "model": "MoM", // Use Mixture of Models to trigger decision engine + "messages": []map[string]string{ + {"role": "user", "content": testCase.Query}, + }, + } + + jsonData, err := json.Marshal(requestBody) + if err != nil { + result.Error = fmt.Sprintf("failed to marshal request: %v", err) + return result + } + + // Send request + url := fmt.Sprintf("http://localhost:%s/v1/chat/completions", localPort) + req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewBuffer(jsonData)) + if err != nil { + result.Error = fmt.Sprintf("failed to create request: %v", err) + return result + } + req.Header.Set("Content-Type", "application/json") + + httpClient := &http.Client{Timeout: 30 * time.Second} + resp, err := httpClient.Do(req) + if err != nil { + result.Error = fmt.Sprintf("failed to send request: %v", err) + return result + } + defer resp.Body.Close() + + // Check response status + if resp.StatusCode != http.StatusOK { + bodyBytes, _ := io.ReadAll(resp.Body) + result.Error = fmt.Sprintf("HTTP %d: %s", resp.StatusCode, string(bodyBytes)) + + if verbose { + fmt.Printf("[Test] ✗ HTTP %d Error for query: %s\n", resp.StatusCode, testCase.Query) + fmt.Printf(" Expected decision: %s\n", testCase.ExpectedDecision) + fmt.Printf(" Response: %s\n", string(bodyBytes)) + } + + return result + } + + // Extract VSR decision headers + result.ActualDecision = resp.Header.Get("x-vsr-selected-decision") + + // Determine if fallback occurred + // "other_decision" or "general_decision" indicates fallback + result.DidFallback = (result.ActualDecision == "other_decision" || + result.ActualDecision == "general_decision") + + // Check if the result matches expectations + result.Correct = (result.ActualDecision == testCase.ExpectedDecision) + + if verbose { + if result.Correct { + fmt.Printf("[Test] ✓ Fallback behavior correct\n") + fmt.Printf(" Query: %s\n", truncateString(testCase.Query, 60)) + fmt.Printf(" Decision: %s (fallback: %v)\n", result.ActualDecision, result.DidFallback) + } else { + fmt.Printf("[Test] ✗ Fallback behavior incorrect\n") + fmt.Printf(" Query: %s\n", testCase.Query) + fmt.Printf(" Expected: %s (should fallback: %v)\n", testCase.ExpectedDecision, testCase.ShouldFallback) + fmt.Printf(" Actual: %s (did fallback: %v)\n", result.ActualDecision, result.DidFallback) + fmt.Printf(" Description: %s\n", testCase.Description) + } + } + + return result +} + +func printDecisionFallbackResults(results []DecisionFallbackResult, totalTests, correctTests int, accuracy float64) { + separator := "================================================================================" + fmt.Println("\n" + separator) + fmt.Println("DECISION FALLBACK BEHAVIOR TEST RESULTS") + fmt.Println(separator) + fmt.Printf("Total Tests: %d\n", totalTests) + fmt.Printf("Correct Behaviors: %d\n", correctTests) + fmt.Printf("Accuracy Rate: %.2f%%\n", accuracy) + fmt.Println(separator) + + // Print summary by behavior type + fallbackTests := 0 + fallbackCorrect := 0 + specificTests := 0 + specificCorrect := 0 + + for _, result := range results { + if result.ShouldFallback { + fallbackTests++ + if result.Correct { + fallbackCorrect++ + } + } else { + specificTests++ + if result.Correct { + specificCorrect++ + } + } + } + + fmt.Println("\nTest Breakdown:") + if fallbackTests > 0 { + fallbackAccuracy := float64(fallbackCorrect) / float64(fallbackTests) * 100 + fmt.Printf(" - Fallback Tests: %d/%d (%.2f%%)\n", fallbackCorrect, fallbackTests, fallbackAccuracy) + } + if specificTests > 0 { + specificAccuracy := float64(specificCorrect) / float64(specificTests) * 100 + fmt.Printf(" - Specific Tests: %d/%d (%.2f%%)\n", specificCorrect, specificTests, specificAccuracy) + } + + // Print failed cases + failedCount := 0 + for _, result := range results { + if !result.Correct && result.Error == "" { + failedCount++ + } + } + + if failedCount > 0 { + fmt.Println("\nFailed Fallback Behaviors:") + for _, result := range results { + if !result.Correct && result.Error == "" { + fmt.Printf(" - Query: %s\n", truncateString(result.Query, 70)) + fmt.Printf(" Expected: %s (should fallback: %v)\n", result.ExpectedDecision, result.ShouldFallback) + fmt.Printf(" Actual: %s (did fallback: %v)\n", result.ActualDecision, result.DidFallback) + } + } + } + + // Print errors + errorCount := 0 + for _, result := range results { + if result.Error != "" { + errorCount++ + } + } + + if errorCount > 0 { + fmt.Println("\nErrors:") + for _, result := range results { + if result.Error != "" { + fmt.Printf(" - Query: %s\n", truncateString(result.Query, 70)) + fmt.Printf(" Error: %s\n", result.Error) + } + } + } + + fmt.Println(separator + "\n") +} diff --git a/e2e/testcases/decision_priority.go b/e2e/testcases/decision_priority.go new file mode 100644 index 000000000..438e745d8 --- /dev/null +++ b/e2e/testcases/decision_priority.go @@ -0,0 +1,262 @@ +package testcases + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "time" + + pkgtestcases "github.com/vllm-project/semantic-router/e2e/pkg/testcases" + "k8s.io/client-go/kubernetes" +) + +func init() { + pkgtestcases.Register("decision-priority-selection", pkgtestcases.TestCase{ + Description: "Test decision priority when multiple decisions match", + Tags: []string{"signal-decision", "priority", "routing"}, + Fn: testDecisionPriority, + }) +} + +// DecisionPriorityCase represents a test case for decision priority +type DecisionPriorityCase struct { + Query string `json:"query"` + ExpectedDecision string `json:"expected_decision"` + ExpectedPriority int `json:"expected_priority"` + MatchingDecisions []string `json:"matching_decisions"` // Decisions that should match + Description string `json:"description"` +} + +// DecisionPriorityResult tracks the result of a single priority test +type DecisionPriorityResult struct { + Query string + ExpectedDecision string + ActualDecision string + ExpectedPriority int + ActualPriority string + Correct bool + Error string +} + +func testDecisionPriority(ctx context.Context, client *kubernetes.Clientset, opts pkgtestcases.TestCaseOptions) error { + if opts.Verbose { + fmt.Println("[Test] Testing decision priority selection with multiple matches") + } + + // Setup service connection and get local port + localPort, stopPortForward, err := setupServiceConnection(ctx, client, opts) + if err != nil { + return err + } + defer stopPortForward() + + // Define test cases inline + testCases := []DecisionPriorityCase{ + { + Query: "Think carefully about this urgent business decision", + ExpectedDecision: "thinking_decision", + ExpectedPriority: 15, + MatchingDecisions: []string{"thinking_decision", "business_decision"}, + Description: "Query matches both thinking (priority 15) and business (priority 10) - should select higher priority", + }, + { + Query: "I need to think about complex math problems", + ExpectedDecision: "thinking_decision", + ExpectedPriority: 15, + MatchingDecisions: []string{"thinking_decision", "math_decision"}, + Description: "Query matches thinking (priority 15) and math (priority 10) - should select higher priority", + }, + { + Query: "What is 2 + 2?", + ExpectedDecision: "math_decision", + ExpectedPriority: 10, + MatchingDecisions: []string{"math_decision"}, + Description: "Simple math query should match math decision (priority 10)", + }, + { + Query: "Tell me about cellular biology", + ExpectedDecision: "biology_decision", + ExpectedPriority: 10, + MatchingDecisions: []string{"biology_decision"}, + Description: "Biology query should match biology decision (priority 10)", + }, + } + + // Run priority tests + var results []DecisionPriorityResult + totalTests := 0 + correctTests := 0 + + for _, testCase := range testCases { + totalTests++ + result := testSinglePrioritySelection(ctx, testCase, localPort, opts.Verbose) + results = append(results, result) + if result.Correct { + correctTests++ + } + } + + // Calculate accuracy + accuracy := float64(correctTests) / float64(totalTests) * 100 + + // Set details for reporting + if opts.SetDetails != nil { + opts.SetDetails(map[string]interface{}{ + "total_tests": totalTests, + "correct_tests": correctTests, + "accuracy_rate": fmt.Sprintf("%.2f%%", accuracy), + "failed_tests": totalTests - correctTests, + }) + } + + // Print results + printDecisionPriorityResults(results, totalTests, correctTests, accuracy) + + if opts.Verbose { + fmt.Printf("[Test] Decision priority test completed: %d/%d correct (%.2f%% accuracy)\n", + correctTests, totalTests, accuracy) + } + + // Return error if accuracy is below threshold + if correctTests == 0 { + return fmt.Errorf("decision priority test failed: 0%% accuracy (0/%d correct)", totalTests) + } + + return nil +} + +func testSinglePrioritySelection(ctx context.Context, testCase DecisionPriorityCase, localPort string, verbose bool) DecisionPriorityResult { + result := DecisionPriorityResult{ + Query: testCase.Query, + ExpectedDecision: testCase.ExpectedDecision, + ExpectedPriority: testCase.ExpectedPriority, + } + + // Create chat completion request + requestBody := map[string]interface{}{ + "model": "MoM", // Use Mixture of Models to trigger decision engine + "messages": []map[string]string{ + {"role": "user", "content": testCase.Query}, + }, + } + + jsonData, err := json.Marshal(requestBody) + if err != nil { + result.Error = fmt.Sprintf("failed to marshal request: %v", err) + return result + } + + // Send request + url := fmt.Sprintf("http://localhost:%s/v1/chat/completions", localPort) + req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewBuffer(jsonData)) + if err != nil { + result.Error = fmt.Sprintf("failed to create request: %v", err) + return result + } + req.Header.Set("Content-Type", "application/json") + + httpClient := &http.Client{Timeout: 30 * time.Second} + resp, err := httpClient.Do(req) + if err != nil { + result.Error = fmt.Sprintf("failed to send request: %v", err) + return result + } + defer resp.Body.Close() + + // Check response status + if resp.StatusCode != http.StatusOK { + bodyBytes, _ := io.ReadAll(resp.Body) + result.Error = fmt.Sprintf("HTTP %d: %s", resp.StatusCode, string(bodyBytes)) + + if verbose { + fmt.Printf("[Test] ✗ HTTP %d Error for query: %s\n", resp.StatusCode, testCase.Query) + fmt.Printf(" Expected decision: %s (priority %d)\n", testCase.ExpectedDecision, testCase.ExpectedPriority) + fmt.Printf(" Response: %s\n", string(bodyBytes)) + } + + return result + } + + // Extract VSR decision headers + result.ActualDecision = resp.Header.Get("x-vsr-selected-decision") + result.ActualPriority = resp.Header.Get("x-vsr-decision-priority") + + // Check if the highest priority decision was selected + // Validate both decision name AND priority value + decisionMatches := (result.ActualDecision == testCase.ExpectedDecision) + priorityMatches := (result.ActualPriority == fmt.Sprintf("%d", testCase.ExpectedPriority)) + result.Correct = decisionMatches && priorityMatches + + if verbose { + if result.Correct { + fmt.Printf("[Test] ✓ Correct priority selection: %s (priority %s)\n", result.ActualDecision, result.ActualPriority) + } else { + fmt.Printf("[Test] ✗ Wrong decision or priority\n") + fmt.Printf(" Query: %s\n", testCase.Query) + fmt.Printf(" Expected: %s (priority %d)\n", testCase.ExpectedDecision, testCase.ExpectedPriority) + fmt.Printf(" Actual: %s (priority %s)\n", result.ActualDecision, result.ActualPriority) + if !decisionMatches { + fmt.Printf(" ⚠ Decision mismatch\n") + } + if !priorityMatches { + fmt.Printf(" ⚠ Priority mismatch\n") + } + fmt.Printf(" Description: %s\n", testCase.Description) + } + } + + return result +} + +func printDecisionPriorityResults(results []DecisionPriorityResult, totalTests, correctTests int, accuracy float64) { + separator := "================================================================================" + fmt.Println("\n" + separator) + fmt.Println("DECISION PRIORITY SELECTION TEST RESULTS") + fmt.Println(separator) + fmt.Printf("Total Tests: %d\n", totalTests) + fmt.Printf("Correct Selections: %d\n", correctTests) + fmt.Printf("Accuracy Rate: %.2f%%\n", accuracy) + fmt.Println(separator) + + // Print failed cases + failedCount := 0 + for _, result := range results { + if !result.Correct && result.Error == "" { + failedCount++ + } + } + + if failedCount > 0 { + fmt.Println("\nFailed Priority Selections:") + for _, result := range results { + if !result.Correct && result.Error == "" { + fmt.Printf(" - Query: %s\n", result.Query) + fmt.Printf(" Expected Decision: %s (priority %d)\n", result.ExpectedDecision, result.ExpectedPriority) + fmt.Printf(" Actual Decision: %s (priority %s)\n", result.ActualDecision, result.ActualPriority) + } + } + } + + // Print errors + errorCount := 0 + for _, result := range results { + if result.Error != "" { + errorCount++ + } + } + + if errorCount > 0 { + fmt.Println("\nErrors:") + for _, result := range results { + if result.Error != "" { + fmt.Printf(" - Query: %s\n", result.Query) + fmt.Printf(" Error: %s\n", result.Error) + } + } + } + + fmt.Println(separator + "\n") +} diff --git a/e2e/testcases/keyword_routing.go b/e2e/testcases/keyword_routing.go new file mode 100644 index 000000000..1b925ef6c --- /dev/null +++ b/e2e/testcases/keyword_routing.go @@ -0,0 +1,308 @@ +package testcases + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "time" + + pkgtestcases "github.com/vllm-project/semantic-router/e2e/pkg/testcases" + "k8s.io/client-go/kubernetes" +) + +func init() { + pkgtestcases.Register("keyword-routing", pkgtestcases.TestCase{ + Description: "Test keyword-based routing with case sensitivity and operators", + Tags: []string{"signal-decision", "keyword", "routing"}, + Fn: testKeywordRouting, + }) +} + +// KeywordRoutingCase represents a test case for keyword-based routing +type KeywordRoutingCase struct { + Query string `json:"query"` + ExpectedDecision string `json:"expected_decision"` + MatchingKeywords []string `json:"matching_keywords"` + CaseSensitive bool `json:"case_sensitive"` + Description string `json:"description"` +} + +// KeywordRoutingResult tracks the result of a single keyword routing test +type KeywordRoutingResult struct { + Query string + ExpectedDecision string + ActualDecision string + MatchingKeywords []string + Correct bool + Error string +} + +func testKeywordRouting(ctx context.Context, client *kubernetes.Clientset, opts pkgtestcases.TestCaseOptions) error { + if opts.Verbose { + fmt.Println("[Test] Testing keyword-based routing") + } + + // Setup service connection and get local port + localPort, stopPortForward, err := setupServiceConnection(ctx, client, opts) + if err != nil { + return err + } + defer stopPortForward() + + // Define test cases for keyword routing + // Based on the keyword_rules configuration: ["urgent", "immediate", "asap", "think", "careful"] + testCases := []KeywordRoutingCase{ + { + Query: "This is URGENT and needs immediate attention", + ExpectedDecision: "thinking_decision", + MatchingKeywords: []string{"urgent", "immediate"}, + CaseSensitive: false, + Description: "Uppercase keywords should match (case-insensitive)", + }, + { + Query: "Please think carefully about this problem", + ExpectedDecision: "thinking_decision", + MatchingKeywords: []string{"think", "careful"}, + CaseSensitive: false, + Description: "Multiple thinking keywords should trigger thinking decision", + }, + { + Query: "We need this done ASAP", + ExpectedDecision: "thinking_decision", + MatchingKeywords: []string{"asap"}, + CaseSensitive: false, + Description: "Single ASAP keyword should trigger thinking decision", + }, + { + Query: "urgent: please think about this immediately", + ExpectedDecision: "thinking_decision", + MatchingKeywords: []string{"urgent", "think", "immediately"}, + CaseSensitive: false, + Description: "Multiple keywords in one query should match", + }, + { + Query: "What is 2 + 2?", + ExpectedDecision: "math_decision", + MatchingKeywords: []string{}, + CaseSensitive: false, + Description: "Query without thinking keywords should not trigger thinking decision", + }, + { + Query: "I need you to think through this step by step carefully", + ExpectedDecision: "thinking_decision", + MatchingKeywords: []string{"think", "careful"}, + CaseSensitive: false, + Description: "Embedded keywords should be detected", + }, + } + + // Run keyword routing tests + var results []KeywordRoutingResult + totalTests := 0 + correctTests := 0 + + for _, testCase := range testCases { + totalTests++ + result := testSingleKeywordRouting(ctx, testCase, localPort, opts.Verbose) + results = append(results, result) + if result.Correct { + correctTests++ + } + + // Small delay between tests + time.Sleep(500 * time.Millisecond) + } + + // Calculate accuracy + accuracy := float64(correctTests) / float64(totalTests) * 100 + + // Set details for reporting + if opts.SetDetails != nil { + opts.SetDetails(map[string]interface{}{ + "total_tests": totalTests, + "correct_tests": correctTests, + "accuracy_rate": fmt.Sprintf("%.2f%%", accuracy), + "failed_tests": totalTests - correctTests, + }) + } + + // Print results + printKeywordRoutingResults(results, totalTests, correctTests, accuracy) + + if opts.Verbose { + fmt.Printf("[Test] Keyword routing test completed: %d/%d correct (%.2f%% accuracy)\n", + correctTests, totalTests, accuracy) + } + + // Return error if accuracy is below threshold + if correctTests == 0 { + return fmt.Errorf("keyword routing test failed: 0%% accuracy (0/%d correct)", totalTests) + } + + return nil +} + +func testSingleKeywordRouting(ctx context.Context, testCase KeywordRoutingCase, localPort string, verbose bool) KeywordRoutingResult { + result := KeywordRoutingResult{ + Query: testCase.Query, + ExpectedDecision: testCase.ExpectedDecision, + MatchingKeywords: testCase.MatchingKeywords, + } + + // Create chat completion request + requestBody := map[string]interface{}{ + "model": "MoM", // Use Mixture of Models to trigger decision engine + "messages": []map[string]string{ + {"role": "user", "content": testCase.Query}, + }, + } + + jsonData, err := json.Marshal(requestBody) + if err != nil { + result.Error = fmt.Sprintf("failed to marshal request: %v", err) + return result + } + + // Send request + url := fmt.Sprintf("http://localhost:%s/v1/chat/completions", localPort) + req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewBuffer(jsonData)) + if err != nil { + result.Error = fmt.Sprintf("failed to create request: %v", err) + return result + } + req.Header.Set("Content-Type", "application/json") + + httpClient := &http.Client{Timeout: 30 * time.Second} + resp, err := httpClient.Do(req) + if err != nil { + result.Error = fmt.Sprintf("failed to send request: %v", err) + return result + } + defer resp.Body.Close() + + // Check response status + if resp.StatusCode != http.StatusOK { + bodyBytes, _ := io.ReadAll(resp.Body) + result.Error = fmt.Sprintf("HTTP %d: %s", resp.StatusCode, string(bodyBytes)) + + if verbose { + fmt.Printf("[Test] ✗ HTTP %d Error for query: %s\n", resp.StatusCode, testCase.Query) + fmt.Printf(" Expected decision: %s\n", testCase.ExpectedDecision) + fmt.Printf(" Response: %s\n", string(bodyBytes)) + } + + return result + } + + // Extract VSR decision headers + result.ActualDecision = resp.Header.Get("x-vsr-selected-decision") + + // Check if the correct decision was selected + result.Correct = (result.ActualDecision == testCase.ExpectedDecision) + + if verbose { + if result.Correct { + fmt.Printf("[Test] ✓ Keyword routing correct\n") + fmt.Printf(" Query: %s\n", truncateString(testCase.Query, 60)) + fmt.Printf(" Decision: %s\n", result.ActualDecision) + if len(testCase.MatchingKeywords) > 0 { + fmt.Printf(" Matching Keywords: %v\n", testCase.MatchingKeywords) + } + } else { + fmt.Printf("[Test] ✗ Keyword routing incorrect\n") + fmt.Printf(" Query: %s\n", testCase.Query) + fmt.Printf(" Expected: %s, Actual: %s\n", testCase.ExpectedDecision, result.ActualDecision) + if len(testCase.MatchingKeywords) > 0 { + fmt.Printf(" Expected Keywords: %v\n", testCase.MatchingKeywords) + } + fmt.Printf(" Description: %s\n", testCase.Description) + } + } + + return result +} + +func printKeywordRoutingResults(results []KeywordRoutingResult, totalTests, correctTests int, accuracy float64) { + separator := "================================================================================" + fmt.Println("\n" + separator) + fmt.Println("KEYWORD ROUTING TEST RESULTS") + fmt.Println(separator) + fmt.Printf("Total Tests: %d\n", totalTests) + fmt.Printf("Correct Routings: %d\n", correctTests) + fmt.Printf("Accuracy Rate: %.2f%%\n", accuracy) + fmt.Println(separator) + + // Print summary by keyword presence + keywordTests := 0 + keywordCorrect := 0 + noKeywordTests := 0 + noKeywordCorrect := 0 + + for _, result := range results { + if len(result.MatchingKeywords) > 0 { + keywordTests++ + if result.Correct { + keywordCorrect++ + } + } else { + noKeywordTests++ + if result.Correct { + noKeywordCorrect++ + } + } + } + + fmt.Println("\nTest Breakdown:") + if keywordTests > 0 { + keywordAccuracy := float64(keywordCorrect) / float64(keywordTests) * 100 + fmt.Printf(" - With Keywords: %d/%d (%.2f%%)\n", keywordCorrect, keywordTests, keywordAccuracy) + } + if noKeywordTests > 0 { + noKeywordAccuracy := float64(noKeywordCorrect) / float64(noKeywordTests) * 100 + fmt.Printf(" - Without Keywords: %d/%d (%.2f%%)\n", noKeywordCorrect, noKeywordTests, noKeywordAccuracy) + } + + // Print failed cases + failedCount := 0 + for _, result := range results { + if !result.Correct && result.Error == "" { + failedCount++ + } + } + + if failedCount > 0 { + fmt.Println("\nFailed Keyword Routings:") + for _, result := range results { + if !result.Correct && result.Error == "" { + fmt.Printf(" - Query: %s\n", truncateString(result.Query, 70)) + fmt.Printf(" Expected: %s, Actual: %s\n", result.ExpectedDecision, result.ActualDecision) + if len(result.MatchingKeywords) > 0 { + fmt.Printf(" Expected Keywords: %v\n", result.MatchingKeywords) + } + } + } + } + + // Print errors + errorCount := 0 + for _, result := range results { + if result.Error != "" { + errorCount++ + } + } + + if errorCount > 0 { + fmt.Println("\nErrors:") + for _, result := range results { + if result.Error != "" { + fmt.Printf(" - Query: %s\n", truncateString(result.Query, 70)) + fmt.Printf(" Error: %s\n", result.Error) + } + } + } + + fmt.Println(separator + "\n") +} diff --git a/e2e/testcases/plugin_chain_execution.go b/e2e/testcases/plugin_chain_execution.go new file mode 100644 index 000000000..8c1099e4f --- /dev/null +++ b/e2e/testcases/plugin_chain_execution.go @@ -0,0 +1,314 @@ +package testcases + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "time" + + pkgtestcases "github.com/vllm-project/semantic-router/e2e/pkg/testcases" + "k8s.io/client-go/kubernetes" +) + +func init() { + pkgtestcases.Register("plugin-chain-execution", pkgtestcases.TestCase{ + Description: "Test plugin chain execution order and blocking behavior", + Tags: []string{"signal-decision", "plugin", "pii"}, + Fn: testPluginChainExecution, + }) +} + +// PluginChainCase represents a test case for plugin chain execution +type PluginChainCase struct { + Query string `json:"query"` + ExpectPIIBlock bool `json:"expect_pii_block"` + ExpectCacheUsed bool `json:"expect_cache_used"` + ExpectPromptApplied bool `json:"expect_prompt_applied"` + Description string `json:"description"` + PIITypes []string `json:"pii_types"` // Expected PII types detected +} + +// PluginChainResult tracks the result of a single plugin chain test +type PluginChainResult struct { + Query string + PIIBlocked bool + PIIDetected string + CacheHit bool + PromptApplied bool + StatusCode int + ExpectPIIBlock bool + ExpectCacheUsed bool + ExpectPromptApplied bool + Correct bool + Error string +} + +func testPluginChainExecution(ctx context.Context, client *kubernetes.Clientset, opts pkgtestcases.TestCaseOptions) error { + if opts.Verbose { + fmt.Println("[Test] Testing plugin chain execution order and blocking") + } + + // Setup service connection and get local port + localPort, stopPortForward, err := setupServiceConnection(ctx, client, opts) + if err != nil { + return err + } + defer stopPortForward() + + // Define test cases + testCases := []PluginChainCase{ + { + Query: "My social security number is 123-45-6789", + ExpectPIIBlock: true, + ExpectCacheUsed: false, // PII blocks before cache + ExpectPromptApplied: false, // PII blocks before prompt + Description: "PII (SSN) should block entire plugin chain", + PIITypes: []string{"US_SSN"}, + }, + { + Query: "Contact me at john.doe@example.com", + ExpectPIIBlock: true, + ExpectCacheUsed: false, + ExpectPromptApplied: false, + Description: "PII (EMAIL) should block entire plugin chain", + PIITypes: []string{"EMAIL"}, + }, + { + Query: "What is 5 + 7?", + ExpectPIIBlock: false, + ExpectCacheUsed: false, // First request, cache miss + ExpectPromptApplied: true, // Should apply math expert prompt + Description: "Clean query should pass PII and apply prompt", + }, + { + Query: "Tell me about photosynthesis", + ExpectPIIBlock: false, + ExpectCacheUsed: false, + ExpectPromptApplied: true, + Description: "Biology query should pass PII plugin", + }, + } + + // Run plugin chain tests + var results []PluginChainResult + totalTests := 0 + correctTests := 0 + + for _, testCase := range testCases { + totalTests++ + result := testSinglePluginChain(ctx, testCase, localPort, opts.Verbose) + results = append(results, result) + if result.Correct { + correctTests++ + } + + // Small delay between tests to avoid overwhelming the system + time.Sleep(500 * time.Millisecond) + } + + // Calculate accuracy + accuracy := float64(correctTests) / float64(totalTests) * 100 + + // Set details for reporting + if opts.SetDetails != nil { + opts.SetDetails(map[string]interface{}{ + "total_tests": totalTests, + "correct_tests": correctTests, + "accuracy_rate": fmt.Sprintf("%.2f%%", accuracy), + "failed_tests": totalTests - correctTests, + }) + } + + // Print results + printPluginChainResults(results, totalTests, correctTests, accuracy) + + if opts.Verbose { + fmt.Printf("[Test] Plugin chain execution test completed: %d/%d correct (%.2f%% accuracy)\n", + correctTests, totalTests, accuracy) + } + + // Return error if accuracy is below threshold + if correctTests == 0 { + return fmt.Errorf("plugin chain execution test failed: 0%% accuracy (0/%d correct)", totalTests) + } + + return nil +} + +func testSinglePluginChain(ctx context.Context, testCase PluginChainCase, localPort string, verbose bool) PluginChainResult { + result := PluginChainResult{ + Query: testCase.Query, + ExpectPIIBlock: testCase.ExpectPIIBlock, + ExpectCacheUsed: testCase.ExpectCacheUsed, + ExpectPromptApplied: testCase.ExpectPromptApplied, + } + + // Create chat completion request + requestBody := map[string]interface{}{ + "model": "MoM", // Use Mixture of Models to trigger decision engine + "messages": []map[string]string{ + {"role": "user", "content": testCase.Query}, + }, + } + + jsonData, err := json.Marshal(requestBody) + if err != nil { + result.Error = fmt.Sprintf("failed to marshal request: %v", err) + return result + } + + // Send request + url := fmt.Sprintf("http://localhost:%s/v1/chat/completions", localPort) + req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewBuffer(jsonData)) + if err != nil { + result.Error = fmt.Sprintf("failed to create request: %v", err) + return result + } + req.Header.Set("Content-Type", "application/json") + + httpClient := &http.Client{Timeout: 30 * time.Second} + resp, err := httpClient.Do(req) + if err != nil { + result.Error = fmt.Sprintf("failed to send request: %v", err) + return result + } + defer resp.Body.Close() + + result.StatusCode = resp.StatusCode + + // Extract plugin execution headers + result.PIIDetected = resp.Header.Get("x-vsr-pii-detected") + result.PIIBlocked = (resp.StatusCode == http.StatusForbidden || result.PIIDetected != "") + + // Check cache headers (x-vsr-cache-hit or similar) + cacheHeader := resp.Header.Get("x-vsr-cache-hit") + result.CacheHit = (cacheHeader == "true") + + // Check if system prompt was applied (x-vsr-selected-decision indicates routing happened) + selectedDecision := resp.Header.Get("x-vsr-selected-decision") + result.PromptApplied = (selectedDecision != "" && !result.PIIBlocked) + + // Determine correctness based on expectations + piiCorrect := (result.PIIBlocked == testCase.ExpectPIIBlock) + cacheCorrect := true // We don't strictly enforce cache behavior in this test + promptCorrect := true + + // If PII was expected to block, other plugins shouldn't run + if testCase.ExpectPIIBlock { + promptCorrect = !result.PromptApplied // Prompt should NOT be applied if PII blocked + } + + result.Correct = piiCorrect && cacheCorrect && promptCorrect + + if verbose { + if result.Correct { + fmt.Printf("[Test] ✓ Plugin chain executed correctly\n") + fmt.Printf(" Query: %s\n", truncateString(testCase.Query, 60)) + fmt.Printf(" PII Blocked: %v (expected: %v)\n", result.PIIBlocked, testCase.ExpectPIIBlock) + if result.PIIDetected != "" { + fmt.Printf(" PII Detected: %s\n", result.PIIDetected) + } + } else { + fmt.Printf("[Test] ✗ Plugin chain execution failed\n") + fmt.Printf(" Query: %s\n", testCase.Query) + fmt.Printf(" Expected PII Block: %v, Actual: %v\n", testCase.ExpectPIIBlock, result.PIIBlocked) + fmt.Printf(" PII Detected: %s\n", result.PIIDetected) + fmt.Printf(" Status Code: %d\n", result.StatusCode) + fmt.Printf(" Description: %s\n", testCase.Description) + } + } + + // Read response body for detailed error + if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusForbidden { + bodyBytes, _ := io.ReadAll(resp.Body) + if verbose { + fmt.Printf(" Response: %s\n", string(bodyBytes)) + } + } + + return result +} + +func printPluginChainResults(results []PluginChainResult, totalTests, correctTests int, accuracy float64) { + separator := "================================================================================" + fmt.Println("\n" + separator) + fmt.Println("PLUGIN CHAIN EXECUTION TEST RESULTS") + fmt.Println(separator) + fmt.Printf("Total Tests: %d\n", totalTests) + fmt.Printf("Correct Executions: %d\n", correctTests) + fmt.Printf("Accuracy Rate: %.2f%%\n", accuracy) + fmt.Println(separator) + + // Print summary by behavior type + piiBlockTests := 0 + piiBlockCorrect := 0 + cleanQueryTests := 0 + cleanQueryCorrect := 0 + + for _, result := range results { + if result.ExpectPIIBlock { + piiBlockTests++ + if result.Correct { + piiBlockCorrect++ + } + } else { + cleanQueryTests++ + if result.Correct { + cleanQueryCorrect++ + } + } + } + + fmt.Println("\nTest Breakdown:") + if piiBlockTests > 0 { + piiBlockAccuracy := float64(piiBlockCorrect) / float64(piiBlockTests) * 100 + fmt.Printf(" - PII Blocking Tests: %d/%d (%.2f%%)\n", piiBlockCorrect, piiBlockTests, piiBlockAccuracy) + } + if cleanQueryTests > 0 { + cleanQueryAccuracy := float64(cleanQueryCorrect) / float64(cleanQueryTests) * 100 + fmt.Printf(" - Clean Query Tests: %d/%d (%.2f%%)\n", cleanQueryCorrect, cleanQueryTests, cleanQueryAccuracy) + } + + // Print failed cases + failedCount := 0 + for _, result := range results { + if !result.Correct && result.Error == "" { + failedCount++ + } + } + + if failedCount > 0 { + fmt.Println("\nFailed Plugin Chain Executions:") + for _, result := range results { + if !result.Correct && result.Error == "" { + fmt.Printf(" - Query: %s\n", truncateString(result.Query, 70)) + fmt.Printf(" Expected PII Block: %v, Actual: %v\n", result.ExpectPIIBlock, result.PIIBlocked) + fmt.Printf(" PII Detected: %s\n", result.PIIDetected) + fmt.Printf(" Status Code: %d\n", result.StatusCode) + } + } + } + + // Print errors + errorCount := 0 + for _, result := range results { + if result.Error != "" { + errorCount++ + } + } + + if errorCount > 0 { + fmt.Println("\nErrors:") + for _, result := range results { + if result.Error != "" { + fmt.Printf(" - Query: %s\n", truncateString(result.Query, 70)) + fmt.Printf(" Error: %s\n", result.Error) + } + } + } + + fmt.Println(separator + "\n") +} diff --git a/e2e/testcases/plugin_config_variations.go b/e2e/testcases/plugin_config_variations.go new file mode 100644 index 000000000..7765c818d --- /dev/null +++ b/e2e/testcases/plugin_config_variations.go @@ -0,0 +1,328 @@ +package testcases + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "time" + + pkgtestcases "github.com/vllm-project/semantic-router/e2e/pkg/testcases" + "k8s.io/client-go/kubernetes" +) + +func init() { + pkgtestcases.Register("plugin-config-variations", pkgtestcases.TestCase{ + Description: "Test different plugin configuration variations", + Tags: []string{"signal-decision", "plugin", "configuration"}, + Fn: testPluginConfigVariations, + }) +} + +// PluginConfigCase represents a test case for plugin configuration variations +type PluginConfigCase struct { + Query string `json:"query"` + ExpectedDecision string `json:"expected_decision"` + PluginType string `json:"plugin_type"` // "pii", "semantic-cache", "system_prompt" + ExpectedBehavior string `json:"expected_behavior"` + CacheSimilarity float64 `json:"cache_similarity,omitempty"` + Description string `json:"description"` +} + +// PluginConfigResult tracks the result of a single plugin config test +type PluginConfigResult struct { + Query string + ExpectedDecision string + ActualDecision string + PluginType string + ExpectedBehavior string + ActualBehavior string + Correct bool + Error string +} + +func testPluginConfigVariations(ctx context.Context, client *kubernetes.Clientset, opts pkgtestcases.TestCaseOptions) error { + if opts.Verbose { + fmt.Println("[Test] Testing plugin configuration variations") + } + + // Setup service connection and get local port + localPort, stopPortForward, err := setupServiceConnection(ctx, client, opts) + if err != nil { + return err + } + defer stopPortForward() + + // Define test cases for different plugin configurations + testCases := []PluginConfigCase{ + // Semantic cache threshold variations + { + Query: "What is photosynthesis?", + ExpectedDecision: "biology_decision", + PluginType: "semantic-cache", + ExpectedBehavior: "cache_miss", // First request + Description: "First biology query should be cache miss", + }, + { + Query: "Explain the process of photosynthesis", + ExpectedDecision: "biology_decision", + PluginType: "semantic-cache", + ExpectedBehavior: "cache_hit_possible", // Similar query, might hit + Description: "Similar biology query might hit cache depending on threshold", + }, + // Psychology with high cache threshold (0.92) + { + Query: "What is cognitive behavioral therapy?", + ExpectedDecision: "psychology_decision", + PluginType: "semantic-cache", + ExpectedBehavior: "cache_miss", + CacheSimilarity: 0.92, + Description: "Psychology query with strict cache threshold (0.92)", + }, + // Other/general with relaxed cache threshold (0.75) + { + Query: "Tell me something interesting", + ExpectedDecision: "other_decision", + PluginType: "semantic-cache", + ExpectedBehavior: "cache_miss", + CacheSimilarity: 0.75, + Description: "General query with relaxed cache threshold (0.75)", + }, + // System prompt variations + { + Query: "What is 100 divided by 5?", + ExpectedDecision: "math_decision", + PluginType: "system_prompt", + ExpectedBehavior: "prompt_applied", + Description: "Math query should have math expert system prompt applied", + }, + { + Query: "Explain Newton's laws of motion", + ExpectedDecision: "physics_decision", + PluginType: "system_prompt", + ExpectedBehavior: "prompt_applied", + Description: "Physics query should have physics expert system prompt applied", + }, + } + + // Run plugin config tests + var results []PluginConfigResult + totalTests := 0 + correctTests := 0 + + for _, testCase := range testCases { + totalTests++ + result := testSinglePluginConfig(ctx, testCase, localPort, opts.Verbose) + results = append(results, result) + if result.Correct { + correctTests++ + } + + // Delay between tests to allow cache to settle + time.Sleep(1 * time.Second) + } + + // Calculate accuracy + accuracy := float64(correctTests) / float64(totalTests) * 100 + + // Set details for reporting + if opts.SetDetails != nil { + opts.SetDetails(map[string]interface{}{ + "total_tests": totalTests, + "correct_tests": correctTests, + "accuracy_rate": fmt.Sprintf("%.2f%%", accuracy), + "failed_tests": totalTests - correctTests, + }) + } + + // Print results + printPluginConfigResults(results, totalTests, correctTests, accuracy) + + if opts.Verbose { + fmt.Printf("[Test] Plugin config variations test completed: %d/%d correct (%.2f%% accuracy)\n", + correctTests, totalTests, accuracy) + } + + // Return error if accuracy is below threshold + if correctTests == 0 { + return fmt.Errorf("plugin config variations test failed: 0%% accuracy (0/%d correct)", totalTests) + } + + return nil +} + +func testSinglePluginConfig(ctx context.Context, testCase PluginConfigCase, localPort string, verbose bool) PluginConfigResult { + result := PluginConfigResult{ + Query: testCase.Query, + ExpectedDecision: testCase.ExpectedDecision, + PluginType: testCase.PluginType, + ExpectedBehavior: testCase.ExpectedBehavior, + } + + // Create chat completion request + requestBody := map[string]interface{}{ + "model": "MoM", // Use Mixture of Models to trigger decision engine + "messages": []map[string]string{ + {"role": "user", "content": testCase.Query}, + }, + } + + jsonData, err := json.Marshal(requestBody) + if err != nil { + result.Error = fmt.Sprintf("failed to marshal request: %v", err) + return result + } + + // Send request + url := fmt.Sprintf("http://localhost:%s/v1/chat/completions", localPort) + req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewBuffer(jsonData)) + if err != nil { + result.Error = fmt.Sprintf("failed to create request: %v", err) + return result + } + req.Header.Set("Content-Type", "application/json") + + httpClient := &http.Client{Timeout: 30 * time.Second} + resp, err := httpClient.Do(req) + if err != nil { + result.Error = fmt.Sprintf("failed to send request: %v", err) + return result + } + defer resp.Body.Close() + + // Check response status + if resp.StatusCode != http.StatusOK { + bodyBytes, _ := io.ReadAll(resp.Body) + result.Error = fmt.Sprintf("HTTP %d: %s", resp.StatusCode, string(bodyBytes)) + + if verbose { + fmt.Printf("[Test] ✗ HTTP %d Error for query: %s\n", resp.StatusCode, testCase.Query) + fmt.Printf(" Expected decision: %s\n", testCase.ExpectedDecision) + fmt.Printf(" Response: %s\n", string(bodyBytes)) + } + + return result + } + + // Extract VSR decision headers + result.ActualDecision = resp.Header.Get("x-vsr-selected-decision") + + // Determine actual behavior based on plugin type + switch testCase.PluginType { + case "semantic-cache": + cacheHit := resp.Header.Get("x-vsr-cache-hit") + if cacheHit == "true" { + result.ActualBehavior = "cache_hit" + } else { + result.ActualBehavior = "cache_miss" + } + // For "cache_hit_possible", we accept either hit or miss + if testCase.ExpectedBehavior == "cache_hit_possible" { + result.Correct = (result.ActualDecision == testCase.ExpectedDecision) + } else { + result.Correct = (result.ActualDecision == testCase.ExpectedDecision && + result.ActualBehavior == testCase.ExpectedBehavior) + } + + case "system_prompt": + // If decision matched, system prompt should have been applied + result.ActualBehavior = "prompt_applied" + result.Correct = (result.ActualDecision == testCase.ExpectedDecision) + + default: + result.Correct = (result.ActualDecision == testCase.ExpectedDecision) + } + + if verbose { + if result.Correct { + fmt.Printf("[Test] ✓ Plugin config correct\n") + fmt.Printf(" Query: %s\n", truncateString(testCase.Query, 60)) + fmt.Printf(" Decision: %s\n", result.ActualDecision) + fmt.Printf(" Plugin: %s, Behavior: %s\n", testCase.PluginType, result.ActualBehavior) + } else { + fmt.Printf("[Test] ✗ Plugin config incorrect\n") + fmt.Printf(" Query: %s\n", testCase.Query) + fmt.Printf(" Expected Decision: %s, Actual: %s\n", testCase.ExpectedDecision, result.ActualDecision) + fmt.Printf(" Plugin: %s\n", testCase.PluginType) + fmt.Printf(" Expected Behavior: %s, Actual: %s\n", testCase.ExpectedBehavior, result.ActualBehavior) + fmt.Printf(" Description: %s\n", testCase.Description) + } + } + + return result +} + +func printPluginConfigResults(results []PluginConfigResult, totalTests, correctTests int, accuracy float64) { + separator := "================================================================================" + fmt.Println("\n" + separator) + fmt.Println("PLUGIN CONFIGURATION VARIATIONS TEST RESULTS") + fmt.Println(separator) + fmt.Printf("Total Tests: %d\n", totalTests) + fmt.Printf("Correct Configurations: %d\n", correctTests) + fmt.Printf("Accuracy Rate: %.2f%%\n", accuracy) + fmt.Println(separator) + + // Print summary by plugin type + pluginStats := make(map[string]struct { + total int + correct int + }) + + for _, result := range results { + stats := pluginStats[result.PluginType] + stats.total++ + if result.Correct { + stats.correct++ + } + pluginStats[result.PluginType] = stats + } + + fmt.Println("\nTest Breakdown by Plugin Type:") + for pluginType, stats := range pluginStats { + pluginAccuracy := float64(stats.correct) / float64(stats.total) * 100 + fmt.Printf(" - %-20s: %d/%d (%.2f%%)\n", pluginType, stats.correct, stats.total, pluginAccuracy) + } + + // Print failed cases + failedCount := 0 + for _, result := range results { + if !result.Correct && result.Error == "" { + failedCount++ + } + } + + if failedCount > 0 { + fmt.Println("\nFailed Plugin Configurations:") + for _, result := range results { + if !result.Correct && result.Error == "" { + fmt.Printf(" - Query: %s\n", truncateString(result.Query, 70)) + fmt.Printf(" Plugin: %s\n", result.PluginType) + fmt.Printf(" Expected Decision: %s, Actual: %s\n", result.ExpectedDecision, result.ActualDecision) + fmt.Printf(" Expected Behavior: %s, Actual: %s\n", result.ExpectedBehavior, result.ActualBehavior) + } + } + } + + // Print errors + errorCount := 0 + for _, result := range results { + if result.Error != "" { + errorCount++ + } + } + + if errorCount > 0 { + fmt.Println("\nErrors:") + for _, result := range results { + if result.Error != "" { + fmt.Printf(" - Query: %s\n", truncateString(result.Query, 70)) + fmt.Printf(" Plugin: %s\n", result.PluginType) + fmt.Printf(" Error: %s\n", result.Error) + } + } + } + + fmt.Println(separator + "\n") +} diff --git a/e2e/testcases/rule_condition_logic.go b/e2e/testcases/rule_condition_logic.go new file mode 100644 index 000000000..fbc9b66d8 --- /dev/null +++ b/e2e/testcases/rule_condition_logic.go @@ -0,0 +1,293 @@ +package testcases + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "time" + + pkgtestcases "github.com/vllm-project/semantic-router/e2e/pkg/testcases" + "k8s.io/client-go/kubernetes" +) + +func init() { + pkgtestcases.Register("rule-condition-logic", pkgtestcases.TestCase{ + Description: "Test rule condition logic with AND/OR operators", + Tags: []string{"signal-decision", "rules", "conditions"}, + Fn: testRuleConditionLogic, + }) +} + +// RuleConditionCase represents a test case for rule condition logic +type RuleConditionCase struct { + Query string `json:"query"` + ExpectedMatch bool `json:"expected_match"` + ExpectedDecision string `json:"expected_decision"` + RuleOperator string `json:"rule_operator"` // "AND" or "OR" + RequiredConditions []string `json:"required_conditions"` + Description string `json:"description"` +} + +// RuleConditionResult tracks the result of a single rule condition test +type RuleConditionResult struct { + Query string + ExpectedMatch bool + ActualMatch bool + ExpectedDecision string + ActualDecision string + Correct bool + Error string +} + +func testRuleConditionLogic(ctx context.Context, client *kubernetes.Clientset, opts pkgtestcases.TestCaseOptions) error { + if opts.Verbose { + fmt.Println("[Test] Testing rule condition logic with AND/OR operators") + } + + // Setup service connection and get local port + localPort, stopPortForward, err := setupServiceConnection(ctx, client, opts) + if err != nil { + return err + } + defer stopPortForward() + + // Define test cases for different rule operators + testCases := []RuleConditionCase{ + // OR operator tests - any condition matches + { + Query: "Think carefully about this urgent business problem", + ExpectedMatch: true, + ExpectedDecision: "thinking_decision", // Has keywords: "think", "urgent" + RuleOperator: "OR", + RequiredConditions: []string{"keyword:thinking", "domain:thinking"}, + Description: "Query with thinking keywords should match thinking decision (OR operator)", + }, + { + Query: "Calculate 25 * 4", + ExpectedMatch: true, + ExpectedDecision: "math_decision", + RuleOperator: "OR", + RequiredConditions: []string{"domain:math"}, + Description: "Math query should match math decision (OR with single domain condition)", + }, + { + Query: "Tell me about physics and quantum mechanics", + ExpectedMatch: true, + ExpectedDecision: "physics_decision", + RuleOperator: "OR", + RequiredConditions: []string{"domain:physics"}, + Description: "Physics query should match physics decision", + }, + // AND operator tests - both conditions must match + { + Query: "What is the capital of France?", + ExpectedMatch: false, + ExpectedDecision: "other_decision", // Falls back to general + RuleOperator: "AND", + RequiredConditions: []string{"keyword:urgent", "domain:business"}, + Description: "Query without urgent keyword should not match AND rule requiring both", + }, + // Keyword matching tests (case-insensitive) + { + Query: "This is URGENT and needs immediate attention", + ExpectedMatch: true, + ExpectedDecision: "thinking_decision", // Keywords: "urgent", "immediate" + RuleOperator: "OR", + RequiredConditions: []string{"keyword:urgent", "keyword:immediate"}, + Description: "Uppercase keywords should match (case-insensitive)", + }, + { + Query: "Please think about this carefully", + ExpectedMatch: true, + ExpectedDecision: "thinking_decision", // Keyword: "think", "careful" + RuleOperator: "OR", + RequiredConditions: []string{"keyword:think", "keyword:careful"}, + Description: "Multiple thinking keywords should trigger thinking decision", + }, + } + + // Run rule condition tests + var results []RuleConditionResult + totalTests := 0 + correctTests := 0 + + for _, testCase := range testCases { + totalTests++ + result := testSingleRuleCondition(ctx, testCase, localPort, opts.Verbose) + results = append(results, result) + if result.Correct { + correctTests++ + } + + // Small delay between tests + time.Sleep(500 * time.Millisecond) + } + + // Calculate accuracy + accuracy := float64(correctTests) / float64(totalTests) * 100 + + // Set details for reporting + if opts.SetDetails != nil { + opts.SetDetails(map[string]interface{}{ + "total_tests": totalTests, + "correct_tests": correctTests, + "accuracy_rate": fmt.Sprintf("%.2f%%", accuracy), + "failed_tests": totalTests - correctTests, + }) + } + + // Print results + printRuleConditionResults(results, totalTests, correctTests, accuracy) + + if opts.Verbose { + fmt.Printf("[Test] Rule condition logic test completed: %d/%d correct (%.2f%% accuracy)\n", + correctTests, totalTests, accuracy) + } + + // Return error if accuracy is below threshold + if correctTests == 0 { + return fmt.Errorf("rule condition logic test failed: 0%% accuracy (0/%d correct)", totalTests) + } + + return nil +} + +func testSingleRuleCondition(ctx context.Context, testCase RuleConditionCase, localPort string, verbose bool) RuleConditionResult { + result := RuleConditionResult{ + Query: testCase.Query, + ExpectedMatch: testCase.ExpectedMatch, + ExpectedDecision: testCase.ExpectedDecision, + } + + // Create chat completion request + requestBody := map[string]interface{}{ + "model": "MoM", // Use Mixture of Models to trigger decision engine + "messages": []map[string]string{ + {"role": "user", "content": testCase.Query}, + }, + } + + jsonData, err := json.Marshal(requestBody) + if err != nil { + result.Error = fmt.Sprintf("failed to marshal request: %v", err) + return result + } + + // Send request + url := fmt.Sprintf("http://localhost:%s/v1/chat/completions", localPort) + req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewBuffer(jsonData)) + if err != nil { + result.Error = fmt.Sprintf("failed to create request: %v", err) + return result + } + req.Header.Set("Content-Type", "application/json") + + httpClient := &http.Client{Timeout: 30 * time.Second} + resp, err := httpClient.Do(req) + if err != nil { + result.Error = fmt.Sprintf("failed to send request: %v", err) + return result + } + defer resp.Body.Close() + + // Check response status + if resp.StatusCode != http.StatusOK { + bodyBytes, _ := io.ReadAll(resp.Body) + result.Error = fmt.Sprintf("HTTP %d: %s", resp.StatusCode, string(bodyBytes)) + + if verbose { + fmt.Printf("[Test] ✗ HTTP %d Error for query: %s\n", resp.StatusCode, testCase.Query) + fmt.Printf(" Expected decision: %s\n", testCase.ExpectedDecision) + fmt.Printf(" Response: %s\n", string(bodyBytes)) + } + + return result + } + + // Extract VSR decision headers + result.ActualDecision = resp.Header.Get("x-vsr-selected-decision") + + // Determine if the rule matched based on the selected decision + result.ActualMatch = (result.ActualDecision == testCase.ExpectedDecision) + + // Check if the result matches expectations + result.Correct = (result.ActualMatch == testCase.ExpectedMatch) + + // Also check if we got the expected decision when we expected a match + if testCase.ExpectedMatch { + result.Correct = result.Correct && (result.ActualDecision == testCase.ExpectedDecision) + } + + if verbose { + if result.Correct { + fmt.Printf("[Test] ✓ Rule condition evaluated correctly\n") + fmt.Printf(" Query: %s\n", truncateString(testCase.Query, 60)) + fmt.Printf(" Decision: %s\n", result.ActualDecision) + fmt.Printf(" Operator: %s, Conditions: %v\n", testCase.RuleOperator, testCase.RequiredConditions) + } else { + fmt.Printf("[Test] ✗ Rule condition evaluation failed\n") + fmt.Printf(" Query: %s\n", testCase.Query) + fmt.Printf(" Expected Decision: %s, Actual: %s\n", testCase.ExpectedDecision, result.ActualDecision) + fmt.Printf(" Expected Match: %v, Actual: %v\n", testCase.ExpectedMatch, result.ActualMatch) + fmt.Printf(" Operator: %s\n", testCase.RuleOperator) + fmt.Printf(" Required Conditions: %v\n", testCase.RequiredConditions) + fmt.Printf(" Description: %s\n", testCase.Description) + } + } + + return result +} + +func printRuleConditionResults(results []RuleConditionResult, totalTests, correctTests int, accuracy float64) { + separator := "================================================================================" + fmt.Println("\n" + separator) + fmt.Println("RULE CONDITION LOGIC TEST RESULTS") + fmt.Println(separator) + fmt.Printf("Total Tests: %d\n", totalTests) + fmt.Printf("Correct Evaluations: %d\n", correctTests) + fmt.Printf("Accuracy Rate: %.2f%%\n", accuracy) + fmt.Println(separator) + + // Print failed cases + failedCount := 0 + for _, result := range results { + if !result.Correct && result.Error == "" { + failedCount++ + } + } + + if failedCount > 0 { + fmt.Println("\nFailed Rule Evaluations:") + for _, result := range results { + if !result.Correct && result.Error == "" { + fmt.Printf(" - Query: %s\n", truncateString(result.Query, 70)) + fmt.Printf(" Expected Decision: %s\n", result.ExpectedDecision) + fmt.Printf(" Actual Decision: %s\n", result.ActualDecision) + fmt.Printf(" Expected Match: %v, Actual: %v\n", result.ExpectedMatch, result.ActualMatch) + } + } + } + + // Print errors + errorCount := 0 + for _, result := range results { + if result.Error != "" { + errorCount++ + } + } + + if errorCount > 0 { + fmt.Println("\nErrors:") + for _, result := range results { + if result.Error != "" { + fmt.Printf(" - Query: %s\n", truncateString(result.Query, 70)) + fmt.Printf(" Error: %s\n", result.Error) + } + } + } + + fmt.Println(separator + "\n") +} diff --git a/website/docs/api/crd-reference.md b/website/docs/api/crd-reference.md index c28d9f697..ea8f47a2e 100644 --- a/website/docs/api/crd-reference.md +++ b/website/docs/api/crd-reference.md @@ -7,30 +7,26 @@ description: Kubernetes Custom Resource Definitions (CRDs) API reference for vLL # API Reference ## Packages -- [vllm.ai/v1alpha1](#vllmaiv1alpha1) +- [vllm.ai/v1alpha1](#vllmaiv1alpha1) ## vllm.ai/v1alpha1 Package v1alpha1 contains API Schema definitions for the v1alpha1 API group ### Resource Types + - [IntelligentPool](#intelligentpool) - [IntelligentPoolList](#intelligentpoollist) - [IntelligentRoute](#intelligentroute) - [IntelligentRouteList](#intelligentroutelist) - - #### Decision - - Decision defines a routing decision based on rule combinations - - _Appears in:_ + - [IntelligentRouteSpec](#intelligentroutespec) | Field | Description | Default | Validation | @@ -42,16 +38,12 @@ _Appears in:_ | `modelRefs` _[ModelRef](#modelref) array_ | ModelRefs defines the model references for this decision (currently only one model is supported) | | MaxItems: 1
MinItems: 1
Required: \{\}
| | `plugins` _[DecisionPlugin](#decisionplugin) array_ | Plugins defines the plugins to apply for this decision | | MaxItems: 10
| - #### DecisionPlugin - - DecisionPlugin defines a plugin configuration for a decision - - _Appears in:_ + - [Decision](#decision) | Field | Description | Default | Validation | @@ -59,16 +51,12 @@ _Appears in:_ | `type` _string_ | Type is the plugin type (semantic-cache, jailbreak, pii, system_prompt, header_mutation) | | Enum: [semantic-cache jailbreak pii system_prompt header_mutation]
Required: \{\}
| | `configuration` _[RawExtension](https://kubernetes.io/docs/reference/generated/kubernetes-api/v/#rawextension-runtime-pkg)_ | Configuration is the plugin-specific configuration as a raw JSON object | | Schemaless: \{\}
| - #### DomainSignal - - DomainSignal defines a domain category for classification - - _Appears in:_ + - [Signals](#signals) | Field | Description | Default | Validation | @@ -76,16 +64,12 @@ _Appears in:_ | `name` _string_ | Name is the unique identifier for this domain | | MaxLength: 100
MinLength: 1
Required: \{\}
| | `description` _string_ | Description provides a human-readable description of this domain | | MaxLength: 500
| - #### EmbeddingSignal - - EmbeddingSignal defines an embedding-based signal extraction rule - - _Appears in:_ + - [Signals](#signals) | Field | Description | Default | Validation | @@ -95,16 +79,12 @@ _Appears in:_ | `candidates` _string array_ | Candidates is the list of candidate phrases for semantic matching | | MaxItems: 100
MinItems: 1
Required: \{\}
| | `aggregationMethod` _string_ | AggregationMethod defines how to aggregate multiple candidate similarities | max | Enum: [mean max any]
| - #### IntelligentPool - - IntelligentPool defines a pool of models with their configurations - - _Appears in:_ + - [IntelligentPoolList](#intelligentpoollist) | Field | Description | Default | Validation | @@ -115,17 +95,10 @@ _Appears in:_ | `spec` _[IntelligentPoolSpec](#intelligentpoolspec)_ | | | | | `status` _[IntelligentPoolStatus](#intelligentpoolstatus)_ | | | | - #### IntelligentPoolList - - IntelligentPoolList contains a list of IntelligentPool - - - - | Field | Description | Default | Validation | | --- | --- | --- | --- | | `apiVersion` _string_ | `vllm.ai/v1alpha1` | | | @@ -133,16 +106,12 @@ IntelligentPoolList contains a list of IntelligentPool | `metadata` _[ListMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v/#listmeta-v1-meta)_ | Refer to Kubernetes API documentation for fields of `metadata`. | | | | `items` _[IntelligentPool](#intelligentpool) array_ | | | | - #### IntelligentPoolSpec - - IntelligentPoolSpec defines the desired state of IntelligentPool - - _Appears in:_ + - [IntelligentPool](#intelligentpool) | Field | Description | Default | Validation | @@ -150,16 +119,12 @@ _Appears in:_ | `defaultModel` _string_ | DefaultModel specifies the default model to use when no specific model is selected | | MaxLength: 100
MinLength: 1
Required: \{\}
| | `models` _[ModelConfig](#modelconfig) array_ | Models defines the list of available models in this pool | | MaxItems: 100
MinItems: 1
Required: \{\}
| - #### IntelligentPoolStatus - - IntelligentPoolStatus defines the observed state of IntelligentPool - - _Appears in:_ + - [IntelligentPool](#intelligentpool) | Field | Description | Default | Validation | @@ -168,16 +133,12 @@ _Appears in:_ | `observedGeneration` _integer_ | ObservedGeneration reflects the generation of the most recently observed IntelligentPool | | | | `modelCount` _integer_ | ModelCount indicates the number of models in the pool | | | - #### IntelligentRoute - - IntelligentRoute defines intelligent routing rules and decisions - - _Appears in:_ + - [IntelligentRouteList](#intelligentroutelist) | Field | Description | Default | Validation | @@ -188,17 +149,10 @@ _Appears in:_ | `spec` _[IntelligentRouteSpec](#intelligentroutespec)_ | | | | | `status` _[IntelligentRouteStatus](#intelligentroutestatus)_ | | | | - #### IntelligentRouteList - - IntelligentRouteList contains a list of IntelligentRoute - - - - | Field | Description | Default | Validation | | --- | --- | --- | --- | | `apiVersion` _string_ | `vllm.ai/v1alpha1` | | | @@ -206,16 +160,12 @@ IntelligentRouteList contains a list of IntelligentRoute | `metadata` _[ListMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v/#listmeta-v1-meta)_ | Refer to Kubernetes API documentation for fields of `metadata`. | | | | `items` _[IntelligentRoute](#intelligentroute) array_ | | | | - #### IntelligentRouteSpec - - IntelligentRouteSpec defines the desired state of IntelligentRoute - - _Appears in:_ + - [IntelligentRoute](#intelligentroute) | Field | Description | Default | Validation | @@ -223,16 +173,12 @@ _Appears in:_ | `signals` _[Signals](#signals)_ | Signals defines signal extraction rules for routing decisions | | | | `decisions` _[Decision](#decision) array_ | Decisions defines the routing decisions based on signal combinations | | MaxItems: 100
MinItems: 1
Required: \{\}
| - #### IntelligentRouteStatus - - IntelligentRouteStatus defines the observed state of IntelligentRoute - - _Appears in:_ + - [IntelligentRoute](#intelligentroute) | Field | Description | Default | Validation | @@ -241,16 +187,12 @@ _Appears in:_ | `observedGeneration` _integer_ | ObservedGeneration reflects the generation of the most recently observed IntelligentRoute | | | | `statistics` _[RouteStatistics](#routestatistics)_ | Statistics provides statistics about configured decisions and signals | | | - #### KeywordSignal - - KeywordSignal defines a keyword-based signal extraction rule - - _Appears in:_ + - [Signals](#signals) | Field | Description | Default | Validation | @@ -260,16 +202,12 @@ _Appears in:_ | `keywords` _string array_ | Keywords is the list of keywords to match | | MaxItems: 100
MinItems: 1
Required: \{\}
| | `caseSensitive` _boolean_ | CaseSensitive specifies whether keyword matching is case-sensitive | false | | - #### LoRAConfig - - LoRAConfig defines a LoRA adapter configuration - - _Appears in:_ + - [ModelConfig](#modelconfig) | Field | Description | Default | Validation | @@ -277,16 +215,12 @@ _Appears in:_ | `name` _string_ | Name is the unique identifier for this LoRA adapter | | MaxLength: 100
MinLength: 1
Required: \{\}
| | `description` _string_ | Description provides a human-readable description of this LoRA adapter | | MaxLength: 500
| - #### ModelConfig - - ModelConfig defines the configuration for a single model - - _Appears in:_ + - [IntelligentPoolSpec](#intelligentpoolspec) | Field | Description | Default | Validation | @@ -296,16 +230,12 @@ _Appears in:_ | `pricing` _[ModelPricing](#modelpricing)_ | Pricing defines the cost structure for this model | | | | `loras` _[LoRAConfig](#loraconfig) array_ | LoRAs defines the list of LoRA adapters available for this model | | MaxItems: 50
| - #### ModelPricing - - ModelPricing defines the pricing structure for a model - - _Appears in:_ + - [ModelConfig](#modelconfig) | Field | Description | Default | Validation | @@ -313,16 +243,12 @@ _Appears in:_ | `inputTokenPrice` _float_ | InputTokenPrice is the cost per input token | | Minimum: 0
| | `outputTokenPrice` _float_ | OutputTokenPrice is the cost per output token | | Minimum: 0
| - #### ModelRef - - ModelRef defines a model reference without score - - _Appears in:_ + - [Decision](#decision) | Field | Description | Default | Validation | @@ -333,18 +259,12 @@ _Appears in:_ | `reasoningDescription` _string_ | ReasoningDescription provides context for when to use reasoning | | MaxLength: 500
| | `reasoningEffort` _string_ | ReasoningEffort defines the reasoning effort level (low/medium/high) | | Enum: [low medium high]
| - - - #### RouteStatistics - - RouteStatistics provides statistics about the IntelligentRoute configuration - - _Appears in:_ + - [IntelligentRouteStatus](#intelligentroutestatus) | Field | Description | Default | Validation | @@ -354,16 +274,12 @@ _Appears in:_ | `embeddings` _integer_ | Embeddings indicates the number of embedding signals | | | | `domains` _integer_ | Domains indicates the number of domain signals | | | - #### SignalCombination - - SignalCombination defines how to combine multiple signals - - _Appears in:_ + - [Decision](#decision) | Field | Description | Default | Validation | @@ -371,16 +287,12 @@ _Appears in:_ | `operator` _string_ | Operator defines the logical operator for combining conditions (AND/OR) | | Enum: [AND OR]
Required: \{\}
| | `conditions` _[SignalCondition](#signalcondition) array_ | Conditions defines the list of signal conditions | | MaxItems: 50
MinItems: 1
Required: \{\}
| - #### SignalCondition - - SignalCondition defines a single signal condition - - _Appears in:_ + - [SignalCombination](#signalcombination) | Field | Description | Default | Validation | @@ -388,16 +300,12 @@ _Appears in:_ | `type` _string_ | Type defines the type of signal (keyword/embedding/domain) | | Enum: [keyword embedding domain]
Required: \{\}
| | `name` _string_ | Name is the name of the signal to reference | | MaxLength: 100
MinLength: 1
Required: \{\}
| - #### Signals - - Signals defines signal extraction rules - - _Appears in:_ + - [IntelligentRouteSpec](#intelligentroutespec) | Field | Description | Default | Validation | @@ -405,5 +313,3 @@ _Appears in:_ | `keywords` _[KeywordSignal](#keywordsignal) array_ | Keywords defines keyword-based signal extraction rules | | MaxItems: 100
| | `embeddings` _[EmbeddingSignal](#embeddingsignal) array_ | Embeddings defines embedding-based signal extraction rules | | MaxItems: 100
| | `domains` _[DomainSignal](#domainsignal) array_ | Domains defines MMLU domain categories for classification | | MaxItems: 14
| - - From 0ba39858f604bee4469bce65d15e6ea90d27dccc Mon Sep 17 00:00:00 2001 From: Yehudit Kerido Date: Thu, 20 Nov 2025 10:19:45 +0200 Subject: [PATCH 2/2] fix wrong header in test Signed-off-by: Yehudit Kerido --- .../aibrix/semantic-router-values/values.yaml | 3 --- e2e/README.md | 4 ++-- .../dynamic-config/crds/intelligentroute.yaml | 8 ++----- e2e/testcases/decision_priority.go | 23 +++++++------------ e2e/testcases/plugin_chain_execution.go | 5 ++-- 5 files changed, 15 insertions(+), 28 deletions(-) diff --git a/deploy/kubernetes/aibrix/semantic-router-values/values.yaml b/deploy/kubernetes/aibrix/semantic-router-values/values.yaml index fd5917389..ffb17a1d4 100644 --- a/deploy/kubernetes/aibrix/semantic-router-values/values.yaml +++ b/deploy/kubernetes/aibrix/semantic-router-values/values.yaml @@ -35,7 +35,6 @@ config: - name: computer science - name: philosophy - name: engineering - - name: thinking # Decisions - define routing logic with rules, model selection, and plugins decisions: @@ -365,8 +364,6 @@ config: rules: operator: "OR" conditions: - - type: "domain" - name: "thinking" - type: "keyword" name: "thinking" modelRefs: diff --git a/e2e/README.md b/e2e/README.md index 018d7f935..6e977e5d6 100644 --- a/e2e/README.md +++ b/e2e/README.md @@ -77,7 +77,7 @@ The framework includes the following test cases (all in `e2e/testcases/`): | Test Case | Description | Metrics | |-----------|-------------|---------| -| `decision-priority-selection` | Decision priority selection with multiple matches | 4 cases, priority validation | +| `decision-priority-selection` | Decision priority selection with multiple matches | 4 cases, priority validation (indirect) | | `plugin-chain-execution` | Plugin execution order (PII → Cache → System Prompt) | 4 cases, chain validation, blocking behavior | | `rule-condition-logic` | AND/OR operators and keyword matching | 6 cases, operator validation | | `decision-fallback-behavior` | Fallback to default decision when no match | 5 cases, fallback validation | @@ -86,7 +86,7 @@ The framework includes the following test cases (all in `e2e/testcases/`): **Signal-Decision Engine Features Tested:** -- ✅ Decision priority selection (priority 15 > 10) +- ✅ Decision priority selection (priority 15 > 10) - validated by checking which decision wins when multiple match - ✅ Plugin chain execution order and blocking - ✅ Rule condition logic (AND/OR operators) - ✅ Keyword-based routing (case-insensitive) diff --git a/e2e/profiles/dynamic-config/crds/intelligentroute.yaml b/e2e/profiles/dynamic-config/crds/intelligentroute.yaml index a2b417bfc..500c2b576 100644 --- a/e2e/profiles/dynamic-config/crds/intelligentroute.yaml +++ b/e2e/profiles/dynamic-config/crds/intelligentroute.yaml @@ -32,8 +32,6 @@ spec: description: "Philosophy and ethical questions" - name: "engineering" description: "Engineering and technical problem-solving" - - name: "thinking" - description: "Queries requiring careful thought or urgent attention" - name: "other" description: "General knowledge and miscellaneous topics" @@ -41,7 +39,7 @@ spec: - name: "thinking" operator: "OR" keywords: ["urgent", "immediate", "asap", "think", "careful"] - case_sensitive: false + caseSensitive: false decisions: - name: "thinking_decision" @@ -50,8 +48,6 @@ spec: signals: operator: "OR" conditions: - - type: "domain" - name: "thinking" - type: "keyword" name: "thinking" modelRefs: @@ -400,5 +396,5 @@ spec: - type: "system_prompt" configuration: enabled: true - system_prompt: "You are an engineering expert with knowledge across multiple engineering disciplines including mechanical, electrical, civil, chemical, software, and systems engineering. Apply engineering principles, design methodologies, and problem-solving approaches to provide practical solutions. Consider safety, efficiency, sustainability, and cost-effectiveness in your recommendations. Use technical precision while explaining concepts clearly, and emphasize the importance of proper engineering practices and standards." + system_prompt: "You are a knowledgeable AI assistant with broad expertise across many domains. Provide accurate, helpful, and well-structured responses to general questions. When uncertain, acknowledge limitations and suggest where to find authoritative information." mode: "replace" diff --git a/e2e/testcases/decision_priority.go b/e2e/testcases/decision_priority.go index 438e745d8..ddca0cc1e 100644 --- a/e2e/testcases/decision_priority.go +++ b/e2e/testcases/decision_priority.go @@ -182,28 +182,21 @@ func testSinglePrioritySelection(ctx context.Context, testCase DecisionPriorityC // Extract VSR decision headers result.ActualDecision = resp.Header.Get("x-vsr-selected-decision") - result.ActualPriority = resp.Header.Get("x-vsr-decision-priority") // Check if the highest priority decision was selected - // Validate both decision name AND priority value - decisionMatches := (result.ActualDecision == testCase.ExpectedDecision) - priorityMatches := (result.ActualPriority == fmt.Sprintf("%d", testCase.ExpectedPriority)) - result.Correct = decisionMatches && priorityMatches + // Note: We validate priority indirectly by checking which decision "wins" + // when multiple decisions match. The x-vsr-decision-priority header is not + // currently implemented in semantic-router backend. + result.Correct = (result.ActualDecision == testCase.ExpectedDecision) if verbose { if result.Correct { - fmt.Printf("[Test] ✓ Correct priority selection: %s (priority %s)\n", result.ActualDecision, result.ActualPriority) + fmt.Printf("[Test] ✓ Correct priority selection: %s\n", result.ActualDecision) } else { - fmt.Printf("[Test] ✗ Wrong decision or priority\n") + fmt.Printf("[Test] ✗ Wrong decision selected\n") fmt.Printf(" Query: %s\n", testCase.Query) fmt.Printf(" Expected: %s (priority %d)\n", testCase.ExpectedDecision, testCase.ExpectedPriority) - fmt.Printf(" Actual: %s (priority %s)\n", result.ActualDecision, result.ActualPriority) - if !decisionMatches { - fmt.Printf(" ⚠ Decision mismatch\n") - } - if !priorityMatches { - fmt.Printf(" ⚠ Priority mismatch\n") - } + fmt.Printf(" Actual: %s\n", result.ActualDecision) fmt.Printf(" Description: %s\n", testCase.Description) } } @@ -235,7 +228,7 @@ func printDecisionPriorityResults(results []DecisionPriorityResult, totalTests, if !result.Correct && result.Error == "" { fmt.Printf(" - Query: %s\n", result.Query) fmt.Printf(" Expected Decision: %s (priority %d)\n", result.ExpectedDecision, result.ExpectedPriority) - fmt.Printf(" Actual Decision: %s (priority %s)\n", result.ActualDecision, result.ActualPriority) + fmt.Printf(" Actual Decision: %s\n", result.ActualDecision) } } } diff --git a/e2e/testcases/plugin_chain_execution.go b/e2e/testcases/plugin_chain_execution.go index 8c1099e4f..df01364eb 100644 --- a/e2e/testcases/plugin_chain_execution.go +++ b/e2e/testcases/plugin_chain_execution.go @@ -180,8 +180,9 @@ func testSinglePluginChain(ctx context.Context, testCase PluginChainCase, localP result.StatusCode = resp.StatusCode // Extract plugin execution headers - result.PIIDetected = resp.Header.Get("x-vsr-pii-detected") - result.PIIBlocked = (resp.StatusCode == http.StatusForbidden || result.PIIDetected != "") + piiViolationHeader := resp.Header.Get("x-vsr-pii-violation") + result.PIIDetected = piiViolationHeader // Store for display purposes + result.PIIBlocked = (resp.StatusCode == http.StatusForbidden || piiViolationHeader == "true") // Check cache headers (x-vsr-cache-hit or similar) cacheHeader := resp.Header.Get("x-vsr-cache-hit")