diff --git a/api/v1alpha1/api.go b/api/v1alpha1/api.go index e022ae201..3d7e598be 100644 --- a/api/v1alpha1/api.go +++ b/api/v1alpha1/api.go @@ -61,7 +61,7 @@ type AIGatewayRouteSpec struct { // modifications to achieve the necessary jobs, notably inserting the AI Gateway filter responsible for // the transformation of the request and response, etc. // - // In the matching conditions in the AIGatewayRouteRule, `x-envoy-ai-gateway-model` header is available + // In the matching conditions in the AIGatewayRouteRule, `x-ai-eg-model` header is available // if we want to describe the routing behavior based on the model name. The model name is extracted // from the request content before the routing decision. // @@ -355,7 +355,7 @@ const ( const ( // AIModelHeaderKey is the header key whose value is extracted from the request by the ai-gateway. // This can be used to describe the routing behavior in HTTPRoute referenced by AIGatewayRoute. - AIModelHeaderKey = "x-envoy-ai-gateway-model" + AIModelHeaderKey = "x-ai-eg-model" ) // BackendSecurityPolicyType specifies the type of auth mechanism used to access a backend. diff --git a/filterconfig/filterconfig.go b/filterconfig/filterconfig.go index 586bd15c6..92c0db6ac 100644 --- a/filterconfig/filterconfig.go +++ b/filterconfig/filterconfig.go @@ -21,8 +21,8 @@ import ( const DefaultConfig = ` schema: name: OpenAI -selectedBackendHeaderKey: x-envoy-ai-gateway-selected-backend -modelNameHeaderKey: x-envoy-ai-gateway-model +selectedBackendHeaderKey: x-ai-eg-selected-backend +modelNameHeaderKey: x-ai-eg-model ` // Config is the configuration schema for the filter. @@ -32,7 +32,7 @@ modelNameHeaderKey: x-envoy-ai-gateway-model // schema: // name: OpenAI // selectedBackendHeaderKey: x-envoy-ai-gateway-selected-backend -// modelNameHeaderKey: x-envoy-ai-gateway-model +// modelNameHeaderKey: x-ai-eg-model // llmRequestCosts: // - metadataKey: token_usage_key // type: OutputToken @@ -47,24 +47,24 @@ modelNameHeaderKey: x-envoy-ai-gateway-model // schema: // name: AWSBedrock // headers: -// - name: x-envoy-ai-gateway-model +// - name: x-ai-eg-model // value: llama3.3333 // - backends: // - name: openai // schema: // name: OpenAI // headers: -// - name: x-envoy-ai-gateway-model +// - name: x-ai-eg-model // value: gpt4.4444 // -// where the input of the Gateway is in the OpenAI schema, the model name is populated in the header x-envoy-ai-gateway-model, -// The model name header `x-envoy-ai-gateway-model` is used in the header matching to make the routing decision. **After** the routing decision is made, -// the selected backend name is populated in the header `x-envoy-ai-gateway-selected-backend`. For example, when the model name is `llama3.3333`, +// where the input of the Gateway is in the OpenAI schema, the model name is populated in the header x-ai-eg-model, +// The model name header `x-ai-eg-model` is used in the header matching to make the routing decision. **After** the routing decision is made, +// the selected backend name is populated in the header `x-ai-eg-selected-backend`. For example, when the model name is `llama3.3333`, // the request is routed to either backends `kserve` or `awsbedrock` with weights 1 and 10 respectively, and the selected -// backend, say `awsbedrock`, is populated in the header `x-envoy-ai-gateway-selected-backend`. +// backend, say `awsbedrock`, is populated in the header `x-ai-eg-selected-backend`. // -// From Envoy configuration perspective, configuring the header matching based on `x-envoy-ai-gateway-selected-backend` is enough to route the request to the selected backend. -// That is because the matching decision is made by the filter and the selected backend is populated in the header `x-envoy-ai-gateway-selected-backend`. +// From Envoy configuration perspective, configuring the header matching based on `x-ai-eg-selected-backend` is enough to route the request to the selected backend. +// That is because the matching decision is made by the filter and the selected backend is populated in the header `x-ai-eg-selected-backend`. type Config struct { // MetadataNamespace is the namespace of the dynamic metadata to be used by the filter. MetadataNamespace string `json:"metadataNamespace"` diff --git a/filterconfig/filterconfig_test.go b/filterconfig/filterconfig_test.go index 06f8b4530..d89052610 100644 --- a/filterconfig/filterconfig_test.go +++ b/filterconfig/filterconfig_test.go @@ -31,8 +31,8 @@ func TestUnmarshalConfigYaml(t *testing.T) { const config = ` schema: name: OpenAI -selectedBackendHeaderKey: x-envoy-ai-gateway-selected-backend -modelNameHeaderKey: x-envoy-ai-gateway-model +selectedBackendHeaderKey: x-ai-eg-selected-backend +modelNameHeaderKey: x-ai-eg-model metadataNamespace: ai_gateway_llm_ns llmRequestCosts: - metadataKey: token_usage_key @@ -48,14 +48,14 @@ rules: schema: name: AWSBedrock headers: - - name: x-envoy-ai-gateway-model + - name: x-ai-eg-model value: llama3.3333 - backends: - name: openai schema: name: OpenAI headers: - - name: x-envoy-ai-gateway-model + - name: x-ai-eg-model value: gpt4.4444 ` require.NoError(t, os.WriteFile(configPath, []byte(config), 0o600)) @@ -65,8 +65,8 @@ rules: require.Equal(t, "token_usage_key", cfg.LLMRequestCosts[0].MetadataKey) require.Equal(t, "OutputToken", string(cfg.LLMRequestCosts[0].Type)) require.Equal(t, "OpenAI", string(cfg.Schema.Name)) - require.Equal(t, "x-envoy-ai-gateway-selected-backend", cfg.SelectedBackendHeaderKey) - require.Equal(t, "x-envoy-ai-gateway-model", cfg.ModelNameHeaderKey) + require.Equal(t, "x-ai-eg-selected-backend", cfg.SelectedBackendHeaderKey) + require.Equal(t, "x-ai-eg-model", cfg.ModelNameHeaderKey) require.Len(t, cfg.Rules, 2) require.Equal(t, "llama3.3333", cfg.Rules[0].Headers[0].Value) require.Equal(t, "gpt4.4444", cfg.Rules[1].Headers[0].Value) diff --git a/internal/controller/ai_gateway_route.go b/internal/controller/ai_gateway_route.go index 90c14220c..3117d78fe 100644 --- a/internal/controller/ai_gateway_route.go +++ b/internal/controller/ai_gateway_route.go @@ -58,7 +58,7 @@ func NewAIGatewayRouteController( return &aiGatewayRouteController{ client: client, kube: kube, - logger: logger.WithName("eaig-route-controller"), + logger: logger.WithName("ai-eg-route-controller"), defaultExtProcImage: options.ExtProcImage, eventChan: ch, } @@ -270,7 +270,7 @@ func (c *aiGatewayRouteController) reconcileExtProcDeployment(ctx context.Contex } func extProcName(route *aigv1a1.AIGatewayRoute) string { - return fmt.Sprintf("eaig-route-extproc-%s", route.Name) + return fmt.Sprintf("ai-eg-route-extproc-%s", route.Name) } func ownerReferenceForAIGatewayRoute(aiGatewayRoute *aigv1a1.AIGatewayRoute) []metav1.OwnerReference { diff --git a/internal/controller/ai_gateway_route_test.go b/internal/controller/ai_gateway_route_test.go index e3bb0d147..5abe6c5c1 100644 --- a/internal/controller/ai_gateway_route_test.go +++ b/internal/controller/ai_gateway_route_test.go @@ -27,7 +27,7 @@ func Test_extProcName(t *testing.T) { Name: "myroute", }, }) - require.Equal(t, "eaig-route-extproc-myroute", actual) + require.Equal(t, "ai-eg-route-extproc-myroute", actual) } func TestAIGatewayRouteController_ensuresExtProcConfigMapExists(t *testing.T) { diff --git a/internal/controller/sink.go b/internal/controller/sink.go index 0c13e41a7..7fdd88cd3 100644 --- a/internal/controller/sink.go +++ b/internal/controller/sink.go @@ -16,7 +16,7 @@ import ( "github.com/envoyproxy/ai-gateway/filterconfig" ) -const selectedBackendHeaderKey = "x-envoy-ai-gateway-selected-backend" +const selectedBackendHeaderKey = "x-ai-eg-selected-backend" // ConfigSinkEvent is the interface for the events that the configSink can handle. // It can be either an AIServiceBackend, an AIGatewayRoute, or a deletion event. diff --git a/internal/extproc/server_test.go b/internal/extproc/server_test.go index 793ca7a05..2c4f06816 100644 --- a/internal/extproc/server_test.go +++ b/internal/extproc/server_test.go @@ -39,7 +39,7 @@ func TestServer_LoadConfig(t *testing.T) { MetadataNamespace: "ns", LLMRequestCosts: []filterconfig.LLMRequestCost{{MetadataKey: "key", Type: filterconfig.LLMRequestCostTypeOutputToken}}, Schema: filterconfig.VersionedAPISchema{Name: filterconfig.APISchemaOpenAI}, - SelectedBackendHeaderKey: "x-envoy-ai-gateway-selected-backend", + SelectedBackendHeaderKey: "x-ai-eg-selected-backend", ModelNameHeaderKey: "x-model-name", Rules: []filterconfig.RouteRule{ { @@ -78,7 +78,7 @@ func TestServer_LoadConfig(t *testing.T) { require.Equal(t, filterconfig.LLMRequestCostTypeOutputToken, s.config.requestCosts[0].Type) require.NotNil(t, s.config.router) require.NotNil(t, s.config.bodyParser) - require.Equal(t, "x-envoy-ai-gateway-selected-backend", s.config.selectedBackendHeaderKey) + require.Equal(t, "x-ai-eg-selected-backend", s.config.selectedBackendHeaderKey) require.Equal(t, "x-model-name", s.config.ModelNameHeaderKey) require.Len(t, s.config.factories, 2) require.NotNil(t, s.config.factories[filterconfig.VersionedAPISchema{Name: filterconfig.APISchemaOpenAI}]) diff --git a/internal/extproc/watcher_test.go b/internal/extproc/watcher_test.go index bb652dccf..1f5e5faf9 100644 --- a/internal/extproc/watcher_test.go +++ b/internal/extproc/watcher_test.go @@ -44,7 +44,7 @@ func TestStartConfigWatcher(t *testing.T) { cfg := ` schema: name: OpenAI -selectedBackendHeaderKey: x-envoy-ai-gateway-selected-backend +selectedBackendHeaderKey: x-ai-eg-selected-backend modelNameHeaderKey: x-model-name rules: - backends: @@ -84,7 +84,7 @@ rules: cfg = ` schema: name: OpenAI -selectedBackendHeaderKey: x-envoy-ai-gateway-selected-backend +selectedBackendHeaderKey: x-ai-eg-selected-backend modelNameHeaderKey: x-model-name rules: - backends: diff --git a/manifests/charts/ai-gateway-helm/crds/aigateway.envoyproxy.io_aigatewayroutes.yaml b/manifests/charts/ai-gateway-helm/crds/aigateway.envoyproxy.io_aigatewayroutes.yaml index eb3ecb71f..038df7dd3 100644 --- a/manifests/charts/ai-gateway-helm/crds/aigateway.envoyproxy.io_aigatewayroutes.yaml +++ b/manifests/charts/ai-gateway-helm/crds/aigateway.envoyproxy.io_aigatewayroutes.yaml @@ -229,7 +229,7 @@ spec: modifications to achieve the necessary jobs, notably inserting the AI Gateway filter responsible for the transformation of the request and response, etc. - In the matching conditions in the AIGatewayRouteRule, `x-envoy-ai-gateway-model` header is available + In the matching conditions in the AIGatewayRouteRule, `x-ai-eg-model` header is available if we want to describe the routing behavior based on the model name. The model name is extracted from the request content before the routing decision. diff --git a/tests/cel-validation/testdata/aigatewayroutes/basic.yaml b/tests/cel-validation/testdata/aigatewayroutes/basic.yaml index 459d2d01d..6fe953dce 100644 --- a/tests/cel-validation/testdata/aigatewayroutes/basic.yaml +++ b/tests/cel-validation/testdata/aigatewayroutes/basic.yaml @@ -14,7 +14,7 @@ spec: - matches: - headers: - type: Exact - name: x-envoy-ai-gateway-model + name: x-ai-eg-model value: llama3-70b backendRefs: - name: kserve diff --git a/tests/cel-validation/testdata/aigatewayroutes/no_target_refs.yaml b/tests/cel-validation/testdata/aigatewayroutes/no_target_refs.yaml index 61f96b758..49627132a 100644 --- a/tests/cel-validation/testdata/aigatewayroutes/no_target_refs.yaml +++ b/tests/cel-validation/testdata/aigatewayroutes/no_target_refs.yaml @@ -11,7 +11,7 @@ spec: - matches: - headers: - type: Exact - name: x-envoy-ai-gateway-model + name: x-ai-eg-model value: llama3-70b backendRefs: - name: kserve diff --git a/tests/cel-validation/testdata/aigatewayroutes/non_openai_schema.yaml b/tests/cel-validation/testdata/aigatewayroutes/non_openai_schema.yaml index 9a9705515..aa7148fff 100644 --- a/tests/cel-validation/testdata/aigatewayroutes/non_openai_schema.yaml +++ b/tests/cel-validation/testdata/aigatewayroutes/non_openai_schema.yaml @@ -15,7 +15,7 @@ spec: - matches: - headers: - type: Exact - name: x-envoy-ai-gateway-model + name: x-ai-eg-model value: llama3-70b backendRefs: - name: kserve diff --git a/tests/cel-validation/testdata/aigatewayroutes/unknown_schema.yaml b/tests/cel-validation/testdata/aigatewayroutes/unknown_schema.yaml index 7cf4f0f13..8bfb20330 100644 --- a/tests/cel-validation/testdata/aigatewayroutes/unknown_schema.yaml +++ b/tests/cel-validation/testdata/aigatewayroutes/unknown_schema.yaml @@ -15,7 +15,7 @@ spec: - matches: - headers: - type: Exact - name: x-envoy-ai-gateway-model + name: x-ai-eg-model value: llama3-70b backendRefs: - name: kserve diff --git a/tests/cel-validation/testdata/aigatewayroutes/unsupported_match.yaml b/tests/cel-validation/testdata/aigatewayroutes/unsupported_match.yaml index 32a0bb843..48905a028 100644 --- a/tests/cel-validation/testdata/aigatewayroutes/unsupported_match.yaml +++ b/tests/cel-validation/testdata/aigatewayroutes/unsupported_match.yaml @@ -14,7 +14,7 @@ spec: - matches: - headers: - type: RegularExpression - name: x-envoy-ai-gateway-model + name: x-ai-eg-model value: llama3-70b backendRefs: - name: kserve diff --git a/tests/controller/controller_test.go b/tests/controller/controller_test.go index cb14fa620..dd1493f4c 100644 --- a/tests/controller/controller_test.go +++ b/tests/controller/controller_test.go @@ -35,7 +35,7 @@ import ( var defaultSchema = aigv1a1.VersionedAPISchema{Name: aigv1a1.APISchemaOpenAI, Version: "v1"} func extProcName(aiGatewayRouteName string) string { - return fmt.Sprintf("eaig-route-extproc-%s", aiGatewayRouteName) + return fmt.Sprintf("ai-eg-route-extproc-%s", aiGatewayRouteName) } // TestStartControllers tests the [controller.StartControllers] function. @@ -205,11 +205,11 @@ func TestStartControllers(t *testing.T) { require.Len(t, httpRoute.Spec.Rules, 3) // 2 for backends, 1 for the default backend. require.Len(t, httpRoute.Spec.Rules[0].Matches, 1) require.Len(t, httpRoute.Spec.Rules[0].Matches[0].Headers, 1) - require.Equal(t, "x-envoy-ai-gateway-selected-backend", string(httpRoute.Spec.Rules[0].Matches[0].Headers[0].Name)) + require.Equal(t, "x-ai-eg-selected-backend", string(httpRoute.Spec.Rules[0].Matches[0].Headers[0].Name)) require.Equal(t, "backend1.default", httpRoute.Spec.Rules[0].Matches[0].Headers[0].Value) require.Len(t, httpRoute.Spec.Rules[1].Matches, 1) require.Len(t, httpRoute.Spec.Rules[1].Matches[0].Headers, 1) - require.Equal(t, "x-envoy-ai-gateway-selected-backend", string(httpRoute.Spec.Rules[1].Matches[0].Headers[0].Name)) + require.Equal(t, "x-ai-eg-selected-backend", string(httpRoute.Spec.Rules[1].Matches[0].Headers[0].Name)) require.Equal(t, "backend2.default", httpRoute.Spec.Rules[1].Matches[0].Headers[0].Value) return true }, 30*time.Second, 200*time.Millisecond) diff --git a/tests/e2e/testdata/translation_testupstream.yaml b/tests/e2e/testdata/translation_testupstream.yaml index 09d58889a..3da3264f3 100644 --- a/tests/e2e/testdata/translation_testupstream.yaml +++ b/tests/e2e/testdata/translation_testupstream.yaml @@ -33,7 +33,7 @@ spec: - matches: - headers: - type: Exact - name: x-envoy-ai-gateway-model + name: x-ai-eg-model value: some-cool-model backendRefs: - name: translation-testupstream-cool-model-backend @@ -41,7 +41,7 @@ spec: - matches: - headers: - type: Exact - name: x-envoy-ai-gateway-model + name: x-ai-eg-model value: another-cool-model backendRefs: - name: translation-testupstream-another-cool-model-backend diff --git a/tests/e2e/translation_testupstream_test.go b/tests/e2e/translation_testupstream_test.go index 46e37aa9f..38b28882b 100644 --- a/tests/e2e/translation_testupstream_test.go +++ b/tests/e2e/translation_testupstream_test.go @@ -32,22 +32,25 @@ func TestTranslationWithTestUpstream(t *testing.T) { t.Run("/chat/completions", func(t *testing.T) { for _, tc := range []struct { - name string - modelName string - expPath string - fakeResponseBody string + name string + modelName string + expTestUpstreamID string + expPath string + fakeResponseBody string }{ { - name: "openai", - modelName: "some-cool-model", - expPath: "/v1/chat/completions", - fakeResponseBody: `{"choices":[{"message":{"content":"This is a test."}}]}`, + name: "openai", + modelName: "some-cool-model", + expTestUpstreamID: "primary", + expPath: "/v1/chat/completions", + fakeResponseBody: `{"choices":[{"message":{"content":"This is a test."}}]}`, }, { - name: "aws-bedrock", - modelName: "another-cool-model", - expPath: "/model/another-cool-model/converse", - fakeResponseBody: `{"output":{"message":{"content":[{"text":"response"},{"text":"from"},{"text":"assistant"}],"role":"assistant"}},"stopReason":null,"usage":{"inputTokens":10,"outputTokens":20,"totalTokens":30}}`, + name: "aws-bedrock", + modelName: "another-cool-model", + expTestUpstreamID: "canary", + expPath: "/model/another-cool-model/converse", + fakeResponseBody: `{"output":{"message":{"content":[{"text":"response"},{"text":"from"},{"text":"assistant"}],"role":"assistant"}},"stopReason":null,"usage":{"inputTokens":10,"outputTokens":20,"totalTokens":30}}`, }, } { t.Run(tc.name, func(t *testing.T) { @@ -58,7 +61,7 @@ func TestTranslationWithTestUpstream(t *testing.T) { t.Logf("modelName: %s", tc.modelName) client := openai.NewClient(option.WithBaseURL(fwd.address()+"/v1/"), option.WithHeader( - "x-test-case-name", tc.name), + "x-expected-testupstream-id", tc.expTestUpstreamID), option.WithHeader( "x-expected-path", base64.StdEncoding.EncodeToString([]byte(tc.expPath))), option.WithHeader("x-response-body", diff --git a/tests/extproc/envoy.yaml b/tests/extproc/envoy.yaml index 83ed68e75..ea7c90510 100644 --- a/tests/extproc/envoy.yaml +++ b/tests/extproc/envoy.yaml @@ -59,8 +59,6 @@ static_resources: typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor allow_mode_override: true - mutation_rules: - allow_all_routing: true processing_mode: request_header_mode: "SEND" response_header_mode: "SEND" diff --git a/tests/testupstream/main.go b/tests/testupstream/main.go index 198cddda9..d888a6d4f 100644 --- a/tests/testupstream/main.go +++ b/tests/testupstream/main.go @@ -37,6 +37,11 @@ const ( // nonExpectedHeadersKey is the key for the non-expected request headers. // The value is a base64 encoded string of comma separated header keys expected to be absent. nonExpectedRequestHeadersKey = "x-non-expected-request-headers" + // expectedTestUpstreamIDKey is the key for the expected testupstream-id in the request, + // and the value will be compared with the TESTUPSTREAM_ID environment variable. + // If the values do not match, the request will be rejected, meaning that the request + // was routed to the wrong upstream. + expectedTestUpstreamIDKey = "x-expected-testupstream-id" ) // main starts a server that listens on port 1063 and responds with the expected response body and headers @@ -170,6 +175,19 @@ func handler(w http.ResponseWriter, r *http.Request) { fmt.Println("no non-expected headers in the request") } + if v := r.Header.Get(expectedTestUpstreamIDKey); v != "" { + if os.Getenv("TESTUPSTREAM_ID") != v { + msg := fmt.Sprintf("unexpected testupstream-id: received by '%s' but expected '%s'\n", os.Getenv("TESTUPSTREAM_ID"), v) + fmt.Println(msg) + http.Error(w, msg, http.StatusBadRequest) + return + } else { + fmt.Println("testupstream-id matched:", v) + } + } else { + fmt.Println("no expected testupstream-id") + } + expectedPath, err := base64.StdEncoding.DecodeString(r.Header.Get(expectedPathHeaderKey)) if err != nil { fmt.Println("failed to decode the expected path")