Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 20 additions & 12 deletions api/v1alpha1/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package v1alpha1

import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
gwapiv1a2 "sigs.k8s.io/gateway-api/apis/v1alpha2"
)

// +kubebuilder:object:root=true
Expand All @@ -16,6 +15,10 @@ import (
// receive. And then the Gateway will route the traffic to the appropriate LLMBackend based
// on the output schema of the LLMBackend while doing the other necessary jobs like
// upstream authentication, rate limit, etc.
//
// LLMRoute references a HTTPRoute resource as a basis for routing the traffic. The AI Gateway controller
// modifies the HTTPRoute resource to include the necessary filters to achieve the necessary jobs,
// notably the AI Gateway external processor filter.
type LLMRoute struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`
Expand Down Expand Up @@ -43,22 +46,21 @@ type LLMRouteSpec struct {
// +kubebuilder:validation:Required
// +kubebuilder:validation:XValidation:rule="self.schema == 'OpenAI'"
APISchema LLMAPISchema `json:"inputSchema"`
// TargetRefs are the names of the Gateway resources this policy is being attached to.
// HTTPRouteRef is the name of the HTTPRoute resource that the Gateway will use to route the traffic.
// The namespace is "local", i.e. the same namespace as the LLMRoute.
//
// +optional
// +kubebuilder:validation:MaxItems=128
TargetRefs []gwapiv1a2.LocalPolicyTargetReferenceWithSectionName `json:"targetRefs"`
// BackendRefs lists the LLMBackends that this LLMRoute will route traffic to.
// The namespace is "local", i.e. the same namespace as the LLMRoute.
// In the matching configuration of the referenced HTTPRoute, `x-envoy-ai-gateway-llm-model` header
// can be used to describe the routing behavior.
//
// +kubebuilder:validation:MaxItems=128
BackendRefs []LLMBackendLocalRef `json:"backendRefs,omitempty"`
// Currently, only the exact header matching is supported, otherwise the configuration will be rejected.
//
// +kubebuilder:validation:Required
HTTPRouteRef HTTPRouteRef `json:"httpRouteRef,omitempty"`
}

// LLMBackendLocalRef is a reference to a LLMBackend resource in the "local" namespace.
type LLMBackendLocalRef struct {
// Name is the name of the LLMBackend in the same namespace as the LLMRoute.
// HTTPRouteRef is a reference to a HTTPRoute resource in the "local" namespace.
type HTTPRouteRef struct {
// Name is the name of the HTTPRoute in the same namespace as the LLMRoute.
Name string `json:"name"`
}

Expand Down Expand Up @@ -123,3 +125,9 @@ const (
// https://docs.aws.amazon.com/bedrock/latest/APIReference/API_Operations_Amazon_Bedrock_Runtime.html
APISchemaAWSBedrock APISchema = "AWSBedrock"
)

const (
// LLMModelHeaderKey is the header key whose value is extracted from the request by the ai-gateway.
// This can be used to describe the routing behavior in HTTPRoute referenced by LLMRoute.
LLMModelHeaderKey = "x-envoy-ai-gateway-llm-model"
)
46 changes: 17 additions & 29 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

35 changes: 19 additions & 16 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,24 @@ module github.com/envoyproxy/ai-gateway

go 1.23.2

replace github.com/imdario/mergo => github.com/imdario/mergo v0.3.16

require (
github.com/aws/aws-sdk-go v1.55.5
github.com/envoyproxy/go-control-plane v0.13.1
github.com/stretchr/testify v1.10.0
k8s.io/apimachinery v0.31.3
sigs.k8s.io/controller-runtime v0.19.3
sigs.k8s.io/gateway-api v1.2.1
)

require (
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/cncf/xds/go v0.0.0-20240423153145-555b57ec207b // indirect
github.com/cncf/xds/go v0.0.0-20240723142845-024c85f92f20 // indirect
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/emicklei/go-restful/v3 v3.12.0 // indirect
github.com/emicklei/go-restful/v3 v3.12.1 // indirect
github.com/envoyproxy/protoc-gen-validate v1.1.0 // indirect
github.com/evanphx/json-patch v5.9.0+incompatible // indirect
github.com/evanphx/json-patch/v5 v5.9.0 // indirect
github.com/fxamacker/cbor/v2 v2.7.0 // indirect
github.com/go-logr/logr v1.4.2 // indirect
Expand All @@ -32,43 +34,44 @@ require (
github.com/google/go-cmp v0.6.0 // indirect
github.com/google/gofuzz v1.2.0 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/imdario/mergo v0.3.16 // indirect
github.com/imdario/mergo v1.0.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/compress v1.17.9 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus/client_golang v1.19.1 // indirect
github.com/prometheus/client_golang v1.20.5 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.55.0 // indirect
github.com/prometheus/common v0.60.1 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/x448/float16 v0.8.4 // indirect
go.uber.org/multierr v1.11.0 // indirect
go.uber.org/zap v1.26.0 // indirect
golang.org/x/exp v0.0.0-20240416160154-fe59bbe5cc7f // indirect
go.uber.org/zap v1.27.0 // indirect
golang.org/x/exp v0.0.0-20240904232852-e7e105dedf7e // indirect
golang.org/x/net v0.31.0 // indirect
golang.org/x/oauth2 v0.21.0 // indirect
golang.org/x/oauth2 v0.23.0 // indirect
golang.org/x/sys v0.27.0 // indirect
golang.org/x/term v0.26.0 // indirect
golang.org/x/text v0.20.0 // indirect
golang.org/x/time v0.5.0 // indirect
gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094 // indirect
google.golang.org/grpc v1.66.2 // indirect
google.golang.org/protobuf v1.34.2 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20241007155032-5fefd90f89a9 // indirect
google.golang.org/grpc v1.67.1 // indirect
google.golang.org/protobuf v1.35.1 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/api v0.31.1 // indirect
k8s.io/apiextensions-apiserver v0.31.1 // indirect
k8s.io/client-go v0.31.1 // indirect
k8s.io/api v0.31.2 // indirect
k8s.io/apiextensions-apiserver v0.31.2 // indirect
k8s.io/client-go v0.31.2 // indirect
k8s.io/klog/v2 v2.130.1 // indirect
k8s.io/kube-openapi v0.0.0-20240423202451-8948a665c108 // indirect
k8s.io/kube-openapi v0.0.0-20240521193020-835d969ad83a // indirect
k8s.io/utils v0.0.0-20241104163129-6fe5fd82f078 // indirect
sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.4.3 // indirect
Expand Down
Loading