api: initial skeleton of LLMRoute and LLMBackend (#20)

This adds the skeleton API of LLMRoute and LLMBackend. These two resources would be the foundation for the future iterations, such as authn/z, token-based rate limiting, schema transformation and more advanced thingy like #10 Note: we might / will break APIs if necessity comes up until the initial release. part of #13 --------- Signed-off-by: Takeshi Yoneda <[email protected]>
envoyproxy · Dec 5, 2024 · e95a824 · e95a824
1 parent 8719beb
commit e95a824
Show file tree

Hide file tree

Showing 6 changed files with 553 additions and 4 deletions.
diff --git a/api/v1alpha1/api.go b/api/v1alpha1/api.go
@@ -1,3 +1,126 @@
 package v1alpha1
 
+import (
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	gwapiv1a2 "sigs.k8s.io/gateway-api/apis/v1alpha2"
+)
+
+// +kubebuilder:object:root=true
+
+// LLMRoute combines multiple LLMBackends and attaching them to Gateway(s) resources.
+//
+// This serves as a way to define a "unified" LLM API for a Gateway which allows downstream
+// clients to use a single schema API to interact with multiple LLM backends.
+//
+// The InputSchema is used to determine the structure of the requests that the Gateway will
+// receive. And then the Gateway will route the traffic to the appropriate LLMBackend based
+// on the output schema of the LLMBackend while doing the other necessary jobs like
+// upstream authentication, rate limit, etc.
+type LLMRoute struct {
+	metav1.TypeMeta   `json:",inline"`
+	metav1.ObjectMeta `json:"metadata,omitempty"`
+	// Spec defines the details of the LLM policy.
+	Spec LLMRouteSpec `json:"spec,omitempty"`
+}
+
+// +kubebuilder:object:root=true
+
+// LLMRouteList contains a list of LLMTrafficPolicy
+type LLMRouteList struct {
+	metav1.TypeMeta `json:",inline"`
+	metav1.ListMeta `json:"metadata,omitempty"`
+	Items           []LLMRoute `json:"items"`
+}
+
+// LLMRouteSpec details the LLMRoute configuration.
+type LLMRouteSpec struct {
+	// APISchema specifies the API schema of the input that the target Gateway(s) will receive.
+	// Based on this schema, the ai-gateway will perform the necessary transformation to the
+	// output schema specified in the selected LLMBackend during the routing process.
+	//
+	// Currently, the only supported schema is OpenAI as the input schema.
+	//
+	// +kubebuilder:validation:Required
+	// +kubebuilder:default={schema: OpenAI}
+	// +kubebuilder:validation:XValidation:rule="self.inputSchema.schema == 'OpenAI'"
+	APISchema LLMAPISchema `json:"inputSchema"`
+	// TargetRefs are the names of the Gateway resources this policy is being attached to.
+	// The namespace is "local", i.e. the same namespace as the LLMRoute.
+	//
+	// +optional
+	// +kubebuilder:validation:MaxItems=128
+	TargetRefs []gwapiv1a2.LocalPolicyTargetReferenceWithSectionName `json:"targetRefs"`
+	// BackendRefs lists the LLMBackends that this LLMRoute will route traffic to.
+	// The namespace is "local", i.e. the same namespace as the LLMRoute.
+	//
+	// +kubebuilder:validation:MaxItems=128
+	BackendRefs []LLMBackendLocalRef `json:"backendRefs,omitempty"`
+}
+
+// LLMBackendLocalRef is a reference to a LLMBackend resource in the "local" namespace.
+type LLMBackendLocalRef struct {
+	// Name is the name of the LLMBackend in the same namespace as the LLMRoute.
+	Name string `json:"name"`
+}
+
 // +kubebuilder:object:root=true
+
+// LLMBackend is a resource that represents a single backend for LLMRoute.
+// A backend is a service that handles traffic with a concrete API specification.
+type LLMBackend struct {
+	metav1.TypeMeta   `json:",inline"`
+	metav1.ObjectMeta `json:"metadata,omitempty"`
+	// Spec defines the details of the LLM policy.
+	Spec LLMBackendSpec `json:"spec,omitempty"`
+}
+
+// +kubebuilder:object:root=true
+
+// LLMBackendList contains a list of LLMBackends.
+type LLMBackendList struct {
+	metav1.TypeMeta `json:",inline"`
+	metav1.ListMeta `json:"metadata,omitempty"`
+	Items           []LLMBackend `json:"items"`
+}
+
+// LLMBackendSpec details the LLMBackend configuration.
+type LLMBackendSpec struct {
+	// APISchema specifies the API schema of the output format of requests from
+	// Envoy that this LLMBackend can accept as incoming requests.
+	// Based on this schema, the ai-gateway will perform the necessary transformation for
+	// the pair of LLMRouteSpec.APISchema and LLMBackendSpec.APISchema.
+	//
+	// This is required to be set.
+	APISchema LLMAPISchema `json:"outputSchema"`
+}
+
+// LLMAPISchema defines the API schema of either LLMRoute (the input) or LLMBackend (the output).
+//
+// This allows the ai-gateway to understand the input and perform the necessary transformation
+// depending on the API schema pair (input, output).
+//
+// Note that this is vendor specific, and the stability of the API schema is not guaranteed by
+// the ai-gateway, but by the vendor via proper versioning.
+type LLMAPISchema struct {
+	// Schema is the API schema of the LLMRoute or LLMBackend.
+	//
+	// +kubebuilder:validation:Enum=OpenAI;AWSBedrock
+	Schema APISchema `json:"schema"`
+
+	// Version is the version of the API schema.
+	Version string `json:"version,omitempty"`
+}
+
+// APISchema defines the API schema.
+type APISchema string
+
+const (
+	// APISchemaOpenAI is the OpenAI schema.
+	//
+	// https://github.com/openai/openai-openapi
+	APISchemaOpenAI APISchema = "OpenAI"
+	// APISchemaAWSBedrock is the AWS Bedrock schema.
+	//
+	// https://docs.aws.amazon.com/bedrock/latest/APIReference/API_Operations_Amazon_Bedrock_Runtime.html
+	APISchemaAWSBedrock APISchema = "AWSBedrock"
+)
diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go
diff --git a/go.mod b/go.mod
@@ -5,6 +5,7 @@ go 1.23.2
 require (
 	k8s.io/apimachinery v0.31.3
 	sigs.k8s.io/controller-runtime v0.19.3
+	sigs.k8s.io/gateway-api v1.2.1
 )
 
 require (

diff --git a/go.sum b/go.sum
@@ -81,8 +81,8 @@ golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGm
 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
 golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
-golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg=
-golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
+golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24=
+golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
@@ -94,8 +94,8 @@ gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
 gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
-k8s.io/api v0.31.0 h1:b9LiSjR2ym/SzTOlfMHm1tr7/21aD7fSkqgD/CVJBCo=
-k8s.io/api v0.31.0/go.mod h1:0YiFF+JfFxMM6+1hQei8FY8M7s1Mth+z/q7eF1aJkTE=
+k8s.io/api v0.31.1 h1:Xe1hX/fPW3PXYYv8BlozYqw63ytA92snr96zMW9gWTU=
+k8s.io/api v0.31.1/go.mod h1:sbN1g6eY6XVLeqNsZGLnI5FwVseTrZX7Fv3O26rhAaI=
 k8s.io/apimachinery v0.31.3 h1:6l0WhcYgasZ/wk9ktLq5vLaoXJJr5ts6lkaQzgeYPq4=
 k8s.io/apimachinery v0.31.3/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo=
 k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
@@ -104,6 +104,8 @@ k8s.io/utils v0.0.0-20241104163129-6fe5fd82f078 h1:jGnCPejIetjiy2gqaJ5V0NLwTpF4w
 k8s.io/utils v0.0.0-20241104163129-6fe5fd82f078/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
 sigs.k8s.io/controller-runtime v0.19.3 h1:XO2GvC9OPftRst6xWCpTgBZO04S2cbp0Qqkj8bX1sPw=
 sigs.k8s.io/controller-runtime v0.19.3/go.mod h1:j4j87DqtsThvwTv5/Tc5NFRyyF/RF0ip4+62tbTSIUM=
+sigs.k8s.io/gateway-api v1.2.1 h1:fZZ/+RyRb+Y5tGkwxFKuYuSRQHu9dZtbjenblleOLHM=
+sigs.k8s.io/gateway-api v1.2.1/go.mod h1:EpNfEXNjiYfUJypf0eZ0P5iXA9ekSGWaS1WgPaM42X0=
 sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE=
 sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg=
 sigs.k8s.io/structured-merge-diff/v4 v4.4.3 h1:sCP7Vv3xx/CWIuTPVN38lUPx0uw0lcLfzaiDa8Ja01A=