Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: implements CEL expression API for costs #153

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,10 @@ ENABLE_MULTI_PLATFORMS ?= false
HELM_CHART_VERSION ?= v0.0.0-latest

# Arguments for go test. This can be used, for example, to run specific tests via
# `GO_TEST_EXTRA_ARGS="-run TestName/foo/etc"`.
GO_TEST_EXTRA_ARGS ?=
# `GO_TEST_ARGS="-run TestName/foo/etc" -v -race`.
GO_TEST_ARGS ?= -v -race
# Arguments for go test in e2e tests in addition to GO_TEST_ARGS, applicable to test-e2e, test-extproc, and test-controller.
GO_TEST_E2E_ARGS ?= -count=1

# This will print out the help message for contributing to the project.
.PHONY: help
Expand Down Expand Up @@ -103,7 +105,7 @@ editorconfig: editorconfig-checker
.PHONY: test
test:
@echo "test => ./..."
@go test -v ./...
@go test $(GO_TEST_ARGS) ./...

ENVTEST_K8S_VERSIONS ?= 1.29.0 1.30.0 1.31.0

Expand All @@ -115,7 +117,7 @@ test-cel: envtest apigen
@for k8sVersion in $(ENVTEST_K8S_VERSIONS); do \
echo "Run CEL Validation on k8s $$k8sVersion"; \
KUBEBUILDER_ASSETS="$$($(ENVTEST) use $$k8sVersion -p path)" \
go test ./tests/cel-validation $(GO_TEST_EXTRA_ARGS) --tags test_cel_validation -v -count=1; \
go test ./tests/cel-validation $(GO_TEST_ARGS) $(GO_TEST_E2E_ARGS) --tags test_cel_validation; \
done

# This runs the end-to-end tests for extproc without controller or k8s at all.
Expand All @@ -127,15 +129,15 @@ test-extproc: build.extproc
@$(MAKE) build.extproc_custom_router CMD_PATH_PREFIX=examples
@$(MAKE) build.testupstream CMD_PATH_PREFIX=tests
@echo "Run ExtProc test"
@go test ./tests/extproc/... $(GO_TEST_EXTRA_ARGS) -tags test_extproc -v -count=1
@go test ./tests/extproc/... $(GO_TEST_ARGS) $(GO_TEST_E2E_ARGS) -tags test_extproc -v

# This runs the end-to-end tests for the controller with EnvTest.
.PHONY: test-controller
test-controller: envtest apigen
@for k8sVersion in $(ENVTEST_K8S_VERSIONS); do \
echo "Run Controller tests on k8s $$k8sVersion"; \
KUBEBUILDER_ASSETS="$$($(ENVTEST) use $$k8sVersion -p path)" \
go test ./tests/controller $(GO_TEST_EXTRA_ARGS) --tags test_controller -v -count=1; \
go test ./tests/controller $(GO_TEST_ARGS) $(GO_TEST_E2E_ARGS) -tags test_controller; \
done

# This runs the end-to-end tests for the controller and extproc with a local kind cluster.
Expand All @@ -146,7 +148,7 @@ test-e2e: kind
@$(MAKE) docker-build DOCKER_BUILD_ARGS="--load"
@$(MAKE) docker-build.testupstream CMD_PATH_PREFIX=tests DOCKER_BUILD_ARGS="--load"
@echo "Run E2E tests"
@go test ./tests/e2e/... $(GO_TEST_EXTRA_ARGS) -tags test_e2e -v -count=1
@go test ./tests/e2e/... $(GO_TEST_ARGS) $(GO_TEST_E2E_ARGS) -tags test_e2e

# This builds a binary for the given command under the internal/cmd directory.
#
Expand Down
27 changes: 21 additions & 6 deletions api/v1alpha1/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -475,17 +475,32 @@ type LLMRequestCost struct {
// +kubebuilder:validation:Required
MetadataKey string `json:"metadataKey"`
// Type specifies the type of the request cost. The default is "OutputToken",
// and it uses "output token" as the cost. The other types are "InputToken" and "TotalToken".
// and it uses "output token" as the cost. The other types are "InputToken", "TotalToken",
// and "CEL".
//
// +kubebuilder:validation:Enum=OutputToken;InputToken;TotalToken
// +kubebuilder:validation:Enum=OutputToken;InputToken;TotalToken;CEL
Type LLMRequestCostType `json:"type"`
// CELExpression is the CEL expression to calculate the cost of the request.
// The CEL expression must return an integer value. The CEL expression should be
// able to access the request headers, model name, backend name, input/output tokens etc.
// The CEL expression must return a signed or unsigned integer. If the
// return value is negative, it will be error.
//
// The expression can use the following variables:
//
// * model: the model name extracted from the request content. Type: string.
// * backend: the backend name in the form of "name.namespace". Type: string.
// * input_tokens: the number of input tokens. Type: unsigned integer.
// * output_tokens: the number of output tokens. Type: unsigned integer.
// * total_tokens: the total number of tokens. Type: unsigned integer.
//
// For example, the following expressions are valid:
//
// * "model == 'llama' ? input_tokens + output_token * 0.5 : total_tokens"
// * "backend == 'foo.default' ? input_tokens + output_tokens : total_tokens"
// * "input_tokens + output_tokens + total_tokens"
// * "input_tokens * output_tokens"
//
// +optional
// +notImplementedHide https://github.com/envoyproxy/ai-gateway/issues/97
CELExpression *string `json:"celExpression"`
CELExpression *string `json:"celExpression,omitempty"`
}

// LLMRequestCostType specifies the type of the LLMRequestCost.
Expand Down
3 changes: 3 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ require (
github.com/envoyproxy/gateway v0.5.0-rc.1.0.20250115172926-8b89dadfbd2c
github.com/envoyproxy/go-control-plane/envoy v1.32.3
github.com/go-logr/logr v1.4.2
github.com/google/cel-go v0.22.1
github.com/google/go-cmp v0.6.0
github.com/openai/openai-go v0.1.0-alpha.46
github.com/stretchr/testify v1.10.0
Expand All @@ -30,6 +31,7 @@ require (

require (
cel.dev/expr v0.18.0 // indirect
github.com/antlr4-go/antlr/v4 v4.13.0 // indirect
github.com/aws/aws-sdk-go-v2/credentials v1.17.54 // indirect
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.24 // indirect
github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.28 // indirect
Expand Down Expand Up @@ -75,6 +77,7 @@ require (
github.com/prometheus/procfs v0.15.1 // indirect
github.com/rogpeppe/go-internal v1.13.1 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/stoewer/go-strcase v1.3.0 // indirect
github.com/tidwall/gjson v1.14.4 // indirect
github.com/tidwall/match v1.1.1 // indirect
github.com/tidwall/pretty v1.2.1 // indirect
Expand Down
12 changes: 12 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
cel.dev/expr v0.18.0 h1:CJ6drgk+Hf96lkLikr4rFf19WrU0BOWEihyZnI2TAzo=
cel.dev/expr v0.18.0/go.mod h1:MrpN08Q+lEBs+bGYdLxxHkZoUSsCp0nSKTs0nTymJgw=
github.com/antlr4-go/antlr/v4 v4.13.0 h1:lxCg3LAv+EUK6t1i0y1V6/SLeUi0eKEKdhQAlS8TVTI=
github.com/antlr4-go/antlr/v4 v4.13.0/go.mod h1:pfChB/xh/Unjila75QW7+VU4TSnWnnk9UTnmpPaOR2g=
github.com/aws/aws-sdk-go-v2 v1.33.0 h1:Evgm4DI9imD81V0WwD+TN4DCwjUMdc94TrduMLbgZJs=
github.com/aws/aws-sdk-go-v2 v1.33.0/go.mod h1:P5WJBrYqqbWVaOxgH0X/FYYD47/nooaPOZPlQdmiN2U=
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.7 h1:lL7IfaFzngfx0ZwUGOZdsFFnQ5uLvR0hWqqhyE7Q9M8=
Expand Down Expand Up @@ -70,6 +72,8 @@ github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/google/cel-go v0.22.1 h1:AfVXx3chM2qwoSbM7Da8g8hX8OVSkBFwX+rz2+PcK40=
github.com/google/cel-go v0.22.1/go.mod h1:BuznPXXfQDpXKWQ9sPW3TzlAJN5zzFe+i9tIs0yC4s8=
github.com/google/gnostic-models v0.6.9 h1:MU/8wDLif2qCXZmzncUQ/BOfxWfthHi63KqpoNbWqVw=
github.com/google/gnostic-models v0.6.9/go.mod h1:CiWsm0s6BSQd1hRn8/QmxqB6BesYcbSZxsz9b0KuDBw=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
Expand Down Expand Up @@ -130,8 +134,15 @@ github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR
github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stoewer/go-strcase v1.3.0 h1:g0eASXYtp+yvN9fK8sH94oCIk0fau9uV1/ZdJ0AVEzs=
github.com/stoewer/go-strcase v1.3.0/go.mod h1:fAH5hQ5pehh+j3nZfvwdk2RgEgQjAoM8wodgtPmh1xo=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
Expand Down Expand Up @@ -224,6 +235,7 @@ gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSP
gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M=
gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
k8s.io/api v0.32.1 h1:f562zw9cy+GvXzXf0CKlVQ7yHJVYzLfL6JAS4kOAaOc=
Expand Down
8 changes: 8 additions & 0 deletions internal/controller/sink.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (

aigv1a1 "github.com/envoyproxy/ai-gateway/api/v1alpha1"
"github.com/envoyproxy/ai-gateway/filterconfig"
"github.com/envoyproxy/ai-gateway/internal/llmcostcel"
)

const selectedBackendHeaderKey = "x-ai-eg-selected-backend"
Expand Down Expand Up @@ -261,6 +262,13 @@ func (c *configSink) updateExtProcConfigMap(aiGatewayRoute *aigv1a1.AIGatewayRou
fc.Type = filterconfig.LLMRequestCostTypeTotalToken
case aigv1a1.LLMRequestCostTypeCEL:
fc.Type = filterconfig.LLMRequestCostTypeCELExpression
expr := *cost.CELExpression
// Sanity check the CEL expression.
_, err := llmcostcel.NewProgram(expr)
if err != nil {
return fmt.Errorf("invalid CEL expression: %w", err)
}
fc.CELExpression = expr
default:
return fmt.Errorf("unknown request cost type: %s", cost.Type)
}
Expand Down
11 changes: 11 additions & 0 deletions internal/controller/sink_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,15 @@ func Test_updateExtProcConfigMap(t *testing.T) {
Type: aigv1a1.LLMRequestCostTypeInputToken,
MetadataKey: "input-token",
},
{
Type: aigv1a1.LLMRequestCostTypeTotalToken,
MetadataKey: "total-token",
},
{
Type: aigv1a1.LLMRequestCostTypeCEL,
MetadataKey: "cel-token",
CELExpression: ptr.To("model == 'cool_model' ? input_tokens * output_tokens : total_tokens"),
},
},
},
},
Expand Down Expand Up @@ -347,6 +356,8 @@ func Test_updateExtProcConfigMap(t *testing.T) {
LLMRequestCosts: []filterconfig.LLMRequestCost{
{Type: filterconfig.LLMRequestCostTypeOutputToken, MetadataKey: "output-token"},
{Type: filterconfig.LLMRequestCostTypeInputToken, MetadataKey: "input-token"},
{Type: filterconfig.LLMRequestCostTypeTotalToken, MetadataKey: "total-token"},
{Type: filterconfig.LLMRequestCostTypeCELExpression, MetadataKey: "cel-token", CELExpression: "model == 'cool_model' ? input_tokens * output_tokens : total_tokens"},
},
},
},
Expand Down
31 changes: 26 additions & 5 deletions internal/extproc/processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,25 +11,32 @@ import (

corev3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
"github.com/google/cel-go/cel"
"google.golang.org/protobuf/types/known/structpb"

"github.com/envoyproxy/ai-gateway/extprocapi"
"github.com/envoyproxy/ai-gateway/filterconfig"
"github.com/envoyproxy/ai-gateway/internal/extproc/backendauth"
"github.com/envoyproxy/ai-gateway/internal/extproc/router"
"github.com/envoyproxy/ai-gateway/internal/extproc/translator"
"github.com/envoyproxy/ai-gateway/internal/llmcostcel"
)

// processorConfig is the configuration for the processor.
// This will be created by the server and passed to the processor when it detects a new configuration.
type processorConfig struct {
bodyParser router.RequestBodyParser
router extprocapi.Router
ModelNameHeaderKey, selectedBackendHeaderKey string
modelNameHeaderKey, selectedBackendHeaderKey string
Comment on lines -28 to +30
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

note: shouldn't have been exported

factories map[filterconfig.VersionedAPISchema]translator.Factory
backendAuthHandlers map[string]backendauth.Handler
metadataNamespace string
requestCosts []filterconfig.LLMRequestCost
requestCosts []processorConfigRequestCost
}

type processorConfigRequestCost struct {
*filterconfig.LLMRequestCost
celProg cel.Program
}

// ProcessorIface is the interface for the processor.
Expand Down Expand Up @@ -79,7 +86,7 @@ func (p *Processor) ProcessRequestBody(_ context.Context, rawBody *extprocv3.Htt
}
p.logger.Info("Processing request", "path", path, "model", model)

p.requestHeaders[p.config.ModelNameHeaderKey] = model
p.requestHeaders[p.config.modelNameHeaderKey] = model
b, err := p.config.router.Calculate(p.requestHeaders)
if err != nil {
return nil, fmt.Errorf("failed to calculate route: %w", err)
Expand Down Expand Up @@ -107,7 +114,7 @@ func (p *Processor) ProcessRequestBody(_ context.Context, rawBody *extprocv3.Htt
}
// Set the model name to the request header with the key `x-ai-gateway-llm-model-name`.
headerMutation.SetHeaders = append(headerMutation.SetHeaders, &corev3.HeaderValueOption{
Header: &corev3.HeaderValue{Key: p.config.ModelNameHeaderKey, RawValue: []byte(model)},
Header: &corev3.HeaderValue{Key: p.config.modelNameHeaderKey, RawValue: []byte(model)},
}, &corev3.HeaderValueOption{
Header: &corev3.HeaderValue{Key: p.config.selectedBackendHeaderKey, RawValue: []byte(b.Name)},
})
Expand Down Expand Up @@ -203,7 +210,8 @@ func (p *Processor) ProcessResponseBody(_ context.Context, body *extprocv3.HttpB

func (p *Processor) maybeBuildDynamicMetadata() (*structpb.Struct, error) {
metadata := make(map[string]*structpb.Value, len(p.config.requestCosts))
for _, c := range p.config.requestCosts {
for i := range p.config.requestCosts {
c := &p.config.requestCosts[i]
var cost uint32
switch c.Type {
case filterconfig.LLMRequestCostTypeInputToken:
Expand All @@ -212,6 +220,19 @@ func (p *Processor) maybeBuildDynamicMetadata() (*structpb.Struct, error) {
cost = p.costs.OutputTokens
case filterconfig.LLMRequestCostTypeTotalToken:
cost = p.costs.TotalTokens
case filterconfig.LLMRequestCostTypeCELExpression:
costU64, err := llmcostcel.EvaluateProgram(
c.celProg,
p.requestHeaders[p.config.modelNameHeaderKey],
p.requestHeaders[p.config.selectedBackendHeaderKey],
p.costs.InputTokens,
p.costs.OutputTokens,
p.costs.TotalTokens,
)
if err != nil {
return nil, fmt.Errorf("failed to evaluate CEL expression: %w", err)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Out of curiosity, what does it mean for ext_proc to fail at this stage, when the response headers may have been sent? Does it make sense to make it fail?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this will result in connection termination i guess

}
cost = uint32(costU64) //nolint:gosec
default:
return nil, fmt.Errorf("unknown request cost kind: %s", c.Type)
}
Expand Down
26 changes: 22 additions & 4 deletions internal/extproc/processor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"github.com/envoyproxy/ai-gateway/filterconfig"
"github.com/envoyproxy/ai-gateway/internal/extproc/router"
"github.com/envoyproxy/ai-gateway/internal/extproc/translator"
"github.com/envoyproxy/ai-gateway/internal/llmcostcel"
)

func TestProcessor_ProcessRequestHeaders(t *testing.T) {
Expand Down Expand Up @@ -65,11 +66,24 @@ func TestProcessor_ProcessResponseBody(t *testing.T) {
retBodyMutation: expBodyMut, retHeaderMutation: expHeadMut,
retUsedToken: translator.LLMTokenUsage{OutputTokens: 123, InputTokens: 1},
}

celProgInt, err := llmcostcel.NewProgram("54321")
require.NoError(t, err)
celProgUint, err := llmcostcel.NewProgram("uint(9999)")
require.NoError(t, err)
p := &Processor{translator: mt, config: &processorConfig{
metadataNamespace: "ai_gateway_llm_ns",
requestCosts: []filterconfig.LLMRequestCost{
{Type: filterconfig.LLMRequestCostTypeOutputToken, MetadataKey: "output_token_usage"},
{Type: filterconfig.LLMRequestCostTypeInputToken, MetadataKey: "input_token_usage"},
requestCosts: []processorConfigRequestCost{
{LLMRequestCost: &filterconfig.LLMRequestCost{Type: filterconfig.LLMRequestCostTypeOutputToken, MetadataKey: "output_token_usage"}},
{LLMRequestCost: &filterconfig.LLMRequestCost{Type: filterconfig.LLMRequestCostTypeInputToken, MetadataKey: "input_token_usage"}},
{
celProg: celProgInt,
LLMRequestCost: &filterconfig.LLMRequestCost{Type: filterconfig.LLMRequestCostTypeCELExpression, MetadataKey: "cel_int"},
},
{
celProg: celProgUint,
LLMRequestCost: &filterconfig.LLMRequestCost{Type: filterconfig.LLMRequestCostTypeCELExpression, MetadataKey: "cel_uint"},
},
},
}}
res, err := p.ProcessResponseBody(context.Background(), inBody)
Expand All @@ -84,6 +98,10 @@ func TestProcessor_ProcessResponseBody(t *testing.T) {
GetStructValue().Fields["output_token_usage"].GetNumberValue())
require.Equal(t, float64(1), md.Fields["ai_gateway_llm_ns"].
GetStructValue().Fields["input_token_usage"].GetNumberValue())
require.Equal(t, float64(54321), md.Fields["ai_gateway_llm_ns"].
GetStructValue().Fields["cel_int"].GetNumberValue())
require.Equal(t, float64(9999), md.Fields["ai_gateway_llm_ns"].
GetStructValue().Fields["cel_uint"].GetNumberValue())
})
}

Expand Down Expand Up @@ -168,7 +186,7 @@ func TestProcessor_ProcessRequestBody(t *testing.T) {
{Name: "some-schema", Version: "v10.0"}: factory.impl,
},
selectedBackendHeaderKey: "x-ai-gateway-backend-key",
ModelNameHeaderKey: "x-ai-gateway-model-key",
modelNameHeaderKey: "x-ai-gateway-model-key",
}, requestHeaders: headers, logger: slog.Default()}
resp, err := p.ProcessRequestBody(context.Background(), &extprocv3.HttpBody{})
require.NoError(t, err)
Expand Down
19 changes: 17 additions & 2 deletions internal/extproc/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"log/slog"

extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
"github.com/google/cel-go/cel"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/health/grpc_health_v1"
"google.golang.org/grpc/status"
Expand All @@ -17,6 +18,7 @@ import (
"github.com/envoyproxy/ai-gateway/internal/extproc/backendauth"
"github.com/envoyproxy/ai-gateway/internal/extproc/router"
"github.com/envoyproxy/ai-gateway/internal/extproc/translator"
"github.com/envoyproxy/ai-gateway/internal/llmcostcel"
)

// Server implements the external process server.
Expand Down Expand Up @@ -64,14 +66,27 @@ func (s *Server[P]) LoadConfig(config *filterconfig.Config) error {
}
}

costs := make([]processorConfigRequestCost, 0, len(config.LLMRequestCosts))
for i := range config.LLMRequestCosts {
c := &config.LLMRequestCosts[i]
var prog cel.Program
if c.CELExpression != "" {
prog, err = llmcostcel.NewProgram(c.CELExpression)
if err != nil {
return fmt.Errorf("cannot create CEL program for cost: %w", err)
}
}
costs = append(costs, processorConfigRequestCost{LLMRequestCost: c, celProg: prog})
}

newConfig := &processorConfig{
bodyParser: bodyParser, router: rt,
selectedBackendHeaderKey: config.SelectedBackendHeaderKey,
ModelNameHeaderKey: config.ModelNameHeaderKey,
modelNameHeaderKey: config.ModelNameHeaderKey,
factories: factories,
backendAuthHandlers: backendAuthHandlers,
metadataNamespace: config.MetadataNamespace,
requestCosts: config.LLMRequestCosts,
requestCosts: costs,
}
s.config = newConfig // This is racey, but we don't care.
return nil
Expand Down
Loading
Loading