-
Notifications
You must be signed in to change notification settings - Fork 18
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: implements CEL expression API for costs #153
base: main
Are you sure you want to change the base?
Changes from all commits
3af87de
db900a4
4567b1a
a3a2cd9
569ac5d
5eac389
6cb71ab
7777f5f
f639a68
ad6bff6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,25 +11,32 @@ import ( | |
|
||
corev3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" | ||
extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" | ||
"github.com/google/cel-go/cel" | ||
"google.golang.org/protobuf/types/known/structpb" | ||
|
||
"github.com/envoyproxy/ai-gateway/extprocapi" | ||
"github.com/envoyproxy/ai-gateway/filterconfig" | ||
"github.com/envoyproxy/ai-gateway/internal/extproc/backendauth" | ||
"github.com/envoyproxy/ai-gateway/internal/extproc/router" | ||
"github.com/envoyproxy/ai-gateway/internal/extproc/translator" | ||
"github.com/envoyproxy/ai-gateway/internal/llmcostcel" | ||
) | ||
|
||
// processorConfig is the configuration for the processor. | ||
// This will be created by the server and passed to the processor when it detects a new configuration. | ||
type processorConfig struct { | ||
bodyParser router.RequestBodyParser | ||
router extprocapi.Router | ||
ModelNameHeaderKey, selectedBackendHeaderKey string | ||
modelNameHeaderKey, selectedBackendHeaderKey string | ||
factories map[filterconfig.VersionedAPISchema]translator.Factory | ||
backendAuthHandlers map[string]backendauth.Handler | ||
metadataNamespace string | ||
requestCosts []filterconfig.LLMRequestCost | ||
requestCosts []processorConfigRequestCost | ||
} | ||
|
||
type processorConfigRequestCost struct { | ||
*filterconfig.LLMRequestCost | ||
celProg cel.Program | ||
} | ||
|
||
// ProcessorIface is the interface for the processor. | ||
|
@@ -79,7 +86,7 @@ func (p *Processor) ProcessRequestBody(_ context.Context, rawBody *extprocv3.Htt | |
} | ||
p.logger.Info("Processing request", "path", path, "model", model) | ||
|
||
p.requestHeaders[p.config.ModelNameHeaderKey] = model | ||
p.requestHeaders[p.config.modelNameHeaderKey] = model | ||
b, err := p.config.router.Calculate(p.requestHeaders) | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to calculate route: %w", err) | ||
|
@@ -107,7 +114,7 @@ func (p *Processor) ProcessRequestBody(_ context.Context, rawBody *extprocv3.Htt | |
} | ||
// Set the model name to the request header with the key `x-ai-gateway-llm-model-name`. | ||
headerMutation.SetHeaders = append(headerMutation.SetHeaders, &corev3.HeaderValueOption{ | ||
Header: &corev3.HeaderValue{Key: p.config.ModelNameHeaderKey, RawValue: []byte(model)}, | ||
Header: &corev3.HeaderValue{Key: p.config.modelNameHeaderKey, RawValue: []byte(model)}, | ||
}, &corev3.HeaderValueOption{ | ||
Header: &corev3.HeaderValue{Key: p.config.selectedBackendHeaderKey, RawValue: []byte(b.Name)}, | ||
}) | ||
|
@@ -203,7 +210,8 @@ func (p *Processor) ProcessResponseBody(_ context.Context, body *extprocv3.HttpB | |
|
||
func (p *Processor) maybeBuildDynamicMetadata() (*structpb.Struct, error) { | ||
metadata := make(map[string]*structpb.Value, len(p.config.requestCosts)) | ||
for _, c := range p.config.requestCosts { | ||
for i := range p.config.requestCosts { | ||
c := &p.config.requestCosts[i] | ||
var cost uint32 | ||
switch c.Type { | ||
case filterconfig.LLMRequestCostTypeInputToken: | ||
|
@@ -212,6 +220,19 @@ func (p *Processor) maybeBuildDynamicMetadata() (*structpb.Struct, error) { | |
cost = p.costs.OutputTokens | ||
case filterconfig.LLMRequestCostTypeTotalToken: | ||
cost = p.costs.TotalTokens | ||
case filterconfig.LLMRequestCostTypeCELExpression: | ||
costU64, err := llmcostcel.EvaluateProgram( | ||
c.celProg, | ||
p.requestHeaders[p.config.modelNameHeaderKey], | ||
p.requestHeaders[p.config.selectedBackendHeaderKey], | ||
p.costs.InputTokens, | ||
p.costs.OutputTokens, | ||
p.costs.TotalTokens, | ||
) | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to evaluate CEL expression: %w", err) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Out of curiosity, what does it mean for ext_proc to fail at this stage, when the response headers may have been sent? Does it make sense to make it fail? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this will result in connection termination i guess |
||
} | ||
cost = uint32(costU64) //nolint:gosec | ||
default: | ||
return nil, fmt.Errorf("unknown request cost kind: %s", c.Type) | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
note: shouldn't have been exported