Skip to content

Commit f664362

Browse files
committed
Merge remote-tracking branch 'origin/main' into adil/release_0.4.4
2 parents 8bc712a + 062825f commit f664362

5 files changed

Lines changed: 22 additions & 14 deletions

File tree

cli/planoai/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ def convert_legacy_listeners(
128128

129129
model_provider_set = False
130130
for listener in listeners:
131-
if listener.get("type") == "model_listener":
131+
if listener.get("type") == "model":
132132
if model_provider_set:
133133
raise ValueError(
134134
"Currently only one listener can have model_providers set"

config/arch_config_schema.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ properties:
6666
type: string
6767
enum:
6868
- plano_orchestrator_v1
69+
max_retries:
70+
type: integer
6971
type:
7072
type: string
7173
enum:

config/envoy.template.yaml

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,7 @@ static_resources:
413413
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
414414
path: "/var/log/access_llm.log"
415415
format: |
416-
[%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%"
416+
[%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%" attempts=%UPSTREAM_REQUEST_ATTEMPT_COUNT%
417417
route_config:
418418
name: local_routes
419419
virtual_hosts:
@@ -534,7 +534,7 @@ static_resources:
534534
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
535535
path: "/var/log/access_llm.log"
536536
format: |
537-
[%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%"
537+
[%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%" attempts=%UPSTREAM_REQUEST_ATTEMPT_COUNT%
538538
route_config:
539539
name: local_routes
540540
virtual_hosts:
@@ -559,6 +559,16 @@ static_resources:
559559
auto_host_rewrite: true
560560
cluster: {{ llm_cluster_name }}
561561
timeout: 300s
562+
{% if llm_gateway_listener.max_retries %}
563+
retry_policy:
564+
retry_on: "5xx,connect-failure,refused-stream,reset,retriable-status-codes"
565+
num_retries: {{ llm_gateway_listener.max_retries }}
566+
per_try_timeout: 30s
567+
retriable_status_codes: [429, 500, 502, 503, 504]
568+
retry_back_off:
569+
base_interval: 0.5s
570+
max_interval: 5s
571+
{% endif %}
562572
{% endfor %}
563573
- match:
564574
prefix: "/"

demos/use_cases/llm_routing/config.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ listeners:
55
name: model_1
66
address: 0.0.0.0
77
port: 12000
8+
max_retries: 3
89

910
model_providers:
1011

docs/source/resources/includes/arch_config_full_reference_rendered.yaml

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,6 @@ listeners:
3737
port: 8001
3838
router: plano_orchestrator_v1
3939
type: agent
40-
- address: 0.0.0.0
41-
name: model_1
42-
port: 12000
43-
type: model
44-
- address: 0.0.0.0
45-
name: prompt_function_listener
46-
port: 10000
47-
type: prompt
4840
- address: 0.0.0.0
4941
model_providers:
5042
- access_key: $OPENAI_API_KEY
@@ -73,10 +65,13 @@ listeners:
7365
port: 443
7466
protocol: https
7567
provider_interface: openai
76-
name: egress_traffic
68+
name: model_1
7769
port: 12000
78-
timeout: 30s
79-
type: model_listener
70+
type: model
71+
- address: 0.0.0.0
72+
name: prompt_function_listener
73+
port: 10000
74+
type: prompt
8075
model_aliases:
8176
fast-llm:
8277
target: gpt-4o-mini

0 commit comments

Comments
 (0)