Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions cli/planoai/config_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,15 @@ def validate_and_render_schema():
"upstream_tls_ca_path", "/etc/ssl/certs/ca-certificates.crt"
)

upstream_timeout_ms = overrides.get("upstream_timeout_ms")
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure why we we have an upstream_timeout_rs field, when the model_listener object already has a timeout field. Can you elaborate a bit more?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated to use existing per listener timeout.

if upstream_timeout_ms is not None:
timeout_s = f"{int(upstream_timeout_ms) // 1000}s"
llm_gateway["timeout"] = timeout_s
prompt_gateway["timeout"] = timeout_s
for listener in listeners:
if listener.get("type") == "agent" and "timeout" not in listener:
listener["timeout"] = timeout_s

data = {
"prompt_gateway_listener": prompt_gateway,
"llm_gateway_listener": llm_gateway,
Expand Down
4 changes: 2 additions & 2 deletions cli/planoai/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def convert_legacy_listeners(
"type": "model_listener",
"port": 12000,
"address": "0.0.0.0",
"timeout": "30s",
"timeout": "300s",
"model_providers": model_providers or [],
}

Expand All @@ -101,7 +101,7 @@ def convert_legacy_listeners(
"type": "prompt_listener",
"port": 10000,
"address": "0.0.0.0",
"timeout": "30s",
"timeout": "300s",
}

# Handle None case
Expand Down
6 changes: 3 additions & 3 deletions config/envoy.template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ static_resources:
auto_host_rewrite: true
prefix_rewrite: "/agents/"
cluster: bright_staff
timeout: {{ listener.timeout | default('30s') }}
timeout: {{ listener.timeout | default('300s') }}
http_filters:
- name: envoy.filters.http.compressor
typed_config:
Expand Down Expand Up @@ -517,12 +517,12 @@ static_resources:
route:
auto_host_rewrite: true
cluster: {{ llm_cluster_name }}
timeout: 300s
timeout: {{ llm_gateway_listener.timeout }}
{% if llm_gateway_listener.max_retries %}
retry_policy:
retry_on: "5xx,connect-failure,refused-stream,reset,retriable-status-codes"
num_retries: {{ llm_gateway_listener.max_retries }}
per_try_timeout: 30s
per_try_timeout: {{ llm_gateway_listener.timeout }}
retriable_status_codes: [429, 500, 502, 503, 504]
retry_back_off:
base_interval: 0.5s
Expand Down
4 changes: 4 additions & 0 deletions config/plano_config_schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,10 @@ properties:
type: boolean
use_agent_orchestrator:
type: boolean
upstream_timeout_ms:
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above. I don't think we need this field, especially if we already support a timeout field for model_listener objects. Please review more carefully

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated.

type: integer
minimum: 1000
description: "Timeout in milliseconds for outbound upstream calls from WASM filters (tool endpoints, function calling, default prompt targets). Default is 300000 (300s)."
upstream_connect_timeout:
type: string
description: "Connect timeout for upstream provider clusters (e.g., '5s', '10s'). Default is '5s'."
Expand Down
1 change: 1 addition & 0 deletions crates/common/src/configuration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ pub struct Overrides {
pub prompt_target_intent_matching_threshold: Option<f64>,
pub optimize_context_window: Option<bool>,
pub use_agent_orchestrator: Option<bool>,
pub upstream_timeout_ms: Option<u64>,
}

#[derive(Debug, Clone, Serialize, Deserialize, Default)]
Expand Down
8 changes: 4 additions & 4 deletions crates/common/src/consts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ pub const SYSTEM_ROLE: &str = "system";
pub const USER_ROLE: &str = "user";
pub const TOOL_ROLE: &str = "tool";
pub const ASSISTANT_ROLE: &str = "assistant";
pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
pub const DEFAULT_TARGET_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
pub const API_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
pub const MODEL_SERVER_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 300_000; // 300 seconds
pub const DEFAULT_TARGET_REQUEST_TIMEOUT_MS: u64 = 300_000; // 300 seconds
pub const API_REQUEST_TIMEOUT_MS: u64 = 300_000; // 300 seconds
pub const MODEL_SERVER_REQUEST_TIMEOUT_MS: u64 = 300_000; // 300 seconds
pub const MODEL_SERVER_NAME: &str = "bright_staff";
pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider";
pub const MESSAGES_KEY: &str = "messages";
Expand Down
9 changes: 7 additions & 2 deletions crates/prompt_gateway/src/http_context.rs
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As mentioned over the zoom call - we don't need any changes to the prompt_gateway side of things. The issue talked about how the llm_gateway was the one timing out and the developer may have had a tool call scenario that could have taken longer.

Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,12 @@ impl HttpContext for StreamContext {
info!("on_http_request_body: sending request to model server");
debug!("request body: {}", json_data);

let timeout_str = MODEL_SERVER_REQUEST_TIMEOUT_MS.to_string();
let timeout_ms = if let Some(overrides) = self.overrides.as_ref() {
overrides.upstream_timeout_ms.unwrap_or(MODEL_SERVER_REQUEST_TIMEOUT_MS)
} else {
MODEL_SERVER_REQUEST_TIMEOUT_MS
};
let timeout_str = timeout_ms.to_string();

let mut headers = vec![
(ARCH_UPSTREAM_HOST_HEADER, MODEL_SERVER_NAME),
Expand All @@ -230,7 +235,7 @@ impl HttpContext for StreamContext {
headers,
Some(json_data.as_bytes()),
vec![],
Duration::from_secs(5),
Duration::from_millis(timeout_ms),
);

if let Some(content) = self.user_prompt.as_ref().unwrap().content.as_ref() {
Expand Down
20 changes: 16 additions & 4 deletions crates/prompt_gateway/src/stream_context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,14 @@ impl StreamContext {
callout_context.request_body.messages.clone(),
);
let arch_messages_json = serde_json::to_string(&params).unwrap();
let timeout_str = DEFAULT_TARGET_REQUEST_TIMEOUT_MS.to_string();
let timeout_ms = if let Some(overrides) = self.overrides.as_ref() {
overrides
.upstream_timeout_ms
.unwrap_or(DEFAULT_TARGET_REQUEST_TIMEOUT_MS)
} else {
DEFAULT_TARGET_REQUEST_TIMEOUT_MS
};
let timeout_str = timeout_ms.to_string();

let mut headers = vec![
(":method", "POST"),
Expand All @@ -193,7 +200,7 @@ impl StreamContext {
headers,
Some(arch_messages_json.as_bytes()),
vec![],
Duration::from_secs(5),
Duration::from_millis(timeout_ms),
);
callout_context.response_handler_type = ResponseHandlerType::DefaultTarget;
callout_context.prompt_target_name = Some(default_prompt_target.name.clone());
Expand Down Expand Up @@ -422,7 +429,12 @@ impl StreamContext {

debug!("on_http_call_response: api call body {:?}", api_call_body);

let timeout_str = API_REQUEST_TIMEOUT_MS.to_string();
let timeout_ms = if let Some(overrides) = self.overrides.as_ref() {
overrides.upstream_timeout_ms.unwrap_or(API_REQUEST_TIMEOUT_MS)
} else {
API_REQUEST_TIMEOUT_MS
};
let timeout_str = timeout_ms.to_string();

let http_method_str = http_method.to_string();
let mut headers: HashMap<_, _> = [
Expand Down Expand Up @@ -457,7 +469,7 @@ impl StreamContext {
headers.into_iter().collect(),
api_call_body.as_deref().map(|s| s.as_bytes()),
vec![],
Duration::from_secs(5),
Duration::from_millis(timeout_ms),
);

info!(
Expand Down
Loading