From 871b00d1c2f4fd0a45a7ffa1d52cfcffd42ac5cc Mon Sep 17 00:00:00 2001 From: Tom Stoffer Date: Tue, 14 Apr 2026 23:56:18 +1200 Subject: [PATCH 1/2] Updating Contributing.md to include helpful build steps --- CONTRIBUTING.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9f9b0b9cc..d6d9ad747 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -100,6 +100,16 @@ For library tests only: cargo test --lib ``` +**Build Rust artifacts with the correct targets:** + +```bash +cd crates +cargo build --release --target wasm32-wasip1 -p llm_gateway -p prompt_gateway +cargo build --release -p brightstaff -p hermesllm -p common +``` + +Do not run a blanket workspace-native build such as `cargo build --release` from `crates/`. The `llm_gateway` and `prompt_gateway` crates are Proxy-WASM `cdylib`s and must be built for `wasm32-wasip1`, while `brightstaff`, `hermesllm`, and `common` build natively. + **Run Python CLI tests:** ```bash From 164fd467a00e6011fdc008489f734389152119f7 Mon Sep 17 00:00:00 2001 From: Tom Stoffer Date: Wed, 15 Apr 2026 20:32:17 +1200 Subject: [PATCH 2/2] Implemented request adaptor for ChatGPT codex subscription endpoints which do not match the standard openai ones. Made-with: Cursor --- crates/brightstaff/src/handlers/llm/mod.rs | 21 +- crates/hermesllm/src/apis/openai_responses.rs | 79 +++- crates/hermesllm/src/lib.rs | 1 + crates/hermesllm/src/providers/mod.rs | 2 + crates/hermesllm/src/providers/request.rs | 48 +++ .../src/providers/request_adapter.rs | 407 ++++++++++++++++++ .../src/transforms/request/from_openai.rs | 39 +- crates/llm_gateway/src/stream_context.rs | 17 +- 8 files changed, 577 insertions(+), 37 deletions(-) create mode 100644 crates/hermesllm/src/providers/request_adapter.rs diff --git a/crates/brightstaff/src/handlers/llm/mod.rs b/crates/brightstaff/src/handlers/llm/mod.rs index 80455cfb7..ace0288a4 100644 --- a/crates/brightstaff/src/handlers/llm/mod.rs +++ b/crates/brightstaff/src/handlers/llm/mod.rs @@ -5,7 +5,7 @@ use common::llm_providers::LlmProviders; use hermesllm::apis::openai::Message; use hermesllm::apis::openai_responses::InputParam; use hermesllm::clients::{SupportedAPIsFromClient, SupportedUpstreamAPIs}; -use hermesllm::{ProviderRequest, ProviderRequestType}; +use hermesllm::{serialize_for_upstream, ProviderRequest, ProviderRequestType}; use http_body_util::combinators::BoxBody; use http_body_util::BodyExt; use hyper::header::{self}; @@ -248,16 +248,15 @@ async fn llm_chat_inner( }; // Serialize request for upstream BEFORE router consumes it - let client_request_bytes_for_upstream: Bytes = - match ProviderRequestType::to_bytes(&client_request) { - Ok(bytes) => bytes.into(), - Err(err) => { - warn!(error = %err, "failed to serialize request for upstream"); - let mut r = Response::new(full(format!("Failed to serialize request: {}", err))); - *r.status_mut() = StatusCode::INTERNAL_SERVER_ERROR; - return Ok(r); - } - }; + let client_request_bytes_for_upstream: Bytes = match serialize_for_upstream(&client_request, provider_id) { + Ok(bytes) => bytes.into(), + Err(err) => { + warn!(error = %err, "failed to serialize request for upstream"); + let mut r = Response::new(full(format!("Failed to serialize request: {}", err))); + *r.status_mut() = StatusCode::INTERNAL_SERVER_ERROR; + return Ok(r); + } + }; // --- Phase 3: Route the request (or use pinned model from session cache) --- let resolved_model = if let Some(cached_model) = pinned_model { diff --git a/crates/hermesllm/src/apis/openai_responses.rs b/crates/hermesllm/src/apis/openai_responses.rs index eac8a452c..24848a5b0 100644 --- a/crates/hermesllm/src/apis/openai_responses.rs +++ b/crates/hermesllm/src/apis/openai_responses.rs @@ -280,16 +280,31 @@ pub struct ConversationParam { pub id: Option, } -/// Tool definitions +/// Tool definitions. +/// +/// Supports both the canonical OpenAI Responses flat tool shape: +/// { "type": "function", "name": "...", "description": "...", "parameters": {...} } +/// and the nested chat-completions-compatible shape: +/// { "type": "function", "function": { "name": "...", "description": "...", "parameters": {...} } } #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(tag = "type", rename_all = "snake_case")] pub enum Tool { - /// Function tool - flat structure in Responses API + /// Function tool — accepts both flat and nested `function` object shapes. Function { - name: String, + /// Top-level name (flat shape). + name: Option, + /// Top-level description (flat shape). description: Option, + /// Top-level parameters (flat shape). parameters: Option, + /// Top-level strict flag (flat shape). strict: Option, + /// Nested `function` object (nested/compat shape). + /// + /// When present, `name`/`description`/`parameters` from the outer level are + /// ignored in favour of the values inside this object. + #[serde(default, flatten)] + function: Option, }, /// File search tool FileSearch { @@ -321,6 +336,49 @@ pub enum Tool { }, } +impl Tool { + pub fn name(&self) -> Option<&str> { + match self { + Tool::Function { name, function, .. } => { + function + .as_ref() + .and_then(|f| f.name.as_ref()) + .map(|s| s.as_str()) + .or_else(|| name.as_ref().map(|s| s.as_str())) + } + Tool::Custom { name, .. } => name.as_deref(), + _ => None, + } + } + + pub fn description(&self) -> Option<&String> { + match self { + Tool::Function { + description, + function, + .. + } => description + .as_ref() + .or_else(|| function.as_ref().and_then(|f| f.description.as_ref())), + Tool::Custom { description, .. } => description.as_ref(), + _ => None, + } + } + + pub fn parameters(&self) -> Option<&serde_json::Value> { + match self { + Tool::Function { + parameters, + function, + .. + } => parameters + .as_ref() + .or_else(|| function.as_ref().and_then(|f| f.parameters.as_ref())), + _ => None, + } + } +} + /// Ranking options for file search #[skip_serializing_none] #[derive(Debug, Clone, Serialize, Deserialize)] @@ -343,6 +401,16 @@ pub struct UserLocation { pub timezone: Option, } +/// Inner function definition — used inside the nested `function` object. +#[skip_serializing_none] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct FunctionDef { + pub name: Option, + pub description: Option, + pub parameters: Option, + pub strict: Option, +} + /// Tool choice options #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(untagged)] @@ -1146,7 +1214,10 @@ impl ProviderRequest for ResponsesAPIRequest { tools .iter() .filter_map(|tool| match tool { - Tool::Function { name, .. } => Some(name.clone()), + Tool::Function { name, function, .. } => function + .as_ref() + .and_then(|f| f.name.clone()) + .or_else(|| name.clone()), Tool::Custom { name: Some(name), .. } => Some(name.clone()), diff --git a/crates/hermesllm/src/lib.rs b/crates/hermesllm/src/lib.rs index 3b9611e00..645bb4e94 100644 --- a/crates/hermesllm/src/lib.rs +++ b/crates/hermesllm/src/lib.rs @@ -11,6 +11,7 @@ pub use apis::streaming_shapes::sse::{SseEvent, SseStreamIter}; pub use aws_smithy_eventstream::frame::DecodedFrame; pub use providers::id::ProviderId; pub use providers::request::{ProviderRequest, ProviderRequestError, ProviderRequestType}; +pub use providers::request_adapter::serialize_for_upstream; pub use providers::response::{ ProviderResponse, ProviderResponseError, ProviderResponseType, TokenUsage, }; diff --git a/crates/hermesllm/src/providers/mod.rs b/crates/hermesllm/src/providers/mod.rs index 4343022f4..59d4605b3 100644 --- a/crates/hermesllm/src/providers/mod.rs +++ b/crates/hermesllm/src/providers/mod.rs @@ -5,10 +5,12 @@ //! pub mod id; pub mod request; +pub mod request_adapter; pub mod response; pub mod streaming_response; pub use id::ProviderId; pub use request::{ProviderRequest, ProviderRequestError, ProviderRequestType}; +pub use request_adapter::serialize_for_upstream; pub use response::{ProviderResponse, ProviderResponseType, TokenUsage}; pub use streaming_response::{ProviderStreamResponse, ProviderStreamResponseType}; diff --git a/crates/hermesllm/src/providers/request.rs b/crates/hermesllm/src/providers/request.rs index 762020800..bf558e2e9 100644 --- a/crates/hermesllm/src/providers/request.rs +++ b/crates/hermesllm/src/providers/request.rs @@ -1004,6 +1004,54 @@ mod tests { } } + #[test] + fn test_normalize_for_upstream_chatgpt_sets_store_stream_and_wraps_input() { + use crate::apis::openai::OpenAIApi::Responses; + use crate::apis::openai_responses::InputParam; + + let responses_req = ResponsesAPIRequest { + model: "gpt-5.4".to_string(), + input: InputParam::Text("Hello, Codex!".to_string()), + temperature: None, + max_output_tokens: Some(8192), + stream: Some(false), + metadata: None, + tools: None, + tool_choice: None, + parallel_tool_calls: None, + instructions: None, + modalities: None, + user: None, + store: None, + reasoning_effort: None, + include: None, + audio: None, + text: None, + service_tier: None, + top_p: None, + top_logprobs: None, + stream_options: None, + truncation: None, + conversation: None, + previous_response_id: None, + max_tool_calls: None, + background: None, + }; + + let upstream_api = SupportedUpstreamAPIs::OpenAIResponsesAPI(Responses); + let mut request = ProviderRequestType::ResponsesAPIRequest(responses_req); + request.normalize_for_upstream(ProviderId::ChatGPT, &upstream_api); + + match request { + ProviderRequestType::ResponsesAPIRequest(req) => { + assert_eq!(req.max_output_tokens, Some(8192)); + assert_eq!(req.store, Some(false)); + assert_eq!(req.stream, Some(true)); + assert!(matches!(req.input, InputParam::Items(_))); + } + _ => panic!("Expected ResponsesAPIRequest variant"), + } + } #[test] fn test_chat_completions_to_responses_api_not_supported() { use crate::apis::openai::OpenAIApi::Responses; diff --git a/crates/hermesllm/src/providers/request_adapter.rs b/crates/hermesllm/src/providers/request_adapter.rs new file mode 100644 index 000000000..0ae70b93b --- /dev/null +++ b/crates/hermesllm/src/providers/request_adapter.rs @@ -0,0 +1,407 @@ +use crate::apis::openai_responses::ResponsesAPIRequest; +use crate::providers::id::ProviderId; +use crate::providers::request::{ProviderRequest, ProviderRequestError, ProviderRequestType}; + +/// Serialize a provider request for the upstream wire format. +/// +/// For most providers this is plain `to_bytes()`. ChatGPT's native /responses +/// backend has wire-format quirks that require post-serialization patching: +/// - `max_output_tokens` must be sent as `maxTokens` +/// - Structured content arrays (`input_text`/`output_text` typed parts) +/// must be flattened to plain text strings +pub fn serialize_for_upstream( + request: &ProviderRequestType, + provider_id: ProviderId, +) -> Result, ProviderRequestError> { + match (provider_id, request) { + (ProviderId::ChatGPT, ProviderRequestType::ResponsesAPIRequest(req)) => { + adapt_chatgpt_responses_request(req) + } + _ => request.to_bytes(), + } +} + +/// Apply ChatGPT-specific wire-format fixes to a ResponsesAPI request. +/// +/// Works at the JSON value level so we can rename keys and restructure +/// content without needing separate serde types for the ChatGPT variant. +fn adapt_chatgpt_responses_request( + req: &ResponsesAPIRequest, +) -> Result, ProviderRequestError> { + let mut value = serde_json::to_value(req).map_err(|e| ProviderRequestError { + message: format!("Failed to encode ChatGPT responses request as JSON value: {}", e), + source: Some(Box::new(e)), + })?; + + if let Some(obj) = value.as_object_mut() { + // ChatGPT rejects `max_output_tokens`; it expects `maxTokens` + if let Some(max_output_tokens) = obj.remove("max_output_tokens") { + if !max_output_tokens.is_null() { + obj.insert("maxTokens".to_string(), max_output_tokens); + } + } + + // ChatGPT rejects structured content arrays with typed parts + // (input_text, output_text); flatten them to plain text strings + flatten_input_content_parts(obj); + } + + serde_json::to_vec(&value).map_err(|e| ProviderRequestError { + message: format!("Failed to serialize ChatGPT responses request for upstream: {}", e), + source: Some(Box::new(e)), + }) +} + +/// Walk through `input[].content` and collapse typed content-part arrays +/// into plain text strings that ChatGPT accepts. +fn flatten_input_content_parts(obj: &mut serde_json::Map) { + let input = match obj.get_mut("input").and_then(|v| v.as_array_mut()) { + Some(arr) => arr, + None => return, + }; + + for item in input { + let content = match item + .as_object_mut() + .and_then(|m| m.get_mut("content")) + { + Some(c) => c, + None => continue, + }; + + let parts = match content.as_array() { + Some(p) => p, + None => continue, + }; + + let mut saw_text_part = false; + let text = parts + .iter() + .filter_map(|part| { + let part_obj = part.as_object()?; + match part_obj.get("type").and_then(|v| v.as_str()) { + Some("input_text") | Some("output_text") => { + saw_text_part = true; + Some( + part_obj + .get("text") + .and_then(|v| v.as_str()) + .unwrap_or_default() + .to_string(), + ) + } + _ => None, + } + }) + .collect::>() + .join("\n"); + + // Even when all text parts are empty, we still need to collapse the array. + // Leaving typed parts in-place causes ChatGPT Codex endpoints to reject them. + if saw_text_part { + *content = serde_json::Value::String(text); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::apis::openai::OpenAIApi; + use crate::apis::openai_responses::{ + InputContent, InputItem, InputMessage, InputParam, MessageContent, MessageRole, + ResponsesAPIRequest, + }; + + fn make_responses_request(input: InputParam, max_output_tokens: Option) -> ResponsesAPIRequest { + ResponsesAPIRequest { + model: "gpt-5.4".to_string(), + input, + temperature: None, + max_output_tokens, + stream: Some(true), + metadata: None, + tools: None, + tool_choice: None, + parallel_tool_calls: None, + instructions: Some("You are Codex.".to_string()), + modalities: None, + user: None, + store: Some(false), + reasoning_effort: None, + include: None, + audio: None, + text: None, + service_tier: None, + top_p: None, + top_logprobs: None, + stream_options: None, + truncation: None, + conversation: None, + previous_response_id: None, + max_tool_calls: None, + background: None, + } + } + + // --------------------------------------------------------------- + // max_output_tokens → maxTokens rename + // --------------------------------------------------------------- + + #[test] + fn chatgpt_renames_max_output_tokens_to_max_tokens_on_wire() { + let req = make_responses_request( + InputParam::Text("Hello".to_string()), + Some(8192), + ); + let request = ProviderRequestType::ResponsesAPIRequest(req); + + let bytes = serialize_for_upstream(&request, ProviderId::ChatGPT).unwrap(); + let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap(); + + assert!(wire.get("max_output_tokens").is_none(), + "max_output_tokens should be absent from wire format"); + assert_eq!(wire.get("maxTokens").and_then(|v| v.as_i64()), Some(8192), + "maxTokens should be present with the original value"); + } + + #[test] + fn chatgpt_omits_max_tokens_when_max_output_tokens_is_none() { + let req = make_responses_request( + InputParam::Text("Hello".to_string()), + None, + ); + let request = ProviderRequestType::ResponsesAPIRequest(req); + + let bytes = serialize_for_upstream(&request, ProviderId::ChatGPT).unwrap(); + let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap(); + + assert!(wire.get("max_output_tokens").is_none()); + assert!(wire.get("maxTokens").is_none(), + "maxTokens should not appear when original was None"); + } + + #[test] + fn non_chatgpt_preserves_max_output_tokens_field_name() { + let req = make_responses_request( + InputParam::Text("Hello".to_string()), + Some(4096), + ); + let request = ProviderRequestType::ResponsesAPIRequest(req); + + let bytes = serialize_for_upstream(&request, ProviderId::OpenAI).unwrap(); + let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap(); + + assert_eq!(wire.get("max_output_tokens").and_then(|v| v.as_i64()), Some(4096)); + assert!(wire.get("maxTokens").is_none()); + } + + // --------------------------------------------------------------- + // input_text / output_text content flattening + // --------------------------------------------------------------- + + #[test] + fn chatgpt_flattens_input_text_content_parts_to_plain_string() { + let input = InputParam::Items(vec![InputItem::Message(InputMessage { + role: MessageRole::User, + content: MessageContent::Items(vec![ + InputContent::InputText { text: "first line".to_string() }, + InputContent::InputText { text: "second line".to_string() }, + ]), + })]); + + let req = make_responses_request(input, None); + let request = ProviderRequestType::ResponsesAPIRequest(req); + + let bytes = serialize_for_upstream(&request, ProviderId::ChatGPT).unwrap(); + let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap(); + + let content = &wire["input"][0]["content"]; + assert!(content.is_string(), + "content should be flattened to a string, got: {}", content); + assert_eq!(content.as_str().unwrap(), "first line\nsecond line"); + } + + #[test] + fn chatgpt_flattens_output_text_content_parts() { + let input = InputParam::Items(vec![InputItem::Message(InputMessage { + role: MessageRole::Assistant, + content: MessageContent::Items(vec![ + InputContent::InputText { text: "assistant reply".to_string() }, + ]), + })]); + + let req = make_responses_request(input, None); + let request = ProviderRequestType::ResponsesAPIRequest(req); + + let bytes = serialize_for_upstream(&request, ProviderId::ChatGPT).unwrap(); + let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap(); + + let content = &wire["input"][0]["content"]; + assert!(content.is_string()); + assert_eq!(content.as_str().unwrap(), "assistant reply"); + } + + #[test] + fn chatgpt_flattens_empty_input_text_content_parts() { + let input = InputParam::Items(vec![InputItem::Message(InputMessage { + role: MessageRole::Assistant, + content: MessageContent::Items(vec![InputContent::InputText { + text: "".to_string(), + }]), + })]); + + let req = make_responses_request(input, None); + let request = ProviderRequestType::ResponsesAPIRequest(req); + + let bytes = serialize_for_upstream(&request, ProviderId::ChatGPT).unwrap(); + let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap(); + + let content = &wire["input"][0]["content"]; + assert!( + content.is_string(), + "content should be flattened to a string, got: {}", + content + ); + assert_eq!(content.as_str().unwrap(), ""); + } + + #[test] + fn chatgpt_preserves_plain_text_content_unchanged() { + let input = InputParam::Items(vec![InputItem::Message(InputMessage { + role: MessageRole::User, + content: MessageContent::Text("plain text message".to_string()), + })]); + + let req = make_responses_request(input, None); + let request = ProviderRequestType::ResponsesAPIRequest(req); + + let bytes = serialize_for_upstream(&request, ProviderId::ChatGPT).unwrap(); + let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap(); + + let content = &wire["input"][0]["content"]; + assert_eq!(content.as_str().unwrap(), "plain text message"); + } + + #[test] + fn non_chatgpt_does_not_flatten_content_parts() { + let input = InputParam::Items(vec![InputItem::Message(InputMessage { + role: MessageRole::User, + content: MessageContent::Items(vec![ + InputContent::InputText { text: "part one".to_string() }, + InputContent::InputText { text: "part two".to_string() }, + ]), + })]); + + let req = make_responses_request(input, None); + let request = ProviderRequestType::ResponsesAPIRequest(req); + + let bytes = serialize_for_upstream(&request, ProviderId::OpenAI).unwrap(); + let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap(); + + let content = &wire["input"][0]["content"]; + assert!(content.is_array(), + "OpenAI should preserve array content, got: {}", content); + } + + // --------------------------------------------------------------- + // Both fixes together (realistic ChatGPT payload) + // --------------------------------------------------------------- + + #[test] + fn chatgpt_applies_both_fixes_together() { + let input = InputParam::Items(vec![ + InputItem::Message(InputMessage { + role: MessageRole::User, + content: MessageContent::Items(vec![ + InputContent::InputText { text: "Write a function".to_string() }, + ]), + }), + InputItem::Message(InputMessage { + role: MessageRole::Assistant, + content: MessageContent::Items(vec![ + InputContent::InputText { text: "def hello(): pass".to_string() }, + ]), + }), + InputItem::Message(InputMessage { + role: MessageRole::User, + content: MessageContent::Items(vec![ + InputContent::InputText { text: "Add a docstring".to_string() }, + ]), + }), + ]); + + let req = make_responses_request(input, Some(16384)); + let request = ProviderRequestType::ResponsesAPIRequest(req); + + let bytes = serialize_for_upstream(&request, ProviderId::ChatGPT).unwrap(); + let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap(); + + // max_output_tokens renamed + assert!(wire.get("max_output_tokens").is_none()); + assert_eq!(wire.get("maxTokens").and_then(|v| v.as_i64()), Some(16384)); + + // All content arrays flattened + for (i, item) in wire["input"].as_array().unwrap().iter().enumerate() { + let content = &item["content"]; + assert!(content.is_string(), + "input[{}].content should be a string, got: {}", i, content); + } + } + + // --------------------------------------------------------------- + // Non-ResponsesAPI requests pass through unchanged + // --------------------------------------------------------------- + + #[test] + fn chatgpt_chat_completions_request_passes_through() { + use crate::apis::openai::{ChatCompletionsRequest, Message, MessageContent as MC, Role}; + + let chat_req = ChatCompletionsRequest { + model: "gpt-5.4".to_string(), + messages: vec![Message { + role: Role::User, + content: Some(MC::Text("Hello".to_string())), + name: None, + tool_calls: None, + tool_call_id: None, + }], + max_completion_tokens: Some(1024), + ..Default::default() + }; + let request = ProviderRequestType::ChatCompletionsRequest(chat_req); + + let bytes = serialize_for_upstream(&request, ProviderId::ChatGPT).unwrap(); + let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap(); + + assert_eq!(wire.get("max_completion_tokens").and_then(|v| v.as_i64()), Some(1024)); + } + + // --------------------------------------------------------------- + // Normalize + serialize round-trip (full pipeline test) + // --------------------------------------------------------------- + + #[test] + fn chatgpt_full_pipeline_normalize_then_serialize() { + let input = InputParam::Text("Hello, Codex!".to_string()); + let req = make_responses_request(input, Some(8192)); + + let upstream_api = crate::clients::endpoints::SupportedUpstreamAPIs::OpenAIResponsesAPI( + OpenAIApi::Responses, + ); + let mut request = ProviderRequestType::ResponsesAPIRequest(req); + + // normalize_for_upstream sets store=false, stream=true, wraps input in Items + request.normalize_for_upstream(ProviderId::ChatGPT, &upstream_api); + + // serialize_for_upstream then renames max_output_tokens and flattens content + let bytes = serialize_for_upstream(&request, ProviderId::ChatGPT).unwrap(); + let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap(); + + assert!(wire.get("max_output_tokens").is_none()); + assert_eq!(wire.get("maxTokens").and_then(|v| v.as_i64()), Some(8192)); + assert_eq!(wire.get("store"), Some(&serde_json::Value::Bool(false))); + assert_eq!(wire.get("stream"), Some(&serde_json::Value::Bool(true))); + assert!(wire["input"].is_array(), "input should be an array after normalize"); + } +} diff --git a/crates/hermesllm/src/transforms/request/from_openai.rs b/crates/hermesllm/src/transforms/request/from_openai.rs index 70e69cb8d..f624e5252 100644 --- a/crates/hermesllm/src/transforms/request/from_openai.rs +++ b/crates/hermesllm/src/transforms/request/from_openai.rs @@ -514,15 +514,27 @@ impl TryFrom for ChatCompletionsRequest { description, parameters, strict, - } => converted_chat_tools.push(Tool { - tool_type: "function".to_string(), - function: crate::apis::openai::Function { - name, - description, - parameters: normalize_function_parameters(parameters, None), - strict, - }, - }), + function, + } => { + let resolved_name = function + .as_ref() + .and_then(|f| f.name.clone()) + .or_else(|| name.clone()) + .unwrap_or_else(|| "".to_string()); + let resolved_description = function + .as_ref() + .and_then(|f| f.description.clone()) + .or_else(|| description.clone()); + converted_chat_tools.push(Tool { + tool_type: "function".to_string(), + function: crate::apis::openai::Function { + name: resolved_name, + description: resolved_description, + parameters: normalize_function_parameters(parameters, None), + strict, + }, + }) + } ResponsesTool::WebSearchPreview { search_context_size, user_location, @@ -804,10 +816,10 @@ impl TryFrom for ConverseRequest { .into_iter() .map(|tool| BedrockTool::ToolSpec { tool_spec: ToolSpecDefinition { - name: tool.function.name, - description: tool.function.description, + name: tool.function.name.clone(), + description: tool.function.description.clone(), input_schema: ToolInputSchema { - json: tool.function.parameters, + json: tool.function.parameters.clone(), }, }, }) @@ -1350,7 +1362,7 @@ mod tests { output: serde_json::json!({"status":"ok","stdout":"hello"}), }]), tools: Some(vec![ResponsesTool::Function { - name: "exec_command".to_string(), + name: Some("exec_command".to_string()), description: Some("Execute a shell command".to_string()), parameters: Some(serde_json::json!({ "type": "object", @@ -1359,6 +1371,7 @@ mod tests { }, "required": ["cmd"] })), + function: None, strict: Some(false), }]), include: None, diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index e8e9b0d7f..19a30aa7a 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -27,7 +27,8 @@ use hermesllm::clients::endpoints::SupportedAPIsFromClient; use hermesllm::providers::response::ProviderResponse; use hermesllm::providers::streaming_response::ProviderStreamResponse; use hermesllm::{ - DecodedFrame, ProviderId, ProviderRequest, ProviderRequestType, ProviderResponseType, + serialize_for_upstream, DecodedFrame, ProviderId, ProviderRequest, ProviderRequestType, + ProviderResponseType, ProviderStreamResponseType, }; @@ -1056,14 +1057,10 @@ impl HttpContext for StreamContext { match ProviderRequestType::try_from((deserialized_client_request, upstream)) { Ok(mut request) => { - request.normalize_for_upstream(self.get_provider_id(), upstream); - debug!( - "request_id={}: upstream request payload: {}", - self.request_identifier(), - String::from_utf8_lossy(&request.to_bytes().unwrap_or_default()) - ); + let provider_id = self.get_provider_id(); + request.normalize_for_upstream(provider_id, upstream); - match request.to_bytes() { + let request_bytes = match serialize_for_upstream(&request, provider_id) { Ok(bytes) => bytes, Err(e) => { warn!( @@ -1080,7 +1077,9 @@ impl HttpContext for StreamContext { ); return Action::Pause; } - } + }; + + request_bytes } Err(e) => { warn!(