Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,16 @@ For library tests only:
cargo test --lib
```

**Build Rust artifacts with the correct targets:**

```bash
cd crates
cargo build --release --target wasm32-wasip1 -p llm_gateway -p prompt_gateway
cargo build --release -p brightstaff -p hermesllm -p common
```

Do not run a blanket workspace-native build such as `cargo build --release` from `crates/`. The `llm_gateway` and `prompt_gateway` crates are Proxy-WASM `cdylib`s and must be built for `wasm32-wasip1`, while `brightstaff`, `hermesllm`, and `common` build natively.

**Run Python CLI tests:**

```bash
Expand Down
21 changes: 10 additions & 11 deletions crates/brightstaff/src/handlers/llm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use common::llm_providers::LlmProviders;
use hermesllm::apis::openai::Message;
use hermesllm::apis::openai_responses::InputParam;
use hermesllm::clients::{SupportedAPIsFromClient, SupportedUpstreamAPIs};
use hermesllm::{ProviderRequest, ProviderRequestType};
use hermesllm::{serialize_for_upstream, ProviderRequest, ProviderRequestType};
use http_body_util::combinators::BoxBody;
use http_body_util::BodyExt;
use hyper::header::{self};
Expand Down Expand Up @@ -248,16 +248,15 @@ async fn llm_chat_inner(
};

// Serialize request for upstream BEFORE router consumes it
let client_request_bytes_for_upstream: Bytes =
match ProviderRequestType::to_bytes(&client_request) {
Ok(bytes) => bytes.into(),
Err(err) => {
warn!(error = %err, "failed to serialize request for upstream");
let mut r = Response::new(full(format!("Failed to serialize request: {}", err)));
*r.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
return Ok(r);
}
};
let client_request_bytes_for_upstream: Bytes = match serialize_for_upstream(&client_request, provider_id) {
Ok(bytes) => bytes.into(),
Err(err) => {
warn!(error = %err, "failed to serialize request for upstream");
let mut r = Response::new(full(format!("Failed to serialize request: {}", err)));
*r.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
return Ok(r);
}
};

// --- Phase 3: Route the request (or use pinned model from session cache) ---
let resolved_model = if let Some(cached_model) = pinned_model {
Expand Down
79 changes: 75 additions & 4 deletions crates/hermesllm/src/apis/openai_responses.rs
Original file line number Diff line number Diff line change
Expand Up @@ -280,16 +280,31 @@ pub struct ConversationParam {
pub id: Option<String>,
}

/// Tool definitions
/// Tool definitions.
///
/// Supports both the canonical OpenAI Responses flat tool shape:
/// { "type": "function", "name": "...", "description": "...", "parameters": {...} }
/// and the nested chat-completions-compatible shape:
/// { "type": "function", "function": { "name": "...", "description": "...", "parameters": {...} } }
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum Tool {
/// Function tool - flat structure in Responses API
/// Function tool — accepts both flat and nested `function` object shapes.
Function {
name: String,
/// Top-level name (flat shape).
name: Option<String>,
/// Top-level description (flat shape).
description: Option<String>,
/// Top-level parameters (flat shape).
parameters: Option<serde_json::Value>,
/// Top-level strict flag (flat shape).
strict: Option<bool>,
/// Nested `function` object (nested/compat shape).
///
/// When present, `name`/`description`/`parameters` from the outer level are
/// ignored in favour of the values inside this object.
#[serde(default, flatten)]
function: Option<FunctionDef>,
},
/// File search tool
FileSearch {
Expand Down Expand Up @@ -321,6 +336,49 @@ pub enum Tool {
},
}

impl Tool {
pub fn name(&self) -> Option<&str> {
match self {
Tool::Function { name, function, .. } => {
function
.as_ref()
.and_then(|f| f.name.as_ref())
.map(|s| s.as_str())
.or_else(|| name.as_ref().map(|s| s.as_str()))
}
Tool::Custom { name, .. } => name.as_deref(),
_ => None,
}
}

pub fn description(&self) -> Option<&String> {
match self {
Tool::Function {
description,
function,
..
} => description
.as_ref()
.or_else(|| function.as_ref().and_then(|f| f.description.as_ref())),
Tool::Custom { description, .. } => description.as_ref(),
_ => None,
}
}

pub fn parameters(&self) -> Option<&serde_json::Value> {
match self {
Tool::Function {
parameters,
function,
..
} => parameters
.as_ref()
.or_else(|| function.as_ref().and_then(|f| f.parameters.as_ref())),
_ => None,
}
}
}

/// Ranking options for file search
#[skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
Expand All @@ -343,6 +401,16 @@ pub struct UserLocation {
pub timezone: Option<String>,
}

/// Inner function definition — used inside the nested `function` object.
#[skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FunctionDef {
pub name: Option<String>,
pub description: Option<String>,
pub parameters: Option<serde_json::Value>,
pub strict: Option<bool>,
}

/// Tool choice options
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
Expand Down Expand Up @@ -1146,7 +1214,10 @@ impl ProviderRequest for ResponsesAPIRequest {
tools
.iter()
.filter_map(|tool| match tool {
Tool::Function { name, .. } => Some(name.clone()),
Tool::Function { name, function, .. } => function
.as_ref()
.and_then(|f| f.name.clone())
.or_else(|| name.clone()),
Tool::Custom {
name: Some(name), ..
} => Some(name.clone()),
Expand Down
1 change: 1 addition & 0 deletions crates/hermesllm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ pub use apis::streaming_shapes::sse::{SseEvent, SseStreamIter};
pub use aws_smithy_eventstream::frame::DecodedFrame;
pub use providers::id::ProviderId;
pub use providers::request::{ProviderRequest, ProviderRequestError, ProviderRequestType};
pub use providers::request_adapter::serialize_for_upstream;
pub use providers::response::{
ProviderResponse, ProviderResponseError, ProviderResponseType, TokenUsage,
};
Expand Down
2 changes: 2 additions & 0 deletions crates/hermesllm/src/providers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@
//!
pub mod id;
pub mod request;
pub mod request_adapter;
pub mod response;
pub mod streaming_response;

pub use id::ProviderId;
pub use request::{ProviderRequest, ProviderRequestError, ProviderRequestType};
pub use request_adapter::serialize_for_upstream;
pub use response::{ProviderResponse, ProviderResponseType, TokenUsage};
pub use streaming_response::{ProviderStreamResponse, ProviderStreamResponseType};
48 changes: 48 additions & 0 deletions crates/hermesllm/src/providers/request.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1004,6 +1004,54 @@ mod tests {
}
}

#[test]
fn test_normalize_for_upstream_chatgpt_sets_store_stream_and_wraps_input() {
use crate::apis::openai::OpenAIApi::Responses;
use crate::apis::openai_responses::InputParam;

let responses_req = ResponsesAPIRequest {
model: "gpt-5.4".to_string(),
input: InputParam::Text("Hello, Codex!".to_string()),
temperature: None,
max_output_tokens: Some(8192),
stream: Some(false),
metadata: None,
tools: None,
tool_choice: None,
parallel_tool_calls: None,
instructions: None,
modalities: None,
user: None,
store: None,
reasoning_effort: None,
include: None,
audio: None,
text: None,
service_tier: None,
top_p: None,
top_logprobs: None,
stream_options: None,
truncation: None,
conversation: None,
previous_response_id: None,
max_tool_calls: None,
background: None,
};

let upstream_api = SupportedUpstreamAPIs::OpenAIResponsesAPI(Responses);
let mut request = ProviderRequestType::ResponsesAPIRequest(responses_req);
request.normalize_for_upstream(ProviderId::ChatGPT, &upstream_api);

match request {
ProviderRequestType::ResponsesAPIRequest(req) => {
assert_eq!(req.max_output_tokens, Some(8192));
assert_eq!(req.store, Some(false));
assert_eq!(req.stream, Some(true));
assert!(matches!(req.input, InputParam::Items(_)));
}
_ => panic!("Expected ResponsesAPIRequest variant"),
}
}
#[test]
fn test_chat_completions_to_responses_api_not_supported() {
use crate::apis::openai::OpenAIApi::Responses;
Expand Down
Loading