Skip to content

Commit 7f90124

Browse files
committed
more updates
1 parent e7eb773 commit 7f90124

29 files changed

Lines changed: 375 additions & 133 deletions

File tree

.gitignore

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -101,9 +101,6 @@ venv.bak/
101101
# mypy
102102
.mypy_cache/
103103

104-
# VSCode stuff:
105-
.vscode/
106-
107104
# MacOS Metadata
108105
*.DS_Store
109106

arch/arch_config_schema.yaml

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -72,20 +72,23 @@ properties:
7272
type: string
7373
default:
7474
type: boolean
75-
# endpoint field is deprecated, use base_url instead
76-
endpoint:
77-
type: string
7875
base_url:
7976
type: string
80-
protocol:
81-
type: string
82-
enum:
83-
- http
84-
- https
8577
http_host:
8678
type: string
87-
usage:
88-
type: string
79+
routing_preferences:
80+
type: array
81+
items:
82+
type: object
83+
properties:
84+
name:
85+
type: string
86+
description:
87+
type: string
88+
additionalProperties: false
89+
required:
90+
- name
91+
- description
8992
additionalProperties: false
9093
required:
9194
- model

arch/tools/.vscode/settings.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"cSpell.words": [
3+
"BRIGHTSTAFF"
4+
]
5+
}

arch/tools/cli/config_generator.py

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ def validate_and_render_schema():
9595
updated_llm_providers = []
9696
llm_provider_name_set = set()
9797
llms_with_usage = []
98+
model_name_keys = set()
99+
model_usage_name_keys = set()
98100
for llm_provider in config_yaml["llm_providers"]:
99101
if llm_provider.get("usage", None):
100102
llms_with_usage.append(llm_provider["name"])
@@ -104,6 +106,11 @@ def validate_and_render_schema():
104106
)
105107

106108
model_name = llm_provider.get("model")
109+
if model_name in model_name_keys:
110+
raise Exception(
111+
f"Duplicate model name {model_name}, please provide unique model name for each llm_provider"
112+
)
113+
model_name_keys.add(model_name)
107114
if llm_provider.get("name") is None:
108115
llm_provider["name"] = model_name
109116

@@ -119,6 +126,20 @@ def validate_and_render_schema():
119126
f"Unsupported provider {provider} for model {model_name}. Supported providers are: {', '.join(SUPPORTED_PROVIDERS)}"
120127
)
121128

129+
if model_id in model_name_keys:
130+
raise Exception(
131+
f"Duplicate model_id {model_id}, please provide unique model_id for each llm_provider"
132+
)
133+
model_name_keys.add(model_id)
134+
135+
for routing_preference in llm_provider.get("routing_preferences", []):
136+
if routing_preference.get("name") in model_usage_name_keys:
137+
raise Exception(
138+
f"Duplicate routing preference name \"{routing_preference.get('name')}\", please provide unique name for each routing preference"
139+
)
140+
model_usage_name_keys.add(routing_preference.get("name"))
141+
142+
llm_provider["model"] = model_id
122143
llm_provider["provider_interface"] = provider
123144
llm_provider_name_set.add(llm_provider.get("name"))
124145
provider = None
@@ -132,21 +153,14 @@ def validate_and_render_schema():
132153
del llm_provider["provider"]
133154
updated_llm_providers.append(llm_provider)
134155

135-
if llm_provider.get("endpoint") and llm_provider.get("base_url"):
136-
raise Exception("Please provide either endpoint or base_url, not both")
137-
138-
if llm_provider.get("endpoint", None):
139-
endpoint = llm_provider["endpoint"]
140-
protocol = llm_provider.get("protocol", "http")
141-
llm_provider["endpoint"], llm_provider["port"] = get_endpoint_and_port(
142-
endpoint, protocol
143-
)
144-
llms_with_endpoint.append(llm_provider)
145-
elif llm_provider.get("base_url", None):
156+
if llm_provider.get("base_url", None):
146157
base_url = llm_provider["base_url"]
147158
urlparse_result = urlparse(base_url)
148-
if llm_provider.get("port"):
149-
raise Exception("Please provider port in base_url")
159+
url_path = urlparse_result.path
160+
if url_path and url_path != "/":
161+
raise Exception(
162+
f"Please provide base_url without path, got {base_url}. Use base_url like 'http://example.com' instead of 'http://example.com/path'."
163+
)
150164
if urlparse_result.scheme == "" or urlparse_result.scheme not in [
151165
"http",
152166
"https",

crates/.vscode/launch.json

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
{
2+
"version": "0.2.0",
3+
"configurations": [
4+
{
5+
"name": "Debug Brightstaff",
6+
"type": "lldb",
7+
"request": "launch",
8+
"program": "${workspaceFolder}/target/debug/brightstaff",
9+
"args": [],
10+
"cwd": "${workspaceFolder}",
11+
"stopOnEntry": false,
12+
"sourceLanguages": ["rust"],
13+
"env": {
14+
"RUST_LOG": "debug",
15+
"RUST_BACKTRACE": "1",
16+
"ARCH_CONFIG_PATH_RENDERED": "../demos/use_cases/preference_based_routing/arch_config_rendered.yaml"
17+
},
18+
"preLaunchTask": "rust: cargo build"
19+
}
20+
]
21+
}

crates/.vscode/tasks.json

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
{
2+
"version": "2.0.0",
3+
"tasks": [
4+
{
5+
"type": "cargo",
6+
"command": "build",
7+
"args": [
8+
"--bin",
9+
"brightstaff"
10+
],
11+
"problemMatcher": [
12+
"$rustc"
13+
],
14+
"group": {
15+
"kind": "build",
16+
"isDefault": true
17+
},
18+
"label": "rust: cargo build"
19+
}
20+
]
21+
}

crates/brightstaff/src/handlers/chat_completions.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ use hyper::{Request, Response, StatusCode};
1212
use tokio::sync::mpsc;
1313
use tokio_stream::wrappers::ReceiverStream;
1414
use tokio_stream::StreamExt;
15-
use tracing::{debug, info, trace, warn};
15+
use tracing::{debug, info, warn};
1616

1717
use crate::router::llm_router::RouterService;
1818

@@ -81,8 +81,8 @@ pub async fn chat_completions(
8181
}
8282
}
8383

84-
trace!(
85-
"arch-router request body: {}",
84+
debug!(
85+
"arch-router request received: {}",
8686
&serde_json::to_string(&chat_completion_request).unwrap()
8787
);
8888

@@ -102,7 +102,7 @@ pub async fn chat_completions(
102102
.as_ref()
103103
.and_then(|s| serde_yaml::from_str(s).ok());
104104

105-
debug!("usage preferences: {:?}", usage_preferences);
105+
debug!("usage preferences from request: {:?}", usage_preferences);
106106

107107
let mut determined_route = match router_service
108108
.determine_route(

crates/brightstaff/src/main.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
4444
let _tracer_provider = init_tracer();
4545
let bind_address = env::var("BIND_ADDRESS").unwrap_or_else(|_| BIND_ADDRESS.to_string());
4646

47+
info!(
48+
"current working directory: {}",
49+
env::current_dir().unwrap().display()
50+
);
4751
// loading arch_config.yaml file
4852
let arch_config_path = env::var("ARCH_CONFIG_PATH_RENDERED")
4953
.unwrap_or_else(|_| "./arch_config_rendered.yaml".to_string());

crates/brightstaff/src/router/llm_router.rs

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use std::sync::Arc;
22

33
use common::{
4-
configuration::{LlmProvider, LlmRoute, ModelUsagePreference},
4+
configuration::{LlmProvider, ModelUsagePreference, RoutingPreference},
55
consts::ARCH_PROVIDER_HINT_HEADER,
66
};
77
use hermesllm::providers::openai::types::{ChatCompletionsResponse, ContentType, Message};
@@ -44,11 +44,14 @@ impl RouterService {
4444
) -> Self {
4545
let providers_with_usage = providers
4646
.iter()
47-
.filter(|provider| provider.usage.is_some())
47+
.filter(|provider| provider.routing_preferences.is_some())
4848
.cloned()
4949
.collect::<Vec<LlmProvider>>();
5050

51-
let llm_routes: Vec<LlmRoute> = providers_with_usage.iter().map(LlmRoute::from).collect();
51+
let llm_routes: Vec<RoutingPreference> = providers_with_usage
52+
.iter()
53+
.flat_map(|provider| provider.routing_preferences.clone().unwrap_or_default())
54+
.collect();
5255

5356
let router_model = Arc::new(router_model_v1::RouterModelV1::new(
5457
llm_routes,
@@ -156,6 +159,12 @@ impl RouterService {
156159
router_response_time.as_millis()
157160
);
158161

162+
if let Some(ref route) = route_name {
163+
if route == "other" {
164+
return Ok(None);
165+
}
166+
}
167+
159168
Ok(route_name)
160169
} else {
161170
Ok(None)

crates/brightstaff/src/router/router_model_v1.rs

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use common::{
2-
configuration::{LlmRoute, ModelUsagePreference},
2+
configuration::{ModelUsagePreference, RoutingPreference},
33
consts::{SYSTEM_ROLE, TOOL_ROLE, USER_ROLE},
44
};
55
use hermesllm::providers::openai::types::{ChatCompletionsRequest, ContentType, Message};
@@ -36,7 +36,11 @@ pub struct RouterModelV1 {
3636
max_token_length: usize,
3737
}
3838
impl RouterModelV1 {
39-
pub fn new(llm_routes: Vec<LlmRoute>, routing_model: String, max_token_length: usize) -> Self {
39+
pub fn new(
40+
llm_routes: Vec<RoutingPreference>,
41+
routing_model: String,
42+
max_token_length: usize,
43+
) -> Self {
4044
let llm_route_json_str =
4145
serde_json::to_string(&llm_routes).unwrap_or_else(|_| "[]".to_string());
4246
RouterModelV1 {
@@ -138,9 +142,9 @@ impl RouterModel for RouterModelV1 {
138142
let llm_route_json = usage_preferences
139143
.as_ref()
140144
.map(|prefs| {
141-
let llm_route: Vec<LlmRoute> = prefs
145+
let llm_route: Vec<RoutingPreference> = prefs
142146
.iter()
143-
.map(|pref| LlmRoute {
147+
.map(|pref| RoutingPreference {
144148
name: pref.name.clone(),
145149
description: pref.usage.clone().unwrap_or_default(),
146150
})
@@ -255,7 +259,7 @@ Based on your analysis, provide your response in the following JSON formats if y
255259
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
256260
]
257261
"#;
258-
let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
262+
let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
259263
let routing_model = "test-model".to_string();
260264
let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX);
261265

@@ -314,7 +318,7 @@ Based on your analysis, provide your response in the following JSON formats if y
314318
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
315319
]
316320
"#;
317-
let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
321+
let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
318322
let routing_model = "test-model".to_string();
319323
let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX);
320324

@@ -379,7 +383,7 @@ Based on your analysis, provide your response in the following JSON formats if y
379383
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
380384
]
381385
"#;
382-
let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
386+
let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
383387
let routing_model = "test-model".to_string();
384388
let router = RouterModelV1::new(llm_routes, routing_model.clone(), 235);
385389

@@ -440,7 +444,7 @@ Based on your analysis, provide your response in the following JSON formats if y
440444
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
441445
]
442446
"#;
443-
let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
447+
let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
444448
let routing_model = "test-model".to_string();
445449
let router = RouterModelV1::new(llm_routes, routing_model.clone(), 200);
446450

@@ -501,7 +505,7 @@ Based on your analysis, provide your response in the following JSON formats if y
501505
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
502506
]
503507
"#;
504-
let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
508+
let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
505509
let routing_model = "test-model".to_string();
506510
let router = RouterModelV1::new(llm_routes, routing_model.clone(), 230);
507511

@@ -569,7 +573,7 @@ Based on your analysis, provide your response in the following JSON formats if y
569573
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
570574
]
571575
"#;
572-
let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
576+
let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
573577
let routing_model = "test-model".to_string();
574578
let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX);
575579

@@ -639,7 +643,7 @@ Based on your analysis, provide your response in the following JSON formats if y
639643
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
640644
]
641645
"#;
642-
let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
646+
let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
643647
let routing_model = "test-model".to_string();
644648
let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX);
645649

@@ -716,7 +720,7 @@ Based on your analysis, provide your response in the following JSON formats if y
716720
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
717721
]
718722
"#;
719-
let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
723+
let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
720724

721725
let router = RouterModelV1::new(llm_routes, "test-model".to_string(), 2000);
722726

0 commit comments

Comments
 (0)