diff --git a/deploy/example.config.ts b/deploy/example.config.ts index cabb2d8..225d4d6 100644 --- a/deploy/example.config.ts +++ b/deploy/example.config.ts @@ -2,7 +2,7 @@ import { env } from 'cloudflare:workers' import type { Config } from '@deploy/types' // can be whatever you want, just used to make linking apiKeys to providers typesafe. -type ProviderKeys = 'openai' | 'groq' | 'google-vertex' | 'anthropic' | 'bedrock' +type ProviderKeys = 'openai' | 'anthropic' | 'google-vertex' | 'bedrock' | 'groq' | 'azure' // projects, users and keys must have numeric keys, using constants here to make it easier to understand // of course, keys must be unique within a type (e.g. project ids must be unique) but users and projects can have the same id @@ -72,6 +72,13 @@ export const config: Config = { // credentials are used by the ProviderProxy to authenticate the forwarded request credentials: env.OPENAI_API_KEY, }, + azure: { + providerId: 'azure', + // NOTE: For now, you need to specify the family of models you want to use. + baseUrl: 'https://marcelo-0665-resource.openai.azure.com/openai/v1', + injectCost: true, + credentials: env.AZURE_API_KEY, + }, groq: { providerId: 'groq', baseUrl: 'https://api.groq.com', injectCost: true, credentials: env.GROQ_API_KEY }, 'google-vertex': { providerId: 'google-vertex', diff --git a/deploy/example.env.local b/deploy/example.env.local index 06f16fb..ba340ab 100644 --- a/deploy/example.env.local +++ b/deploy/example.env.local @@ -6,6 +6,9 @@ LOGFIRE_TOKEN=pylf_v1_... # if you want to use OpenAI, generate an API key (you would use env.OPENAI_API_KEY in config.ts) OPENAI_API_KEY=... +# if you want to use Azure, generate an API key (you would use env.AZURE_API_KEY in config.ts) +AZURE_API_KEY=... + # same for Groq (again you would use env.GROQ_API_KEY in config.ts) GROQ_API_KEY=... diff --git a/examples/ex_azure.py b/examples/ex_azure.py new file mode 100644 index 0000000..a7f5a91 --- /dev/null +++ b/examples/ex_azure.py @@ -0,0 +1,32 @@ +import os + +import logfire +from devtools import debug +from openai import OpenAI + +logfire.configure() +logfire.instrument_httpx(capture_all=True) + +api_key = os.getenv('PYDANTIC_AI_GATEWAY_API_KEY') +assert api_key is not None + +client = OpenAI(api_key=api_key, base_url='http://localhost:8787/azure') + +response = client.responses.create( + model='gpt-4.1', + instructions='reply concisely', + input='what color is the sky?', +) + +print(response.output_text) +response.usage + +completion = client.chat.completions.create( + model='gpt-4.1', + messages=[ + {'role': 'developer', 'content': 'You are a helpful assistant.'}, + {'role': 'user', 'content': 'what color is the sky?'}, + ], +) +debug(completion) +completion.usage diff --git a/gateway/src/providers/azure.ts b/gateway/src/providers/azure.ts new file mode 100644 index 0000000..5230e90 --- /dev/null +++ b/gateway/src/providers/azure.ts @@ -0,0 +1,5 @@ +import { OpenAIProvider } from './openai' + +// TODO(Marcelo): The `AzureProvider` should be its own class, not a subclass of `OpenAIProvider`. +export class AzureProvider extends OpenAIProvider {} +// TODO(Marcelo): We should support Anthropic models as well. diff --git a/gateway/src/providers/index.ts b/gateway/src/providers/index.ts index a18a5db..3603e50 100644 --- a/gateway/src/providers/index.ts +++ b/gateway/src/providers/index.ts @@ -18,6 +18,7 @@ along with this program. If not, see . import type { ProviderID } from '../types' import { AnthropicProvider } from './anthropic' +import { AzureProvider } from './azure' import { BedrockProvider } from './bedrock' import { DefaultProviderProxy, type ProviderOptions } from './default' import { GoogleVertexProvider } from './google' @@ -31,6 +32,8 @@ export function getProvider(providerId: ProviderID): ProviderSig { switch (providerId) { case 'openai': return OpenAIProvider + case 'azure': + return AzureProvider case 'groq': return GroqProvider case 'google-vertex': diff --git a/gateway/src/types.ts b/gateway/src/types.ts index 0a7e016..99924fd 100644 --- a/gateway/src/types.ts +++ b/gateway/src/types.ts @@ -38,8 +38,8 @@ export interface ApiKeyInfo { otelSettings?: OtelSettings } -export type ProviderID = 'groq' | 'openai' | 'google-vertex' | 'anthropic' | 'test' | 'bedrock' -// TODO | 'azure' | 'fireworks' | 'mistral' | 'cohere' +export type ProviderID = 'groq' | 'openai' | 'google-vertex' | 'anthropic' | 'test' | 'bedrock' | 'azure' +// TODO | 'fireworks' | 'mistral' | 'cohere' const providerIds: Record = { groq: true, @@ -48,6 +48,7 @@ const providerIds: Record = { anthropic: true, test: true, bedrock: true, + azure: true, } export const providerIdsArray = Object.keys(providerIds) as ProviderID[] diff --git a/gateway/test/env.d.ts b/gateway/test/env.d.ts index 0b7b98d..9179add 100644 --- a/gateway/test/env.d.ts +++ b/gateway/test/env.d.ts @@ -2,6 +2,7 @@ interface Env { KV: KVNamespace GITHUB_SHA: string limitsDB: D1Database + AZURE_API_KEY: string OPENAI_API_KEY: string GROQ_API_KEY: string ANTHROPIC_API_KEY: string diff --git a/gateway/test/gateway.spec.ts b/gateway/test/gateway.spec.ts index 119ca1f..82b7f6e 100644 --- a/gateway/test/gateway.spec.ts +++ b/gateway/test/gateway.spec.ts @@ -34,7 +34,7 @@ describe('invalid request', () => { const text = await response.text() expect(response.status, `got ${response.status} response: ${text}`).toBe(404) expect(text).toMatchInlineSnapshot( - `"Route not found: wrong. Supported values: anthropic, bedrock, converse, gemini, google-vertex, groq, openai, test"`, + `"Route not found: wrong. Supported values: anthropic, azure, bedrock, converse, gemini, google-vertex, groq, openai, test"`, ) }) }) diff --git a/gateway/test/providers/azure.spec.ts b/gateway/test/providers/azure.spec.ts new file mode 100644 index 0000000..42f0afb --- /dev/null +++ b/gateway/test/providers/azure.spec.ts @@ -0,0 +1,40 @@ +import OpenAI from 'openai' +import { describe, expect } from 'vitest' +import { deserializeRequest } from '../otel' +import { test } from '../setup' + +describe('azure', () => { + test('chat', async ({ gateway }) => { + const { fetch, otelBatch } = gateway + + const client = new OpenAI({ apiKey: 'healthy', baseURL: 'https://example.com/azure', fetch }) + + const completion = await client.chat.completions.create({ + model: 'gpt-4.1', + messages: [ + { role: 'developer', content: 'You are a helpful assistant.' }, + { role: 'user', content: 'What is the capital of France?' }, + ], + max_completion_tokens: 1024, + }) + + expect(completion).toMatchSnapshot('llm') + expect(otelBatch, 'otelBatch length not 1').toHaveLength(1) + expect(deserializeRequest(otelBatch[0]!)).toMatchSnapshot('span') + }) + + test('responses', async ({ gateway }) => { + const { fetch, otelBatch } = gateway + + const client = new OpenAI({ apiKey: 'healthy', baseURL: 'https://example.com/azure', fetch }) + + const completion = await client.responses.create({ + model: 'gpt-4.1', + instructions: 'reply concisely', + input: 'what color is the sky?', + }) + expect(completion).toMatchSnapshot('llm') + expect(otelBatch, 'otelBatch length not 1').toHaveLength(1) + expect(deserializeRequest(otelBatch[0]!)).toMatchSnapshot('span') + }) +}) diff --git a/gateway/test/providers/azure.spec.ts.snap b/gateway/test/providers/azure.spec.ts.snap new file mode 100644 index 0000000..f94c238 --- /dev/null +++ b/gateway/test/providers/azure.spec.ts.snap @@ -0,0 +1,336 @@ +// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html + +exports[`azure > chat > llm 1`] = ` +{ + "choices": [ + { + "content_filter_results": { + "hate": { + "filtered": false, + "severity": "safe", + }, + "protected_material_code": { + "detected": false, + "filtered": false, + }, + "protected_material_text": { + "detected": false, + "filtered": false, + }, + "self_harm": { + "filtered": false, + "severity": "safe", + }, + "sexual": { + "filtered": false, + "severity": "safe", + }, + "violence": { + "filtered": false, + "severity": "safe", + }, + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "annotations": [], + "content": "The capital of France is Paris.", + "refusal": null, + "role": "assistant", + }, + }, + ], + "created": 1763971680, + "id": "chatcmpl-CfLu4vonOKFA67K411wDbi8LbkEJf", + "model": "gpt-4.1-2025-04-14", + "object": "chat.completion", + "prompt_filter_results": [ + { + "content_filter_results": { + "hate": { + "filtered": false, + "severity": "safe", + }, + "jailbreak": { + "detected": false, + "filtered": false, + }, + "self_harm": { + "filtered": false, + "severity": "safe", + }, + "sexual": { + "filtered": false, + "severity": "safe", + }, + "violence": { + "filtered": false, + "severity": "safe", + }, + }, + "prompt_index": 0, + }, + ], + "system_fingerprint": "fp_f99638a8d7", + "usage": { + "completion_tokens": 8, + "completion_tokens_details": { + "accepted_prediction_tokens": 0, + "audio_tokens": 0, + "reasoning_tokens": 0, + "rejected_prediction_tokens": 0, + }, + "prompt_tokens": 24, + "prompt_tokens_details": { + "audio_tokens": 0, + "cached_tokens": 0, + }, + "pydantic_ai_gateway": { + "cost_estimate": 0.000112, + }, + "total_tokens": 32, + }, +} +`; + +exports[`azure > chat > span 1`] = ` +[ + { + "attributes": { + "gen_ai.input.messages": [ + { + "parts": [ + { + "content": "You are a helpful assistant.", + "type": "text", + }, + ], + "role": "system", + }, + { + "parts": [ + { + "content": "What is the capital of France?", + "type": "text", + }, + ], + "role": "user", + }, + ], + "gen_ai.operation.name": "chat", + "gen_ai.output.messages": [ + { + "finish_reason": "stop", + "parts": [ + { + "content": "The capital of France is Paris.", + "type": "text", + }, + ], + "role": "assistant", + }, + ], + "gen_ai.request.max_tokens": 1024, + "gen_ai.request.model": "gpt-4.1", + "gen_ai.request.seed": {}, + "gen_ai.request.stop_sequences": {}, + "gen_ai.request.temperature": {}, + "gen_ai.request.top_k": {}, + "gen_ai.request.top_p": {}, + "gen_ai.response.finish_reasons": [ + "stop", + ], + "gen_ai.response.id": "chatcmpl-CfLu4vonOKFA67K411wDbi8LbkEJf", + "gen_ai.response.model": "gpt-4.1-2025-04-14", + "gen_ai.system": "openai", + "gen_ai.system_instructions": {}, + "gen_ai.usage.cache_audio_read_tokens": {}, + "gen_ai.usage.cache_read_tokens": 0, + "gen_ai.usage.cache_write_tokens": {}, + "gen_ai.usage.input_audio_tokens": 0, + "gen_ai.usage.input_tokens": 24, + "gen_ai.usage.output_audio_tokens": 0, + "gen_ai.usage.output_tokens": 8, + "http.request.body.text": "{ + "model": "gpt-4.1", + "messages": [ + { + "role": "developer", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": "What is the capital of France?" + } + ], + "max_completion_tokens": 1024 +}", + "http.response.body.text": "{"choices":[{"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"protected_material_code":{"filtered":false,"detected":false},"protected_material_text":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}},"finish_reason":"stop","index":0,"logprobs":null,"message":{"annotations":[],"content":"The capital of France is Paris.","refusal":null,"role":"assistant"}}],"created":1763971680,"id":"chatcmpl-CfLu4vonOKFA67K411wDbi8LbkEJf","model":"gpt-4.1-2025-04-14","object":"chat.completion","prompt_filter_results":[{"prompt_index":0,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"jailbreak":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}],"system_fingerprint":"fp_f99638a8d7","usage":{"completion_tokens":8,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":0,"rejected_prediction_tokens":0},"prompt_tokens":24,"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0},"total_tokens":32,"pydantic_ai_gateway":{"cost_estimate":0.000112}}}", + "http.response.status_code": 200, + "logfire.json_schema": "{"type":"object","properties":{"gen_ai.operation.name":{"type":"string"},"gen_ai.request.model":{"type":"string"},"gen_ai.system":{"type":"string"},"gen_ai.request.max_tokens":{"type":"number"},"gen_ai.request.top_k":{},"gen_ai.request.top_p":{},"gen_ai.request.temperature":{},"gen_ai.request.stop_sequences":{},"gen_ai.request.seed":{},"gen_ai.response.finish_reasons":{},"gen_ai.response.id":{"type":"string"},"gen_ai.input.messages":{},"gen_ai.output.messages":{},"gen_ai.system_instructions":{},"http.response.status_code":{"type":"number"},"http.request.body.text":{"type":"string"},"http.response.body.text":{"type":"string"},"gen_ai.response.model":{"type":"string"},"gen_ai.usage.input_tokens":{"type":"number"},"gen_ai.usage.cache_read_tokens":{"type":"number"},"gen_ai.usage.cache_write_tokens":{},"gen_ai.usage.output_tokens":{"type":"number"},"gen_ai.usage.input_audio_tokens":{"type":"number"},"gen_ai.usage.cache_audio_read_tokens":{},"gen_ai.usage.output_audio_tokens":{"type":"number"}}}", + "logfire.level_num": 9, + "logfire.msg": "chat gpt-4.1-2025-04-14", + }, + "events": [], + "kind": 1, + "links": [], + "name": "chat gpt-4.1-2025-04-14", + "parentSpanId": undefined, + "resource": { + "service.name": "PAIG", + "service.version": "test", + }, + "scope": "pydantic-ai-gateway", + "status": { + "code": 1, + }, + }, +] +`; + +exports[`azure > responses > llm 1`] = ` +{ + "background": false, + "content_filters": null, + "created_at": 1763971680, + "error": null, + "id": "resp_0eeed6a040a8611b0069241260f58c81948cd87299c4d31154", + "incomplete_details": null, + "instructions": "reply concisely", + "max_output_tokens": null, + "max_tool_calls": null, + "metadata": {}, + "model": "gpt-4.1", + "object": "response", + "output": [ + { + "content": [ + { + "annotations": [], + "logprobs": [], + "text": "The sky is usually blue during the day, but it can appear red, orange, pink, or purple at sunrise and sunset, and black at night.", + "type": "output_text", + }, + ], + "id": "msg_0eeed6a040a8611b006924126129cc819495f20c3473dd468d", + "role": "assistant", + "status": "completed", + "type": "message", + }, + ], + "output_text": "The sky is usually blue during the day, but it can appear red, orange, pink, or purple at sunrise and sunset, and black at night.", + "parallel_tool_calls": true, + "previous_response_id": null, + "prompt_cache_key": null, + "reasoning": { + "effort": null, + "summary": null, + }, + "safety_identifier": null, + "service_tier": "default", + "status": "completed", + "store": true, + "temperature": 1, + "text": { + "format": { + "type": "text", + }, + "verbosity": "medium", + }, + "tool_choice": "auto", + "tools": [], + "top_logprobs": 0, + "top_p": 1, + "truncation": "disabled", + "usage": { + "input_tokens": 21, + "input_tokens_details": { + "cached_tokens": 0, + }, + "output_tokens": 32, + "output_tokens_details": { + "reasoning_tokens": 0, + }, + "pydantic_ai_gateway": { + "cost_estimate": 0.000298, + }, + "total_tokens": 53, + }, + "user": null, +} +`; + +exports[`azure > responses > span 1`] = ` +[ + { + "attributes": { + "gen_ai.input.messages": [ + { + "parts": [ + { + "content": "what color is the sky?", + "type": "text", + }, + ], + "role": "user", + }, + ], + "gen_ai.operation.name": "chat", + "gen_ai.output.messages": [ + { + "parts": [ + { + "content": "The sky is usually blue during the day, but it can appear red, orange, pink, or purple at sunrise and sunset, and black at night.", + "type": "text", + }, + ], + "role": "assistant", + }, + ], + "gen_ai.request.max_tokens": {}, + "gen_ai.request.model": "gpt-4.1", + "gen_ai.request.seed": {}, + "gen_ai.request.stop_sequences": {}, + "gen_ai.request.temperature": {}, + "gen_ai.request.top_k": {}, + "gen_ai.request.top_p": {}, + "gen_ai.response.finish_reasons": {}, + "gen_ai.response.id": "resp_0eeed6a040a8611b0069241260f58c81948cd87299c4d31154", + "gen_ai.response.model": "gpt-4.1", + "gen_ai.system": "openai", + "gen_ai.system_instructions": {}, + "gen_ai.usage.cache_audio_read_tokens": {}, + "gen_ai.usage.cache_read_tokens": 0, + "gen_ai.usage.cache_write_tokens": {}, + "gen_ai.usage.input_audio_tokens": {}, + "gen_ai.usage.input_tokens": 21, + "gen_ai.usage.output_audio_tokens": {}, + "gen_ai.usage.output_tokens": 32, + "http.request.body.text": "{ + "model": "gpt-4.1", + "instructions": "reply concisely", + "input": "what color is the sky?" +}", + "http.response.body.text": "{"id":"resp_0eeed6a040a8611b0069241260f58c81948cd87299c4d31154","object":"response","created_at":1763971680,"status":"completed","background":false,"content_filters":null,"error":null,"incomplete_details":null,"instructions":"reply concisely","max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4.1","output":[{"id":"msg_0eeed6a040a8611b006924126129cc819495f20c3473dd468d","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"The sky is usually blue during the day, but it can appear red, orange, pink, or purple at sunrise and sunset, and black at night."}],"role":"assistant"}],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1,"truncation":"disabled","usage":{"input_tokens":21,"input_tokens_details":{"cached_tokens":0},"output_tokens":32,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":53,"pydantic_ai_gateway":{"cost_estimate":0.000298}},"user":null,"metadata":{}}", + "http.response.status_code": 200, + "logfire.json_schema": "{"type":"object","properties":{"gen_ai.operation.name":{"type":"string"},"gen_ai.request.model":{"type":"string"},"gen_ai.system":{"type":"string"},"gen_ai.request.max_tokens":{},"gen_ai.request.top_k":{},"gen_ai.request.top_p":{},"gen_ai.request.temperature":{},"gen_ai.request.stop_sequences":{},"gen_ai.request.seed":{},"gen_ai.response.finish_reasons":{},"gen_ai.response.id":{"type":"string"},"gen_ai.input.messages":{},"gen_ai.output.messages":{},"gen_ai.system_instructions":{},"http.response.status_code":{"type":"number"},"http.request.body.text":{"type":"string"},"http.response.body.text":{"type":"string"},"gen_ai.response.model":{"type":"string"},"gen_ai.usage.input_tokens":{"type":"number"},"gen_ai.usage.cache_read_tokens":{"type":"number"},"gen_ai.usage.cache_write_tokens":{},"gen_ai.usage.output_tokens":{"type":"number"},"gen_ai.usage.input_audio_tokens":{},"gen_ai.usage.cache_audio_read_tokens":{},"gen_ai.usage.output_audio_tokens":{}}}", + "logfire.level_num": 9, + "logfire.msg": "chat gpt-4.1", + }, + "events": [], + "kind": 1, + "links": [], + "name": "chat gpt-4.1", + "parentSpanId": undefined, + "resource": { + "service.name": "PAIG", + "service.version": "test", + }, + "scope": "pydantic-ai-gateway", + "status": { + "code": 1, + }, + }, +] +`; diff --git a/gateway/test/worker.ts b/gateway/test/worker.ts index 32d4c0e..2334f3a 100644 --- a/gateway/test/worker.ts +++ b/gateway/test/worker.ts @@ -72,6 +72,13 @@ class TestKeysDB extends KeysDbD1 { injectCost: true, credentials: 'test', }, + { + key: 'azure', + baseUrl: 'http://localhost:8005/azure', + providerId: 'azure', + injectCost: true, + credentials: env.AZURE_API_KEY, + }, { key: 'openai', // baseUrl decides what URL the request will be forwarded to @@ -219,14 +226,14 @@ class TestKeysDB extends KeysDbD1 { baseUrl: 'http://localhost:8005/anthropic', providerId: 'anthropic', injectCost: true, - credentials: this.allProviders[3]!.credentials, + credentials: this.allProviders[4]!.credentials, }, { key: 'google-vertex', baseUrl: 'http://localhost:8005/google-vertex', providerId: 'google-vertex', injectCost: true, - credentials: this.allProviders[5]!.credentials, + credentials: this.allProviders[6]!.credentials, }, ], routingGroups: { anthropic: [{ key: 'anthropic' }, { key: 'google-vertex' }] }, diff --git a/gateway/vitest.config.mts b/gateway/vitest.config.mts index eb402f2..d5eb0be 100644 --- a/gateway/vitest.config.mts +++ b/gateway/vitest.config.mts @@ -49,6 +49,7 @@ export default defineWorkersConfig({ wrangler: { configPath: './test/wrangler.jsonc' }, miniflare: { bindings: { + AZURE_API_KEY: process.env.AZURE_API_KEY ?? 'AZURE_API_KEY-unset', OPENAI_API_KEY: process.env.OPENAI_API_KEY ?? 'OPENAI_API_KEY-unset', GROQ_API_KEY: process.env.GROQ_API_KEY ?? 'GROQ_API_KEY-unset', ANTHROPIC_API_KEY: process.env.ANTHROPIC_API_KEY ?? 'ANTHROPIC_API_KEY-unset', diff --git a/package-lock.json b/package-lock.json index 8a63918..96d48de 100644 --- a/package-lock.json +++ b/package-lock.json @@ -3325,9 +3325,9 @@ "link": true }, "node_modules/@pydantic/genai-prices": { - "version": "0.0.43", - "resolved": "https://registry.npmjs.org/@pydantic/genai-prices/-/genai-prices-0.0.43.tgz", - "integrity": "sha512-NUzUlIvIgsXK3eOVekzynEkwsIHCJOn8Fwwlwzh8u86xgKvTK8WsYeBT4kc+0CKmXn3dRj7sOIxm6uzzLHGyKA==", + "version": "0.0.44", + "resolved": "https://registry.npmjs.org/@pydantic/genai-prices/-/genai-prices-0.0.44.tgz", + "integrity": "sha512-g8bwSR9N55M2cLVsnd2c+NGO/9lSdfUUpuTSIssL4VHlqJ66hVeJH6mxoZExDjHlQ24FgxIJvX9Eqp9GLW4LUg==", "license": "MIT", "dependencies": { "yargs": "^17.7.2" diff --git a/proxy-vcr/proxy_vcr/cassettes/azure-358cc480d17298f1874d3260d806b8b282395bbbe5b44f1408bc0e6fe4625339.yaml b/proxy-vcr/proxy_vcr/cassettes/azure-358cc480d17298f1874d3260d806b8b282395bbbe5b44f1408bc0e6fe4625339.yaml new file mode 100644 index 0000000..6828ac0 --- /dev/null +++ b/proxy-vcr/proxy_vcr/cassettes/azure-358cc480d17298f1874d3260d806b8b282395bbbe5b44f1408bc0e6fe4625339.yaml @@ -0,0 +1,68 @@ +interactions: +- request: + body: "{\n \"model\": \"gpt-4.1\",\n \"messages\": [\n {\n \"role\": + \"developer\",\n \"content\": \"You are a helpful assistant.\"\n },\n + \ {\n \"role\": \"user\",\n \"content\": \"What is the capital of + France?\"\n }\n ],\n \"max_completion_tokens\": 1024\n}" + headers: + accept: + - '*/*' + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '250' + content-type: + - application/json + host: + - marcelo-0665-resource.openai.azure.com + user-agent: + - python-httpx/0.28.1 + method: POST + uri: https://marcelo-0665-resource.openai.azure.com/openai/v1/chat/completions + response: + body: + string: '{"choices":[{"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"protected_material_code":{"filtered":false,"detected":false},"protected_material_text":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}},"finish_reason":"stop","index":0,"logprobs":null,"message":{"annotations":[],"content":"The + capital of France is Paris.","refusal":null,"role":"assistant"}}],"created":1763971680,"id":"chatcmpl-CfLu4vonOKFA67K411wDbi8LbkEJf","model":"gpt-4.1-2025-04-14","object":"chat.completion","prompt_filter_results":[{"prompt_index":0,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"jailbreak":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}],"system_fingerprint":"fp_f99638a8d7","usage":{"completion_tokens":8,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":0,"rejected_prediction_tokens":0},"prompt_tokens":24,"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0},"total_tokens":32}} + + ' + headers: + Content-Length: + - '1237' + Content-Type: + - application/json + Date: + - Mon, 24 Nov 2025 08:08:00 GMT + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + apim-request-id: + - 2e417e9e-8f9f-4ac9-8686-4702d6d2effa + azureml-model-session: + - d085-20251105033551 + x-accel-buffering: + - 'no' + x-content-type-options: + - nosniff + x-ms-client-request-id: + - Not-Set + x-ms-deployment-name: + - gpt-4.1 + x-ms-rai-invoked: + - 'true' + x-ms-region: + - Sweden Central + x-ratelimit-limit-requests: + - '50' + x-ratelimit-limit-tokens: + - '50000' + x-ratelimit-remaining-requests: + - '49' + x-ratelimit-remaining-tokens: + - '49984' + x-request-id: + - ba909b14-db02-4651-ae41-7fe4d83d06c8 + status: + code: 200 + message: OK +version: 1 diff --git a/proxy-vcr/proxy_vcr/cassettes/azure-d3a027f470b4f1da6ffb9a81b8306a445928195e90737f13f6948226ba5cad49.yaml b/proxy-vcr/proxy_vcr/cassettes/azure-d3a027f470b4f1da6ffb9a81b8306a445928195e90737f13f6948226ba5cad49.yaml new file mode 100644 index 0000000..d2f8972 --- /dev/null +++ b/proxy-vcr/proxy_vcr/cassettes/azure-d3a027f470b4f1da6ffb9a81b8306a445928195e90737f13f6948226ba5cad49.yaml @@ -0,0 +1,80 @@ +interactions: +- request: + body: "{\n \"model\": \"gpt-4.1\",\n \"instructions\": \"reply concisely\",\n + \ \"input\": \"what color is the sky?\"\n}" + headers: + accept: + - '*/*' + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '98' + content-type: + - application/json + host: + - marcelo-0665-resource.openai.azure.com + user-agent: + - python-httpx/0.28.1 + method: POST + uri: https://marcelo-0665-resource.openai.azure.com/openai/v1/responses + response: + body: + string: "{\n \"id\": \"resp_0eeed6a040a8611b0069241260f58c81948cd87299c4d31154\",\n + \ \"object\": \"response\",\n \"created_at\": 1763971680,\n \"status\": + \"completed\",\n \"background\": false,\n \"content_filters\": null,\n \"error\": + null,\n \"incomplete_details\": null,\n \"instructions\": \"reply concisely\",\n + \ \"max_output_tokens\": null,\n \"max_tool_calls\": null,\n \"model\": + \"gpt-4.1\",\n \"output\": [\n {\n \"id\": \"msg_0eeed6a040a8611b006924126129cc819495f20c3473dd468d\",\n + \ \"type\": \"message\",\n \"status\": \"completed\",\n \"content\": + [\n {\n \"type\": \"output_text\",\n \"annotations\": + [],\n \"logprobs\": [],\n \"text\": \"The sky is usually + blue during the day, but it can appear red, orange, pink, or purple at sunrise + and sunset, and black at night.\"\n }\n ],\n \"role\": \"assistant\"\n + \ }\n ],\n \"parallel_tool_calls\": true,\n \"previous_response_id\": + null,\n \"prompt_cache_key\": null,\n \"reasoning\": {\n \"effort\": + null,\n \"summary\": null\n },\n \"safety_identifier\": null,\n \"service_tier\": + \"default\",\n \"store\": true,\n \"temperature\": 1.0,\n \"text\": {\n + \ \"format\": {\n \"type\": \"text\"\n },\n \"verbosity\": \"medium\"\n + \ },\n \"tool_choice\": \"auto\",\n \"tools\": [],\n \"top_logprobs\": + 0,\n \"top_p\": 1.0,\n \"truncation\": \"disabled\",\n \"usage\": {\n \"input_tokens\": + 21,\n \"input_tokens_details\": {\n \"cached_tokens\": 0\n },\n + \ \"output_tokens\": 32,\n \"output_tokens_details\": {\n \"reasoning_tokens\": + 0\n },\n \"total_tokens\": 53\n },\n \"user\": null,\n \"metadata\": + {}\n}" + headers: + Content-Length: + - '1518' + Content-Type: + - application/json + Date: + - Mon, 24 Nov 2025 08:08:01 GMT + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + apim-request-id: + - 05ae1647-c7fb-4da0-ae37-120544ff3501 + azureai-processed-tier: + - default + azureai-requested-tier: + - default + skip-error-remapping: + - 'true' + x-content-type-options: + - nosniff + x-ms-client-request-id: + - Not-Set + x-ms-region: + - Sweden Central + x-ratelimit-limit-tokens: + - '-1' + x-ratelimit-remaining-tokens: + - '-1' + x-ratelimit-reset-tokens: + - '0' + x-request-id: + - da702ab3-476c-4acd-b69f-a1e6fedbaaaa + status: + code: 200 + message: OK +version: 1 diff --git a/proxy-vcr/proxy_vcr/main.py b/proxy-vcr/proxy_vcr/main.py index 995e6bb..cb2ce2f 100644 --- a/proxy-vcr/proxy_vcr/main.py +++ b/proxy-vcr/proxy_vcr/main.py @@ -20,6 +20,8 @@ ANTHROPIC_BASE_URL = 'https://api.anthropic.com' BEDROCK_BASE_URL = 'https://bedrock-runtime.us-east-1.amazonaws.com' GOOGLE_BASE_URL = 'https://aiplatform.googleapis.com' +# The Azure URL is not a secret, we can commit it. +AZURE_BASE_URL = 'https://marcelo-0665-resource.openai.azure.com/openai/v1' current_file_dir = pathlib.Path(__file__).parent @@ -55,6 +57,12 @@ async def proxy(request: Request) -> Response: with vcr.use_cassette(cassette_name('openai', vcr_suffix)): # type: ignore[reportUnknownReturnType] headers = {'Authorization': auth_header, 'content-type': 'application/json'} response = await client.post(url, content=body, headers=headers) + elif provider == 'azure': + client = cast(httpx.AsyncClient, request.scope['state']['httpx_client']) + url = AZURE_BASE_URL + request.url.path[len('/azure') :] + with vcr.use_cassette(cassette_name('azure', vcr_suffix)): # type: ignore[reportUnknownReturnType] + headers = {'Authorization': auth_header, 'content-type': 'application/json'} + response = await client.post(url, content=body, headers=headers) elif provider == 'groq': client = cast(httpx.AsyncClient, request.scope['state']['httpx_client']) url = GROQ_BASE_URL + request.url.path[len('/groq') :] @@ -160,5 +168,7 @@ def select_provider(request: Request) -> str: return 'anthropic' elif request.url.path.startswith('/google-vertex'): return 'google-vertex' + elif request.url.path.startswith('/azure'): + return 'azure' else: raise HTTPException(status_code=404, detail=f'Path {request.url.path} not supported')