Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion deploy/example.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { env } from 'cloudflare:workers'
import type { Config } from '@deploy/types'

// can be whatever you want, just used to make linking apiKeys to providers typesafe.
type ProviderKeys = 'a' | 'b' | 'c' | 'd' | 'e'
type ProviderKeys = 'a' | 'b' | 'c' | 'd' | 'e' | 'huggingface'

// projects, users and keys must have numeric keys, using constants here to make it easier to understand
// of course, keys must be unique within a type (e.g. project ids must be unique) but users and projects can have the same id
Expand Down Expand Up @@ -67,6 +67,12 @@ export const config: Config<ProviderKeys> = {
injectCost: true,
credentials: env.AWS_BEARER_TOKEN_BEDROCK,
},
huggingface: {
providerId: 'huggingface',
baseUrl: 'https://api-inference.huggingface.co',

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://api-inference.huggingface.co is deprecated, the base URL for Hugging Face Inference Providers should be https://router.huggingface.co/v1. more details in the docs here.

Suggested change
baseUrl: 'https://api-inference.huggingface.co',
baseUrl: 'https://router.huggingface.co/v1',

injectCost: true,
credentials: env.HF_TOKEN,
},
},
// routing groups for load balancing and fallback
routingGroups: {
Expand Down
3 changes: 3 additions & 0 deletions deploy/example.env.local
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,8 @@ AWS_BEARER_TOKEN_BEDROCK=...
# python -c "import json;print(json.dumps(json.loads(open(input('Service account JSON file path: ')).read())))"
GOOGLE_SERVICE_ACCOUNT_KEY=full service google service account key...

# same for Hugging Face, generate a token (you would use env.HF_TOKEN in config.ts)
HF_TOKEN=...

# password for viewing /status/
STATUS_AUTH_API_KEY="change-me!"
11 changes: 11 additions & 0 deletions gateway/src/providers/huggingface.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import type { ModelAPI } from '../api'
import { ChatCompletionAPI } from '../api/chat'
import { DefaultProviderProxy } from './default'

export class HuggingFaceProvider extends DefaultProviderProxy {
defaultBaseUrl = 'https://api-inference.huggingface.co'

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same comment as above

Suggested change
defaultBaseUrl = 'https://api-inference.huggingface.co'
defaultBaseUrl = 'https://router.huggingface.co/v1'


protected modelAPI(): ModelAPI {
return new ChatCompletionAPI('huggingface')
}
}
3 changes: 3 additions & 0 deletions gateway/src/providers/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import { BedrockProvider } from './bedrock'
import { DefaultProviderProxy, type ProviderOptions } from './default'
import { GoogleVertexProvider } from './google'
import { GroqProvider } from './groq'
import { HuggingFaceProvider } from './huggingface'
import { OpenAIProvider } from './openai'
import { TestProvider } from './test'

Expand All @@ -39,6 +40,8 @@ export function getProvider(providerId: ProviderID): ProviderSig {
return AnthropicProvider
case 'bedrock':
return BedrockProvider
case 'huggingface':
return HuggingFaceProvider
case 'test':
return TestProvider
default:
Expand Down
3 changes: 2 additions & 1 deletion gateway/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ export interface ApiKeyInfo<ProviderKey extends string = string> {
otelSettings?: OtelSettings
}

export type ProviderID = 'groq' | 'openai' | 'google-vertex' | 'anthropic' | 'test' | 'bedrock'
export type ProviderID = 'groq' | 'openai' | 'google-vertex' | 'anthropic' | 'test' | 'bedrock' | 'huggingface'
// TODO | 'azure' | 'fireworks' | 'mistral' | 'cohere'

const providerIds: Record<ProviderID, boolean> = {
Expand All @@ -48,6 +48,7 @@ const providerIds: Record<ProviderID, boolean> = {
anthropic: true,
test: true,
bedrock: true,
huggingface: true,
}

export const providerIdsArray = Object.keys(providerIds) as ProviderID[]
Expand Down