diff --git a/package.json b/package.json index 4049a568..3054d823 100644 --- a/package.json +++ b/package.json @@ -3,7 +3,7 @@ "version": "0.0.1", "private": true, "scripts": { - "dev": "pnpm run update-ctx-length && vite dev", + "dev": "vite dev", "build": "pnpm run update-ctx-length && vite build", "preview": "vite preview", "prepare": "ts-patch install && svelte-kit sync || echo ''", @@ -12,9 +12,8 @@ "lint": "prettier . --check . && eslint src/", "format": "prettier . --write .", "clean": "rm -rf ./node_modules/ && rm -rf ./.svelte-kit/ && ni && echo 'Project cleaned!'", - "update-ctx-length": "jiti scripts/update-ctx-length.ts", - "test:unit": "vitest", - "test": "npm run test:unit -- --run && npm run test:e2e", + "test:unit": "vitest --browser.headless", + "test": "npm run test:unit", "test:e2e": "playwright test" }, "devDependencies": { diff --git a/scripts/update-ctx-length.ts b/scripts/update-ctx-length.ts deleted file mode 100644 index 7f5e8841..00000000 --- a/scripts/update-ctx-length.ts +++ /dev/null @@ -1,55 +0,0 @@ -import dotenv from "dotenv"; -dotenv.config(); // Load .env file into process.env - -import { fetchAllProviderData, type ApiKeys } from "../src/lib/server/providers/index.js"; // Import ApiKeys type -import fs from "fs/promises"; -import path from "path"; - -const CACHE_FILE_PATH = path.resolve("src/lib/data/context_length.json"); - -async function runUpdate() { - console.log("Starting context length cache update..."); - - // Gather API keys from process.env - const apiKeys: ApiKeys = { - COHERE_API_KEY: process.env.COHERE_API_KEY, - TOGETHER_API_KEY: process.env.TOGETHER_API_KEY, - FIREWORKS_API_KEY: process.env.FIREWORKS_API_KEY, - HYPERBOLIC_API_KEY: process.env.HYPERBOLIC_API_KEY, - REPLICATE_API_KEY: process.env.REPLICATE_API_KEY, - NEBIUS_API_KEY: process.env.NEBIUS_API_KEY, - NOVITA_API_KEY: process.env.NOVITA_API_KEY, - SAMBANOVA_API_KEY: process.env.SAMBANOVA_API_KEY, - }; - - try { - // Fetch data from all supported providers concurrently, passing keys - const fetchedData = await fetchAllProviderData(apiKeys); - - // Read existing manual/cached data - let existingData = {}; - try { - const currentCache = await fs.readFile(CACHE_FILE_PATH, "utf-8"); - existingData = JSON.parse(currentCache); - } catch { - // Remove unused variable name - console.log("No existing cache file found or error reading, creating new one."); - } - - // Merge fetched data with existing data (fetched data takes precedence) - const combinedData = { ...existingData, ...fetchedData }; - - // Write the combined data back to the file - const tempFilePath = CACHE_FILE_PATH + ".tmp"; - await fs.writeFile(tempFilePath, JSON.stringify(combinedData, null, "\t"), "utf-8"); - await fs.rename(tempFilePath, CACHE_FILE_PATH); - - console.log("Context length cache update complete."); - console.log(`Cache file written to: ${CACHE_FILE_PATH}`); - } catch (error) { - console.error("Error during context length cache update:", error); - process.exit(1); // Exit with error code - } -} - -runUpdate(); diff --git a/src/lib/components/inference-playground/playground.svelte b/src/lib/components/inference-playground/playground.svelte index 68b96fc5..7655dee3 100644 --- a/src/lib/components/inference-playground/playground.svelte +++ b/src/lib/components/inference-playground/playground.svelte @@ -28,6 +28,7 @@ import BillingIndicator from "../billing-indicator.svelte"; import { TEST_IDS } from "$lib/constants.js"; import MessageTextarea from "./message-textarea.svelte"; + import { atLeastNDecimals } from "$lib/utils/number.js"; let viewCode = $state(false); let viewSettings = $state(false); @@ -155,7 +156,7 @@
- {#each iterate(conversations.generationStats) as [{ latency, tokens }, isLast]} + {#each iterate(conversations.generationStats) as [{ latency, tokens, cost }, isLast]} {@const baLeft = observed["bottom-actions"].rect.left} {@const tceRight = observed["token-count-end"].offset.right} - {tokens} tokens · Latency {latency}ms + {tokens} tokens · Latency {latency}ms · Cost ${atLeastNDecimals(cost ?? 0, 1)} {/each}
diff --git a/src/lib/components/inference-playground/provider-select.svelte b/src/lib/components/inference-playground/provider-select.svelte index 9d0bc55b..bd59de10 100644 --- a/src/lib/components/inference-playground/provider-select.svelte +++ b/src/lib/components/inference-playground/provider-select.svelte @@ -1,6 +1,7 @@
@@ -92,9 +100,16 @@ classes, )} > -
+
- {getProviderName(conversation.data.provider ?? "") ?? "loading"} +
+ {getProviderName(conversation.data.provider ?? "") ?? "loading"} + {#if providerPricing} + + In: {providerPricing.input} • Out: {providerPricing.output} + + {/if} +
{#snippet option(provider: string)} + {@const providerPricing = getProviderPricing(provider)}
- {getProviderName(provider)} +
+ {getProviderName(provider)} + {#if providerPricing} +
+ + In: {providerPricing.input} • Out: {providerPricing.output} + +
+ {/if} +
{/snippet} diff --git a/src/lib/data/context_length.json b/src/lib/data/context_length.json deleted file mode 100644 index 951adbbe..00000000 --- a/src/lib/data/context_length.json +++ /dev/null @@ -1,270 +0,0 @@ -{ - "replicate": {}, - "sambanova": { - "DeepSeek-R1-0528": 32768, - "DeepSeek-R1-Distill-Llama-70B": 131072, - "DeepSeek-V3-0324": 32768, - "E5-Mistral-7B-Instruct": 4096, - "Llama-3.3-Swallow-70B-Instruct-v0.4": 131072, - "Llama-4-Maverick-17B-128E-Instruct": 131072, - "Meta-Llama-3.1-8B-Instruct": 16384, - "Meta-Llama-3.3-70B-Instruct": 131072, - "Qwen3-32B": 32768, - "Whisper-Large-v3": 4096 - }, - "nebius": { - "meta-llama/Meta-Llama-3.1-8B-Instruct-fast": 131072, - "meta-llama/Meta-Llama-3.1-8B-Instruct": 131072, - "meta-llama/Meta-Llama-3.1-70B-Instruct": 131072, - "meta-llama/Meta-Llama-3.1-405B-Instruct": 131072, - "meta-llama/Llama-Guard-3-8B": 131072, - "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1": 131072, - "mistralai/Mistral-Nemo-Instruct-2407": 128000, - "google/gemma-2-2b-it": 8192, - "google/gemma-2-9b-it-fast": 8192, - "Qwen/Qwen2.5-Coder-7B-fast": 32768, - "Qwen/Qwen2.5-Coder-7B": 32768, - "Qwen/Qwen2.5-Coder-32B-Instruct-fast": 131072, - "Qwen/Qwen2.5-Coder-32B-Instruct": 131072, - "Qwen/Qwen2.5-32B-Instruct-fast": 131072, - "Qwen/Qwen2.5-32B-Instruct": 131072, - "Qwen/Qwen2.5-72B-Instruct-fast": 131072, - "Qwen/Qwen2.5-72B-Instruct": 131072, - "Qwen/Qwen2-VL-72B-Instruct": 32768, - "aaditya/Llama3-OpenBioLLM-70B": 8192, - "BAAI/bge-en-icl": 32768, - "BAAI/bge-multilingual-gemma2": 8192, - "intfloat/e5-mistral-7b-instruct": 32768, - "meta-llama/Llama-3.3-70B-Instruct": 131072, - "meta-llama/Llama-3.3-70B-Instruct-fast": 131072, - "microsoft/phi-4": 16384, - "deepseek-ai/DeepSeek-V3": 163840, - "deepseek-ai/DeepSeek-R1": 163840, - "deepseek-ai/DeepSeek-R1-0528": 131072, - "NousResearch/Hermes-3-Llama-405B": 131072, - "deepseek-ai/DeepSeek-R1-Distill-Llama-70B": 131072, - "deepseek-ai/DeepSeek-R1-fast": 163840, - "Qwen/QwQ-32B-fast": 131072, - "Qwen/QwQ-32B": 131072, - "Qwen/Qwen3-235B-A22B": 40960, - "Qwen/Qwen3-30B-A3B": 40960, - "Qwen/Qwen3-30B-A3B-fast": 40960, - "Qwen/Qwen3-32B": 40960, - "Qwen/Qwen3-32B-fast": 40960, - "Qwen/Qwen3-14B": 40960, - "Qwen/Qwen3-4B-fast": 40960, - "nvidia/Llama-3_3-Nemotron-Super-49B-v1": 131072, - "mistralai/Mistral-Small-3.1-24B-Instruct-2503": 131072, - "mistralai/Devstral-Small-2505": 128000, - "google/gemma-3-27b-it": 110000, - "google/gemma-3-27b-it-fast": 110000, - "Qwen/Qwen2.5-VL-72B-Instruct": 32000, - "Qwen/Qwen3-Embedding-8B": 40960, - "deepseek-ai/DeepSeek-V3-0324": 163840, - "deepseek-ai/DeepSeek-V3-0324-fast": 163840, - "black-forest-labs/flux-dev": 0, - "black-forest-labs/flux-schnell": 0, - "stability-ai/sdxl": 0 - }, - "novita": { - "deepseek/deepseek-v3-0324": 163840, - "moonshotai/kimi-k2-instruct": 131072, - "deepseek/deepseek-r1-0528": 163840, - "baidu/ernie-4.5-vl-424b-a47b": 123000, - "baidu/ernie-4.5-300b-a47b-paddle": 123000, - "qwen/qwen3-30b-a3b-fp8": 40960, - "minimaxai/minimax-m1-80k": 1000000, - "deepseek/deepseek-r1-0528-qwen3-8b": 128000, - "qwen/qwen3-32b-fp8": 40960, - "qwen/qwen2.5-vl-72b-instruct": 32768, - "qwen/qwen3-235b-a22b-fp8": 40960, - "deepseek/deepseek-v3-turbo": 64000, - "thudm/glm-4.1v-9b-thinking": 65536, - "meta-llama/llama-4-maverick-17b-128e-instruct-fp8": 1048576, - "google/gemma-3-27b-it": 32000, - "deepseek/deepseek-r1-turbo": 64000, - "Sao10K/L3-8B-Stheno-v3.2": 8192, - "gryphe/mythomax-l2-13b": 4096, - "deepseek/deepseek-prover-v2-671b": 160000, - "meta-llama/llama-4-scout-17b-16e-instruct": 131072, - "deepseek/deepseek-r1-distill-llama-8b": 32000, - "meta-llama/llama-3.1-8b-instruct": 16384, - "deepseek/deepseek-r1-distill-qwen-14b": 64000, - "meta-llama/llama-3.3-70b-instruct": 131072, - "qwen/qwen-2.5-72b-instruct": 32000, - "mistralai/mistral-nemo": 60288, - "deepseek/deepseek-r1-distill-qwen-32b": 64000, - "meta-llama/llama-3-8b-instruct": 8192, - "microsoft/wizardlm-2-8x22b": 65535, - "deepseek/deepseek-r1-distill-llama-70b": 32000, - "mistralai/mistral-7b-instruct": 32768, - "meta-llama/llama-3-70b-instruct": 8192, - "nousresearch/hermes-2-pro-llama-3-8b": 8192, - "sao10k/l3-70b-euryale-v2.1": 8192, - "cognitivecomputations/dolphin-mixtral-8x22b": 16000, - "sophosympatheia/midnight-rose-70b": 4096, - "sao10k/l3-8b-lunaris": 8192, - "baidu/ernie-4.5-vl-28b-a3b": 30000, - "baidu/ernie-4.5-21B-a3b": 120000, - "baidu/ernie-4.5-0.3b": 120000, - "google/gemma-3-1b-it": 32768, - "qwen/qwen3-8b-fp8": 128000, - "qwen/qwen3-4b-fp8": 128000, - "thudm/glm-4-32b-0414": 32000, - "qwen/qwen2.5-7b-instruct": 32000, - "meta-llama/llama-3.2-1b-instruct": 131000, - "meta-llama/llama-3.2-3b-instruct": 32768, - "sao10k/l31-70b-euryale-v2.2": 8192 - }, - "fal": { - "fal/model-name": 4096 - }, - "cerebras": { - "cerebras/model-name": 8192 - }, - "hf-inference": { - "google/gemma-2-9b-it": 8192, - "meta-llama/Meta-Llama-3-8B-Instruct": 8192 - }, - "hyperbolic": { - "Qwen/Qwen2.5-72B-Instruct": 131072, - "Qwen/Qwen2.5-VL-72B-Instruct": 32768, - "meta-llama/Meta-Llama-3-70B-Instruct": 8192, - "deepseek-ai/DeepSeek-V3": 131072, - "deepseek-ai/DeepSeek-V3-0324": 163840, - "meta-llama/Llama-3.3-70B-Instruct": 131072, - "Qwen/Qwen2.5-Coder-32B-Instruct": 32768, - "meta-llama/Llama-3.2-3B-Instruct": 131072, - "NousResearch/Hermes-3-Llama-3.1-70B": 12288, - "meta-llama/Meta-Llama-3.1-405B-Instruct": 131000, - "meta-llama/Meta-Llama-3.1-70B-Instruct": 131072, - "meta-llama/Meta-Llama-3.1-8B-Instruct": 131072, - "mistralai/Pixtral-12B-2409": 32768, - "Qwen/Qwen2.5-VL-7B-Instruct": 32768, - "meta-llama/Meta-Llama-3.1-405B-FP8": 32768, - "deepseek-ai/DeepSeek-R1": 163840, - "Qwen/QwQ-32B": 131072 - }, - "cohere": { - "embed-english-light-v3.0": 512, - "embed-multilingual-v2.0": 256, - "rerank-v3.5": 4096, - "embed-v4.0": 8192, - "rerank-english-v3.0": 4096, - "command-r-08-2024": 132096, - "embed-english-light-v3.0-image": 0, - "embed-english-v3.0-image": 0, - "command-nightly": 288000, - "command-a-03-2025": 288000, - "command-r-plus-08-2024": 132096, - "c4ai-aya-vision-32b": 16384, - "command-r": 132096, - "command-r7b-12-2024": 132000, - "command-a-vision": 128000, - "command-r7b-arabic-02-2025": 128000, - "command-light-nightly": 4096, - "embed-english-v3.0": 512, - "embed-multilingual-light-v3.0-image": 0, - "embed-multilingual-v3.0-image": 0, - "c4ai-aya-expanse-32b": 128000 - }, - "together": { - "cartesia/sonic": 0, - "black-forest-labs/FLUX.1-kontext-pro": 0, - "Alibaba-NLP/gte-modernbert-base": 8192, - "mistralai/Mistral-7B-Instruct-v0.3": 32768, - "cartesia/sonic-2": 0, - "togethercomputer/MoA-1": 32768, - "meta-llama/Meta-Llama-Guard-3-8B": 8192, - "togethercomputer/m2-bert-80M-32k-retrieval": 32768, - "deepseek-ai/DeepSeek-V3": 131072, - "moonshotai/Kimi-K2-Instruct": 131072, - "Qwen/Qwen2.5-7B-Instruct-Turbo": 32768, - "meta-llama/Llama-3-8b-chat-hf": 8192, - "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": 130815, - "togethercomputer/MoA-1-Turbo": 32768, - "eddiehou/meta-llama/Llama-3.1-405B": 12000, - "mistralai/Mistral-7B-Instruct-v0.2": 32768, - "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free": 131072, - "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": 131072, - "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": 131072, - "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo": 131072, - "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo": 131072, - "meta-llama/Meta-Llama-3-70B-Instruct-Turbo": 8192, - "meta-llama/Llama-3.3-70B-Instruct-Turbo": 131072, - "deepseek-ai/DeepSeek-R1": 163840, - "Qwen/Qwen2.5-VL-72B-Instruct": 32768, - "google/gemma-3n-E4B-it": 32768, - "arcee-ai/AFM-4.5B-Preview": 65536, - "lgai/exaone-3-5-32b-instruct": 32768, - "meta-llama/Llama-3-70b-chat-hf": 8192, - "mistralai/Mixtral-8x7B-Instruct-v0.1": 32768, - "google/gemma-2-27b-it": 8192, - "Qwen/Qwen2-72B-Instruct": 32768, - "meta-llama/Llama-2-70b-hf": 4096, - "Qwen/Qwen3-235B-A22B-fp8-tput": 40960, - "Salesforce/Llama-Rank-V1": 8192, - "mistralai/Mistral-Small-24B-Instruct-2501": 32768, - "Qwen/Qwen2-VL-72B-Instruct": 32768, - "mixedbread-ai/Mxbai-Rerank-Large-V2": 32768, - "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B": 131072, - "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF": 32768, - "meta-llama/Llama-Vision-Free": 131072, - "perplexity-ai/r1-1776": 163840, - "scb10x/scb10x-llama3-1-typhoon2-70b-instruct": 8192, - "meta-llama/Llama-Guard-3-11B-Vision-Turbo": 131072, - "arcee-ai/maestro-reasoning": 131072, - "togethercomputer/Refuel-Llm-V2-Small": 8192, - "meta-llama/Llama-3.2-3B-Instruct-Turbo": 131072, - "Qwen/Qwen2.5-Coder-32B-Instruct": 16384, - "arcee-ai/coder-large": 32768, - "Qwen/QwQ-32B": 131072, - "arcee-ai/virtuoso-large": 131072, - "arcee_ai/arcee-spotlight": 131072, - "arcee-ai/arcee-blitz": 32768, - "deepseek-ai/DeepSeek-R1-0528-tput": 163840, - "arcee-ai/virtuoso-medium-v2": 131072, - "arcee-ai/caller": 32768, - "marin-community/marin-8b-instruct": 4096, - "lgai/exaone-deep-32b": 32768, - "google/gemma-3-27b-it": 65536, - "deepseek-ai/DeepSeek-R1-Distill-Llama-70B": 131072, - "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": 32768, - "mistralai/Mistral-7B-Instruct-v0.1": 32768, - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": 1048576, - "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B": 131072, - "deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free": 8192, - "scb10x/scb10x-typhoon-2-1-gemma3-12b": 131072, - "meta-llama/Llama-Guard-4-12B": 1048576, - "togethercomputer/Refuel-Llm-V2": 16384, - "Qwen/Qwen2.5-72B-Instruct-Turbo": 131072, - "meta-llama/LlamaGuard-2-8b": 8192, - "meta-llama/Meta-Llama-3-8B-Instruct-Lite": 8192, - "intfloat/multilingual-e5-large-instruct": 514, - "meta-llama/Llama-4-Scout-17B-16E-Instruct": 1048576, - "yan/deepseek-ai-deepseek-v3": 163839, - "black-forest-labs/FLUX.1-kontext-max": 0 - }, - "fireworks-ai": { - "accounts/fireworks/models/deepseek-r1-0528": 163840, - "accounts/perplexity/models/r1-1776": 163840, - "accounts/fireworks/models/qwen3-30b-a3b": 131072, - "accounts/fireworks/models/llama4-scout-instruct-basic": 10485760, - "accounts/fireworks/models/llama4-maverick-instruct-basic": 1048576, - "accounts/fireworks/models/llama-v3p1-8b-instruct": 131072, - "accounts/fireworks/models/firesearch-ocr-v6": 8192, - "accounts/fireworks/models/llama-v3p1-405b-instruct": 131072, - "accounts/fireworks/models/mixtral-8x22b-instruct": 65536, - "accounts/fireworks/models/deepseek-r1-basic": 163840, - "accounts/fireworks/models/kimi-k2-instruct": 131072, - "accounts/fireworks/models/llama-v3p1-70b-instruct": 131072, - "accounts/fireworks/models/qwen3-235b-a22b": 131072, - "accounts/fireworks/models/llama-v3p3-70b-instruct": 131072, - "accounts/fireworks/models/deepseek-r1": 163840, - "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new": 131072, - "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b": 131072, - "accounts/fireworks/models/deepseek-v3": 131072, - "accounts/fireworks/models/deepseek-v3-0324": 163840, - "accounts/fireworks/models/qwen2p5-vl-32b-instruct": 128000 - } -} \ No newline at end of file diff --git a/src/lib/server/providers/cohere.ts b/src/lib/server/providers/cohere.ts deleted file mode 100644 index da0e7cd1..00000000 --- a/src/lib/server/providers/cohere.ts +++ /dev/null @@ -1,35 +0,0 @@ -import type { MaxTokensCache } from "./index.js"; - -const COHERE_API_URL = "https://api.cohere.ai/v1/models"; - -// Accept apiKey as an argument -export async function fetchCohereData(apiKey: string | undefined): Promise { - if (!apiKey) { - console.warn("Cohere API key not provided. Skipping Cohere fetch."); - return {}; - } - try { - const response = await fetch(COHERE_API_URL, { - headers: { - Authorization: `Bearer ${apiKey}`, // Use passed-in apiKey - }, - }); - if (!response.ok) { - throw new Error(`Cohere API request failed: ${response.status} ${response.statusText}`); - } - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const data: any = await response.json(); - const modelsData: MaxTokensCache["cohere"] = {}; - if (data?.models && Array.isArray(data.models)) { - for (const model of data.models) { - if (model.name && typeof model.context_length === "number") { - modelsData[model.name] = model.context_length; - } - } - } - return modelsData; - } catch (error) { - console.error("Error fetching Cohere data:", error); - return {}; - } -} diff --git a/src/lib/server/providers/fireworks.ts b/src/lib/server/providers/fireworks.ts deleted file mode 100644 index edd7b526..00000000 --- a/src/lib/server/providers/fireworks.ts +++ /dev/null @@ -1,41 +0,0 @@ -import type { MaxTokensCache } from "./index.js"; - -const FIREWORKS_API_URL = "https://api.fireworks.ai/inference/v1/models"; // Assumed - -export async function fetchFireworksData(apiKey: string | undefined): Promise { - if (!apiKey) { - console.warn("Fireworks AI API key not provided. Skipping Fireworks AI fetch."); - return {}; - } - try { - const response = await fetch(FIREWORKS_API_URL, { - headers: { - Authorization: `Bearer ${apiKey}`, - }, - }); - if (!response.ok) { - throw new Error(`Fireworks AI API request failed: ${response.status} ${response.statusText}`); - } - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const data: any = await response.json(); // Assuming OpenAI structure { data: [ { id: string, ... } ] } - const modelsData: MaxTokensCache["fireworks-ai"] = {}; - - // Check if data and data.data exist and are an array - if (data?.data && Array.isArray(data.data)) { - for (const model of data.data) { - // Check for common context length fields (OpenAI uses context_window) - const contextLength = model.context_length ?? model.context_window ?? model.config?.max_tokens ?? null; - // Fireworks uses model.id - if (model.id && typeof contextLength === "number") { - modelsData[model.id] = contextLength; - } - } - } else { - console.warn("Unexpected response structure from Fireworks AI API:", data); - } - return modelsData; - } catch (error) { - console.error("Error fetching Fireworks AI data:", error); - return {}; // Return empty on error - } -} diff --git a/src/lib/server/providers/hyperbolic.ts b/src/lib/server/providers/hyperbolic.ts deleted file mode 100644 index b06e11c9..00000000 --- a/src/lib/server/providers/hyperbolic.ts +++ /dev/null @@ -1,41 +0,0 @@ -import type { MaxTokensCache } from "./index.js"; - -const HYPERBOLIC_API_URL = "https://api.hyperbolic.xyz/v1/models"; // Assumed - -export async function fetchHyperbolicData(apiKey: string | undefined): Promise { - if (!apiKey) { - console.warn("Hyperbolic API key not provided. Skipping Hyperbolic fetch."); - return {}; - } - try { - const response = await fetch(HYPERBOLIC_API_URL, { - headers: { - Authorization: `Bearer ${apiKey}`, - }, - }); - if (!response.ok) { - throw new Error(`Hyperbolic API request failed: ${response.status} ${response.statusText}`); - } - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const data: any = await response.json(); // Assuming OpenAI structure { data: [ { id: string, ... } ] } - const modelsData: MaxTokensCache["hyperbolic"] = {}; - - // Check if data and data.data exist and are an array - if (data?.data && Array.isArray(data.data)) { - for (const model of data.data) { - // Check for common context length fields (OpenAI uses context_window) - const contextLength = model.context_length ?? model.context_window ?? model.config?.max_tokens ?? null; - // Assuming Hyperbolic uses model.id - if (model.id && typeof contextLength === "number") { - modelsData[model.id] = contextLength; - } - } - } else { - console.warn("Unexpected response structure from Hyperbolic API:", data); - } - return modelsData; - } catch (error) { - console.error("Error fetching Hyperbolic data:", error); - return {}; // Return empty on error - } -} diff --git a/src/lib/server/providers/index.ts b/src/lib/server/providers/index.ts deleted file mode 100644 index 3fba58ec..00000000 --- a/src/lib/server/providers/index.ts +++ /dev/null @@ -1,224 +0,0 @@ -import fs from "fs/promises"; -import path from "path"; -import { fetchCohereData } from "./cohere.js"; -import { fetchTogetherData } from "./together.js"; -import { fetchFireworksData } from "./fireworks.js"; -import { fetchHyperbolicData } from "./hyperbolic.js"; -import { fetchReplicateData } from "./replicate.js"; -import { fetchNebiusData } from "./nebius.js"; -import { fetchNovitaData } from "./novita.js"; -import { fetchSambanovaData } from "./sambanova.js"; - -// --- Constants --- -const CACHE_FILE_PATH = path.resolve("src/lib/server/data/context_length.json"); - -// --- Types --- -export interface MaxTokensCache { - [provider: string]: { - [modelId: string]: number; - }; -} - -// Type for API keys object passed to fetchAllProviderData -export interface ApiKeys { - COHERE_API_KEY?: string; - TOGETHER_API_KEY?: string; - FIREWORKS_API_KEY?: string; - HYPERBOLIC_API_KEY?: string; - REPLICATE_API_KEY?: string; - NEBIUS_API_KEY?: string; - NOVITA_API_KEY?: string; - SAMBANOVA_API_KEY?: string; -} - -// --- Cache Handling --- -// (readCache and updateCache remain the same) -let memoryCache: MaxTokensCache | null = null; -let cacheReadPromise: Promise | null = null; - -async function readCache(): Promise { - if (memoryCache) { - return memoryCache; - } - if (cacheReadPromise) { - return cacheReadPromise; - } - cacheReadPromise = (async () => { - try { - const data = await fs.readFile(CACHE_FILE_PATH, "utf-8"); - memoryCache = JSON.parse(data) as MaxTokensCache; - return memoryCache!; - } catch (error: unknown) { - if (typeof error === "object" && error !== null && "code" in error && error.code === "ENOENT") { - console.warn(`Cache file not found at ${CACHE_FILE_PATH}, starting with empty cache.`); - memoryCache = {}; - return {}; - } - console.error("Error reading context length cache file:", error); - memoryCache = {}; - return {}; - } finally { - cacheReadPromise = null; - } - })(); - return cacheReadPromise; -} - -const isBrowser = typeof window !== "undefined"; - -function serverLog(...txt: unknown[]) { - if (isBrowser) return; - console.log(...txt); -} - -function serverError(...txt: unknown[]) { - if (isBrowser) return; - console.error(...txt); -} - -async function updateCache(provider: string, modelId: string, maxTokens: number): Promise { - try { - let cache: MaxTokensCache; - try { - const data = await fs.readFile(CACHE_FILE_PATH, "utf-8"); - cache = JSON.parse(data) as MaxTokensCache; - } catch (readError: unknown) { - if (typeof readError === "object" && readError !== null && "code" in readError && readError.code === "ENOENT") { - cache = {}; - } else { - throw readError; - } - } - if (!cache[provider]) { - cache[provider] = {}; - } - cache[provider][modelId] = maxTokens; - const tempFilePath = CACHE_FILE_PATH + ".tmp"; - await fs.writeFile(tempFilePath, JSON.stringify(cache, null, "\t"), "utf-8"); - await fs.rename(tempFilePath, CACHE_FILE_PATH); - memoryCache = cache; - serverLog(`Cache updated for ${provider} - ${modelId}: ${maxTokens}`); - } catch (error) { - serverError(`Error updating context length cache for ${provider} - ${modelId}:`, error); - memoryCache = null; - } -} - -// --- Main Exported Function --- -// Now accepts apiKey as the third argument -export async function getMaxTokens( - provider: string, - modelId: string, - apiKey: string | undefined, -): Promise { - const cache = await readCache(); - const cachedValue = cache[provider]?.[modelId]; - - if (cachedValue !== undefined) { - return cachedValue; - } - - serverLog(`Cache miss for ${provider} - ${modelId}. Attempting live fetch...`); - - let liveData: number | null = null; - let fetchedProviderData: MaxTokensCache[string] | null = null; - - try { - // Pass the received apiKey to the fetcher functions - switch (provider) { - case "cohere": - fetchedProviderData = await fetchCohereData(apiKey); // Pass apiKey - liveData = fetchedProviderData?.[modelId] ?? null; - break; - case "together": - fetchedProviderData = await fetchTogetherData(apiKey); // Pass apiKey - liveData = fetchedProviderData?.[modelId] ?? null; - break; - case "fireworks-ai": - fetchedProviderData = await fetchFireworksData(apiKey); // Pass apiKey - liveData = fetchedProviderData?.[modelId] ?? null; - break; - case "hyperbolic": - fetchedProviderData = await fetchHyperbolicData(apiKey); // Pass apiKey - liveData = fetchedProviderData?.[modelId] ?? null; - break; - case "replicate": - fetchedProviderData = await fetchReplicateData(apiKey); - liveData = fetchedProviderData?.[modelId] ?? null; - break; - case "nebius": - fetchedProviderData = await fetchNebiusData(apiKey); - liveData = fetchedProviderData?.[modelId] ?? null; - break; - case "novita": - fetchedProviderData = await fetchNovitaData(apiKey); - liveData = fetchedProviderData?.[modelId] ?? null; - break; - case "sambanova": - fetchedProviderData = await fetchSambanovaData(apiKey); - liveData = fetchedProviderData?.[modelId] ?? null; - break; - default: - serverLog(`Live fetch not supported or implemented for provider: ${provider}`); - return null; - } - - if (liveData !== null) { - serverLog(`Live fetch successful for ${provider} - ${modelId}: ${liveData}`); - updateCache(provider, modelId, liveData).catch(err => { - serverError(`Async cache update failed for ${provider} - ${modelId}:`, err); - }); - return liveData; - } else { - serverLog(`Live fetch for ${provider} did not return data for model ${modelId}.`); - return null; - } - } catch (error) { - serverError(`Error during live fetch for ${provider} - ${modelId}:`, error); - return null; - } -} - -// --- Helper for Build Script --- -// Now accepts an apiKeys object -export async function fetchAllProviderData(apiKeys: ApiKeys): Promise { - serverLog("Fetching data for all providers..."); - const results: MaxTokensCache = {}; - - // Define fetchers, passing the specific key from the apiKeys object - const providerFetchers = [ - { name: "cohere", fetcher: () => fetchCohereData(apiKeys.COHERE_API_KEY) }, - { name: "together", fetcher: () => fetchTogetherData(apiKeys.TOGETHER_API_KEY) }, - { name: "fireworks-ai", fetcher: () => fetchFireworksData(apiKeys.FIREWORKS_API_KEY) }, - { name: "hyperbolic", fetcher: () => fetchHyperbolicData(apiKeys.HYPERBOLIC_API_KEY) }, - { name: "replicate", fetcher: () => fetchReplicateData(apiKeys.REPLICATE_API_KEY) }, - { name: "nebius", fetcher: () => fetchNebiusData(apiKeys.NEBIUS_API_KEY) }, - { name: "novita", fetcher: () => fetchNovitaData(apiKeys.NOVITA_API_KEY) }, - { name: "sambanova", fetcher: () => fetchSambanovaData(apiKeys.SAMBANOVA_API_KEY) }, - ]; - - const settledResults = await Promise.allSettled(providerFetchers.map(p => p.fetcher())); - - settledResults.forEach((result, index) => { - const providerInfo = providerFetchers[index]; - if (!providerInfo) { - serverError(`Error: No provider info found for index ${index}`); - return; - } - const providerName = providerInfo.name; - - if (result.status === "fulfilled" && result.value) { - if (Object.keys(result.value).length > 0) { - results[providerName] = result.value; - serverLog(`Successfully fetched data for ${providerName}`); - } else { - serverLog(`No data returned for ${providerName}.`); - } - } else if (result.status === "rejected") { - serverError(`Error fetching ${providerName} data:`, result.reason); - } - }); - - serverLog("Finished fetching provider data."); - return results; -} diff --git a/src/lib/server/providers/nebius.ts b/src/lib/server/providers/nebius.ts deleted file mode 100644 index 9d62b2c0..00000000 --- a/src/lib/server/providers/nebius.ts +++ /dev/null @@ -1,49 +0,0 @@ -import type { MaxTokensCache } from "./index.js"; - -interface NebiusModel { - id: string; - config?: { - max_tokens?: number; - }; - context_length?: number; -} - -interface NebiusResponse { - data?: NebiusModel[]; -} - -const NEBIUS_API_URL = "https://api.studio.nebius.com/v1/models?verbose=true"; - -export async function fetchNebiusData(apiKey: string | undefined): Promise { - if (!apiKey) { - console.warn("Nebius API key not provided. Skipping Nebius fetch."); - return {}; - } - try { - const response = await fetch(NEBIUS_API_URL, { - headers: { - Authorization: `Bearer ${apiKey}`, - }, - }); - if (!response.ok) { - throw new Error(`Nebius API request failed: ${response.status} ${response.statusText}`); - } - const data: NebiusResponse = await response.json(); - const modelsData: MaxTokensCache["nebius"] = {}; - - if (data?.data && Array.isArray(data.data)) { - for (const model of data.data) { - const contextLength = model.context_length ?? model.config?.max_tokens ?? null; - if (model.id && typeof contextLength === "number") { - modelsData[model.id] = contextLength; - } - } - } else { - console.warn("Unexpected response structure from Nebius API:", data); - } - return modelsData; - } catch (error) { - console.error("Error fetching Nebius data:", error); - return {}; - } -} diff --git a/src/lib/server/providers/novita.ts b/src/lib/server/providers/novita.ts deleted file mode 100644 index e5f74f49..00000000 --- a/src/lib/server/providers/novita.ts +++ /dev/null @@ -1,46 +0,0 @@ -import type { MaxTokensCache } from "./index.js"; - -const NOVITA_API_URL = "https://api.novita.ai/v3/openai/models"; - -interface NovitaModel { - id: string; - object: string; - context_size: number; -} - -interface NovitaResponse { - data: NovitaModel[]; -} - -export async function fetchNovitaData(apiKey: string | undefined): Promise { - if (!apiKey) { - console.warn("Novita API key not provided. Skipping Novita fetch."); - return {}; - } - try { - const response = await fetch(NOVITA_API_URL, { - headers: { - Authorization: `Bearer ${apiKey}`, - }, - }); - if (!response.ok) { - throw new Error(`Novita API request failed: ${response.status} ${response.statusText}`); - } - const data: NovitaResponse = await response.json(); - const modelsData: MaxTokensCache["novita"] = {}; - - if (data?.data && Array.isArray(data.data)) { - for (const model of data.data) { - if (model.id && typeof model.context_size === "number") { - modelsData[model.id] = model.context_size; - } - } - } else { - console.warn("Unexpected response structure from Novita API:", data); - } - return modelsData; - } catch (error) { - console.error("Error fetching Novita data:", error); - return {}; - } -} diff --git a/src/lib/server/providers/replicate.ts b/src/lib/server/providers/replicate.ts deleted file mode 100644 index 931a87a6..00000000 --- a/src/lib/server/providers/replicate.ts +++ /dev/null @@ -1,37 +0,0 @@ -import type { MaxTokensCache } from "./index.js"; - -const REPLICATE_API_URL = "https://api.replicate.com/v1/models"; - -export async function fetchReplicateData(apiKey: string | undefined): Promise { - if (!apiKey) { - console.warn("Replicate API key not provided. Skipping Replicate fetch."); - return {}; - } - try { - const response = await fetch(REPLICATE_API_URL, { - headers: { - Authorization: `Token ${apiKey}`, - }, - }); - if (!response.ok) { - throw new Error(`Replicate API request failed: ${response.status} ${response.statusText}`); - } - const data = await response.json(); - const modelsData: MaxTokensCache["replicate"] = {}; - - if (data?.results && Array.isArray(data.results)) { - for (const model of data.results) { - const contextLength = model.context_length ?? model.config?.max_tokens ?? null; - if (model.id && typeof contextLength === "number") { - modelsData[model.id] = contextLength; - } - } - } else { - console.warn("Unexpected response structure from Replicate API:", data); - } - return modelsData; - } catch (error) { - console.error("Error fetching Replicate data:", error); - return {}; - } -} diff --git a/src/lib/server/providers/sambanova.ts b/src/lib/server/providers/sambanova.ts deleted file mode 100644 index 02ec2a3e..00000000 --- a/src/lib/server/providers/sambanova.ts +++ /dev/null @@ -1,52 +0,0 @@ -import type { MaxTokensCache } from "./index.js"; - -const SAMBANOVA_API_URL = "https://api.sambanova.ai/v1/models"; - -interface SambanovaModel { - id: string; - object: string; - context_length: number; - max_completion_tokens?: number; - pricing?: { - prompt: string; - completion: string; - }; -} - -interface SambanovaResponse { - data: SambanovaModel[]; - object: string; -} - -export async function fetchSambanovaData(apiKey: string | undefined): Promise { - if (!apiKey) { - console.warn("SambaNova API key not provided. Skipping SambaNova fetch."); - return {}; - } - try { - const response = await fetch(SAMBANOVA_API_URL, { - headers: { - Authorization: `Bearer ${apiKey}`, - }, - }); - if (!response.ok) { - throw new Error(`SambaNova API request failed: ${response.status} ${response.statusText}`); - } - const data: SambanovaResponse = await response.json(); - const modelsData: MaxTokensCache["sambanova"] = {}; - - if (data?.data && Array.isArray(data.data)) { - for (const model of data.data) { - if (model.id && typeof model.context_length === "number") { - modelsData[model.id] = model.context_length; - } - } - } else { - console.warn("Unexpected response structure from SambaNova API:", data); - } - return modelsData; - } catch (error) { - console.error("Error fetching SambaNova data:", error); - return {}; - } -} diff --git a/src/lib/server/providers/together.ts b/src/lib/server/providers/together.ts deleted file mode 100644 index 775af686..00000000 --- a/src/lib/server/providers/together.ts +++ /dev/null @@ -1,37 +0,0 @@ -import type { MaxTokensCache } from "./index.js"; - -const TOGETHER_API_URL = "https://api.together.xyz/v1/models"; - -// Accept apiKey as an argument -export async function fetchTogetherData(apiKey: string | undefined): Promise { - if (!apiKey) { - console.warn("Together AI API key not provided. Skipping Together AI fetch."); - return {}; - } - try { - const response = await fetch(TOGETHER_API_URL, { - headers: { - Authorization: `Bearer ${apiKey}`, // Use passed-in apiKey - }, - }); - if (!response.ok) { - throw new Error(`Together AI API request failed: ${response.status} ${response.statusText}`); - } - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const data: any[] = await response.json(); - const modelsData: MaxTokensCache["together"] = {}; - - if (Array.isArray(data)) { - for (const model of data) { - const contextLength = model.context_length ?? model.config?.max_tokens ?? null; - if (model.id && typeof contextLength === "number") { - modelsData[model.id] = contextLength; - } - } - } - return modelsData; - } catch (error) { - console.error("Error fetching Together AI data:", error); - return {}; - } -} diff --git a/src/lib/state/conversations.svelte.ts b/src/lib/state/conversations.svelte.ts index d6f23759..ab234414 100644 --- a/src/lib/state/conversations.svelte.ts +++ b/src/lib/state/conversations.svelte.ts @@ -5,9 +5,10 @@ import { import { addToast } from "$lib/components/toaster.svelte.js"; import { AbortManager } from "$lib/spells/abort-manager.svelte"; import { PipelineTag, Provider, type ConversationMessage, type GenerationStatistics, type Model } from "$lib/types.js"; -import { handleNonStreamingResponse, handleStreamingResponse } from "$lib/utils/business.svelte.js"; +import { handleNonStreamingResponse, handleStreamingResponse, estimateTokens } from "$lib/utils/business.svelte.js"; import { omit, snapshot } from "$lib/utils/object.svelte"; import { models, structuredForbiddenProviders } from "./models.svelte"; +import { pricing } from "./pricing.svelte.js"; import { DEFAULT_PROJECT_ID, ProjectEntity, projects } from "./projects.svelte"; import { token } from "./token.svelte"; // eslint-disable-next-line @typescript-eslint/ban-ts-comment @@ -87,7 +88,7 @@ export class ConversationClass { readonly model = $derived(models.all.find(m => m.id === this.data.modelId) ?? emptyModel); abortManager = new AbortManager(); - generationStats = $state({ latency: 0, tokens: 0 }) as GenerationStatistics; + generationStats = $state({ latency: 0, tokens: 0, cost: 0 }) as GenerationStatistics; generating = $state(false); constructor(data: ConversationEntityMembers) { @@ -232,6 +233,17 @@ export class ConversationClass { const endTime = performance.now(); this.generationStats.latency = Math.round(endTime - startTime); + + // Calculate cost if we have pricing data + if (this.data.provider && this.data.provider !== "auto") { + const inputTokens = estimateTokens(this); + const outputTokens = this.generationStats.tokens; + const costEstimate = pricing.estimateCost(this.model.id, this.data.provider, inputTokens, outputTokens); + if (costEstimate) { + this.generationStats.cost = costEstimate.total; + } + } + this.generating = false; }; diff --git a/src/lib/state/pricing.svelte.ts b/src/lib/state/pricing.svelte.ts new file mode 100644 index 00000000..81503d3c --- /dev/null +++ b/src/lib/state/pricing.svelte.ts @@ -0,0 +1,78 @@ +import { page } from "$app/state"; +import { atLeastNDecimals } from "$lib/utils/number.js"; +import type { PageData } from "../../routes/$types.js"; + +interface RouterProvider { + provider: string; + status: string; + context_length?: number; + pricing?: { + input: number; + output: number; + }; + supports_tools?: boolean; + supports_structured_output?: boolean; +} + +interface RouterModel { + id: string; + providers: RouterProvider[]; +} + +interface RouterData { + data: RouterModel[]; +} + +const pageData = $derived(page.data as PageData & { routerData: RouterData }); + +class Pricing { + routerData = $derived(pageData.routerData as RouterData); + + getPricing(modelId: string, provider: string) { + const model = this.routerData?.data?.find((m: RouterModel) => m.id === modelId); + if (!model) return null; + + const providerData = model.providers.find((p: RouterProvider) => p.provider === provider); + return providerData?.pricing || null; + } + + getContextLength(modelId: string, provider: string) { + const model = this.routerData?.data?.find((m: RouterModel) => m.id === modelId); + if (!model) return null; + + const providerData = model.providers.find((p: RouterProvider) => p.provider === provider); + return providerData?.context_length || null; + } + + formatPricing(pricing: { input: number; output: number } | null) { + if (!pricing) return null; + + const inputCost = atLeastNDecimals(pricing.input, 2); + const outputCost = atLeastNDecimals(pricing.output, 2); + + return { + input: `$${inputCost}/1M`, + output: `$${outputCost}/1M`, + inputRaw: pricing.input, + outputRaw: pricing.output, + }; + } + + estimateCost(modelId: string, provider: string, inputTokens: number, outputTokens: number = 0) { + const pricing = this.getPricing(modelId, provider); + if (!pricing) return null; + + const inputCost = (inputTokens / 1000000) * pricing.input; + const outputCost = (outputTokens / 1000000) * pricing.output; + const totalCost = inputCost + outputCost; + + return { + input: inputCost, + output: outputCost, + total: totalCost, + formatted: `$${totalCost.toFixed(6)}`, + }; + } +} + +export const pricing = new Pricing(); diff --git a/src/lib/types.ts b/src/lib/types.ts index 66ad5330..f690d502 100644 --- a/src/lib/types.ts +++ b/src/lib/types.ts @@ -197,6 +197,7 @@ export type ValueOf = T[keyof T]; export interface GenerationStatistics { latency: number; tokens: number; + cost?: number; } export type ModelsJson = { diff --git a/src/lib/utils/business.svelte.ts b/src/lib/utils/business.svelte.ts index 719397a2..e03bc703 100644 --- a/src/lib/utils/business.svelte.ts +++ b/src/lib/utils/business.svelte.ts @@ -6,7 +6,7 @@ * **/ -import ctxLengthData from "$lib/data/context_length.json"; +import { pricing } from "$lib/state/pricing.svelte.js"; import { InferenceClient, snippets } from "@huggingface/inference"; import { ConversationClass, type ConversationEntityMembers } from "$lib/state/conversations.svelte"; import { token } from "$lib/state/token.svelte"; @@ -21,7 +21,7 @@ import { type Model, } from "$lib/types.js"; import { safeParse } from "$lib/utils/json.js"; -import { omit, tryGet } from "$lib/utils/object.svelte.js"; +import { omit } from "$lib/utils/object.svelte.js"; import type { ChatCompletionInputMessage, InferenceSnippet } from "@huggingface/tasks"; import { type ChatCompletionOutputMessage } from "@huggingface/tasks"; import { AutoTokenizer, PreTrainedTokenizer } from "@huggingface/transformers"; @@ -71,20 +71,15 @@ type OpenAICompletionMetadata = { type CompletionMetadata = HFCompletionMetadata | OpenAICompletionMetadata; export function maxAllowedTokens(conversation: ConversationClass) { - const ctxLength = (() => { - const model = conversation.model; - const { provider } = conversation.data; - - if (!provider || !isHFModel(model)) return; - - const idOnProvider = model.inferenceProviderMapping.find(data => data.provider === provider)?.providerId; - if (!idOnProvider) return; + const model = conversation.model; + const { provider } = conversation.data; - const models = tryGet(ctxLengthData, provider); - if (!models) return; + if (!provider || !isHFModel(model)) { + return customMaxTokens[conversation.model.id] ?? 100000; + } - return tryGet(models, idOnProvider) as number | undefined; - })(); + // Try to get context length from router data + const ctxLength = pricing.getContextLength(model.id, provider); if (!ctxLength) return customMaxTokens[conversation.model.id] ?? 100000; return ctxLength; @@ -387,15 +382,16 @@ export async function getTokenizer(model: Model) { } // When you don't have access to a tokenizer, guesstimate -export function estimateTokens(conversation: Conversation) { - const content = conversation.messages.reduce((acc, curr) => { +export function estimateTokens(conversation: ConversationClass) { + if (!conversation.data.messages) return 0; + const content = conversation.data.messages?.reduce((acc, curr) => { return acc + (curr?.content ?? ""); }, ""); return content.length / 4; // 1 token ~ 4 characters } -export async function getTokens(conversation: Conversation): Promise { +export async function getTokens(conversation: ConversationClass): Promise { const model = conversation.model; if (isCustomModel(model)) return estimateTokens(conversation); const tokenizer = await getTokenizer(model); @@ -404,7 +400,7 @@ export async function getTokens(conversation: Conversation): Promise { // This is a simplified version - you might need to adjust based on your exact needs let formattedText = ""; - conversation.messages.forEach((message, index) => { + conversation.data.messages?.forEach((message, index) => { let content = `<|start_header_id|>${message.role}<|end_header_id|>\n\n${message.content?.trim()}<|eot_id|>`; // Add BOS token to the first message diff --git a/src/lib/utils/number.ts b/src/lib/utils/number.ts new file mode 100644 index 00000000..ae927fe1 --- /dev/null +++ b/src/lib/utils/number.ts @@ -0,0 +1,9 @@ +export function atLeastNDecimals(num: number, minDecimals: number): string { + return num.toFixed(Math.max(minDecimals, getDecimalPlaces(num))); +} + +function getDecimalPlaces(num: number): number { + const str = num.toString(); + const decimalIndex = str.indexOf("."); + return decimalIndex === -1 ? 0 : str.length - decimalIndex - 1; +} diff --git a/src/routes/+page.ts b/src/routes/+page.ts index 63885194..e02d798e 100644 --- a/src/routes/+page.ts +++ b/src/routes/+page.ts @@ -2,7 +2,16 @@ import type { PageLoad } from "./$types.js"; import type { ApiModelsResponse } from "./api/models/+server.js"; export const load: PageLoad = async ({ fetch }) => { - const res = await fetch("/api/models"); - const json: ApiModelsResponse = await res.json(); - return json; + const [modelsRes, routerRes] = await Promise.all([ + fetch("/api/models"), + fetch("https://router.huggingface.co/v1/models"), + ]); + + const models: ApiModelsResponse = await modelsRes.json(); + const routerData = await routerRes.json(); + + return { + ...models, + routerData, + }; };