diff --git a/docs.json b/docs.json new file mode 100644 index 0000000..d15a57c --- /dev/null +++ b/docs.json @@ -0,0 +1,1914 @@ +{ + "$schema": "https://mintlify.com/docs.json", + "theme": "mint", + "name": "Portkey Docs", + "colors": { + "primary": "#0891B2", + "light": "#0891B2", + "dark": "#0891B2" + }, + "favicon": "/favicon.png", + "navigation": { + "tabs": [ + { + "tab": "Documentation", + "groups": [ + { + "group": "Introduction", + "pages": [ + "introduction/what-is-portkey", + "introduction/make-your-first-request", + "introduction/feature-overview" + ] + }, + { + "group": "Product", + "pages": [ + { + "group": "Observability", + "pages": [ + "product/observability", + "product/observability/logs", + "product/observability/traces", + "product/observability/analytics", + "product/observability/feedback", + "product/observability/metadata", + "product/observability/filters", + "product/observability/logs-export", + "product/observability/budget-limits" + ] + }, + { + "group": "AI Gateway", + "pages": [ + "product/ai-gateway", + "product/ai-gateway/universal-api", + "product/ai-gateway/configs", + "product/ai-gateway/conditional-routing", + { + "group": "Multimodal Capabilities", + "pages": [ + "product/ai-gateway/multimodal-capabilities", + "product/ai-gateway/multimodal-capabilities/image-generation", + "product/ai-gateway/multimodal-capabilities/function-calling", + "product/ai-gateway/multimodal-capabilities/vision", + "product/ai-gateway/multimodal-capabilities/text-to-speech", + "product/ai-gateway/multimodal-capabilities/speech-to-text" + ] + }, + "product/ai-gateway/cache-simple-and-semantic", + "product/ai-gateway/fallbacks", + "product/ai-gateway/automatic-retries", + "product/ai-gateway/realtime-api", + "product/ai-gateway/load-balancing", + "product/ai-gateway/canary-testing", + "product/ai-gateway/strict-open-ai-compliance", + { + "group": "Virtual Keys", + "pages": [ + "product/ai-gateway/virtual-keys", + "product/ai-gateway/virtual-keys/budget-limits", + "product/ai-gateway/virtual-keys/bedrock-amazon-assumed-role" + ] + }, + "product/ai-gateway/request-timeouts", + "product/ai-gateway/files", + "product/ai-gateway/batches" + ] + }, + { + "group": "Prompt Library", + "pages": [ + "product/prompt-library", + "product/prompt-library/prompt-templates", + "product/prompt-library/prompt-partials", + "product/prompt-library/retrieve-prompts", + "product/prompt-library/advanced-prompting-with-json-mode" + ] + }, + { + "group": "Guardrails", + "pages": [ + "product/guardrails", + "product/guardrails/list-of-guardrail-checks", + "product/guardrails/pii-redaction", + "product/guardrails/patronus-ai", + "product/guardrails/aporia", + "product/guardrails/pillar", + "product/guardrails/pangea", + "product/guardrails/bring-your-own-guardrails", + "product/guardrails/creating-raw-guardrails-in-json" + ] + }, + { + "group": "Security", + "pages": ["product/security/pii"] + }, + "product/autonomous-fine-tuning", + { + "group": "Enterprise Offering", + "pages": [ + "product/enterprise-offering", + { + "group": "Org Management", + "pages": [ + "product/enterprise-offering/org-management", + "product/enterprise-offering/org-management/organizations", + "product/enterprise-offering/org-management/workspaces", + "product/enterprise-offering/org-management/user-roles-and-permissions", + "product/enterprise-offering/org-management/api-keys-authn-and-authz", + "product/enterprise-offering/org-management/scim", + "product/enterprise-offering/org-management/sso" + ] + }, + "product/enterprise-offering/access-control-management", + "product/enterprise-offering/budget-limits", + "product/enterprise-offering/security-portkey", + "product/enterprise-offering/logs-export", + { + "group": "Private Cloud Deployments", + "pages": [ + "product/enterprise-offering/private-cloud-deployments", + "product/enterprise-offering/private-cloud-deployments/architecture", + "product/enterprise-offering/private-cloud-deployments/aws", + "product/enterprise-offering/private-cloud-deployments/gcp", + "product/enterprise-offering/private-cloud-deployments/azure", + "product/enterprise-offering/private-cloud-deployments/cloudflare-workers", + "product/enterprise-offering/private-cloud-deployments/f5-app-stack" + ] + }, + "product/enterprise-offering/components" + ] + }, + "product/open-source", + "product/product-feature-comparison" + ] + }, + { + "group": "Support", + "pages": [ + "support/contact-us", + "support/developer-forum", + "support/common-errors-and-resolutions", + "support/portkeys-december-migration" + ] + } + ] + }, + { + "tab": "Integrations", + "groups": [ + { + "group": "Ecosystem", + "pages": ["integrations/ecosystem", "integrations/partner"] + }, + { + "group": "LLMs", + "pages": [ + "integrations/llms", + { + "group": "OpenAI", + "pages": [ + "integrations/llms/openai", + "integrations/llms/openai/structured-outputs", + "integrations/llms/openai/prompt-caching-openai", + "integrations/llms/openai/files", + "integrations/llms/openai/batches" + ] + }, + { + "group": "Anthropic", + "pages": [ + "integrations/llms/anthropic", + "integrations/llms/anthropic/prompt-caching" + ] + }, + "integrations/llms/gemini", + { + "group": "Google Vertex AI", + "pages": [ + "integrations/llms/vertex-ai", + "integrations/llms/vertex-ai/controlled-generations" + ] + }, + "integrations/llms/azure-openai", + { + "group": "Bedrock", + "pages": [ + "integrations/llms/bedrock/aws-bedrock", + "integrations/llms/bedrock/files", + "integrations/llms/bedrock/batches" + ] + }, + "integrations/llms/aws-sagemaker", + "integrations/llms/ollama", + { + "group": "More", + "pages": [ + "integrations/llms/local-ai", + "integrations/llms/vllm", + "integrations/llms/triton", + "integrations/llms/ai21", + "integrations/llms/anyscale-llama2-mistral-zephyr", + "integrations/llms/cerebras", + "integrations/llms/cohere", + "integrations/llms/fireworks", + "integrations/llms/dashscope", + "integrations/llms/deepinfra", + "integrations/llms/deepbricks", + "integrations/llms/deepgram", + "integrations/llms/deepseek", + "integrations/llms/github", + "integrations/llms/groq", + "integrations/llms/huggingface", + "integrations/llms/inference.net", + "integrations/llms/jina-ai", + "integrations/llms/lambda", + "integrations/llms/lemon-fox", + "integrations/llms/lingyi-01.ai", + "integrations/llms/mistral-ai", + "integrations/llms/monster-api", + "integrations/llms/moonshot", + "integrations/llms/nomic", + "integrations/llms/novita-ai", + "integrations/llms/openrouter", + "integrations/llms/perplexity-ai", + "integrations/llms/predibase", + "integrations/llms/reka-ai", + "integrations/llms/sambanova", + "integrations/llms/segmind", + "integrations/llms/stability-ai", + "integrations/llms/siliconflow", + "integrations/llms/together-ai", + "integrations/llms/upstage", + "integrations/llms/voyage-ai", + "integrations/llms/workers-ai", + "integrations/llms/x-ai", + "integrations/llms/zhipu", + "integrations/llms/replicate", + "integrations/llms/suggest-a-new-integration" + ] + }, + "integrations/llms/byollm" + ] + }, + { + "group": "Vector Databases", + "pages": ["integrations/vector-databases/milvus"] + }, + { + "group": "Agents", + "pages": [ + "integrations/agents", + "integrations/agents/autogen", + "integrations/agents/control-flow", + "integrations/agents/crewai", + "integrations/agents/langchain-agents", + "integrations/agents/langgraph", + "integrations/agents/llama-agents", + "integrations/agents/openai-swarm", + "integrations/agents/phidata", + "integrations/agents/bring-your-own-agents" + ] + }, + { + "group": "Libraries", + "pages": [ + "integrations/libraries", + "integrations/libraries/autogen", + "integrations/libraries/dspy", + "integrations/libraries/instructor", + "integrations/libraries/langchain-python", + "integrations/libraries/langchain-js", + "integrations/libraries/librechat", + "integrations/libraries/openwebui", + "integrations/libraries/llama-index-python", + "integrations/libraries/promptfoo", + "integrations/libraries/vercel", + "integrations/libraries/mindsdb", + "integrations/libraries/tooljet", + "integrations/libraries/mongodb", + "integrations/libraries/supabase" + ] + } + ] + }, + { + "tab": "Inference API", + "groups": [ + { + "group": "API Reference", + "pages": [ + "api-reference/inference-api/introduction", + "api-reference/inference-api/authentication", + "api-reference/inference-api/headers", + "api-reference/inference-api/supported-providers", + { + "group": "SDKs", + "pages": [ + "api-reference/inference-api/sdks/supported-sdks", + "api-reference/inference-api/portkey-sdk-client", + "api-reference/inference-api/sdks/c-sharp" + ] + }, + { + "group": "API Details", + "pages": [ + "api-reference/inference-api/response-schema", + "api-reference/inference-api/config-object", + "api-reference/inference-api/open-api-specification" + ] + } + ] + }, + { + "group": "Chat Completions", + "pages": ["api-reference/inference-api/chat"] + }, + { + "group": "Portkey Prompts", + "pages": [ + "api-reference/inference-api/prompts/prompt-completion", + "api-reference/inference-api/prompts/render" + ] + }, + { + "group": "Embeddings", + "pages": ["api-reference/inference-api/embeddings"] + }, + { + "group": "Images", + "pages": [ + "api-reference/inference-api/images/create-image", + "api-reference/inference-api/images/create-image-edit", + "api-reference/inference-api/images/create-image-variation" + ] + }, + { + "group": "Audio", + "pages": [ + "api-reference/inference-api/audio/create-speech", + "api-reference/inference-api/audio/create-transcription", + "api-reference/inference-api/audio/create-translation" + ] + }, + { + "group": "Realtime", + "pages": ["api-reference/inference-api/realtime-create-session"] + }, + { + "group": "Other APIs", + "pages": ["api-reference/inference-api/gateway-for-other-apis"] + }, + { + "group": "Completions", + "pages": ["api-reference/inference-api/completions"] + }, + { + "group": "Moderations", + "pages": ["api-reference/inference-api/moderations"] + }, + { + "group": "Fine-tuning", + "pages": [ + "api-reference/inference-api/fine-tuning/create-fine-tuning-job", + "api-reference/inference-api/fine-tuning/list-fine-tuning-jobs", + "api-reference/inference-api/fine-tuning/retrieve-fine-tuning-job", + "api-reference/inference-api/fine-tuning/list-fine-tuning-events", + "api-reference/inference-api/fine-tuning/list-fine-tuning-checkpoints", + "api-reference/inference-api/fine-tuning/cancel-fine-tuning" + ] + }, + { + "group": "Batch", + "pages": [ + "api-reference/inference-api/batch/create-batch", + "api-reference/inference-api/batch/list-batch", + "api-reference/inference-api/batch/retrieve-batch", + "api-reference/inference-api/batch/cancel-batch" + ] + }, + { + "group": "Files", + "pages": [ + "api-reference/inference-api/files/upload-file", + "api-reference/inference-api/files/list-files", + "api-reference/inference-api/files/retrieve-file", + "api-reference/inference-api/files/retrieve-file-content", + "api-reference/inference-api/files/delete-file" + ] + }, + { + "group": "Assistants", + "pages": [ + { + "group": "Assistants", + "pages": [ + "api-reference/inference-api/assistants-api/assistants/create-assistant", + "api-reference/inference-api/assistants-api/assistants/list-assistants", + "api-reference/inference-api/assistants-api/assistants/retrieve-assistant", + "api-reference/inference-api/assistants-api/assistants/modify-assistant", + "api-reference/inference-api/assistants-api/assistants/delete-assistant" + ] + }, + { + "group": "Threads", + "pages": [ + "api-reference/inference-api/assistants-api/threads/create-thread", + "api-reference/inference-api/assistants-api/threads/retrieve-thread", + "api-reference/inference-api/assistants-api/threads/modify-thread", + "api-reference/inference-api/assistants-api/threads/delete-thread" + ] + }, + { + "group": "Messages", + "pages": [ + "api-reference/inference-api/assistants-api/messages/create-message", + "api-reference/inference-api/assistants-api/messages/list-messages", + "api-reference/inference-api/assistants-api/messages/retrieve-message", + "api-reference/inference-api/assistants-api/messages/modify-message", + "api-reference/inference-api/assistants-api/messages/delete-message" + ] + }, + { + "group": "Runs", + "pages": [ + "api-reference/inference-api/assistants-api/runs/create-run", + "api-reference/inference-api/assistants-api/runs/create-thread-and-run", + "api-reference/inference-api/assistants-api/runs/list-runs", + "api-reference/inference-api/assistants-api/runs/retrieve-run", + "api-reference/inference-api/assistants-api/runs/modify-run", + "api-reference/inference-api/assistants-api/runs/submit-tool-outputs-to-run", + "api-reference/inference-api/assistants-api/runs/cancel-run" + ] + }, + { + "group": "Run Steps", + "pages": [ + "api-reference/inference-api/assistants-api/run-steps/list-run-steps", + "api-reference/inference-api/assistants-api/run-steps/retrieve-run-steps" + ] + } + ] + } + ] + }, + { + "tab": "Admin API", + "groups": [ + { + "group": "Control Plane", + "pages": [ + { + "group": "Configs", + "pages": [ + "api-reference/admin-api/control-plane/configs/create-config", + "api-reference/admin-api/control-plane/configs/list-configs", + "api-reference/admin-api/control-plane/configs/retrieve-config", + "api-reference/admin-api/control-plane/configs/update-config" + ] + }, + { + "group": "Virtual Keys", + "pages": [ + "api-reference/admin-api/control-plane/virtual-keys/create-virtual-key", + "api-reference/admin-api/control-plane/virtual-keys/list-virtual-keys", + "api-reference/admin-api/control-plane/virtual-keys/retrieve-virtual-key", + "api-reference/admin-api/control-plane/virtual-keys/update-virtual-key", + "api-reference/admin-api/control-plane/virtual-keys/delete-virtual-key" + ] + }, + { + "group": "API Keys", + "pages": [ + "api-reference/admin-api/control-plane/api-keys/update-api-key", + "api-reference/admin-api/control-plane/api-keys/create-api-key", + "api-reference/admin-api/control-plane/api-keys/delete-an-api-key", + "api-reference/admin-api/control-plane/api-keys/retrieve-an-api-key", + "api-reference/admin-api/control-plane/api-keys/list-api-keys" + ] + }, + { + "group": "Analytics [BETA]", + "pages": [ + { + "group": "Summary", + "pages": [ + "api-reference/admin-api/control-plane/analytics/summary/get-all-cache-data" + ] + }, + { + "group": "Groups Paginated Data", + "pages": [ + "api-reference/admin-api/control-plane/analytics/groups-paginated-data/get-metadata-grouped-data", + "api-reference/admin-api/control-plane/analytics/groups-paginated-data/get-model-grouped-data", + "api-reference/admin-api/control-plane/analytics/groups-paginated-data/get-user-grouped-data" + ] + }, + { + "group": "Graphs - Time Series Data", + "pages": [ + "api-reference/admin-api/control-plane/analytics/graphs-time-series-data/get-cache-hit-latency-data", + "api-reference/admin-api/control-plane/analytics/graphs-time-series-data/get-cache-hit-rate-data", + "api-reference/admin-api/control-plane/analytics/graphs-time-series-data/get-cost-data", + "api-reference/admin-api/control-plane/analytics/graphs-time-series-data/get-error-rate-data", + "api-reference/admin-api/control-plane/analytics/graphs-time-series-data/get-errors-data", + "api-reference/admin-api/control-plane/analytics/graphs-time-series-data/get-feedback-data", + "api-reference/admin-api/control-plane/analytics/graphs-time-series-data/get-feedback-per-ai-models-data", + "api-reference/admin-api/control-plane/analytics/graphs-time-series-data/get-feedback-score-distribution-data", + "api-reference/admin-api/control-plane/analytics/graphs-time-series-data/get-latency-data", + "api-reference/admin-api/control-plane/analytics/graphs-time-series-data/get-requests-data", + "api-reference/admin-api/control-plane/analytics/graphs-time-series-data/get-requests-per-user-data", + "api-reference/admin-api/control-plane/analytics/graphs-time-series-data/get-rescued-requests-data", + "api-reference/admin-api/control-plane/analytics/graphs-time-series-data/get-status-code-data", + "api-reference/admin-api/control-plane/analytics/graphs-time-series-data/get-tokens-data", + "api-reference/admin-api/control-plane/analytics/graphs-time-series-data/get-unique-status-code-data", + "api-reference/admin-api/control-plane/analytics/graphs-time-series-data/get-users-data", + "api-reference/admin-api/control-plane/analytics/graphs-time-series-data/get-weighted-feedback-data" + ] + } + ] + }, + { + "group": "Users", + "pages": [ + "api-reference/admin-api/control-plane/users/retrieve-a-user", + "api-reference/admin-api/control-plane/users/retrieve-all-users", + "api-reference/admin-api/control-plane/users/update-a-user", + "api-reference/admin-api/control-plane/users/remove-a-user" + ] + }, + { + "group": "User Invites", + "pages": [ + "api-reference/admin-api/control-plane/user-invites/invite-a-user", + "api-reference/admin-api/control-plane/user-invites/retrieve-an-invite", + "api-reference/admin-api/control-plane/user-invites/retrieve-all-user-invites", + "api-reference/admin-api/control-plane/user-invites/delete-a-user-invite", + "api-reference/admin-api/control-plane/user-invites/resend-a-user-invite" + ] + }, + { + "group": "Workspaces", + "pages": [ + "api-reference/admin-api/control-plane/workspaces/create-workspace", + "api-reference/admin-api/control-plane/workspaces/retrieve-all-workspaces", + "api-reference/admin-api/control-plane/workspaces/retrieve-a-workspace", + "api-reference/admin-api/control-plane/workspaces/update-workspace", + "api-reference/admin-api/control-plane/workspaces/delete-a-workspace" + ] + }, + { + "group": "Workspace Members", + "pages": [ + "api-reference/admin-api/control-plane/workspace-members/add-a-workspace-member", + "api-reference/admin-api/control-plane/workspace-members/retrieve-all-workspace-members", + "api-reference/admin-api/control-plane/workspace-members/retrieve-a-workspace-member", + "api-reference/admin-api/control-plane/workspace-members/update-workspace-member", + "api-reference/admin-api/control-plane/workspace-members/remove-workspace-member" + ] + } + ] + }, + { + "group": "Data Plane", + "pages": [ + { + "group": "Feedback", + "pages": [ + "api-reference/admin-api/data-plane/feedback", + "api-reference/admin-api/data-plane/feedback/create-feedback", + "api-reference/admin-api/data-plane/feedback/update-feedback" + ] + }, + "api-reference/admin-api/data-plane/guardrails", + { + "group": "Logs", + "pages": [ + "api-reference/admin-api/data-plane/logs/insert-a-log", + { + "group": "Log Exports [BETA]", + "pages": [ + "api-reference/admin-api/data-plane/logs/log-exports-beta/retrieve-a-log-export", + "api-reference/admin-api/data-plane/logs/log-exports-beta/update-a-log-export", + "api-reference/admin-api/data-plane/logs/log-exports-beta/list-log-exports", + "api-reference/admin-api/data-plane/logs/log-exports-beta/create-a-log-export", + "api-reference/admin-api/data-plane/logs/log-exports-beta/start-a-log-export", + "api-reference/admin-api/data-plane/logs/log-exports-beta/cancel-a-log-export", + "api-reference/admin-api/data-plane/logs/log-exports-beta/download-a-log-export" + ] + } + ] + } + ] + }, + { + "group": "OpenAPI", + "pages": ["api-reference/admin-api/open-api-specification"] + } + ] + }, + { + "tab": "Cookbook", + "groups": [ + { + "group": "Getting Started", + "pages": [ + "guides/getting-started", + "guides/getting-started/a-b-test-prompts-and-models", + "guides/getting-started/tackling-rate-limiting", + "guides/getting-started/function-calling", + "guides/getting-started/image-generation", + "guides/getting-started/getting-started-with-ai-gateway", + "guides/getting-started/llama-3-on-groq", + "guides/getting-started/return-repeat-requests-from-cache", + "guides/getting-started/trigger-automatic-retries-on-llm-failures", + "guides/getting-started/101-on-portkey-s-gateway-configs" + ] + }, + { + "group": "Integrations", + "pages": [ + "guides/integrations", + "guides/integrations/llama-3-on-portkey-+-together-ai", + "guides/integrations/introduction-to-gpt-4o", + "guides/integrations/anyscale", + "guides/integrations/mistral", + "guides/integrations/vercel-ai", + "guides/integrations/deepinfra", + "guides/integrations/groq", + "guides/integrations/langchain", + "guides/integrations/mixtral-8x22b", + "guides/integrations/segmind" + ] + }, + { + "group": "Use Cases", + "pages": [ + "guides/use-cases", + "guides/use-cases/few-shot-prompting", + "guides/use-cases/enforcing-json-schema-with-anyscale-and-together", + "guides/use-cases/emotions-with-gpt-4o", + "guides/use-cases/build-an-article-suggestion-app-with-supabase-pgvector-and-portkey", + "guides/use-cases/setting-up-resilient-load-balancers-with-failure-mitigating-fallbacks", + "guides/use-cases/run-portkey-on-prompts-from-langchain-hub", + "guides/use-cases/smart-fallback-with-model-optimized-prompts", + "guides/use-cases/how-to-use-openai-sdk-with-portkey-prompt-templates", + "guides/use-cases/setup-openai-greater-than-azure-openai-fallback", + "guides/use-cases/fallback-from-sdxl-to-dall-e-3", + "guides/use-cases/comparing-top10-lmsys-models-with-portkey", + "guides/use-cases/build-a-chatbot-using-portkeys-prompt-templates" + ] + } + ] + }, + { + "tab": "Changelog", + "groups": [ + { + "group": "Platform", + "pages": [ + { + "group": "2025", + "pages": ["changelog/2025/jan"] + }, + { + "group": "2024", + "pages": [ + "changelog/2024/dec", + "changelog/2024/nov", + "changelog/2024/oct" + ] + } + ] + }, + { + "group": "Enterprise Releases", + "pages": ["changelog/enterprise", "changelog/helm-chart"] + }, + { + "group": "Open Source", + "pages": ["changelog/open-source", "changelog/office-hour"] + }, + { + "group": "What's New", + "pages": ["changelog/product"] + } + ] + } + ] + }, + "styling": { + "codeblocks": "system" + }, + "logo": { + "light": "/logo-white.png", + "dark": "/logo-black.png" + }, + "api": { + "openapi": "https://raw.githubusercontent.com/Portkey-AI/openapi/refs/heads/master/openapi.yaml" + }, + "background": { + "decoration": "gradient" + }, + "navbar": { + "links": [ + { + "label": "Support", + "href": "https://portkey.ai/community" + }, + { + "label": "Get Demo", + "href": "https://portkey.sh/demo-18" + }, + { + "label": "LLMs in Prod '25", + "href": "https://portkey.sh/docs-banner" + } + ], + "primary": { + "type": "button", + "label": "Sign in →", + "href": "https://app.portkey.ai/login" + } + }, + "footer": { + "socials": { + "x": "https://twitter.com/PortkeyAI", + "linkedin": "https://www.linkedin.com/company/portkey-ai/", + "discord": "https://portkey.wiki/community", + "github": "https://git.new/portkey" + } + }, + "integrations": { + "ga4": { + "measurementId": "G-JJMXYE4DRL" + } + }, + "redirects": [ + { + "source": "/changelog/enterprise/oct-24", + "destination": "/changelog/enterprise" + }, + { + "source": "/changelog/product/oct-24", + "destination": "/changelog/2024/oct" + }, + { + "source": "/api-reference", + "destination": "/api-reference/inference-api" + }, + { + "source": "/api-reference/introduction", + "destination": "/api-reference/inference-api/introduction" + }, + { + "source": "/api-reference/authentication", + "destination": "/api-reference/inference-api/authentication" + }, + { + "source": "/api-reference/headers", + "destination": "/api-reference/inference-api/headers" + }, + { + "source": "/api-reference/config-object", + "destination": "/api-reference/inference-api/config-object" + }, + { + "source": "/api-reference/portkey-sdk-client", + "destination": "/api-reference/inference-api/portkey-sdk-client" + }, + { + "source": "/provider-endpoints/supported-providers", + "destination": "/api-reference/inference-api/supported-providers" + }, + { + "source": "/api-reference/response-schema", + "destination": "/api-reference/inference-api/response-schema" + }, + { + "source": "/provider-endpoints", + "destination": "/api-reference/inference-api" + }, + { + "source": "/provider-endpoints/assistants-api/assistants/create-assistant", + "destination": "/api-reference/inference-api/assistants-api/assistants/create-assistant" + }, + { + "source": "/provider-endpoints/assistants-api/assistants/delete-assistant", + "destination": "/api-reference/inference-api/assistants-api/assistants/delete-assistant" + }, + { + "source": "/provider-endpoints/assistants-api/assistants/list-assistants", + "destination": "/api-reference/inference-api/assistants-api/assistants/list-assistants" + }, + { + "source": "/provider-endpoints/assistants-api/assistants/modify-assistant", + "destination": "/api-reference/inference-api/assistants-api/assistants/modify-assistant" + }, + { + "source": "/provider-endpoints/assistants-api/assistants/retrieve-assistant", + "destination": "/api-reference/inference-api/assistants-api/assistants/retrieve-assistant" + }, + { + "source": "/provider-endpoints/assistants-api/assistants", + "destination": "/api-reference/inference-api/assistants-api/assistants" + }, + { + "source": "/provider-endpoints/assistants-api/messages/create-message", + "destination": "/api-reference/inference-api/assistants-api/messages/create-message" + }, + { + "source": "/provider-endpoints/assistants-api/messages/delete-message", + "destination": "/api-reference/inference-api/assistants-api/messages/delete-message" + }, + { + "source": "/provider-endpoints/assistants-api/messages/list-messages", + "destination": "/api-reference/inference-api/assistants-api/messages/list-messages" + }, + { + "source": "/provider-endpoints/assistants-api/messages/modify-message", + "destination": "/api-reference/inference-api/assistants-api/messages/modify-message" + }, + { + "source": "/provider-endpoints/assistants-api/messages/retrieve-message", + "destination": "/api-reference/inference-api/assistants-api/messages/retrieve-message" + }, + { + "source": "/provider-endpoints/assistants-api/messages", + "destination": "/api-reference/inference-api/assistants-api/messages" + }, + { + "source": "/provider-endpoints/assistants-api/run-steps/list-run-steps", + "destination": "/api-reference/inference-api/assistants-api/run-steps/list-run-steps" + }, + { + "source": "/provider-endpoints/assistants-api/run-steps/retrieve-run-steps", + "destination": "/api-reference/inference-api/assistants-api/run-steps/retrieve-run-steps" + }, + { + "source": "/provider-endpoints/assistants-api/run-steps", + "destination": "/api-reference/inference-api/assistants-api/run-steps" + }, + { + "source": "/provider-endpoints/assistants-api/runs/cancel-run", + "destination": "/api-reference/inference-api/assistants-api/runs/cancel-run" + }, + { + "source": "/provider-endpoints/assistants-api/runs/create-run", + "destination": "/api-reference/inference-api/assistants-api/runs/create-run" + }, + { + "source": "/provider-endpoints/assistants-api/runs/create-thread-and-run", + "destination": "/api-reference/inference-api/assistants-api/runs/create-thread-and-run" + }, + { + "source": "/provider-endpoints/assistants-api/runs/list-runs", + "destination": "/api-reference/inference-api/assistants-api/runs/list-runs" + }, + { + "source": "/provider-endpoints/assistants-api/runs/modify-run", + "destination": "/api-reference/inference-api/assistants-api/runs/modify-run" + }, + { + "source": "/provider-endpoints/assistants-api/runs/retrieve-run", + "destination": "/api-reference/inference-api/assistants-api/runs/retrieve-run" + }, + { + "source": "/provider-endpoints/assistants-api/runs/submit-tool-outputs-to-run", + "destination": "/api-reference/inference-api/assistants-api/runs/submit-tool-outputs-to-run" + }, + { + "source": "/provider-endpoints/assistants-api/runs", + "destination": "/api-reference/inference-api/assistants-api/runs" + }, + { + "source": "/provider-endpoints/assistants-api/threads/create-thread", + "destination": "/api-reference/inference-api/assistants-api/threads/create-thread" + }, + { + "source": "/provider-endpoints/assistants-api/threads/delete-thread", + "destination": "/api-reference/inference-api/assistants-api/threads/delete-thread" + }, + { + "source": "/provider-endpoints/assistants-api/threads/modify-thread", + "destination": "/api-reference/inference-api/assistants-api/threads/modify-thread" + }, + { + "source": "/provider-endpoints/assistants-api/threads/retrieve-thread", + "destination": "/api-reference/inference-api/assistants-api/threads/retrieve-thread" + }, + { + "source": "/provider-endpoints/assistants-api/threads", + "destination": "/api-reference/inference-api/assistants-api/threads" + }, + { + "source": "/provider-endpoints/assistants-api", + "destination": "/api-reference/inference-api/assistants-api" + }, + { + "source": "/provider-endpoints/audio/create-speech", + "destination": "/api-reference/inference-api/audio/create-speech" + }, + { + "source": "/provider-endpoints/audio/create-transcription", + "destination": "/api-reference/inference-api/audio/create-transcription" + }, + { + "source": "/provider-endpoints/audio/create-translation", + "destination": "/api-reference/inference-api/audio/create-translation" + }, + { + "source": "/provider-endpoints/audio", + "destination": "/api-reference/inference-api/audio" + }, + { + "source": "/provider-endpoints/batch/cancel-batch", + "destination": "/api-reference/inference-api/batch/cancel-batch" + }, + { + "source": "/provider-endpoints/batch/create-batch", + "destination": "/api-reference/inference-api/batch/create-batch" + }, + { + "source": "/provider-endpoints/batch/list-batch", + "destination": "/api-reference/inference-api/batch/list-batch" + }, + { + "source": "/provider-endpoints/batch/retrieve-batch", + "destination": "/api-reference/inference-api/batch/retrieve-batch" + }, + { + "source": "/provider-endpoints/batch", + "destination": "/api-reference/inference-api/batch" + }, + { + "source": "/provider-endpoints/chat", + "destination": "/api-reference/inference-api/chat" + }, + { + "source": "/provider-endpoints/completions", + "destination": "/api-reference/inference-api/completions" + }, + { + "source": "/provider-endpoints/embeddings", + "destination": "/api-reference/inference-api/embeddings" + }, + { + "source": "/provider-endpoints/files/delete-file", + "destination": "/api-reference/inference-api/files/delete-file" + }, + { + "source": "/provider-endpoints/files/list-files", + "destination": "/api-reference/inference-api/files/list-files" + }, + { + "source": "/provider-endpoints/files/retrieve-file-content", + "destination": "/api-reference/inference-api/files/retrieve-file-content" + }, + { + "source": "/provider-endpoints/files/retrieve-file", + "destination": "/api-reference/inference-api/files/retrieve-file" + }, + { + "source": "/provider-endpoints/files/upload-file", + "destination": "/api-reference/inference-api/files/upload-file" + }, + { + "source": "/provider-endpoints/files", + "destination": "/api-reference/inference-api/files" + }, + { + "source": "/provider-endpoints/fine-tuning/cancel-fine-tuning", + "destination": "/api-reference/inference-api/fine-tuning/cancel-fine-tuning" + }, + { + "source": "/provider-endpoints/fine-tuning/create-fine-tuning-job", + "destination": "/api-reference/inference-api/fine-tuning/create-fine-tuning-job" + }, + { + "source": "/provider-endpoints/fine-tuning/list-fine-tuning-checkpoints", + "destination": "/api-reference/inference-api/fine-tuning/list-fine-tuning-checkpoints" + }, + { + "source": "/provider-endpoints/fine-tuning/list-fine-tuning-events", + "destination": "/api-reference/inference-api/fine-tuning/list-fine-tuning-events" + }, + { + "source": "/provider-endpoints/fine-tuning/list-fine-tuning-jobs", + "destination": "/api-reference/inference-api/fine-tuning/list-fine-tuning-jobs" + }, + { + "source": "/provider-endpoints/fine-tuning/retrieve-fine-tuning-job", + "destination": "/api-reference/inference-api/fine-tuning/retrieve-fine-tuning-job" + }, + { + "source": "/provider-endpoints/fine-tuning", + "destination": "/api-reference/inference-api/fine-tuning" + }, + { + "source": "/provider-endpoints/gateway-for-other-apis", + "destination": "/api-reference/inference-api/gateway-for-other-apis" + }, + { + "source": "/provider-endpoints/images/create-image-edit", + "destination": "/api-reference/inference-api/images/create-image-edit" + }, + { + "source": "/provider-endpoints/images/create-image-variation", + "destination": "/api-reference/inference-api/images/create-image-variation" + }, + { + "source": "/provider-endpoints/images/create-image", + "destination": "/api-reference/inference-api/images/create-image" + }, + { + "source": "/provider-endpoints/images", + "destination": "/api-reference/inference-api/images" + }, + { + "source": "/provider-endpoints/moderations", + "destination": "/api-reference/inference-api/moderations" + }, + { + "source": "/portkey-endpoints", + "destination": "/api-reference/admin-api/control-plane" + }, + { + "source": "/portkey-endpoints/admin/user-invites/delete-a-user-invite", + "destination": "/api-reference/admin-api/control-plane/user-invites/delete-a-user-invite" + }, + { + "source": "/portkey-endpoints/admin/user-invites/invite-a-user", + "destination": "/api-reference/admin-api/control-plane/user-invites/invite-a-user" + }, + { + "source": "/portkey-endpoints/admin/user-invites/retrieve-all-user-invites", + "destination": "/api-reference/admin-api/control-plane/user-invites/retrieve-all-user-invites" + }, + { + "source": "/portkey-endpoints/admin/user-invites/retrieve-an-invite", + "destination": "/api-reference/admin-api/control-plane/user-invites/retrieve-an-invite" + }, + { + "source": "/portkey-endpoints/admin/users/remove-a-user", + "destination": "/api-reference/admin-api/control-plane/users/remove-a-user" + }, + { + "source": "/portkey-endpoints/admin/users/retrieve-a-user", + "destination": "/api-reference/admin-api/control-plane/users/retrieve-a-user" + }, + { + "source": "/portkey-endpoints/admin/users/retrieve-all-users", + "destination": "/api-reference/admin-api/control-plane/users/retrieve-all-users" + }, + { + "source": "/portkey-endpoints/admin/users/update-a-user", + "destination": "/api-reference/admin-api/control-plane/users/update-a-user" + }, + { + "source": "/portkey-endpoints/admin/workspace-members/add-a-workspace-member", + "destination": "/api-reference/admin-api/control-plane/workspace-members/add-a-workspace-member" + }, + { + "source": "/portkey-endpoints/admin/workspace-members/remove-workspace-member", + "destination": "/api-reference/admin-api/control-plane/workspace-members/remove-workspace-member" + }, + { + "source": "/portkey-endpoints/admin/workspace-members/retrieve-a-workspace-member", + "destination": "/api-reference/admin-api/control-plane/workspace-members/retrieve-a-workspace-member" + }, + { + "source": "/portkey-endpoints/admin/workspace-members/retrieve-all-workspace-members", + "destination": "/api-reference/admin-api/control-plane/workspace-members/retrieve-all-workspace-members" + }, + { + "source": "/portkey-endpoints/admin/workspace-members/update-workspace-member", + "destination": "/api-reference/admin-api/control-plane/workspace-members/update-workspace-member" + }, + { + "source": "/portkey-endpoints/admin/workspaces/create-workspace", + "destination": "/api-reference/admin-api/control-plane/workspaces/create-workspace" + }, + { + "source": "/portkey-endpoints/admin/workspaces/delete-a-workspace", + "destination": "/api-reference/admin-api/control-plane/workspaces/delete-a-workspace" + }, + { + "source": "/portkey-endpoints/admin/workspaces/retrieve-a-workspace", + "destination": "/api-reference/admin-api/control-plane/workspaces/retrieve-a-workspace" + }, + { + "source": "/portkey-endpoints/admin/workspaces/retrieve-all-workspaces", + "destination": "/api-reference/admin-api/control-plane/workspaces/retrieve-all-workspaces" + }, + { + "source": "/portkey-endpoints/admin/workspaces/update-workspace", + "destination": "/api-reference/admin-api/control-plane/workspaces/update-workspace" + }, + { + "source": "/portkey-endpoints/analytics/graphs-time-series-data/get-cache-hit-latency-data", + "destination": "/api-reference/admin-api/control-plane/analytics/graphs-time-series-data/get-cache-hit-latency-data" + }, + { + "source": "/portkey-endpoints/analytics/graphs-time-series-data/get-cache-hit-rate-data", + "destination": "/api-reference/admin-api/control-plane/analytics/graphs-time-series-data/get-cache-hit-rate-data" + }, + { + "source": "/portkey-endpoints/analytics/graphs-time-series-data/get-cost-data", + "destination": "/api-reference/admin-api/control-plane/analytics/graphs-time-series-data/get-cost-data" + }, + { + "source": "/portkey-endpoints/analytics/graphs-time-series-data/get-error-rate-data", + "destination": "/api-reference/admin-api/control-plane/analytics/graphs-time-series-data/get-error-rate-data" + }, + { + "source": "/portkey-endpoints/analytics/graphs-time-series-data/get-errors-data", + "destination": "/api-reference/admin-api/control-plane/analytics/graphs-time-series-data/get-errors-data" + }, + { + "source": "/portkey-endpoints/analytics/graphs-time-series-data/get-feedback-data", + "destination": "/api-reference/admin-api/control-plane/analytics/graphs-time-series-data/get-feedback-data" + }, + { + "source": "/portkey-endpoints/analytics/graphs-time-series-data/get-feedback-per-ai-models", + "destination": "/api-reference/admin-api/control-plane/analytics/graphs-time-series-data/get-feedback-per-ai-models" + }, + { + "source": "/portkey-endpoints/analytics/graphs-time-series-data/get-feedback-score-distribution-data", + "destination": "/api-reference/admin-api/control-plane/analytics/graphs-time-series-data/get-feedback-score-distribution-data" + }, + { + "source": "/portkey-endpoints/analytics/graphs-time-series-data/get-latency-data", + "destination": "/api-reference/admin-api/control-plane/analytics/graphs-time-series-data/get-latency-data" + }, + { + "source": "/portkey-endpoints/analytics/graphs-time-series-data/get-requests-data", + "destination": "/api-reference/admin-api/control-plane/analytics/graphs-time-series-data/get-requests-data" + }, + { + "source": "/portkey-endpoints/analytics/graphs-time-series-data/get-requests-per-user", + "destination": "/api-reference/admin-api/control-plane/analytics/graphs-time-series-data/get-requests-per-user" + }, + { + "source": "/portkey-endpoints/analytics/graphs-time-series-data/get-rescued-requests-data", + "destination": "/api-reference/admin-api/control-plane/analytics/graphs-time-series-data/get-rescued-requests-data" + }, + { + "source": "/portkey-endpoints/analytics/graphs-time-series-data/get-status-code-data", + "destination": "/api-reference/admin-api/control-plane/analytics/graphs-time-series-data/get-status-code-data" + }, + { + "source": "/portkey-endpoints/analytics/graphs-time-series-data/get-tokens-data", + "destination": "/api-reference/admin-api/control-plane/analytics/graphs-time-series-data/get-tokens-data" + }, + { + "source": "/portkey-endpoints/analytics/graphs-time-series-data/get-unique-status-code-data", + "destination": "/api-reference/admin-api/control-plane/analytics/graphs-time-series-data/get-unique-status-code-data" + }, + { + "source": "/portkey-endpoints/analytics/graphs-time-series-data/get-users-data", + "destination": "/api-reference/admin-api/control-plane/analytics/graphs-time-series-data/get-users-data" + }, + { + "source": "/portkey-endpoints/analytics/graphs-time-series-data/get-weighted-feeback-data", + "destination": "/api-reference/admin-api/control-plane/analytics/graphs-time-series-data/get-weighted-feeback-data" + }, + { + "source": "/portkey-endpoints/analytics/groups-paginated-data/get-metadata-grouped-data", + "destination": "/api-reference/admin-api/control-plane/analytics/groups-paginated-data/get-metadata-grouped-data" + }, + { + "source": "/portkey-endpoints/analytics/groups-paginated-data/get-model-grouped-data", + "destination": "/api-reference/admin-api/control-plane/analytics/groups-paginated-data/get-model-grouped-data" + }, + { + "source": "/portkey-endpoints/analytics/groups-paginated-data/get-user-grouped-data", + "destination": "/api-reference/admin-api/control-plane/analytics/groups-paginated-data/get-user-grouped-data" + }, + { + "source": "/portkey-endpoints/analytics/summary/get-all-cache-data", + "destination": "/api-reference/admin-api/control-plane/analytics/summary/get-all-cache-data" + }, + { + "source": "/portkey-endpoints/api-keys/create-api-key", + "destination": "/api-reference/admin-api/control-plane/api-keys/create-api-key" + }, + { + "source": "/portkey-endpoints/api-keys/delete-an-api-key", + "destination": "/api-reference/admin-api/control-plane/api-keys/delete-an-api-key" + }, + { + "source": "/portkey-endpoints/api-keys/list-api-keys", + "destination": "/api-reference/admin-api/control-plane/api-keys/list-api-keys" + }, + { + "source": "/portkey-endpoints/api-keys/retrieve-an-api-key", + "destination": "/api-reference/admin-api/control-plane/api-keys/retrieve-an-api-key" + }, + { + "source": "/portkey-endpoints/api-keys/update-api-key", + "destination": "/api-reference/admin-api/control-plane/api-keys/update-api-key" + }, + { + "source": "/portkey-endpoints/configs/create-config", + "destination": "/api-reference/admin-api/control-plane/configs/create-config" + }, + { + "source": "/portkey-endpoints/configs/list-configs", + "destination": "/api-reference/admin-api/control-plane/configs/list-configs" + }, + { + "source": "/portkey-endpoints/configs/retrieve-config", + "destination": "/api-reference/admin-api/control-plane/configs/retrieve-config" + }, + { + "source": "/portkey-endpoints/configs/update-config", + "destination": "/api-reference/admin-api/control-plane/configs/update-config" + }, + { + "source": "/portkey-endpoints/feedback/create-feedback", + "destination": "/api-reference/admin-api/data-plane/feedback/create-feedback" + }, + { + "source": "/docs/portkey-endpoints/feedback/create-feedback", + "destination": "/api-reference/admin-api/data-plane/feedback/create-feedback" + }, + { + "source": "/portkey-endpoints/feedback/update-feedback", + "destination": "/api-reference/admin-api/data-plane/feedback/update-feedback" + }, + { + "source": "/docs/portkey-endpoints/feedback/update-feedback", + "destination": "/api-reference/admin-api/data-plane/feedback/update-feedback" + }, + { + "source": "/portkey-endpoints/guardrails", + "destination": "/api-reference/admin-api/data-plane/guardrails" + }, + { + "source": "/docs/portkey-endpoints/logs/insert-a-log", + "destination": "/api-reference/admin-api/data-plane/logs/insert-a-log" + }, + { + "source": "/portkey-endpoints/logs/insert-a-log", + "destination": "/api-reference/admin-api/data-plane/logs/insert-a-log" + }, + { + "source": "/portkey-endpoints/logs/log-exports-beta/cancel-a-log-export", + "destination": "/api-reference/admin-api/data-plane/logs/log-exports-beta/cancel-a-log-export" + }, + { + "source": "/portkey-endpoints/logs/log-exports-beta/create-a-log-export", + "destination": "/api-reference/admin-api/data-plane/logs/log-exports-beta/create-a-log-export" + }, + { + "source": "/portkey-endpoints/logs/log-exports-beta/download-a-log-export", + "destination": "/api-reference/admin-api/data-plane/logs/log-exports-beta/download-a-log-export" + }, + { + "source": "/portkey-endpoints/logs/log-exports-beta/list-log-exports", + "destination": "/api-reference/admin-api/data-plane/logs/log-exports-beta/list-log-exports" + }, + { + "source": "/portkey-endpoints/logs/log-exports-beta/retrieve-a-log-export", + "destination": "/api-reference/admin-api/data-plane/logs/log-exports-beta/retrieve-a-log-export" + }, + { + "source": "/portkey-endpoints/logs/log-exports-beta/start-a-log-export", + "destination": "/api-reference/admin-api/data-plane/logs/log-exports-beta/start-a-log-export" + }, + { + "source": "/portkey-endpoints/logs/log-exports-beta/update-a-log-export", + "destination": "/api-reference/admin-api/data-plane/logs/log-exports-beta/update-a-log-export" + }, + { + "source": "/portkey-endpoints/prompts/prompt-completion", + "destination": "/api-reference/inference-api/prompts/prompt-completion" + }, + { + "source": "/portkey-endpoints/prompts/render", + "destination": "/api-reference/inference-api/prompts/render" + }, + { + "source": "/portkey-endpoints/virtual-keys/create-virtual-key", + "destination": "/api-reference/admin-api/control-plane/virtual-keys/create-virtual-key" + }, + { + "source": "/portkey-endpoints/virtual-keys/delete-virtual-key", + "destination": "/api-reference/admin-api/control-plane/virtual-keys/delete-virtual-key" + }, + { + "source": "/portkey-endpoints/virtual-keys/list-virtual-keys", + "destination": "/api-reference/admin-api/control-plane/virtual-keys/list-virtual-keys" + }, + { + "source": "/portkey-endpoints/virtual-keys/retrieve-virtual-key", + "destination": "/api-reference/admin-api/control-plane/virtual-keys/retrieve-virtual-key" + }, + { + "source": "/portkey-endpoints/virtual-keys/update-virtual-key", + "destination": "/api-reference/admin-api/control-plane/virtual-keys/update-virtual-key" + }, + { + "source": "/product/observability-modern-monitoring-for-llms", + "destination": "/product/observability" + }, + { + "source": "/product/observability-modern-monitoring-for-llms/:slug*", + "destination": "/product/observability/:slug*" + }, + { + "source": "/product/guardrails/list-of-guardrail-checks/:slug*", + "destination": "/product/guardrails/:slug*" + }, + { + "source": "/product/ai-gateway-streamline-llm-integrations", + "destination": "/product/ai-gateway" + }, + { + "source": "/product/ai-gateway-streamline-llm-integrations/:slug*", + "destination": "/product/ai-gateway/:slug*" + }, + { + "source": "/welcome/agents", + "destination": "/integrations/agents" + }, + { + "source": "/welcome/agents/:slug*", + "destination": "/integrations/agents/:slug*" + }, + { + "source": "/welcome/integration-guides", + "destination": "/integrations" + }, + { + "source": "/welcome/integration-guides/:slug*", + "destination": "/integrations/llms/:slug*" + }, + { + "source": "/key-features/prompt-management#versioning-of-prompts", + "destination": "/product/prompt-library/prompt-templates#versioning-prompts" + }, + { + "source": "/introduction/welcome-to-portkey", + "destination": "/introduction/what-is-portkey" + }, + { + "source": "/introduction/getting-started", + "destination": "/introduction/make-your-first-request" + }, + { + "source": "/introduction/features", + "destination": "/introduction/feature-overview" + }, + { + "source": "/introduction/portkey-headers", + "destination": "/api-reference/inference-api/headers" + }, + { + "source": "/portkey-features/ai-gateway", + "destination": "/product/ai-gateway" + }, + { + "source": "/portkey-features/ai-gateway/:slug*", + "destination": "/product/ai-gateway/:slug*" + }, + { + "source": "/portkey-features/ai-gateway/configs", + "destination": "/product/ai-gateway/configs" + }, + { + "source": "/portkey-features/ai-gateway/simple-and-semantic-cache", + "destination": "/product/ai-gateway/cache-simple-and-semantic" + }, + { + "source": "/portkey-features/ai-gateway/fallbacks-on-llms", + "destination": "/product/ai-gateway/fallbacks" + }, + { + "source": "/portkey-features/observability", + "destination": "/product/observability" + }, + { + "source": "/portkey-features/observability/logs-and-analytics", + "destination": "/product/observability/logs" + }, + { + "source": "/portkey-features/observability/request-tracing", + "destination": "/product/observability/traces" + }, + { + "source": "/portkey-features/observability/custom-metadata", + "destination": "/product/observability/metadata" + }, + { + "source": "/portkey-features/model-management", + "destination": "/product/prompt-library" + }, + { + "source": "/portkey-features/feedback", + "destination": "/product/observability/feedback" + }, + { + "source": "/portkey-features/security-and-compliance", + "destination": "/product/enterprise-offering/security-portkey" + }, + { + "source": "/portkey-features/security-and-compliance/virtual-keys", + "destination": "/product/ai-gateway/virtual-keys" + }, + { + "source": "/portkey-features/continuous-fine-tuning-beta", + "destination": "/product/autonomous-fine-tuning" + }, + { + "source": "/sdk", + "destination": "/api-reference/inference-api/portkey-sdk-client" + }, + { + "source": "/sdk/python", + "destination": "/api-reference/inference-api/portkey-sdk-client" + }, + { + "source": "/sdk/node", + "destination": "/api-reference/inference-api/portkey-sdk-client" + }, + { + "source": "/integrations", + "destination": "/integrations/llms" + }, + { + "source": "/integrations/llm-providers", + "destination": "/integrations/llms" + }, + { + "source": "/integrations/llm-providers/open-ai-sdk", + "destination": "/integrations/llms/openai" + }, + { + "source": "/integrations/llm-providers/anthropic-sdk", + "destination": "/integrations/llms/anthropic" + }, + { + "source": "/integrations/llm-providers/whisper", + "destination": "/integrations/llms/openai" + }, + { + "source": "/integrations/llm-providers/anyscale-llama-2-and-mistral-7b", + "destination": "/integrations/llms/anyscale-llama2-mistral-zephyr" + }, + { + "source": "/integrations/llm-providers/anyscale-llama-2-and-mistral-7b/fallback-to-llama2-70b-in-llamaindex", + "destination": "/integrations/llms/anyscale-llama2-mistral-zephyr" + }, + { + "source": "/integrations/llm-providers/cohere", + "destination": "/integrations/llms/cohere" + }, + { + "source": "/integrations/llamaindex", + "destination": "/integrations/libraries/llama-index-python" + }, + { + "source": "/integrations/llamaindex/implementing-fallbacks-and-retries", + "destination": "/integrations/libraries/llama-index-python" + }, + { + "source": "/integrations/llamaindex/implementing-load-balancing", + "destination": "/integrations/libraries/llama-index-python" + }, + { + "source": "/integrations/llamaindex/implementing-semantic-caching", + "destination": "/integrations/libraries/llama-index-python" + }, + { + "source": "/integrations/llamaindex/observability", + "destination": "/integrations/libraries/llama-index-python" + }, + { + "source": "/integrations/langchain", + "destination": "/integrations/libraries/langchain-python" + }, + { + "source": "/integrations/rest-api", + "destination": "/api-reference/inference-api/introduction" + }, + { + "source": "/integrations/rest-api/portkey-headers", + "destination": "/api-reference/inference-api/headers" + }, + { + "source": "/cookbook", + "destination": "/guides/getting-started" + }, + { + "source": "/cookbook/few-shot-prompting", + "destination": "/guides/use-cases/few-shot-prompting" + }, + { + "source": "/support-and-faq/why-portkey", + "destination": "/introduction/what-is-portkey" + }, + { + "source": "/support-and-faq/why-portkey/observability", + "destination": "/product/observability" + }, + { + "source": "/support-and-faq/why-portkey/lower-cost-and-latency", + "destination": "/introduction/feature-overview" + }, + { + "source": "/support-and-faq/why-portkey/improve-llm-success-rate", + "destination": "/introduction/feature-overview" + }, + { + "source": "/support-and-faq/why-portkey/optimise-generation-quality", + "destination": "/introduction/feature-overview" + }, + { + "source": "/support-and-faq/why-portkey/hide-pii-from-llms", + "destination": "/introduction/feature-overview" + }, + { + "source": "/support-and-faq/common-errors-and-resolutions", + "destination": "/support/common-errors-and-resolutions" + }, + { + "source": "/overview/introduction", + "destination": "/introduction/what-is-portkey" + }, + { + "source": "/overview/features-overview", + "destination": "/introduction/feature-overview" + }, + { + "source": "/getting-started/setup-your-account", + "destination": "/introduction/make-your-first-request" + }, + { + "source": "/getting-started/quick-integration", + "destination": "/introduction/make-your-first-request" + }, + { + "source": "/how-portkey-works/portkey-modes", + "destination": "/introduction/feature-overview" + }, + { + "source": "/how-portkey-works/portkey-headers", + "destination": "/api-reference/inference-api/headers" + }, + { + "source": "/how-portkey-works/supported-llms", + "destination": "/integrations/llms" + }, + { + "source": "/key-features/request-caching", + "destination": "/product/ai-gateway/cache-simple-and-semantic" + }, + { + "source": "/key-features/request-tracing", + "destination": "/product/observability/traces" + }, + { + "source": "/key-features/automatic-retries", + "destination": "/product/ai-gateway/automatic-retries" + }, + { + "source": "/key-features/custom-metadata", + "destination": "/product/observability/metadata" + }, + { + "source": "/key-features/fallbacks-on-llms", + "destination": "/product/ai-gateway/fallbacks" + }, + { + "source": "/key-features/load-balancing", + "destination": "/product/ai-gateway/load-balancing" + }, + { + "source": "/key-features/feedback-api", + "destination": "/product/observability/feedback" + }, + { + "source": "/key-features/logs-and-analytics", + "destination": "/product/observability/analytics" + }, + { + "source": "/key-features/ai-provider-keys", + "destination": "/product/ai-gateway/virtual-keys" + }, + { + "source": "/key-features/prompt-management", + "destination": "/product/prompt-library" + }, + { + "source": "/key-features/prompt-management/few-shot-prompting", + "destination": "/guides/use-cases/few-shot-prompting" + }, + { + "source": "/integration-guides/open-ai-sdk", + "destination": "/integrations/llms/openai" + }, + { + "source": "/integration-guides/llamaindex", + "destination": "/integrations/libraries/llama-index-python" + }, + { + "source": "/integration-guides/llamaindex/implementing-fallbacks-and-retries", + "destination": "/integrations/libraries/llama-index-python" + }, + { + "source": "/integration-guides/llamaindex/implementing-load-balancing", + "destination": "/integrations/libraries/llama-index-python" + }, + { + "source": "/integration-guides/llamaindex/implementing-semantic-caching", + "destination": "/integrations/libraries/llama-index-python" + }, + { + "source": "/integration-guides/llamaindex/observability", + "destination": "/integrations/libraries/llama-index-python" + }, + { + "source": "/integration-guides/langchain", + "destination": "/integrations/libraries/langchain-python" + }, + { + "source": "/integration-guides/cohere", + "destination": "/integrations/llms/cohere" + }, + { + "source": "/integration-guides/anthropic-sdk", + "destination": "/integrations/llms/anthropic" + }, + { + "source": "/integration-guides/microsoft-guidance", + "destination": "/integrations/llms/suggest-a-new-integration" + }, + { + "source": "/integration-guides/rest-api", + "destination": "/api-reference/inference-api/introduction" + }, + { + "source": "/why-portkey/observability", + "destination": "/product/observability" + }, + { + "source": "/why-portkey/lower-cost-and-latency", + "destination": "/introduction/feature-overview" + }, + { + "source": "/why-portkey/improve-llm-success-rate", + "destination": "/introduction/feature-overview" + }, + { + "source": "/why-portkey/optimise-generation-quality", + "destination": "/introduction/feature-overview" + }, + { + "source": "/why-portkey/hide-pii-from-llms", + "destination": "/introduction/feature-overview" + }, + { + "source": "/troubleshooting-and-support/common-errors-and-resolutions", + "destination": "/support/common-errors-and-resolutions" + }, + { + "source": "/open-source", + "destination": "/product/open-source" + }, + { + "source": "/open-source/rubeus-by-portkey", + "destination": "/product/open-source" + }, + { + "source": "/support-and-faq/join-portkey-on-discord", + "destination": "/support/developer-forum" + }, + { + "source": "/support-and-faq/contacting-support", + "destination": "/support/contact-us" + }, + { + "source": "/support-and-faq/reporting-issues", + "destination": "/support/developer-forum" + }, + { + "source": "/troubleshooting-and-support/reporting-issues", + "destination": "/support/developer-forum" + }, + { + "source": "/troubleshooting-and-support/contacting-support", + "destination": "/support/contact-us" + }, + { + "source": "/troubleshooting-and-support/join-portkey-on-discord", + "destination": "/support/developer-forum" + }, + { + "source": "/welcome/what-is-portkey", + "destination": "/introduction/what-is-portkey" + }, + { + "source": "/welcome/readme", + "destination": "/introduction/what-is-portkey" + }, + { + "source": "/welcome/integration-guides/openai", + "destination": "/integrations/llms/openai" + }, + { + "source": "/welcome/make-your-first-request", + "destination": "/introduction/make-your-first-request" + }, + { + "source": "/product/ai-gateway-streamline-llm-integrations/cache-simple-and-semantic", + "destination": "/product/ai-gateway/cache-simple-and-semantic" + }, + { + "source": "/welcome/integration-guides/langchain-python", + "destination": "/integrations/libraries/langchain-python" + }, + { + "source": "/welcome/integration-guides/azure-openai", + "destination": "/integrations/llms/azure-openai" + }, + { + "source": "/api-reference/chat-completions", + "destination": "/api-reference/inference-api/chat" + }, + { + "source": "/product/ai-gateway-streamline-llm-integrations/virtual-keys", + "destination": "/product/ai-gateway/virtual-keys" + }, + { + "source": "/welcome/integration-guides/llama-index-python", + "destination": "/integrations/libraries/llama-index-python" + }, + { + "source": "/welcome/integration-guides/anthropic", + "destination": "/integrations/llms/anthropic" + }, + { + "source": "/api-reference/gateway-for-other-apis", + "destination": "/api-reference/inference-api/gateway-for-other-apis" + }, + { + "source": "/welcome/integration-guides/ollama", + "destination": "/integrations/llms/ollama" + }, + { + "source": "/cookbooks/few-shot-prompting", + "destination": "/guides/use-cases/few-shot-prompting" + }, + { + "source": "/welcome/integration-guides/gemini", + "destination": "/integrations/llms/gemini" + }, + { + "source": "/api-reference/prompts", + "destination": "/api-reference/inference-api/prompts" + }, + { + "source": "/welcome/integration-guides/aws-bedrock", + "destination": "/integrations/llms/bedrock/aws-bedrock" + }, + { + "source": "/product/prompt-library/retrieve-prompt-templates", + "destination": "/api-reference/inference-api/prompts/render" + }, + { + "source": "/api-reference/embeddings", + "destination": "/api-reference/inference-api/embeddings" + }, + { + "source": "/api-reference/prompts/prompt-completion", + "destination": "/api-reference/inference-api/prompts/prompt-completion" + }, + { + "source": "/api-reference/completions", + "destination": "/api-reference/inference-api/completions" + }, + { + "source": "/welcome/integration-guides/anyscale-llama2-mistral-zephyr", + "destination": "/integrations/llms/anyscale-llama2-mistral-zephyr" + }, + { + "source": "/cookbooks/a-b-test-prompts-and-models", + "destination": "/guides/getting-started/a-b-test-prompts-and-models" + }, + { + "source": "/changelog/portkeys-december-migration", + "destination": "/support/portkeys-december-migration" + }, + { + "source": "/portkey-endpoints/configs", + "destination": "/api-reference/admin-api/control-plane/configs/create-config" + }, + { + "source": "/welcome/integration-guides/autogen", + "destination": "/integrations/agents/autogen" + }, + { + "source": "/api-reference/evals", + "destination": "/product/guardrails" + }, + { + "source": "/api-reference/feedback", + "destination": "/api-reference/admin-api/data-plane/feedback" + }, + { + "source": "/cookbooks/function-calling", + "destination": "/guides/getting-started/function-calling" + }, + { + "source": "/portkey-endpoints/virtual-keys", + "destination": "/api-reference/admin-api/control-plane/virtual-keys/create-virtual-key" + }, + { + "source": "/product/ai-gateway-streamline-llm-integrations/virtual-keys/budget-limits-enterprise-feature", + "destination": "/product/ai-gateway/virtual-keys/budget-limits" + }, + { + "source": "/portkey-endpoints/prompts", + "destination": "/api-reference/inference-api/prompts" + }, + { + "source": "/api-reference/completions-1", + "destination": "/api-reference/inference-api/completions" + }, + { + "source": "/portkey-endpoints/admin", + "destination": "/api-reference/admin-api/control-plane/users/retrieve-a-user" + }, + { + "source": "/cookbooks/enforcing-json-schema-with-anyscale-and-together", + "destination": "/guides/use-cases/enforcing-json-schema-with-anyscale-and-together" + }, + { + "source": "/api-reference/logs", + "destination": "/api-reference/admin-api/data-plane/logs/insert-a-log" + }, + { + "source": "/cookbooks/emotions-with-gpt-4o", + "destination": "/guides/use-cases/emotions-with-gpt-4o" + }, + { + "source": "/product/ai-gateway-streamline-llm-integrations/multimodal-capabilities/vision-1", + "destination": "/product/ai-gateway/multimodal-capabilities/speech-to-text" + }, + { + "source": "/portkey-endpoints/api-keys", + "destination": "/api-reference/admin-api/control-plane/api-keys/update-api-key" + }, + { + "source": "/portkey-endpoints/analytics/graphs-time-series-data", + "destination": "http://localhost:3000/api-reference/admin-api/control-plane/analytics/graphs-time-series-data/get-cache-hit-latency-data" + }, + { + "source": "/api-reference/virtual-keys", + "destination": "/api-reference/admin-api/control-plane/virtual-keys/create-virtual-key" + }, + { + "source": "/guides/practitioners-cookbooks/quickstarts/a-b-test-prompts-and-models", + "destination": "/guides/getting-started/a-b-test-prompts-and-models" + }, + { + "source": "/portkey-endpoints/logs", + "destination": "/api-reference/admin-api/data-plane/logs/insert-a-log" + }, + { + "source": "/integrations/llms/openai/structued-outputs", + "destination": "/integrations/llms/openai/structured-outputs" + }, + { + "source": "/portkey-endpoints/analytics", + "destination": "/api-reference/admin-api/control-plane/analytics/summary/get-all-cache-data" + }, + { + "source": "/portkey-endpoints/analytics/groups-paginated-data", + "destination": "/api-reference/admin-api/control-plane/analytics/groups-paginated-data/get-metadata-grouped-data" + }, + { + "source": "/portkey-endpoints/admin/workspaces", + "destination": "/api-reference/admin-api/control-plane/workspaces/create-workspace" + }, + { + "source": "/product/enterprise-offering/components/log-store/mongodb", + "destination": "/integrations/libraries/mongodb" + }, + { + "source": "/portkey-docs/introduction/getting-started", + "destination": "/introduction/what-is-portkey" + }, + { + "source": "/api-reference/admin-api/control-plane/admin/:slug*", + "destination": "/api-reference/admin-api/control-plane/:slug*" + }, + { + "source": "/integrations/llms/aws-bedrock", + "destination": "/integrations/llms/bedrock/aws-bedrock" + } + ] +} diff --git a/guides/getting-started/function-calling.mdx b/guides/getting-started/function-calling.mdx index 5cb65db..65c54dc 100644 --- a/guides/getting-started/function-calling.mdx +++ b/guides/getting-started/function-calling.mdx @@ -15,8 +15,7 @@ Function calling is currently supported on select models on **Anyscale**, **Toge We want the LLM to tell what's the temperature in Delhi today. We'll use a "Weather API" to fetch the weather: -```Node Node - +```js import Portkey from "portkey-ai"; const portkey = new Portkey({ @@ -25,9 +24,7 @@ const portkey = new Portkey({ }); // Describing what the Weather API does and expects - let tools = [ - { "type": "function", "function": { @@ -49,7 +46,6 @@ let tools = [ ]; let response = await portkey.chat.completions.create({ - model: "mistralai/Mixtral-8x7B-Instruct-v0.1", messages: [ {"role": "system", "content": "You are helpful assistant."}, @@ -57,7 +53,6 @@ let response = await portkey.chat.completions.create({ ], tools, tool_choice: "auto", // auto is default, yet explicit - }); console.log(response.choices[0].finish_reason) @@ -66,7 +61,7 @@ console.log(response.choices[0].finish_reason) Here, we've defined what the Weather API expects for its requests in the `tool` param, and set `tool_choice` to auto. So, based on the user messages, the LLM will decide if it should do a function call to fulfill the request. Here, it will choose to do that, and we'll see the following output: -```Node +```json { "role": "assistant", @@ -85,50 +80,38 @@ Here, we've defined what the Weather API expects for its requests in the `tool` We can just take the `tool_call` made by the LLM, and pass it to our `getWeather` function - it should return a proper response to our query. We then take that response and send it to our LLM to complete the loop: -```Node +```js /** - * getWeather(..) is a utility to call external weather service APIs - * Responds with: {"temperature": 20, "unit": "celsius"} - **/ let weatherData = await getWeather(JSON.parse(arguments)); - let content = JSON.stringify(weatherData); // Push assistant and tool message from earlier generated function arguments - messages.push(assistantMessage); // - messages.push({ - role: "tool", content: content, toolCallId: "call_x8we3xx" name: "getWeather" - }); let response = await portkey.chat.completions.create({ - model: "mistralai/Mixtral-8x7B-Instruct-v0.1", tools:tools, messages:messages, tool_choice: "auto", - }); - ``` We should see this final output: -```Node +```json { "role": "assistant", "content": "It's 30 degrees celsius in Delhi, India.", - } ``` @@ -145,17 +128,8 @@ Function Calling Workflow ## Supporting Models -While most providers have standard function calling as illustrated above, models on Together AI & select new models on OpenAI (`gpt-4-turbo-preview`, `gpt-4-0125-preview`, `gpt-4-1106-preview`, `gpt-3.5-turbo-0125`, and `gpt-3.5-turbo-1106`) also support **parallel function calling** \- here, you can pass multiple requests in a single query, the model will pick the relevant tool for each query, and return an array of `tool_calls` each with a unique ID. ([Read here for more info](https://platform.openai.com/docs/guides/function-calling/parallel-function-calling)) - -| Model/Provider | Standard Function Calling | Parallel Function Calling | -| --------------------------------------------------------------- | ----------------------------------------------------- | ------------------------- | -| mistralai/Mistral-7B-Instruct-v0.1 Anyscale | | | -| mistralai/Mixtral-8x7B-Instruct-v0.1Anyscale | | | -| mistralai/Mixtral-8x7B-Instruct-v0.1Together AI | | | -| mistralai/Mistral-7B-Instruct-v0.1Together AI | | | -| togethercomputer/CodeLlama-34b-InstructTogether AI | | | -| gpt-4 and previous releases OpenAI / Azure OpenAI | | (some) | -| gpt-3.5-turbo and previous releases OpenAI / Azure OpenAI | | (some) | -| firefunction-v1Fireworks | | | -| fw-function-call-34b-v0Fireworks | [](https://github.com/Portkey-AI/gateway/issues/335) | | -| gemini-1.0-progemini-1.0-pro-001gemini-1.5-pro-lates **Google** | [](https://github.com/Portkey-AI/gateway/issues/335) | | +Portkey's AI Gateway provides native function calling (also known as tool calling) support across our entire ecosystem of AI providers, including OpenAI, Anthropic, Google, Together AI, Fireworks AI, and many more. If you discover a function-calling capable LLM that isn't working with Portkey, please let us know [on Discord](https://portkey.wiki/community). + + + Portkey also supports parallel tool calling when available from the provider. This feature allows you to submit multiple requests in a single query. The model automatically selects the appropriate tool for each request and returns an array of `tool_calls`, each with a unique identifier. For more details, see the [parallel function calling documentation](https://platform.openai.com/docs/guides/function-calling/parallel-function-calling). + diff --git a/images/private-llm.png b/images/private-llm.png new file mode 100644 index 0000000..356fa83 Binary files /dev/null and b/images/private-llm.png differ diff --git a/integrations/llms/byollm.mdx b/integrations/llms/byollm.mdx index 94418a7..a099e06 100644 --- a/integrations/llms/byollm.mdx +++ b/integrations/llms/byollm.mdx @@ -2,101 +2,243 @@ title: "Bring Your Own LLM" --- -Portkey provides a robust and secure platform to observe, integrate, and manage your **locally or privately hosted custom models.** +Portkey supports integration with privately hosted LLMs, allowing you to seamlessly incorporate your private deployments into your AI infrastructure. This feature enables you to manage both private and commercial LLMs through a unified interface while leveraging Portkey's full suite of management and reliability features. -## Integrating Custom Models with Portkey SDK +## Key Benefits +- Unified API access across private and commercial LLMs +- Full compatibility with Portkey's reliability features +- Advanced load balancing capabilities +- Comprehensive monitoring and analytics +- Team-specific access controls +- Secure credential management -You can integrate any custom LLM with Portkey as long as it's API is compliant with any of the **15+** providers Portkey already supports. +## Integrate -### 1\. Install the Portkey SDK + +**Prerequisites** - - - ```sh - npm install --save portkey-ai - ``` +Your private LLM should be following the API specification of any of the existing Portkey-supported providers (e.g., OpenAI's `/chat/completions`, Anthropic's `/messages`, etc.). + - - - ```sh - pip install portkey-ai - ``` - +#### Add Deployment Details + + + +Navigate to the Virtual Keys section in your Portkey dashboard and create a new Virtual Key. Here, enable the "Local/Privately hosted provider" toggle. - +Now, configure your deployment: + - Select the matching provider API specification (typically, this may be `OpenAI`) + - Enter your model's base URL in the `Custom Host` field + - Add required authentication headers and their values +That's it! Portkey will generate a virtual key for your private LLM deployment that you can use anywhere. + +If you do not want to add your private LLM details to Portkey vault, you can also pass them while instantiating the Portkey client. +**[More on that here](#making-requests-without-virtual-keys).** + -### 2\. Initialize Portkey with your Custom URL -Instead of using a `provider` \+ `Authorization` pair or a `virtualKey` referring to the provider, you can specify a `provider` \+ `custom_host` pair while instantiating the Portkey client. -`custom_host` here refers to the URL where your custom model is hosted, including the API version identifier. +#### Sample Request +Now, you can make requests to Portkey using the newly generated virtual key: - - - ```js - import Portkey from 'portkey-ai' + - const portkey = new Portkey({ - apiKey: "PORTKEY_API_KEY", - provider: "PROVIDER_NAME", // This can be mistral-ai, openai, or anything else - customHost: "http://MODEL_URL/v1/", // Your custom URL with version identifier - Authorization: "AUTH_KEY", // If you need to pass auth - }) - ``` - - - ```python - from portkey_ai import Portkey - - portkey = Portkey( - api_key="PORTKEY_API_KEY", - provider="PROVIDER_NAME", # This can be mistral-ai, openai, or anything else - custom_host="http://MODEL_URL/v1/", # Your custom URL with version identifier - Authorization="AUTH_KEY", # If you need to pass auth - ) - ``` - +```js NodeJS +import Portkey from 'portkey-ai' - +const portkey = new Portkey({ + apiKey: "PORTKEY_API_KEY", + virtualKey: "VIRTUAL_KEY" +}) + +async function main() { + const response = await portkey.chat.completions.create({ + messages: [{ role: "user", content: "You are a helpful assistant." }], + model: "MODEL_NAME", + }); + + console.log(response.choices[0]); +} + +main(); +``` + +```py Python +from portkey_ai import Portkey + +portkey = Portkey( + api_key="PORTKEY_API_KEY", + virtual_key="VIRTUAL_KEY" +) + +response = portkey.chat.completions.create( + model="MODEL_NAME", + messages=[ + {"role": "user", "content": "Hello!"} + ] +) + +print(response.choices[0].message) +``` + +```sh cURL +curl https://api.portkey.ai/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "x-portkey-api-key: $PORTKEY_API_KEY" \ + -H "x-portkey-virtual-key: $PORTKEY_PROVIDER_VIRTUAL_KEY" \ + -d '{ + "model": "MODEL_NAME", + "messages": [ + { "role": "user", "content": "Hello!" } + ] + }' +``` + + + +## Advanced Features + +### Load Balancing +Deploy your private LLM across multiple servers and let Portkey handle load distribution: +- Automatic request distribution +- Health checking +- Failover management + +### Reliability Features +Private LLMs are fully compatible with Portkey's reliability suite: +- Automatic retries +- Request timeouts +- Fallback configurations +- Conditional routing +- Rate limiting +- Budget management + +### Access Control +Manage access to your private LLMs: +- Team-specific provisioning +- Role-based access control +- Usage monitoring per team/user +## Monitoring & Analytics +### Available Metrics +- Request volume +- Response latency +- Success rates +- User feedback +- Cost tracking (based on target provider specifications) +- Custom pricing support (coming soon) +### Usage Tracking +Monitor your private LLM usage alongside commercial providers: +- Unified dashboard +- Comparative analytics +- Cost optimization insights + +## Security + +### Credential Management +- Authentication credentials stored securely in Portkey vault +- Access managed exclusively through virtual keys +- No direct exposure of private LLM credentials + +### Network Security +- No special network configuration required +- Secure request routing +- End-to-end request encryption + + +## Best Practices + +1. **Testing & Validation** + - Test your private LLM integration in a staging environment + - Verify API compatibility before full deployment + - Monitor initial requests for expected behavior + +2. **Load Balancing** + - Start with conservative load distribution + - Monitor server performance + - Adjust balancing rules based on usage patterns + +3. **Access Control** + - Implement principle of least privilege + - Regularly review team access permissions + - Monitor usage patterns for security anomalies + +## Making Requests Without Virtual Keys + +You can also pass your private LLM details directly without using virutal keys. + +Instead of using a `virtualKey` referring to the deployment, you can specify a `provider` \+ `custom_host` pair while instantiating the Portkey client. + + +`custom_host` here refers to the URL where your custom model is hosted, including the API version identifier. More on `custom_host` [here](/product/ai-gateway/universal-api#integrating-local-or-private-models). + -### 3\. Invoke Chat Completions + -Use the Portkey SDK to invoke chat completions from your model, just as you would with any other provider. +```js NodeJS +import Portkey from 'portkey-ai' +const portkey = new Portkey({ + apiKey: "PORTKEY_API_KEY", + provider: "PROVIDER_NAME", // This can be openai, mistral-ai, openai, or anything else + customHost: "http://MODEL_URL/v1/", // Your custom URL with version identifier + Authorization: "AUTH_KEY", + xApiKey: "API_KEY", + forwardHeaders: ["Authorization","xApiKey"] // Directly forward these headers +}) - - - ```js - const chatCompletion = await portkey.chat.completions.create({ - messages: [{ role: 'user', content: 'Say this is a test' }] - }); +async function main() { + const response = await portkey.chat.completions.create({ + messages: [{ role: "user", content: "You are a helpful assistant." }] + }); - console.log(chatCompletion.choices); - ``` - - + console.log(response.choices[0]); +} -```python -completion = portkey.chat.completions.create( - messages= [{ "role": 'user', "content": 'Say this is a test' }] +main(); +``` + +```py Python +from portkey_ai import Portkey + +portkey = Portkey( + api_key="PORTKEY_API_KEY", + provider="PROVIDER_NAME", # This can be mistral-ai, openai, or anything else + custom_host="http://MODEL_URL/v1/", # Your custom URL with version identifier + Authorization="AUTH_KEY", + x_api_key="API_KEY", + forward_headers=["Authorization","xApiKey"] # Directly forward these headers ) -print(completion) +response = portkey.chat.completions.create( + messages=[ {"role": "user", "content": "Hello!"} ] +) ``` - - - +```sh cURL +curl https://api.portkey.ai/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "x-portkey-api-key: $PORTKEY_API_KEY" \ + -H "x-portkey-custom-host: $YOUR_DEPLOYMENT_URL" \ + -H "x-portkey-provider: openai" \ + -H "x-api-key: $YOUR_AUTH_DETAILS" \ + -H "x-portkey-forward-headers: ['x-api-key']" \ + -d '{ + "model": "MODEL_NAME", + "messages": [ + { "role": "user", "content": "Hello!" } + ] + }' +``` ## Forward Sensitive Headers Securely @@ -168,10 +310,7 @@ Example: If you have a header of the format `X-My-Custom-Header`, it should be s - - - -### Forward Headers in the Config Object +#### Forward Headers in Portkey Config You can also define `forward_headers` in your Config object and then pass the headers directly while making a request. @@ -205,12 +344,44 @@ You can also define `forward_headers` in your Config object and then pass the he } ``` -## Next Steps -Explore the complete list of features supported in the SDK: + + +## Troubleshooting + +Common issues and solutions: + +1. **Connection Issues** + - Verify base URL configuration + - Check authentication header format + - Confirm network connectivity + +2. **Performance Problems** + - Review load balancing settings + - Check server resources + - Monitor request timeouts + +3. **Authentication Errors** + - Verify credential configuration + - Check virtual key status + - Confirm team access permissions + +## FAQs + +**Q: Can I use any private LLM with Portkey?** +A: Yes, as long as it implements a supported provider's API specification. + +**Q: How do I handle multiple deployment endpoints?** +A: Configure multiple URLs in the load balancing settings for automatic distribution. + +**Q: Are there any request volume limitations?** +A: No specific limitations beyond your private LLM's capabilities. + +**Q: Can I use different models with the same private deployment?** +A: Yes, following the target provider's model specification format. - - +**Q: How are costs calculated for private LLMs?** +A: Currently based on target provider specifications, with custom pricing coming soon. --- diff --git a/mint.json b/mint.json index 4ea8c72..b29b730 100644 --- a/mint.json +++ b/mint.json @@ -211,6 +211,7 @@ "integrations/libraries/langchain-js", "integrations/libraries/librechat", "integrations/libraries/openwebui", + "integrations/libraries/janhq", "integrations/libraries/anythingllm", "integrations/libraries/llama-index-python", "integrations/libraries/promptfoo",