diff --git a/examples/_config.yaml b/examples/_config.yaml index 12f84a16..01a626db 100644 --- a/examples/_config.yaml +++ b/examples/_config.yaml @@ -17,6 +17,11 @@ providers: max_tokens: 200000 input_token_cost: 0.000003 output_token_cost: 0.000015 + claude-3-sonnet: + mode: chat + max_tokens: 200000 + input_token_cost: 0.000003 + output_token_cost: 0.000015 claude-3-haiku-20240307: mode: chat max_tokens: 200000 @@ -66,18 +71,134 @@ providers: min: 0 max: 500 step: 1 - ollama: - id: ollama - name: Ollama + bedrock: + id: bedrock + name: Bedrock ConverseAPI chat: true embed: true keys: + - BEDROCK_SECRET_KEY + - BEDROCK_ACCESS_KEY + - BEDROCK_REGION models: - llama2: + anthropic.claude-3-sonnet-20240229-v1:0: + mode: chat + max_tokens: 200000 + input_token_cost: 0.000003 + output_token_cost: 0.000015 + anthropic.claude-3-5-sonnet-20240620-v1:0: + mode: chat + max_tokens: 200000 + input_token_cost: 0.000003 + output_token_cost: 0.000015 + anthropic.claude-3-5-sonnet-20241022-v2:0: + mode: chat + max_tokens: 200000 + input_token_cost: 0.000003 + output_token_cost: 0.000015 + anthropic.claude-3-haiku-20240307-v1:0: + mode: chat + max_tokens: 200000 + input_token_cost: 0.00000025 + output_token_cost: 0.00000125 + anthropic.claude-3-5-haiku-20241022-v1:0: + mode: chat + max_tokens: 200000 + input_token_cost: 0.000001 + output_token_cost: 0.000005 + anthropic.claude-3-opus-20240229-v1:0: + mode: chat + max_tokens: 200000 + input_token_cost: 0.000015 + output_token_cost: 0.000075 + anthropic.claude-instant-v1: mode: chat - max_tokens: 0 + max_tokens: 100000 + input_token_cost: 0.0000008 + output_token_cost: 0.000024 + anthropic.claude-v2: + mode: chat + max_tokens: 100000 + input_token_cost: 0.000008 + output_token_cost: 0.000024 + anthropic.claude-v2:1: + mode: chat + max_tokens: 100000 + input_token_cost: 0.000008 + output_token_cost: 0.000024 + us.amazon.nova-pro-v1:0: + mode: chat + max_tokens: 300000 + input_token_cost: 0.0000008 + output_token_cost: 0.0000016 + us.amazon.nova-lite-v1:0: + mode: chat + max_tokens: 300000 + input_token_cost: 0.00000006 + output_token_cost: 0.00000012 + us.amazon.nova-micro-v1:0: + mode: chat + max_tokens: 128000 + input_token_cost: 0.000000035 + output_token_cost: 0.00000007 + + parameters: + temperature: + name: "Temperature" + type: float + default: 1 + min: 0 + max: 1 + step: 0.01 + max_tokens: + name: "Maximum tokens" + type: float + default: 256 + min: 1 + max: 4096 + step: 0.01 + top_p: + name: "Top P" + type: float + default: 1 + min: 0 + max: 1 + step: 0.01 + top_k: + name: "Top K" + type: float + default: 5 + min: 0 + max: 500 + step: 1 + self-hosted: + id: self-hosted + name: Self Hosted + chat: true + embed: true + keys: + models: + deepseek-r1:1.5b: + mode: chat + max_tokens: 200000 input_token_cost: 0 output_token_cost: 0 + + deepseek-r1-tool-calling: + mode: chat + max_tokens: 128000 + input_token_cost: 0 + output_token_cost: 0 + llama3.2: + mode: chat + max_tokens: 200000 + input_token_cost: 0 + output_token_cost: 0 + Llama-3-3-70B-Instruct-llmstudio: + mode: chat + max_tokens: 200000 + input_token_cost: 0.00000071 + output_token_cost: 0.00000071 parameters: temperature: name: "Temperature" @@ -115,6 +236,24 @@ providers: keys: - OPENAI_API_KEY models: + o1-preview: + mode: chat + max_completion_tokens: 128000 + input_token_cost: 0.000015 + cached_token_cost: 0.0000075 + output_token_cost: 0.000060 + o1-mini: + mode: chat + max_completion_tokens: 128000 + input_token_cost: 0.0000011 + cached_token_cost: 0.00000055 + output_token_cost: 0.0000044 + o3-mini: + mode: chat + max_completion_tokens: 200000 + input_token_cost: 0.0000011 + cached_token_cost: 0.00000055 + output_token_cost: 0.0000044 o1-preview: mode: chat max_completion_tokens: 128000 @@ -204,6 +343,18 @@ providers: - AZURE_API_ENDPOINT - AZURE_API_VERSION models: + o1-preview: + mode: chat + max_completion_tokens: 128000 + input_token_cost: 0.0000165 + cached_token_cost: 0.00000825 + output_token_cost: 0.000066 + o1-mini: + mode: chat + max_completion_tokens: 128000 + input_token_cost: 0.0000033 + cached_token_cost: 0.00000165 + output_token_cost: 0.0000132 gpt-4o-mini: mode: chat max_tokens: 128000 @@ -212,8 +363,9 @@ providers: gpt-4o: mode: chat max_tokens: 128000 - input_token_cost: 0.000005 - output_token_cost: 0.000015 + input_token_cost: 0.0000025 + cached_token_cost: 0.00000125 + output_token_cost: 0.00001 gpt-4-turbo: mode: chat max_tokens: 128000 diff --git a/examples/core.py b/examples/core.py index 1eab6c81..b8da2d87 100644 --- a/examples/core.py +++ b/examples/core.py @@ -5,10 +5,12 @@ from pprint import pprint import os import asyncio +import asyncio from dotenv import load_dotenv load_dotenv() -def run_provider(provider, model, api_key=None, **kwargs): +def run_provider(provider, model, api_key=None=None, **kwargs): + print(f"\n\n###RUNNING for <{provider}>, <{model}> ###") print(f"\n\n###RUNNING for <{provider}>, <{model}> ###") llm = LLMCore(provider=provider, api_key=api_key, **kwargs) @@ -58,7 +60,7 @@ def run_provider(provider, model, api_key=None, **kwargs): print("\nAsync Stream") async def async_stream(): - chat_request = build_chat_request(model, chat_input="Hello, my name is Tom Json", is_stream=True) + chat_request = build_chat_request(model, chat_input="Hello, my name is Tom", is_stream=True) response_async = await llm.achat(**chat_request) async for p in response_async: @@ -74,7 +76,7 @@ async def async_stream(): print("\nSync Non-Stream") - chat_request = build_chat_request(model, chat_input="Hello, my name is Alice Json", is_stream=False) + chat_request = build_chat_request(model, chat_input="Hello, my name is Alice", is_stream=False) response_sync = llm.chat(**chat_request) pprint(response_sync) @@ -82,7 +84,7 @@ async def async_stream(): print("\nSync Stream") - chat_request = build_chat_request(model, chat_input="Hello, my name is Mary Json", is_stream=True) + chat_request = build_chat_request(model, chat_input="Hello, my name is Mary", is_stream=True) response_sync_stream = llm.chat(**chat_request) for p in response_sync_stream: @@ -126,7 +128,6 @@ def build_chat_request(model: str, chat_input: str, is_stream: bool, max_tokens: "parameters": { "temperature": 0, "max_tokens": max_tokens, - "response_format": {"type": "json_object"}, "functions": None, } } @@ -138,29 +139,75 @@ def multiple_provider_runs(provider:str, model:str, num_runs:int, api_key:str, * latencies = run_provider(provider=provider, model=model, api_key=api_key, **kwargs) pprint(latencies) - +def run_chat_all_providers(): + # OpenAI + multiple_provider_runs(provider="openai", model="gpt-4o-mini", api_key=os.environ["OPENAI_API_KEY"], num_runs=1) + multiple_provider_runs(provider="openai", model="o3-mini", api_key=os.environ["OPENAI_API_KEY"], num_runs=1) + #multiple_provider_runs(provider="openai", model="o1-preview", api_key=os.environ["OPENAI_API_KEY"], num_runs=1) + -# OpenAI -multiple_provider_runs(provider="openai", model="gpt-4o-mini", api_key=os.environ["OPENAI_API_KEY"], num_runs=1) -multiple_provider_runs(provider="openai", model="o3-mini", api_key=os.environ["OPENAI_API_KEY"], num_runs=1) -#multiple_provider_runs(provider="openai", model="o1-preview", api_key=os.environ["OPENAI_API_KEY"], num_runs=1) + # Azure + multiple_provider_runs(provider="azure", model="gpt-4o-mini", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"]) + #multiple_provider_runs(provider="azure", model="gpt-4o", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"]) + #multiple_provider_runs(provider="azure", model="o1-mini", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"]) + #multiple_provider_runs(provider="azure", model="o1-preview", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"]) -# Azure -multiple_provider_runs(provider="azure", model="gpt-4o-mini", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"]) -#multiple_provider_runs(provider="azure", model="gpt-4o", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"]) -#multiple_provider_runs(provider="azure", model="o1-mini", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"]) -#multiple_provider_runs(provider="azure", model="o1-preview", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"]) + #multiple_provider_runs(provider="anthropic", model="claude-3-opus-20240229", num_runs=1, api_key=os.environ["ANTHROPIC_API_KEY"]) + #multiple_provider_runs(provider="azure", model="o1-preview", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"]) + #multiple_provider_runs(provider="azure", model="o1-mini", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"]) -#multiple_provider_runs(provider="anthropic", model="claude-3-opus-20240229", num_runs=1, api_key=os.environ["ANTHROPIC_API_KEY"]) -#multiple_provider_runs(provider="azure", model="o1-preview", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"]) -#multiple_provider_runs(provider="azure", model="o1-mini", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"]) + multiple_provider_runs(provider="vertexai", model="gemini-1.5-flash", num_runs=1, api_key=os.environ["GOOGLE_API_KEY"]) + # Bedrock + multiple_provider_runs(provider="bedrock", model="us.amazon.nova-lite-v1:0", num_runs=1, api_key=None, region=os.environ["BEDROCK_REGION"], secret_key=os.environ["BEDROCK_SECRET_KEY"], access_key=os.environ["BEDROCK_ACCESS_KEY"]) + #multiple_provider_runs(provider="bedrock", model="anthropic.claude-3-5-sonnet-20241022-v2:0", num_runs=1, api_key=None, region=os.environ["BEDROCK_REGION"], secret_key=os.environ["BEDROCK_SECRET_KEY"], access_key=os.environ["BEDROCK_ACCESS_KEY"]) -multiple_provider_runs(provider="vertexai", model="gemini-1.5-flash", num_runs=1, api_key=os.environ["GOOGLE_API_KEY"]) +run_chat_all_providers() -# Bedrock -multiple_provider_runs(provider="bedrock", model="us.amazon.nova-lite-v1:0", num_runs=1, api_key=None, region=os.environ["BEDROCK_REGION"], secret_key=os.environ["BEDROCK_SECRET_KEY"], access_key=os.environ["BEDROCK_ACCESS_KEY"]) -#multiple_provider_runs(provider="bedrock", model="anthropic.claude-3-5-sonnet-20241022-v2:0", num_runs=1, api_key=None, region=os.environ["BEDROCK_REGION"], secret_key=os.environ["BEDROCK_SECRET_KEY"], access_key=os.environ["BEDROCK_ACCESS_KEY"]) + +import base64 + +def messages(img_path): + """ + Creates a message payload with both text and image. + Adapts format based on the provider. + """ + with open(img_path, "rb") as f: + image_bytes = f.read() + + base64_image = base64.b64encode(image_bytes).decode("utf-8") + return [ + { + "role": "user", + "content": [ + {"type": "text", "text": "What's in this image?"}, + { + "type": "image_url", + "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}, + }, + { + "type": "image_url", + "image_url": {"url": "https://awsmp-logos.s3.amazonaws.com/seller-zx4pk43qpmxoa/53d235806f343cec94aac3c577d81c13.png"}, + }, + ], + } + ] + +def run_send_imgs(): + provider="bedrock" + model="us.amazon.nova-lite-v1:0" + chat_input=messages(img_path="./libs/llmstudio/tests/integration_tests/test_data/llmstudio-logo.jpeg") + chat_request = build_chat_request(model=model, chat_input=chat_input, is_stream=False) + llm = LLMCore(provider=provider, api_key=os.environ["OPENAI_API_KEY"], region=os.environ["BEDROCK_REGION"], secret_key=os.environ["BEDROCK_SECRET_KEY"], access_key=os.environ["BEDROCK_ACCESS_KEY"]) + response_sync = llm.chat(**chat_request) + #print(response_sync) + response_sync.clean_print() + + #for p in response_sync: + # if p.metrics: + # p.clean_print() + +run_send_imgs() \ No newline at end of file diff --git a/examples/llm_proxy.py b/examples/llm_proxy.py index aae2091c..8d98a0a4 100644 --- a/examples/llm_proxy.py +++ b/examples/llm_proxy.py @@ -4,23 +4,146 @@ from llmstudio.providers import LLM from llmstudio_proxy.provider import ProxyConfig +from llmstudio_tracker.prompt_manager.manager import PromptManager +from llmstudio_tracker.prompt_manager.schemas import PromptDefault +# from llmstudio_core.agents import AgentManagerCore +import os +# from llmstudio_core.agents.data_models import ToolOuput -# from llmstudio_core.providers import LLMCore as LLM -# from llmstudio.providers import LLM +# instructions = "You are a weather bot. Use the provided functions to answer questions." +# agent_manager = AgentManagerCore("openai",api_key=os.environ["OPENAI_API_KEY"]) -llm = LLM(provider="openai", +tools=[ + { + "type": "function", + "function": { + "name": "get_current_temperature", + "description": "Get the current temperature for a specific location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g., San Francisco, CA" + }, + "unit": { + "type": "string", + "enum": ["Celsius", "Fahrenheit"], + "description": "The temperature unit to use. Infer this from the user's location." + } + }, + "required": ["location", "unit"] + } + } + }, + { + "type": "function", + "function": { + "name": "get_rain_probability", + "description": "Get the probability of rain for a specific location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g., San Francisco, CA" + } + }, + "required": ["location"] + } + } + } + ] + +# file_id = agent_manager.upload_file("/Users/brunoalho/Downloads/Tensor87/dfs/original_df.csv") + +# params = { +# "name":"Math tutor", +# "model":"gpt-4o", +# "tools":tools, +# "instructions":instructions, +# } + +# agent = agent_manager.create_agent(params) + +# messages =[{"role":"user","content":"What's the weather in San Francisco today and the likelihood it'll rain?"}] +# # messages =[{"role":"user","content":"How much is 5+5?"}] + +# run = agent_manager.run_agent(agent,messages) + +# result = agent_manager.retrieve_result(run) +# print(result) +# tool_outputs = [] +# for tool in result.required_action.submit_tools_outputs: +# if tool.function.name == "get_current_temperature": +# tool_outputs.append( +# ToolOuput(tool_call_id=tool.id, +# output="57") +# ) +# elif tool.function.name == "get_rain_probability": +# tool_outputs.append( +# ToolOuput(tool_call_id=tool.id, +# output="0.06")) + +# run.tool_outputs = tool_outputs +# result = agent_manager.submit_tool_outputs(run) +# print(result) + + + +from llmstudio_core.providers import LLMCore as LLM +from llmstudio.providers import LLM + +llm = LLM(provider="azure", proxy_config=ProxyConfig(host="0.0.0.0", port="8001"), tracking_config=TrackingConfig(host="0.0.0.0", port="8002"), session_id="sync") -result = llm.chat("Write a paragfraph about space", model="gpt-4o",) +messages =[{"role":"user","content":"What's the weather in San Francisco today and the likelihood it'll rain?"}] + + +# import asyncio + +# prompt_object = PromptDefault(prompt="textststts", +# name="bruni", +# label="production", +# model="bruno", +# provider="bruno", +# is_active=True) + +# prompt_manager = PromptManager(tracking_config=TrackingConfig(host="0.0.0.0", port="8002")) + +# # result = prompt_manager.add_prompt(prompt_object) +# # print(result.text) + +import json +# prompt_object = PromptDefault(**json.loads(result.text)) +# result = prompt_manager.get_prompt(prompt_id="test") +# print(result.text) + +# # result = prompt_manager.get_prompt(name="bruno", +# # model=prompt_object.model, +# # provider=prompt_object.provider) +# # print(result.text) + +# prompt_object.prompt="ola teste" +# result = prompt_manager.update_prompt(prompt_object) +# print(result.text) + +# result = prompt_manager.delete_prompt(prompt_object) +# print(result) + +result = llm.chat("What's the weather in San Francisco today and the likelihood it'll rain?", + model="gpt-4o-mini", + parameters={"tools":tools} + ) print(result) -llm = LLM(provider="openai", +llm = LLM(provider="self-hosted", proxy_config=ProxyConfig(host="0.0.0.0", port="8001"), tracking_config=TrackingConfig(host="0.0.0.0", port="8002"), session_id="sync stream") -response = llm.chat("Write a paragfraph about space", model="gpt-4o", is_stream=True) +response = llm.chat("Write a paragfraph about space", model="gpt-4o-mini", is_stream=True) for i, chunk in enumerate(response): if i%20==0: print("\n") diff --git a/libs/core/llmstudio_core/config.yaml b/libs/core/llmstudio_core/config.yaml index 46813bd6..486466a8 100644 --- a/libs/core/llmstudio_core/config.yaml +++ b/libs/core/llmstudio_core/config.yaml @@ -171,18 +171,34 @@ providers: min: 0 max: 500 step: 1 - ollama: - id: ollama - name: Ollama + self-hosted: + id: self-hosted + name: Self Hosted chat: true embed: true keys: models: - llama2: + deepseek-r1:1.5b: mode: chat - max_tokens: 0 + max_tokens: 128000 + input_token_cost: 0 + output_token_cost: 0 + + deepseek-r1-tool-calling: + mode: chat + max_tokens: 128000 input_token_cost: 0 output_token_cost: 0 + llama3.2: + mode: chat + max_tokens: 200000 + input_token_cost: 0 + output_token_cost: 0 + Llama-3-3-70B-Instruct-llmstudio: + mode: chat + max_tokens: 200000 + input_token_cost: 0.00000071 + output_token_cost: 0.00000071 parameters: temperature: name: "Temperature" diff --git a/libs/core/llmstudio_core/providers/__init__.py b/libs/core/llmstudio_core/providers/__init__.py index 330fe48e..1ba7b2e8 100644 --- a/libs/core/llmstudio_core/providers/__init__.py +++ b/libs/core/llmstudio_core/providers/__init__.py @@ -4,7 +4,7 @@ from llmstudio_core.providers.azure import AzureProvider from llmstudio_core.providers.bedrock_converse import BedrockConverseProvider -# from llmstudio_core.providers.ollama import OllamaProvider #TODO: adapt it +from llmstudio_core.providers.self_hosted import SelfHostedProvider from llmstudio_core.providers.openai import OpenAIProvider from llmstudio_core.providers.provider import ProviderCore, provider_registry from llmstudio_core.providers.vertexai import VertexAIProvider diff --git a/libs/core/llmstudio_core/providers/azure.py b/libs/core/llmstudio_core/providers/azure.py index f558f9d6..95f3b9ca 100644 --- a/libs/core/llmstudio_core/providers/azure.py +++ b/libs/core/llmstudio_core/providers/azure.py @@ -1,22 +1,10 @@ -import ast -import json import os -import time -import uuid from typing import Any, AsyncGenerator, Generator, Union import openai from llmstudio_core.exceptions import ProviderError from llmstudio_core.providers.provider import ChatRequest, ProviderCore, provider -from openai import AzureOpenAI, OpenAI -from openai.types.chat import ChatCompletionChunk -from openai.types.chat.chat_completion_chunk import ( - Choice, - ChoiceDelta, - ChoiceDeltaFunctionCall, - ChoiceDeltaToolCall, - ChoiceDeltaToolCallFunction, -) +from openai import AzureOpenAI @provider @@ -27,28 +15,18 @@ def __init__( api_key=None, api_endpoint=None, api_version=None, - base_url=None, **kwargs, ): super().__init__(config, **kwargs) self.API_KEY = api_key or os.getenv("AZURE_API_KEY") self.API_ENDPOINT = api_endpoint self.API_VERSION = api_version or os.getenv("AZURE_API_VERSION") - self.BASE_URL = base_url - self.is_llama = False - self.has_tools_functions = False - - if self.BASE_URL and (self.API_ENDPOINT is None): - self._client = OpenAI( - api_key=self.API_KEY, - base_url=self.BASE_URL, - ) - else: - self._client = AzureOpenAI( - api_key=self.API_KEY, - azure_endpoint=self.API_ENDPOINT, - api_version=self.API_VERSION, - ) + + self._client = AzureOpenAI( + api_key=self.API_KEY, + azure_endpoint=self.API_ENDPOINT, + api_version=self.API_VERSION, + ) @staticmethod def _provider_config_name(): @@ -82,68 +60,20 @@ def generate_client(self, request: ChatRequest) -> Any: returned from the API. """ - self.is_llama = "llama" in request.model.lower() - self.is_openai = "gpt" in request.model.lower() - self.has_tools = request.parameters.get("tools") is not None - self.has_functions = request.parameters.get("functions") is not None - try: - messages = self.prepare_messages(request) - - tool_args = {} - if not self.is_llama and self.has_tools and self.is_openai: - tool_args = { - "tools": request.parameters.get("tools"), - "tool_choice": "auto" if request.parameters.get("tools") else None, - } - - function_args = {} - if not self.is_llama and self.has_functions and self.is_openai: - function_args = { - "functions": request.parameters.get("functions"), - "function_call": "auto" - if request.parameters.get("functions") - else None, - } - - base_args = { - "model": request.model, - "messages": messages, - "stream": True, - } - - combined_args = { - **base_args, - **tool_args, - **function_args, + return self._client.chat.completions.create( + model=request.model, + messages=( + [{"role": "user", "content": request.chat_input}] + if isinstance(request.chat_input, str) + else request.chat_input + ), + stream=True, + stream_options={"include_usage": True}, **request.parameters, - } - return self._client.chat.completions.create(**combined_args) - - except openai._exceptions.APIConnectionError as e: - raise ProviderError(f"There was an error reaching the endpoint: {e}") - - except openai._exceptions.APIStatusError as e: - raise ProviderError(e.response.json()) - - def prepare_messages(self, request: ChatRequest): - if self.is_llama and (self.has_tools or self.has_functions): - user_message = self.convert_to_openai_format(request.chat_input) - content = "<|begin_of_text|>" - content = self.build_llama_system_message( - user_message, - content, - request.parameters.get("tools"), - request.parameters.get("functions"), - ) - content = self.build_llama_conversation(user_message, content) - return [{"role": "user", "content": content}] - else: - return ( - [{"role": "user", "content": request.chat_input}] - if isinstance(request.chat_input, str) - else request.chat_input ) + except openai._exceptions.APIError as e: + raise ProviderError(str(e)) async def aparse_response( self, response: AsyncGenerator, **kwargs diff --git a/libs/core/llmstudio_core/providers/bedrock_converse.py b/libs/core/llmstudio_core/providers/bedrock_converse.py index dc756e0a..abc990f7 100644 --- a/libs/core/llmstudio_core/providers/bedrock_converse.py +++ b/libs/core/llmstudio_core/providers/bedrock_converse.py @@ -1,5 +1,7 @@ +import base64 import json import os +import re import time import uuid from typing import ( @@ -14,6 +16,7 @@ ) import boto3 +import requests from llmstudio_core.exceptions import ProviderError from llmstudio_core.providers.provider import ChatRequest, ProviderCore, provider from llmstudio_core.utils import OpenAIToolFunction @@ -276,6 +279,34 @@ def _process_messages( } ) messages.append(tool_use) + elif isinstance(message.get("content"), list): + converse_content_list = [] + for content in message.get("content"): + converse_content = {} + if content.get("type") == "text": + converse_content["text"] = content.get("text") + elif content.get("type") == "image_url": + image_url = content.get("image_url")["url"] + bytes_image = BedrockConverseProvider._get_image_bytes( + image_url + ) + format = ( + BedrockConverseProvider._get_img_format_from_bytes( + bytes_image + ) + ) + converse_content["image"] = { + "format": format, + "source": {"bytes": bytes_image}, + } + converse_content_list.append(converse_content) + + messages.append( + { + "role": message.get("role"), + "content": converse_content_list, + } + ) else: messages.append( { @@ -303,6 +334,62 @@ def _process_messages( return messages, system_prompt + @staticmethod + def _base64_to_bytes(image_url: str) -> bytes: + """ + Extracts and decodes Base64 image data from a 'data:image/...;base64,...' URL. + Returns the raw image bytes. + """ + if not image_url.startswith("data:image/"): + raise ValueError("Invalid Base64 image URL") + + base64_data = re.sub(r"^data:image/[^;]+;base64,", "", image_url) + + return base64.b64decode(base64_data) + + @staticmethod + def _get_img_format_from_bytes(image_bytes: bytes) -> str: + """ + Determines the image format from raw image bytes using file signatures (magic numbers). + """ + if image_bytes.startswith(b"\xFF\xD8\xFF"): + return "jpeg" + elif image_bytes.startswith(b"\x89PNG\r\n\x1A\n"): + return "png" + elif image_bytes.startswith(b"GIF87a") or image_bytes.startswith(b"GIF89a"): + return "gif" + elif ( + image_bytes.startswith(b"\x52\x49\x46\x46") and image_bytes[8:12] == b"WEBP" + ): + return "webp" + elif image_bytes.startswith(b"\x49\x49\x2A\x00") or image_bytes.startswith( + b"\x4D\x4D\x00\x2A" + ): + return "tiff" + else: + raise ValueError("Unknown image format") + + @staticmethod + def _get_image_bytes(image_url: str) -> bytes: + """ + Converts an image URL to a Base64-encoded string. + - If already in 'data:image/...;base64,...' format, it returns as-is. + - If it's a normal URL, downloads and encodes the image in Base64. + """ + if image_url.startswith("data:image/"): + return BedrockConverseProvider._base64_to_bytes(image_url) + + elif image_url.startswith(("http://", "https://")): + response = requests.get(image_url) + if response.status_code != 200: + raise ValueError(f"Failed to download image: {response.status_code}") + + image_bytes = response.content + return image_bytes + + else: + raise ValueError("Invalid image URL format") + @staticmethod def _process_tools(parameters: dict) -> Optional[Dict]: if parameters.get("tools") is None and parameters.get("functions") is None: diff --git a/libs/core/llmstudio_core/providers/data_structures.py b/libs/core/llmstudio_core/providers/data_structures.py index 374ad6eb..85c9482e 100644 --- a/libs/core/llmstudio_core/providers/data_structures.py +++ b/libs/core/llmstudio_core/providers/data_structures.py @@ -1,4 +1,5 @@ -from typing import Any, List, Optional +import copy +from typing import Any, List, Optional, Union from openai.types.chat import ChatCompletion, ChatCompletionChunk from pydantic import BaseModel @@ -90,8 +91,68 @@ def items(self): return self.model_dump().items() -class ChatCompletionLLMstudio(ChatCompletion): - chat_input: str +class ChatCompletionLLMstudioBase: + """ + Base class to share the methods between different ChatCompletionLLMstudio classes. + """ + + def clean_print(self): + """ + Custom representation of the class to prevent large fields from bloating the output. + Ensures missing fields are handled gracefully without errors. + """ + data = copy.deepcopy(self.model_dump()) + + def clean_large_fields(d): + """ + Recursively traverses the dictionary to replace large image Base64 data + with a placeholder while ensuring missing fields do not cause errors. + """ + for key, value in d.items(): + if isinstance(value, list): + for item in value: + if isinstance(item, dict): + # Handle image_url directly under chat_input or context + if "image_url" in item and isinstance( + item["image_url"], dict + ): + if "url" in item["image_url"] and isinstance( + item["image_url"]["url"], str + ): + if item["image_url"]["url"].startswith( + "data:image/" + ): + item["image_url"][ + "url" + ] = "