-
Notifications
You must be signed in to change notification settings - Fork 3.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
10 changed files
with
485 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
# Copyright (c) Microsoft. All rights reserved. | ||
|
||
from semantic_kernel.connectors.ai.nvidia.prompt_execution_settings.nvidia_prompt_execution_settings import ( | ||
NVIDIAEmbeddingPromptExecutionSettings, | ||
NvidiaPromptExecutionSettings, | ||
) | ||
from semantic_kernel.connectors.ai.nvidia.services.nvidia_text_embedding import NvidiaTextEmbedding | ||
from semantic_kernel.connectors.ai.nvidia.settings.nvidia_settings import NvidiaSettings | ||
|
||
__all__ = [ | ||
"NVIDIAEmbeddingPromptExecutionSettings", | ||
"NvidiaPromptExecutionSettings", | ||
"NvidiaSettings", | ||
"NvidiaTextEmbedding", | ||
] |
1 change: 1 addition & 0 deletions
1
python/semantic_kernel/connectors/ai/nvidia/prompt_execution_settings/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
# Copyright (c) Microsoft. All rights reserved. |
43 changes: 43 additions & 0 deletions
43
...kernel/connectors/ai/nvidia/prompt_execution_settings/nvidia_prompt_execution_settings.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
# Copyright (c) Microsoft. All rights reserved. | ||
|
||
from typing import Annotated, Any, Literal | ||
|
||
from pydantic import Field | ||
|
||
from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings | ||
|
||
|
||
class NvidiaPromptExecutionSettings(PromptExecutionSettings): | ||
"""Settings for NVIDIA prompt execution.""" | ||
|
||
format: Literal["json"] | None = None | ||
options: dict[str, Any] | None = None | ||
|
||
def prepare_settings_dict(self, **kwargs) -> dict[str, Any]: | ||
"""Prepare the settings as a dictionary for sending to the AI service. | ||
By default, this method excludes the service_id and extension_data fields. | ||
As well as any fields that are None. | ||
""" | ||
return self.model_dump( | ||
exclude={"service_id", "extension_data", "structured_json_response", "input_type"}, | ||
exclude_none=True, | ||
by_alias=True, | ||
) | ||
|
||
|
||
class NVIDIAEmbeddingPromptExecutionSettings(NvidiaPromptExecutionSettings): | ||
"""Settings for NVIDIA embedding prompt execution.""" | ||
|
||
"""Specific settings for the text embedding endpoint.""" | ||
|
||
input: str | list[str] | list[int] | list[list[int]] | None = None | ||
model: str = None | ||
encoding_format: Literal["float", "base64"] | None = "float" # default to float | ||
truncate: Literal[None, "START", "END"] | None = None | ||
input_type: Literal["passage", "query"] | None = "passage" # default to passage | ||
user: str | None = None | ||
extra_headers: dict | None = None | ||
extra_body: dict | None = None | ||
timeout: float | None = None | ||
dimensions: Annotated[int | None, Field(gt=0, le=3072)] = None |
1 change: 1 addition & 0 deletions
1
python/semantic_kernel/connectors/ai/nvidia/services/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
# Copyright (c) Microsoft. All rights reserved. |
92 changes: 92 additions & 0 deletions
92
python/semantic_kernel/connectors/ai/nvidia/services/nvidia_handler.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
# Copyright (c) Microsoft. All rights reserved. | ||
|
||
import logging | ||
from abc import ABC | ||
from typing import Any, ClassVar, Union | ||
|
||
from openai import AsyncOpenAI, AsyncStream | ||
from openai.types import CreateEmbeddingResponse | ||
|
||
from semantic_kernel.connectors.ai.nvidia import ( | ||
NvidiaPromptExecutionSettings, | ||
) | ||
from semantic_kernel.connectors.ai.nvidia.services.nvidia_model_types import NvidiaModelTypes | ||
from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings | ||
from semantic_kernel.const import USER_AGENT | ||
from semantic_kernel.exceptions import ServiceResponseException | ||
from semantic_kernel.kernel_pydantic import KernelBaseModel | ||
|
||
logger: logging.Logger = logging.getLogger(__name__) | ||
|
||
RESPONSE_TYPE = Union[list[Any],] | ||
|
||
|
||
class NvidiaHandler(KernelBaseModel, ABC): | ||
"""Internal class for calls to Nvidia API's.""" | ||
|
||
MODEL_PROVIDER_NAME: ClassVar[str] = "nvidia" | ||
client: AsyncOpenAI | ||
ai_model_type: NvidiaModelTypes = ( | ||
NvidiaModelTypes.EMBEDDING | ||
) # TODO: revert this to chat after adding support for chat-compl # noqa: TD002 | ||
prompt_tokens: int = 0 | ||
completion_tokens: int = 0 | ||
total_tokens: int = 0 | ||
|
||
async def _send_request(self, settings: PromptExecutionSettings) -> RESPONSE_TYPE: | ||
"""Send a request to the Nvidia API.""" | ||
if self.ai_model_type == NvidiaModelTypes.EMBEDDING: | ||
assert isinstance(settings, NvidiaPromptExecutionSettings) # nosec | ||
return await self._send_embedding_request(settings) | ||
|
||
raise NotImplementedError(f"Model type {self.ai_model_type} is not supported") | ||
|
||
async def _send_embedding_request(self, settings: NvidiaPromptExecutionSettings) -> list[Any]: | ||
"""Send a request to the OpenAI embeddings endpoint.""" | ||
try: | ||
# exclude input-type from main body | ||
response = await self.client.embeddings.create(**settings.prepare_settings_dict(exclude="input_type")) | ||
|
||
self.store_usage(response) | ||
return [x.embedding for x in response.data] | ||
except Exception as ex: | ||
raise ServiceResponseException( | ||
f"{type(self)} service failed to generate embeddings", | ||
ex, | ||
) from ex | ||
|
||
def store_usage( | ||
self, | ||
response: CreateEmbeddingResponse, | ||
): | ||
"""Store the usage information from the response.""" | ||
if not isinstance(response, AsyncStream) and response.usage: | ||
logger.info(f"OpenAI usage: {response.usage}") | ||
self.prompt_tokens += response.usage.prompt_tokens | ||
self.total_tokens += response.usage.total_tokens | ||
if hasattr(response.usage, "completion_tokens"): | ||
self.completion_tokens += response.usage.completion_tokens | ||
|
||
def to_dict(self) -> dict[str, str]: | ||
"""Create a dict of the service settings.""" | ||
client_settings = { | ||
"api_key": self.client.api_key, | ||
"default_headers": {k: v for k, v in self.client.default_headers.items() if k != USER_AGENT}, | ||
} | ||
if self.client.organization: | ||
client_settings["org_id"] = self.client.organization | ||
base = self.model_dump( | ||
exclude={ | ||
"prompt_tokens", | ||
"completion_tokens", | ||
"total_tokens", | ||
"api_type", | ||
"ai_model_type", | ||
"service_id", | ||
"client", | ||
}, | ||
by_alias=True, | ||
exclude_none=True, | ||
) | ||
base.update(client_settings) | ||
return base |
12 changes: 12 additions & 0 deletions
12
python/semantic_kernel/connectors/ai/nvidia/services/nvidia_model_types.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# Copyright (c) Microsoft. All rights reserved. | ||
|
||
from enum import Enum | ||
|
||
|
||
class NvidiaModelTypes(Enum): | ||
"""Nvidia model types, can be text, chat or embedding.""" | ||
|
||
TEXT = "text" | ||
CHAT = "chat" | ||
EMBEDDING = "embedding" | ||
TEXT_TO_IMAGE = "text-to-image" |
148 changes: 148 additions & 0 deletions
148
python/semantic_kernel/connectors/ai/nvidia/services/nvidia_text_embedding.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,148 @@ | ||
# Copyright (c) Microsoft. All rights reserved. | ||
|
||
import logging | ||
import sys | ||
from typing import Any | ||
|
||
if sys.version_info >= (3, 12): | ||
from typing import override # pragma: no cover | ||
else: | ||
from typing_extensions import override # pragma: no cover | ||
|
||
from numpy import array, ndarray | ||
from openai import AsyncOpenAI | ||
from pydantic import ValidationError | ||
|
||
from semantic_kernel.connectors.ai.embeddings.embedding_generator_base import EmbeddingGeneratorBase | ||
from semantic_kernel.connectors.ai.nvidia.prompt_execution_settings.nvidia_prompt_execution_settings import ( | ||
NVIDIAEmbeddingPromptExecutionSettings, | ||
) | ||
from semantic_kernel.connectors.ai.nvidia.services.nvidia_handler import NvidiaHandler | ||
from semantic_kernel.connectors.ai.nvidia.services.nvidia_model_types import NvidiaModelTypes | ||
from semantic_kernel.connectors.ai.nvidia.settings.nvidia_settings import NvidiaSettings | ||
from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings | ||
from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError | ||
from semantic_kernel.utils.experimental_decorator import experimental_class | ||
|
||
logger: logging.Logger = logging.getLogger(__name__) | ||
|
||
|
||
@experimental_class | ||
class NvidiaTextEmbedding(NvidiaHandler, EmbeddingGeneratorBase): | ||
"""Nvidia text embedding service.""" | ||
|
||
def __init__( | ||
self, | ||
ai_model_id: str | None = None, | ||
api_key: str | None = None, | ||
base_url: str | None = None, | ||
client: AsyncOpenAI | None = None, | ||
env_file_path: str | None = None, | ||
service_id: str | None = None, | ||
) -> None: | ||
"""Initializes a new instance of the NvidiaTextEmbedding class. | ||
Args: | ||
ai_model_id (str): NVIDIA model card string, see | ||
https://Nvidia.co/sentence-transformers | ||
api_key: NVIDIA API key, see https://console.NVIDIA.com/settings/keys | ||
(Env var NVIDIA_API_KEY) | ||
base_url: HttpsUrl | None - base_url: The url of the NVIDIA endpoint. The base_url consists of the endpoint, | ||
and more information refer https://docs.api.nvidia.com/nim/reference/ | ||
use endpoint if you only want to supply the endpoint. | ||
(Env var NVIDIA_BASE_URL) | ||
client (Optional[AsyncOpenAI]): An existing client to use. (Optional) | ||
env_file_path (str | None): Use the environment settings file as | ||
a fallback to environment variables. (Optional) | ||
service_id (str): Service ID for the model. (optional) | ||
Note that this model will be downloaded from the NVIDIA model hub. | ||
""" | ||
try: | ||
nvidia_settings = NvidiaSettings.create(api_key=api_key, base_url=base_url, embedding_model_id=ai_model_id) | ||
except ValidationError as ex: | ||
raise ServiceInitializationError("Failed to create NVIDIA settings.", ex) from ex | ||
if not nvidia_settings.embedding_model_id: | ||
raise ServiceInitializationError("The NVIDIA embedding model ID is required.") | ||
if not (api_key or nvidia_settings.api_key): | ||
raise ServiceInitializationError("Please provide an api_key") | ||
if not client: | ||
client = AsyncOpenAI(api_key=nvidia_settings.api_key.get_secret_value(), base_url=nvidia_settings.base_url) | ||
super().__init__( | ||
ai_model_id=nvidia_settings.embedding_model_id, | ||
api_key=nvidia_settings.api_key.get_secret_value() if nvidia_settings.api_key else None, | ||
ai_model_type=NvidiaModelTypes.EMBEDDING, | ||
service_id=service_id or nvidia_settings.embedding_model_id, | ||
env_file_path=env_file_path, | ||
client=client, | ||
) | ||
|
||
@override | ||
async def generate_embeddings( | ||
self, | ||
texts: list[str], | ||
settings: "PromptExecutionSettings | None" = None, | ||
batch_size: int | None = None, | ||
**kwargs: Any, | ||
) -> ndarray: | ||
raw_embeddings = await self.generate_raw_embeddings(texts, settings, batch_size, **kwargs) | ||
return array(raw_embeddings) | ||
|
||
@override | ||
async def generate_raw_embeddings( | ||
self, | ||
texts: list[str], | ||
settings: "NVIDIAEmbeddingPromptExecutionSettings | None" = None, | ||
batch_size: int | None = None, | ||
**kwargs: Any, | ||
) -> Any: | ||
"""Returns embeddings for the given texts in the unedited format. | ||
Args: | ||
texts (List[str]): The texts to generate embeddings for. | ||
settings (NVIDIAEmbeddingPromptExecutionSettings): The settings to use for the request. | ||
batch_size (int): The batch size to use for the request. | ||
kwargs (Dict[str, Any]): Additional arguments to pass to the request. | ||
""" | ||
if not settings: | ||
settings = NVIDIAEmbeddingPromptExecutionSettings(ai_model_id=self.ai_model_id) | ||
else: | ||
if not isinstance(settings, NVIDIAEmbeddingPromptExecutionSettings): | ||
settings = self.get_prompt_execution_settings_from_settings(settings) | ||
assert isinstance(settings, NVIDIAEmbeddingPromptExecutionSettings) # nosec | ||
if settings.model is None: | ||
settings.model = self.ai_model_id | ||
for key, value in kwargs.items(): | ||
setattr(settings, key, value) | ||
|
||
# move input_type to extra-body | ||
if not settings.extra_body: | ||
settings.extra_body = {} | ||
settings.extra_body.setdefault("input_type", settings.input_type) | ||
raw_embeddings = [] | ||
batch_size = batch_size or len(texts) | ||
for i in range(0, len(texts), batch_size): | ||
batch = texts[i : i + batch_size] | ||
settings.input = batch | ||
raw_embedding = await self._send_request(settings=settings) | ||
assert isinstance(raw_embedding, list) # nosec | ||
raw_embeddings.extend(raw_embedding) | ||
return raw_embeddings | ||
|
||
def get_prompt_execution_settings_class(self) -> type["PromptExecutionSettings"]: | ||
"""Get the request settings class.""" | ||
return NVIDIAEmbeddingPromptExecutionSettings | ||
|
||
@classmethod | ||
def from_dict(cls: type["NvidiaTextEmbedding"], settings: dict[str, Any]) -> "NvidiaTextEmbedding": | ||
"""Initialize an Open AI service from a dictionary of settings. | ||
Args: | ||
settings: A dictionary of settings for the service. | ||
""" | ||
return cls( | ||
ai_model_id=settings.get("ai_model_id"), | ||
api_key=settings.get("api_key"), | ||
env_file_path=settings.get("env_file_path"), | ||
service_id=settings.get("service_id"), | ||
) |
1 change: 1 addition & 0 deletions
1
python/semantic_kernel/connectors/ai/nvidia/settings/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
# Copyright (c) Microsoft. All rights reserved. |
43 changes: 43 additions & 0 deletions
43
python/semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
# Copyright (c) Microsoft. All rights reserved. | ||
|
||
from typing import ClassVar | ||
|
||
from pydantic import SecretStr | ||
|
||
from semantic_kernel.kernel_pydantic import KernelBaseSettings | ||
|
||
|
||
class NvidiaSettings(KernelBaseSettings): | ||
"""Nvidia model settings. | ||
The settings are first loaded from environment variables with the prefix 'NVIDIA_'. If the | ||
environment variables are not found, the settings can be loaded from a .env file with the | ||
encoding 'utf-8'. If the settings are not found in the .env file, the settings are ignored; | ||
however, validation will fail alerting that the settings are missing. | ||
Optional settings for prefix 'NVIDIA_' are: | ||
- api_key: NVIDIA API key, see https://console.NVIDIA.com/settings/keys | ||
(Env var NVIDIA_API_KEY) | ||
- base_url: HttpsUrl | None - base_url: The url of the NVIDIA endpoint. The base_url consists of the endpoint, | ||
and more information refer https://docs.api.nvidia.com/nim/reference/ | ||
use endpoint if you only want to supply the endpoint. | ||
(Env var NVIDIA_BASE_URL) | ||
- chat_model_id: The NVIDIA chat model ID to use see https://docs.api.nvidia.com/nim/reference/llm-apis. | ||
(Env var NVIDIA_CHAT_MODEL_ID) | ||
- text_model_id: str | None - The NVIDIA text model ID to use, for example, nvidia/nemotron-4-340b-reward. | ||
(Env var NVIDIA_TEXT_MODEL_ID) | ||
- embedding_model_id: str | None - The NVIDIA embedding model ID to use, for example, nvidia/nv-embed-v1. | ||
(Env var NVIDIA_EMBEDDING_MODEL_ID) | ||
- text_to_image_model_id: str | None - The NVIDIA text to image model ID to use, for example, nvidia/neva-22b. | ||
(Env var NVIDIA_TEXT_TO_IMAGE_MODEL_ID) | ||
- env_file_path: if provided, the .env settings are read from this file path location | ||
""" | ||
|
||
env_prefix: ClassVar[str] = "NVIDIA_" | ||
|
||
api_key: SecretStr | None = None | ||
base_url: str | None = None | ||
chat_model_id: str | None = None | ||
text_model_id: str | None = None | ||
embedding_model_id: str | None = "nvidia/nv-embedqa-e5-v5" | ||
text_to_image_model_id: str | None = None |
Oops, something went wrong.