Skip to content

Commit

Permalink
nvidia embedding connector code
Browse files Browse the repository at this point in the history
  • Loading branch information
raspawar committed Feb 5, 2025
1 parent 4d2b2be commit 5b82ab1
Show file tree
Hide file tree
Showing 10 changed files with 485 additions and 0 deletions.
15 changes: 15 additions & 0 deletions python/semantic_kernel/connectors/ai/nvidia/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright (c) Microsoft. All rights reserved.

from semantic_kernel.connectors.ai.nvidia.prompt_execution_settings.nvidia_prompt_execution_settings import (
NVIDIAEmbeddingPromptExecutionSettings,
NvidiaPromptExecutionSettings,
)
from semantic_kernel.connectors.ai.nvidia.services.nvidia_text_embedding import NvidiaTextEmbedding
from semantic_kernel.connectors.ai.nvidia.settings.nvidia_settings import NvidiaSettings

__all__ = [
"NVIDIAEmbeddingPromptExecutionSettings",
"NvidiaPromptExecutionSettings",
"NvidiaSettings",
"NvidiaTextEmbedding",
]
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Copyright (c) Microsoft. All rights reserved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Copyright (c) Microsoft. All rights reserved.

from typing import Annotated, Any, Literal

from pydantic import Field

from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings


class NvidiaPromptExecutionSettings(PromptExecutionSettings):
"""Settings for NVIDIA prompt execution."""

format: Literal["json"] | None = None
options: dict[str, Any] | None = None

def prepare_settings_dict(self, **kwargs) -> dict[str, Any]:
"""Prepare the settings as a dictionary for sending to the AI service.
By default, this method excludes the service_id and extension_data fields.
As well as any fields that are None.
"""
return self.model_dump(
exclude={"service_id", "extension_data", "structured_json_response", "input_type"},
exclude_none=True,
by_alias=True,
)


class NVIDIAEmbeddingPromptExecutionSettings(NvidiaPromptExecutionSettings):
"""Settings for NVIDIA embedding prompt execution."""

"""Specific settings for the text embedding endpoint."""

input: str | list[str] | list[int] | list[list[int]] | None = None
model: str = None
encoding_format: Literal["float", "base64"] | None = "float" # default to float
truncate: Literal[None, "START", "END"] | None = None
input_type: Literal["passage", "query"] | None = "passage" # default to passage
user: str | None = None
extra_headers: dict | None = None
extra_body: dict | None = None
timeout: float | None = None
dimensions: Annotated[int | None, Field(gt=0, le=3072)] = None
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Copyright (c) Microsoft. All rights reserved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# Copyright (c) Microsoft. All rights reserved.

import logging
from abc import ABC
from typing import Any, ClassVar, Union

from openai import AsyncOpenAI, AsyncStream
from openai.types import CreateEmbeddingResponse

from semantic_kernel.connectors.ai.nvidia import (
NvidiaPromptExecutionSettings,
)
from semantic_kernel.connectors.ai.nvidia.services.nvidia_model_types import NvidiaModelTypes
from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
from semantic_kernel.const import USER_AGENT
from semantic_kernel.exceptions import ServiceResponseException
from semantic_kernel.kernel_pydantic import KernelBaseModel

logger: logging.Logger = logging.getLogger(__name__)

RESPONSE_TYPE = Union[list[Any],]


class NvidiaHandler(KernelBaseModel, ABC):
"""Internal class for calls to Nvidia API's."""

MODEL_PROVIDER_NAME: ClassVar[str] = "nvidia"
client: AsyncOpenAI
ai_model_type: NvidiaModelTypes = (
NvidiaModelTypes.EMBEDDING
) # TODO: revert this to chat after adding support for chat-compl # noqa: TD002
prompt_tokens: int = 0
completion_tokens: int = 0
total_tokens: int = 0

async def _send_request(self, settings: PromptExecutionSettings) -> RESPONSE_TYPE:
"""Send a request to the Nvidia API."""
if self.ai_model_type == NvidiaModelTypes.EMBEDDING:
assert isinstance(settings, NvidiaPromptExecutionSettings) # nosec
return await self._send_embedding_request(settings)

raise NotImplementedError(f"Model type {self.ai_model_type} is not supported")

async def _send_embedding_request(self, settings: NvidiaPromptExecutionSettings) -> list[Any]:
"""Send a request to the OpenAI embeddings endpoint."""
try:
# exclude input-type from main body
response = await self.client.embeddings.create(**settings.prepare_settings_dict(exclude="input_type"))

self.store_usage(response)
return [x.embedding for x in response.data]
except Exception as ex:
raise ServiceResponseException(
f"{type(self)} service failed to generate embeddings",
ex,
) from ex

def store_usage(
self,
response: CreateEmbeddingResponse,
):
"""Store the usage information from the response."""
if not isinstance(response, AsyncStream) and response.usage:
logger.info(f"OpenAI usage: {response.usage}")
self.prompt_tokens += response.usage.prompt_tokens
self.total_tokens += response.usage.total_tokens
if hasattr(response.usage, "completion_tokens"):
self.completion_tokens += response.usage.completion_tokens

def to_dict(self) -> dict[str, str]:
"""Create a dict of the service settings."""
client_settings = {
"api_key": self.client.api_key,
"default_headers": {k: v for k, v in self.client.default_headers.items() if k != USER_AGENT},
}
if self.client.organization:
client_settings["org_id"] = self.client.organization
base = self.model_dump(
exclude={
"prompt_tokens",
"completion_tokens",
"total_tokens",
"api_type",
"ai_model_type",
"service_id",
"client",
},
by_alias=True,
exclude_none=True,
)
base.update(client_settings)
return base
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Copyright (c) Microsoft. All rights reserved.

from enum import Enum


class NvidiaModelTypes(Enum):
"""Nvidia model types, can be text, chat or embedding."""

TEXT = "text"
CHAT = "chat"
EMBEDDING = "embedding"
TEXT_TO_IMAGE = "text-to-image"
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
# Copyright (c) Microsoft. All rights reserved.

import logging
import sys
from typing import Any

if sys.version_info >= (3, 12):
from typing import override # pragma: no cover
else:
from typing_extensions import override # pragma: no cover

from numpy import array, ndarray
from openai import AsyncOpenAI
from pydantic import ValidationError

from semantic_kernel.connectors.ai.embeddings.embedding_generator_base import EmbeddingGeneratorBase
from semantic_kernel.connectors.ai.nvidia.prompt_execution_settings.nvidia_prompt_execution_settings import (
NVIDIAEmbeddingPromptExecutionSettings,
)
from semantic_kernel.connectors.ai.nvidia.services.nvidia_handler import NvidiaHandler
from semantic_kernel.connectors.ai.nvidia.services.nvidia_model_types import NvidiaModelTypes
from semantic_kernel.connectors.ai.nvidia.settings.nvidia_settings import NvidiaSettings
from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError
from semantic_kernel.utils.experimental_decorator import experimental_class

logger: logging.Logger = logging.getLogger(__name__)


@experimental_class
class NvidiaTextEmbedding(NvidiaHandler, EmbeddingGeneratorBase):
"""Nvidia text embedding service."""

def __init__(
self,
ai_model_id: str | None = None,
api_key: str | None = None,
base_url: str | None = None,
client: AsyncOpenAI | None = None,
env_file_path: str | None = None,
service_id: str | None = None,
) -> None:
"""Initializes a new instance of the NvidiaTextEmbedding class.
Args:
ai_model_id (str): NVIDIA model card string, see
https://Nvidia.co/sentence-transformers
api_key: NVIDIA API key, see https://console.NVIDIA.com/settings/keys
(Env var NVIDIA_API_KEY)
base_url: HttpsUrl | None - base_url: The url of the NVIDIA endpoint. The base_url consists of the endpoint,
and more information refer https://docs.api.nvidia.com/nim/reference/
use endpoint if you only want to supply the endpoint.
(Env var NVIDIA_BASE_URL)
client (Optional[AsyncOpenAI]): An existing client to use. (Optional)
env_file_path (str | None): Use the environment settings file as
a fallback to environment variables. (Optional)
service_id (str): Service ID for the model. (optional)
Note that this model will be downloaded from the NVIDIA model hub.
"""
try:
nvidia_settings = NvidiaSettings.create(api_key=api_key, base_url=base_url, embedding_model_id=ai_model_id)
except ValidationError as ex:
raise ServiceInitializationError("Failed to create NVIDIA settings.", ex) from ex
if not nvidia_settings.embedding_model_id:
raise ServiceInitializationError("The NVIDIA embedding model ID is required.")
if not (api_key or nvidia_settings.api_key):
raise ServiceInitializationError("Please provide an api_key")
if not client:
client = AsyncOpenAI(api_key=nvidia_settings.api_key.get_secret_value(), base_url=nvidia_settings.base_url)
super().__init__(
ai_model_id=nvidia_settings.embedding_model_id,
api_key=nvidia_settings.api_key.get_secret_value() if nvidia_settings.api_key else None,
ai_model_type=NvidiaModelTypes.EMBEDDING,
service_id=service_id or nvidia_settings.embedding_model_id,
env_file_path=env_file_path,
client=client,
)

@override
async def generate_embeddings(
self,
texts: list[str],
settings: "PromptExecutionSettings | None" = None,
batch_size: int | None = None,
**kwargs: Any,
) -> ndarray:
raw_embeddings = await self.generate_raw_embeddings(texts, settings, batch_size, **kwargs)
return array(raw_embeddings)

@override
async def generate_raw_embeddings(
self,
texts: list[str],
settings: "NVIDIAEmbeddingPromptExecutionSettings | None" = None,
batch_size: int | None = None,
**kwargs: Any,
) -> Any:
"""Returns embeddings for the given texts in the unedited format.
Args:
texts (List[str]): The texts to generate embeddings for.
settings (NVIDIAEmbeddingPromptExecutionSettings): The settings to use for the request.
batch_size (int): The batch size to use for the request.
kwargs (Dict[str, Any]): Additional arguments to pass to the request.
"""
if not settings:
settings = NVIDIAEmbeddingPromptExecutionSettings(ai_model_id=self.ai_model_id)
else:
if not isinstance(settings, NVIDIAEmbeddingPromptExecutionSettings):
settings = self.get_prompt_execution_settings_from_settings(settings)
assert isinstance(settings, NVIDIAEmbeddingPromptExecutionSettings) # nosec
if settings.model is None:
settings.model = self.ai_model_id
for key, value in kwargs.items():
setattr(settings, key, value)

# move input_type to extra-body
if not settings.extra_body:
settings.extra_body = {}
settings.extra_body.setdefault("input_type", settings.input_type)
raw_embeddings = []
batch_size = batch_size or len(texts)
for i in range(0, len(texts), batch_size):
batch = texts[i : i + batch_size]
settings.input = batch
raw_embedding = await self._send_request(settings=settings)
assert isinstance(raw_embedding, list) # nosec
raw_embeddings.extend(raw_embedding)
return raw_embeddings

def get_prompt_execution_settings_class(self) -> type["PromptExecutionSettings"]:
"""Get the request settings class."""
return NVIDIAEmbeddingPromptExecutionSettings

@classmethod
def from_dict(cls: type["NvidiaTextEmbedding"], settings: dict[str, Any]) -> "NvidiaTextEmbedding":
"""Initialize an Open AI service from a dictionary of settings.
Args:
settings: A dictionary of settings for the service.
"""
return cls(
ai_model_id=settings.get("ai_model_id"),
api_key=settings.get("api_key"),
env_file_path=settings.get("env_file_path"),
service_id=settings.get("service_id"),
)
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Copyright (c) Microsoft. All rights reserved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Copyright (c) Microsoft. All rights reserved.

from typing import ClassVar

from pydantic import SecretStr

from semantic_kernel.kernel_pydantic import KernelBaseSettings


class NvidiaSettings(KernelBaseSettings):
"""Nvidia model settings.
The settings are first loaded from environment variables with the prefix 'NVIDIA_'. If the
environment variables are not found, the settings can be loaded from a .env file with the
encoding 'utf-8'. If the settings are not found in the .env file, the settings are ignored;
however, validation will fail alerting that the settings are missing.
Optional settings for prefix 'NVIDIA_' are:
- api_key: NVIDIA API key, see https://console.NVIDIA.com/settings/keys
(Env var NVIDIA_API_KEY)
- base_url: HttpsUrl | None - base_url: The url of the NVIDIA endpoint. The base_url consists of the endpoint,
and more information refer https://docs.api.nvidia.com/nim/reference/
use endpoint if you only want to supply the endpoint.
(Env var NVIDIA_BASE_URL)
- chat_model_id: The NVIDIA chat model ID to use see https://docs.api.nvidia.com/nim/reference/llm-apis.
(Env var NVIDIA_CHAT_MODEL_ID)
- text_model_id: str | None - The NVIDIA text model ID to use, for example, nvidia/nemotron-4-340b-reward.
(Env var NVIDIA_TEXT_MODEL_ID)
- embedding_model_id: str | None - The NVIDIA embedding model ID to use, for example, nvidia/nv-embed-v1.
(Env var NVIDIA_EMBEDDING_MODEL_ID)
- text_to_image_model_id: str | None - The NVIDIA text to image model ID to use, for example, nvidia/neva-22b.
(Env var NVIDIA_TEXT_TO_IMAGE_MODEL_ID)
- env_file_path: if provided, the .env settings are read from this file path location
"""

env_prefix: ClassVar[str] = "NVIDIA_"

api_key: SecretStr | None = None
base_url: str | None = None
chat_model_id: str | None = None
text_model_id: str | None = None
embedding_model_id: str | None = "nvidia/nv-embedqa-e5-v5"
text_to_image_model_id: str | None = None
Loading

0 comments on commit 5b82ab1

Please sign in to comment.