diff --git a/pinecone/__init__.py b/pinecone/__init__.py index 69f1d84f..7b435e9e 100644 --- a/pinecone/__init__.py +++ b/pinecone/__init__.py @@ -98,6 +98,11 @@ "RestoreJobList": ("pinecone.db_control.models", "RestoreJobList"), "BackupModel": ("pinecone.db_control.models", "BackupModel"), "BackupList": ("pinecone.db_control.models", "BackupList"), + "ConfigureIndexEmbed": ("pinecone.db_control.types", "ConfigureIndexEmbed"), + "CreateIndexForModelEmbedTypedDict": ( + "pinecone.db_control.types", + "CreateIndexForModelEmbedTypedDict", + ), } _config_lazy_imports = { diff --git a/pinecone/__init__.pyi b/pinecone/__init__.pyi index cf4cc0b7..e4af2ca1 100644 --- a/pinecone/__init__.pyi +++ b/pinecone/__init__.pyi @@ -78,6 +78,10 @@ from pinecone.db_control.models import ( PodSpec, PodSpecDefinition, ) +from pinecone.db_control.types import ( + ConfigureIndexEmbed, + CreateIndexForModelEmbedTypedDict, +) from pinecone.pinecone import Pinecone from pinecone.pinecone_asyncio import PineconeAsyncio @@ -160,4 +164,7 @@ __all__ = [ "ServerlessSpecDefinition", "PodSpec", "PodSpecDefinition", + # Control plane types + "ConfigureIndexEmbed", + "CreateIndexForModelEmbedTypedDict", ] diff --git a/pinecone/db_control/__init__.py b/pinecone/db_control/__init__.py index 74c82cd8..7ac45251 100644 --- a/pinecone/db_control/__init__.py +++ b/pinecone/db_control/__init__.py @@ -1,5 +1,6 @@ from .enums import * from .models import * +from .types import * from .db_control import DBControl from .db_control_asyncio import DBControlAsyncio from .repr_overrides import install_repr_overrides @@ -30,6 +31,9 @@ "BackupList", "RestoreJobModel", "RestoreJobList", + # from .types + "ConfigureIndexEmbed", + "CreateIndexForModelEmbedTypedDict", # direct imports "DBControl", "DBControlAsyncio", diff --git a/pinecone/db_control/request_factory.py b/pinecone/db_control/request_factory.py index 3d6a3735..070185e9 100644 --- a/pinecone/db_control/request_factory.py +++ b/pinecone/db_control/request_factory.py @@ -19,6 +19,9 @@ from pinecone.core.openapi.db_control.model.configure_index_request_spec_pod import ( ConfigureIndexRequestSpecPod, ) +from pinecone.core.openapi.db_control.model.configure_index_request_embed import ( + ConfigureIndexRequestEmbed, +) from pinecone.core.openapi.db_control.model.deletion_protection import ( DeletionProtection as DeletionProtectionModel, ) @@ -45,7 +48,7 @@ GcpRegion, AzureRegion, ) -from .types import CreateIndexForModelEmbedTypedDict +from .types import CreateIndexForModelEmbedTypedDict, ConfigureIndexEmbed logger = logging.getLogger(__name__) @@ -241,6 +244,7 @@ def configure_index_request( pod_type: Optional[Union[PodType, str]] = None, deletion_protection: Optional[Union[DeletionProtection, str]] = None, tags: Optional[Dict[str, str]] = None, + embed: Optional[Union[ConfigureIndexEmbed, Dict]] = None, ): if deletion_protection is None: dp = DeletionProtectionModel(description.deletion_protection) @@ -271,13 +275,24 @@ def configure_index_request( if replicas: pod_config_args.update(replicas=replicas) - if pod_config_args != {}: + embed_config = None + if embed is not None: + embed_config = ConfigureIndexRequestEmbed(**dict(embed)) + + spec = None + if pod_config_args: spec = ConfigureIndexRequestSpec(pod=ConfigureIndexRequestSpecPod(**pod_config_args)) - req = ConfigureIndexRequest(deletion_protection=dp, spec=spec, tags=IndexTags(**tags)) - else: - req = ConfigureIndexRequest(deletion_protection=dp, tags=IndexTags(**tags)) - return req + args_dict = parse_non_empty_args( + [ + ("deletion_protection", dp), + ("tags", IndexTags(**tags)), + ("spec", spec), + ("embed", embed_config), + ] + ) + + return ConfigureIndexRequest(**args_dict) @staticmethod def create_collection_request(name: str, source: str) -> CreateCollectionRequest: diff --git a/pinecone/db_control/resources/asyncio/index.py b/pinecone/db_control/resources/asyncio/index.py index cb233bc4..5a844b5a 100644 --- a/pinecone/db_control/resources/asyncio/index.py +++ b/pinecone/db_control/resources/asyncio/index.py @@ -27,6 +27,7 @@ from pinecone.db_control.request_factory import PineconeDBControlRequestFactory from pinecone.core.openapi.db_control import API_VERSION from pinecone.utils import require_kwargs +from pinecone.db_control.types.configure_index_embed import ConfigureIndexEmbed logger = logging.getLogger(__name__) """ :meta private: """ @@ -183,6 +184,7 @@ async def configure( pod_type: Optional[Union[PodType, str]] = None, deletion_protection: Optional[Union[DeletionProtection, str]] = None, tags: Optional[Dict[str, str]] = None, + embed: Optional[Union[ConfigureIndexEmbed, Dict]] = None, ): description = await self.describe(name=name) @@ -192,5 +194,6 @@ async def configure( pod_type=pod_type, deletion_protection=deletion_protection, tags=tags, + embed=embed, ) await self._index_api.configure_index(name, configure_index_request=req) diff --git a/pinecone/db_control/resources/sync/index.py b/pinecone/db_control/resources/sync/index.py index cf255ddf..faf5f983 100644 --- a/pinecone/db_control/resources/sync/index.py +++ b/pinecone/db_control/resources/sync/index.py @@ -10,6 +10,7 @@ from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict from pinecone.db_control.request_factory import PineconeDBControlRequestFactory from pinecone.core.openapi.db_control import API_VERSION +from pinecone.db_control.types.configure_index_embed import ConfigureIndexEmbed logger = logging.getLogger(__name__) """ :meta private: """ @@ -224,6 +225,7 @@ def configure( pod_type: Optional[Union["PodType", str]] = None, deletion_protection: Optional[Union["DeletionProtection", str]] = None, tags: Optional[Dict[str, str]] = None, + embed: Optional[Union["ConfigureIndexEmbed", Dict]] = None, ) -> None: api_instance = self._index_api description = self.describe(name=name) @@ -234,6 +236,7 @@ def configure( pod_type=pod_type, deletion_protection=deletion_protection, tags=tags, + embed=embed, ) api_instance.configure_index(name, configure_index_request=req) diff --git a/pinecone/db_control/types/__init__.py b/pinecone/db_control/types/__init__.py index aa10200b..e17254b0 100644 --- a/pinecone/db_control/types/__init__.py +++ b/pinecone/db_control/types/__init__.py @@ -1,3 +1,4 @@ from .create_index_for_model_embed import CreateIndexForModelEmbedTypedDict +from .configure_index_embed import ConfigureIndexEmbed -__all__ = ["CreateIndexForModelEmbedTypedDict"] +__all__ = ["CreateIndexForModelEmbedTypedDict", "ConfigureIndexEmbed"] diff --git a/pinecone/db_control/types/configure_index_embed.py b/pinecone/db_control/types/configure_index_embed.py new file mode 100644 index 00000000..59467be7 --- /dev/null +++ b/pinecone/db_control/types/configure_index_embed.py @@ -0,0 +1,8 @@ +from typing import TypedDict, Dict, Any, Optional + + +class ConfigureIndexEmbed(TypedDict): + model: str + field_map: Dict[str, str] + read_parameters: Optional[Dict[str, Any]] + write_parameters: Optional[Dict[str, Any]] diff --git a/pinecone/legacy_pinecone_interface.py b/pinecone/legacy_pinecone_interface.py index 2aca80ab..0a085462 100644 --- a/pinecone/legacy_pinecone_interface.py +++ b/pinecone/legacy_pinecone_interface.py @@ -26,7 +26,7 @@ GcpRegion, AzureRegion, ) - from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict + from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict, ConfigureIndexEmbed class LegacyPineconeDBControlInterface(ABC): @@ -438,6 +438,7 @@ def configure_index( pod_type: Optional[Union["PodType", str]] = None, deletion_protection: Optional[Union["DeletionProtection", str]] = None, tags: Optional[Dict[str, str]] = None, + embed: Optional[Union["ConfigureIndexEmbed", Dict]] = None, ): """ :param name: the name of the Index @@ -452,6 +453,10 @@ def configure_index( :type deletion_protection: str or DeletionProtection, optional :param tags: A dictionary of tags to apply to the index. Tags are key-value pairs that can be used to organize and manage indexes. To remove a tag, set the value to "". Tags passed to configure_index will be merged with existing tags and any with the value empty string will be removed. :type tags: Dict[str, str], optional + :param embed: configures the integrated inference embedding settings for the index. You can convert an existing index to an integrated index by specifying the embedding model and field_map. + The index vector type and dimension must match the model vector type and dimension, and the index similarity metric must be supported by the model. + You can later change the embedding configuration to update the field_map, read_parameters, or write_parameters. Once set, the model cannot be changed. + :type embed: Optional[Union[ConfigureIndexEmbed, Dict]], optional This method is used to modify an index's configuration. It can be used to: diff --git a/pinecone/pinecone.py b/pinecone/pinecone.py index 202bed3b..d8c8a1b4 100644 --- a/pinecone/pinecone.py +++ b/pinecone/pinecone.py @@ -18,7 +18,7 @@ from pinecone.db_data import _Index as Index, _IndexAsyncio as IndexAsyncio from pinecone.db_control.index_host_store import IndexHostStore from pinecone.core.openapi.db_control.api.manage_indexes_api import ManageIndexesApi - from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict + from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict, ConfigureIndexEmbed from pinecone.db_control.enums import ( Metric, VectorType, @@ -399,6 +399,7 @@ def configure_index( pod_type: Optional[Union["PodType", str]] = None, deletion_protection: Optional[Union["DeletionProtection", str]] = None, tags: Optional[Dict[str, str]] = None, + embed: Optional[Union["ConfigureIndexEmbed", Dict]] = None, ): return self.db.index.configure( name=name, @@ -406,6 +407,7 @@ def configure_index( pod_type=pod_type, deletion_protection=deletion_protection, tags=tags, + embed=embed, ) def create_collection(self, name: str, source: str) -> None: diff --git a/pinecone/pinecone_asyncio.py b/pinecone/pinecone_asyncio.py index 36d86495..425eb776 100644 --- a/pinecone/pinecone_asyncio.py +++ b/pinecone/pinecone_asyncio.py @@ -10,7 +10,7 @@ from .pinecone import check_realistic_host if TYPE_CHECKING: - from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict + from pinecone.db_control.types import ConfigureIndexEmbed, CreateIndexForModelEmbedTypedDict from pinecone.db_data import _IndexAsyncio from pinecone.db_control.enums import ( Metric, @@ -273,6 +273,7 @@ async def configure_index( pod_type: Optional[Union["PodType", str]] = None, deletion_protection: Optional[Union["DeletionProtection", str]] = None, tags: Optional[Dict[str, str]] = None, + embed: Optional[Union["ConfigureIndexEmbed", Dict]] = None, ): return await self.db.index.configure( name=name, @@ -280,6 +281,7 @@ async def configure_index( pod_type=pod_type, deletion_protection=deletion_protection, tags=tags, + embed=embed, ) async def create_collection(self, name: str, source: str): diff --git a/pinecone/pinecone_interface_asyncio.py b/pinecone/pinecone_interface_asyncio.py index 843ee83a..0d544f10 100644 --- a/pinecone/pinecone_interface_asyncio.py +++ b/pinecone/pinecone_interface_asyncio.py @@ -30,7 +30,7 @@ GcpRegion, AzureRegion, ) - from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict + from pinecone.db_control.types import ConfigureIndexEmbed, CreateIndexForModelEmbedTypedDict class PineconeAsyncioDBControlInterface(ABC): @@ -711,6 +711,7 @@ async def configure_index( pod_type: Optional[Union["PodType", str]] = None, deletion_protection: Optional[Union["DeletionProtection", str]] = None, tags: Optional[Dict[str, str]] = None, + embed: Optional[Union["ConfigureIndexEmbed", Dict]] = None, ): """ :param: name: the name of the Index @@ -719,6 +720,10 @@ async def configure_index( available pod types, please see `Understanding Indexes `_ :param: deletion_protection: If set to 'enabled', the index cannot be deleted. If 'disabled', the index can be deleted. :param: tags: A dictionary of tags to apply to the index. Tags are key-value pairs that can be used to organize and manage indexes. To remove a tag, set the value to "". Tags passed to configure_index will be merged with existing tags and any with the value empty string will be removed. + :param embed: configures the integrated inference embedding settings for the index. You can convert an existing index to an integrated index by specifying the embedding model and field_map. + The index vector type and dimension must match the model vector type and dimension, and the index similarity metric must be supported by the model. + You can later change the embedding configuration to update the field_map, read_parameters, or write_parameters. Once set, the model cannot be changed. + :type embed: Optional[Union[ConfigureIndexEmbed, Dict]], optional This method is used to modify an index's configuration. It can be used to: diff --git a/pyproject.toml b/pyproject.toml index ce982784..3852f355 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -166,3 +166,7 @@ docstring-code-line-length = "dynamic" # E712 Allow == comparison to True/False "tests/**" = ["E712"] + +[tool.black] +line-length = 100 +target-version = ["py39"] diff --git a/tests/integration/control/resources/index/test_configure.py b/tests/integration/control/resources/index/test_configure.py index f4c73094..96203e1a 100644 --- a/tests/integration/control/resources/index/test_configure.py +++ b/tests/integration/control/resources/index/test_configure.py @@ -41,3 +41,30 @@ def test_remove_multiple_tags(self, pc, ready_sl_index): assert found_tags is not None assert found_tags.get("foo", None) is None, "foo should be removed" assert found_tags.get("bar", None) is None, "bar should be removed" + + def test_configure_index_embed(self, pc, create_index_params): + name = create_index_params["name"] + create_index_params["dimension"] = 1024 + pc.db.index.create(**create_index_params) + desc = pc.db.index.describe(name=name) + assert desc.embed is None + + embed_config = { + "model": "multilingual-e5-large", + "field_map": {"text": "chunk_text"}, + } + pc.db.index.configure(name=name, embed=embed_config) + + desc = pc.db.index.describe(name=name) + assert desc.embed.model == "multilingual-e5-large" + assert desc.embed.field_map == {"text": "chunk_text"} + assert desc.embed.read_parameters == {"input_type": "query", "truncate": "END"} + assert desc.embed.write_parameters == { + "input_type": "passage", + "truncate": "END", + } + assert desc.embed.vector_type == "dense" + assert desc.embed.dimension == 1024 + assert desc.embed.metric == "cosine" + + pc.db.index.delete(name=name) diff --git a/tests/integration/control/serverless/test_configure_index_embed.py b/tests/integration/control/serverless/test_configure_index_embed.py new file mode 100644 index 00000000..82658b8a --- /dev/null +++ b/tests/integration/control/serverless/test_configure_index_embed.py @@ -0,0 +1,27 @@ +class TestConfigureIndexEmbed: + def test_convert_index_to_integrated(self, client, create_sl_index_params): + name = create_sl_index_params["name"] + create_sl_index_params["dimension"] = 1024 + client.create_index(**create_sl_index_params) + desc = client.describe_index(name) + assert desc.embed is None + + embed_config = { + "model": "multilingual-e5-large", + "field_map": {"text": "chunk_text"}, + } + client.configure_index(name, embed=embed_config) + + desc = client.describe_index(name) + assert desc.embed.model == "multilingual-e5-large" + assert desc.embed.field_map == {"text": "chunk_text"} + assert desc.embed.read_parameters == {"input_type": "query", "truncate": "END"} + assert desc.embed.write_parameters == { + "input_type": "passage", + "truncate": "END", + } + assert desc.embed.vector_type == "dense" + assert desc.embed.dimension == 1024 + assert desc.embed.metric == "cosine" + + client.delete_index(name) diff --git a/tests/integration/control_asyncio/test_configure_index_embed.py b/tests/integration/control_asyncio/test_configure_index_embed.py new file mode 100644 index 00000000..db05094d --- /dev/null +++ b/tests/integration/control_asyncio/test_configure_index_embed.py @@ -0,0 +1,31 @@ +from pinecone import PineconeAsyncio + + +class TestConfigureIndexEmbed: + async def test_convert_index_to_integrated(self, create_sl_index_params): + pc = PineconeAsyncio() + name = create_sl_index_params["name"] + create_sl_index_params["dimension"] = 1024 + await pc.create_index(**create_sl_index_params) + desc = await pc.describe_index(name) + assert desc.embed is None + + embed_config = { + "model": "multilingual-e5-large", + "field_map": {"text": "chunk_text"}, + } + await pc.configure_index(name, embed=embed_config) + + desc = await pc.describe_index(name) + assert desc.embed.model == "multilingual-e5-large" + assert desc.embed.field_map == {"text": "chunk_text"} + assert desc.embed.read_parameters == {"input_type": "query", "truncate": "END"} + assert desc.embed.write_parameters == { + "input_type": "passage", + "truncate": "END", + } + assert desc.embed.vector_type == "dense" + assert desc.embed.dimension == 1024 + assert desc.embed.metric == "cosine" + + await pc.delete_index(name)