Skip to content

Add embed to Index configure calls #515

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Jun 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions pinecone/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,11 @@
"RestoreJobList": ("pinecone.db_control.models", "RestoreJobList"),
"BackupModel": ("pinecone.db_control.models", "BackupModel"),
"BackupList": ("pinecone.db_control.models", "BackupList"),
"ConfigureIndexEmbed": ("pinecone.db_control.types", "ConfigureIndexEmbed"),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what's the reason for adding ConfigureIndexEmbed and CreateIndexForModelEmbedTypedDict here?

Copy link
Contributor Author

@austin-denoble austin-denoble Jun 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding the classes to _db_control_lazy_imports, which we seem to do for all of our custom models and types. It seemed like CreateIndexForModelEmbedTypedDict was also not included here, so I added it.

This is to allow lazy loaded imports to be exported from the top level of the package here:

*list(_LAZY_IMPORTS.keys()),

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah I was wondering why we added types here but here's the reason why:
#507 (comment)

"CreateIndexForModelEmbedTypedDict": (
"pinecone.db_control.types",
"CreateIndexForModelEmbedTypedDict",
),
}

_config_lazy_imports = {
Expand Down
7 changes: 7 additions & 0 deletions pinecone/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ from pinecone.db_control.models import (
PodSpec,
PodSpecDefinition,
)
from pinecone.db_control.types import (
ConfigureIndexEmbed,
CreateIndexForModelEmbedTypedDict,
)
from pinecone.pinecone import Pinecone
from pinecone.pinecone_asyncio import PineconeAsyncio

Expand Down Expand Up @@ -160,4 +164,7 @@ __all__ = [
"ServerlessSpecDefinition",
"PodSpec",
"PodSpecDefinition",
# Control plane types
"ConfigureIndexEmbed",
"CreateIndexForModelEmbedTypedDict",
]
4 changes: 4 additions & 0 deletions pinecone/db_control/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from .enums import *
from .models import *
from .types import *
from .db_control import DBControl
from .db_control_asyncio import DBControlAsyncio
from .repr_overrides import install_repr_overrides
Expand Down Expand Up @@ -30,6 +31,9 @@
"BackupList",
"RestoreJobModel",
"RestoreJobList",
# from .types
"ConfigureIndexEmbed",
"CreateIndexForModelEmbedTypedDict",
# direct imports
"DBControl",
"DBControlAsyncio",
Expand Down
27 changes: 21 additions & 6 deletions pinecone/db_control/request_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
from pinecone.core.openapi.db_control.model.configure_index_request_spec_pod import (
ConfigureIndexRequestSpecPod,
)
from pinecone.core.openapi.db_control.model.configure_index_request_embed import (
ConfigureIndexRequestEmbed,
)
from pinecone.core.openapi.db_control.model.deletion_protection import (
DeletionProtection as DeletionProtectionModel,
)
Expand All @@ -45,7 +48,7 @@
GcpRegion,
AzureRegion,
)
from .types import CreateIndexForModelEmbedTypedDict
from .types import CreateIndexForModelEmbedTypedDict, ConfigureIndexEmbed


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -241,6 +244,7 @@ def configure_index_request(
pod_type: Optional[Union[PodType, str]] = None,
deletion_protection: Optional[Union[DeletionProtection, str]] = None,
tags: Optional[Dict[str, str]] = None,
embed: Optional[Union[ConfigureIndexEmbed, Dict]] = None,
):
if deletion_protection is None:
dp = DeletionProtectionModel(description.deletion_protection)
Expand Down Expand Up @@ -271,13 +275,24 @@ def configure_index_request(
if replicas:
pod_config_args.update(replicas=replicas)

if pod_config_args != {}:
embed_config = None
if embed is not None:
embed_config = ConfigureIndexRequestEmbed(**dict(embed))

spec = None
if pod_config_args:
spec = ConfigureIndexRequestSpec(pod=ConfigureIndexRequestSpecPod(**pod_config_args))
req = ConfigureIndexRequest(deletion_protection=dp, spec=spec, tags=IndexTags(**tags))
else:
req = ConfigureIndexRequest(deletion_protection=dp, tags=IndexTags(**tags))

return req
args_dict = parse_non_empty_args(
[
("deletion_protection", dp),
("tags", IndexTags(**tags)),
("spec", spec),
("embed", embed_config),
]
)

return ConfigureIndexRequest(**args_dict)

@staticmethod
def create_collection_request(name: str, source: str) -> CreateCollectionRequest:
Expand Down
3 changes: 3 additions & 0 deletions pinecone/db_control/resources/asyncio/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from pinecone.db_control.request_factory import PineconeDBControlRequestFactory
from pinecone.core.openapi.db_control import API_VERSION
from pinecone.utils import require_kwargs
from pinecone.db_control.types.configure_index_embed import ConfigureIndexEmbed

logger = logging.getLogger(__name__)
""" :meta private: """
Expand Down Expand Up @@ -183,6 +184,7 @@ async def configure(
pod_type: Optional[Union[PodType, str]] = None,
deletion_protection: Optional[Union[DeletionProtection, str]] = None,
tags: Optional[Dict[str, str]] = None,
embed: Optional[Union[ConfigureIndexEmbed, Dict]] = None,
):
description = await self.describe(name=name)

Expand All @@ -192,5 +194,6 @@ async def configure(
pod_type=pod_type,
deletion_protection=deletion_protection,
tags=tags,
embed=embed,
)
await self._index_api.configure_index(name, configure_index_request=req)
3 changes: 3 additions & 0 deletions pinecone/db_control/resources/sync/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict
from pinecone.db_control.request_factory import PineconeDBControlRequestFactory
from pinecone.core.openapi.db_control import API_VERSION
from pinecone.db_control.types.configure_index_embed import ConfigureIndexEmbed

logger = logging.getLogger(__name__)
""" :meta private: """
Expand Down Expand Up @@ -224,6 +225,7 @@ def configure(
pod_type: Optional[Union["PodType", str]] = None,
deletion_protection: Optional[Union["DeletionProtection", str]] = None,
tags: Optional[Dict[str, str]] = None,
embed: Optional[Union["ConfigureIndexEmbed", Dict]] = None,
) -> None:
api_instance = self._index_api
description = self.describe(name=name)
Expand All @@ -234,6 +236,7 @@ def configure(
pod_type=pod_type,
deletion_protection=deletion_protection,
tags=tags,
embed=embed,
)
api_instance.configure_index(name, configure_index_request=req)

Expand Down
3 changes: 2 additions & 1 deletion pinecone/db_control/types/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .create_index_for_model_embed import CreateIndexForModelEmbedTypedDict
from .configure_index_embed import ConfigureIndexEmbed

__all__ = ["CreateIndexForModelEmbedTypedDict"]
__all__ = ["CreateIndexForModelEmbedTypedDict", "ConfigureIndexEmbed"]
8 changes: 8 additions & 0 deletions pinecone/db_control/types/configure_index_embed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from typing import TypedDict, Dict, Any, Optional


class ConfigureIndexEmbed(TypedDict):
model: str
field_map: Dict[str, str]
read_parameters: Optional[Dict[str, Any]]
write_parameters: Optional[Dict[str, Any]]
7 changes: 6 additions & 1 deletion pinecone/legacy_pinecone_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
GcpRegion,
AzureRegion,
)
from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict
from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict, ConfigureIndexEmbed


class LegacyPineconeDBControlInterface(ABC):
Expand Down Expand Up @@ -438,6 +438,7 @@ def configure_index(
pod_type: Optional[Union["PodType", str]] = None,
deletion_protection: Optional[Union["DeletionProtection", str]] = None,
tags: Optional[Dict[str, str]] = None,
embed: Optional[Union["ConfigureIndexEmbed", Dict]] = None,
):
"""
:param name: the name of the Index
Expand All @@ -452,6 +453,10 @@ def configure_index(
:type deletion_protection: str or DeletionProtection, optional
:param tags: A dictionary of tags to apply to the index. Tags are key-value pairs that can be used to organize and manage indexes. To remove a tag, set the value to "". Tags passed to configure_index will be merged with existing tags and any with the value empty string will be removed.
:type tags: Dict[str, str], optional
:param embed: configures the integrated inference embedding settings for the index. You can convert an existing index to an integrated index by specifying the embedding model and field_map.
The index vector type and dimension must match the model vector type and dimension, and the index similarity metric must be supported by the model.
You can later change the embedding configuration to update the field_map, read_parameters, or write_parameters. Once set, the model cannot be changed.
:type embed: Optional[Union[ConfigureIndexEmbed, Dict]], optional

This method is used to modify an index's configuration. It can be used to:

Expand Down
4 changes: 3 additions & 1 deletion pinecone/pinecone.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from pinecone.db_data import _Index as Index, _IndexAsyncio as IndexAsyncio
from pinecone.db_control.index_host_store import IndexHostStore
from pinecone.core.openapi.db_control.api.manage_indexes_api import ManageIndexesApi
from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict
from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict, ConfigureIndexEmbed
from pinecone.db_control.enums import (
Metric,
VectorType,
Expand Down Expand Up @@ -399,13 +399,15 @@ def configure_index(
pod_type: Optional[Union["PodType", str]] = None,
deletion_protection: Optional[Union["DeletionProtection", str]] = None,
tags: Optional[Dict[str, str]] = None,
embed: Optional[Union["ConfigureIndexEmbed", Dict]] = None,
):
return self.db.index.configure(
name=name,
replicas=replicas,
pod_type=pod_type,
deletion_protection=deletion_protection,
tags=tags,
embed=embed,
)

def create_collection(self, name: str, source: str) -> None:
Expand Down
4 changes: 3 additions & 1 deletion pinecone/pinecone_asyncio.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from .pinecone import check_realistic_host

if TYPE_CHECKING:
from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict
from pinecone.db_control.types import ConfigureIndexEmbed, CreateIndexForModelEmbedTypedDict
from pinecone.db_data import _IndexAsyncio
from pinecone.db_control.enums import (
Metric,
Expand Down Expand Up @@ -273,13 +273,15 @@ async def configure_index(
pod_type: Optional[Union["PodType", str]] = None,
deletion_protection: Optional[Union["DeletionProtection", str]] = None,
tags: Optional[Dict[str, str]] = None,
embed: Optional[Union["ConfigureIndexEmbed", Dict]] = None,
):
return await self.db.index.configure(
name=name,
replicas=replicas,
pod_type=pod_type,
deletion_protection=deletion_protection,
tags=tags,
embed=embed,
)

async def create_collection(self, name: str, source: str):
Expand Down
7 changes: 6 additions & 1 deletion pinecone/pinecone_interface_asyncio.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
GcpRegion,
AzureRegion,
)
from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict
from pinecone.db_control.types import ConfigureIndexEmbed, CreateIndexForModelEmbedTypedDict


class PineconeAsyncioDBControlInterface(ABC):
Expand Down Expand Up @@ -711,6 +711,7 @@ async def configure_index(
pod_type: Optional[Union["PodType", str]] = None,
deletion_protection: Optional[Union["DeletionProtection", str]] = None,
tags: Optional[Dict[str, str]] = None,
embed: Optional[Union["ConfigureIndexEmbed", Dict]] = None,
):
"""
:param: name: the name of the Index
Expand All @@ -719,6 +720,10 @@ async def configure_index(
available pod types, please see `Understanding Indexes <https://docs.pinecone.io/docs/indexes>`_
:param: deletion_protection: If set to 'enabled', the index cannot be deleted. If 'disabled', the index can be deleted.
:param: tags: A dictionary of tags to apply to the index. Tags are key-value pairs that can be used to organize and manage indexes. To remove a tag, set the value to "". Tags passed to configure_index will be merged with existing tags and any with the value empty string will be removed.
:param embed: configures the integrated inference embedding settings for the index. You can convert an existing index to an integrated index by specifying the embedding model and field_map.
The index vector type and dimension must match the model vector type and dimension, and the index similarity metric must be supported by the model.
You can later change the embedding configuration to update the field_map, read_parameters, or write_parameters. Once set, the model cannot be changed.
:type embed: Optional[Union[ConfigureIndexEmbed, Dict]], optional

This method is used to modify an index's configuration. It can be used to:

Expand Down
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -166,3 +166,7 @@ docstring-code-line-length = "dynamic"

# E712 Allow == comparison to True/False
"tests/**" = ["E712"]

[tool.black]
line-length = 100
target-version = ["py39"]
27 changes: 27 additions & 0 deletions tests/integration/control/resources/index/test_configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,30 @@ def test_remove_multiple_tags(self, pc, ready_sl_index):
assert found_tags is not None
assert found_tags.get("foo", None) is None, "foo should be removed"
assert found_tags.get("bar", None) is None, "bar should be removed"

def test_configure_index_embed(self, pc, create_index_params):
name = create_index_params["name"]
create_index_params["dimension"] = 1024
pc.db.index.create(**create_index_params)
desc = pc.db.index.describe(name=name)
assert desc.embed is None

embed_config = {
"model": "multilingual-e5-large",
"field_map": {"text": "chunk_text"},
}
pc.db.index.configure(name=name, embed=embed_config)

desc = pc.db.index.describe(name=name)
assert desc.embed.model == "multilingual-e5-large"
assert desc.embed.field_map == {"text": "chunk_text"}
assert desc.embed.read_parameters == {"input_type": "query", "truncate": "END"}
assert desc.embed.write_parameters == {
"input_type": "passage",
"truncate": "END",
}
assert desc.embed.vector_type == "dense"
assert desc.embed.dimension == 1024
assert desc.embed.metric == "cosine"

pc.db.index.delete(name=name)
27 changes: 27 additions & 0 deletions tests/integration/control/serverless/test_configure_index_embed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
class TestConfigureIndexEmbed:
def test_convert_index_to_integrated(self, client, create_sl_index_params):
name = create_sl_index_params["name"]
create_sl_index_params["dimension"] = 1024
client.create_index(**create_sl_index_params)
desc = client.describe_index(name)
assert desc.embed is None

embed_config = {
"model": "multilingual-e5-large",
"field_map": {"text": "chunk_text"},
}
client.configure_index(name, embed=embed_config)

desc = client.describe_index(name)
assert desc.embed.model == "multilingual-e5-large"
assert desc.embed.field_map == {"text": "chunk_text"}
assert desc.embed.read_parameters == {"input_type": "query", "truncate": "END"}
assert desc.embed.write_parameters == {
"input_type": "passage",
"truncate": "END",
}
assert desc.embed.vector_type == "dense"
assert desc.embed.dimension == 1024
assert desc.embed.metric == "cosine"

client.delete_index(name)
31 changes: 31 additions & 0 deletions tests/integration/control_asyncio/test_configure_index_embed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from pinecone import PineconeAsyncio


class TestConfigureIndexEmbed:
async def test_convert_index_to_integrated(self, create_sl_index_params):
pc = PineconeAsyncio()
name = create_sl_index_params["name"]
create_sl_index_params["dimension"] = 1024
await pc.create_index(**create_sl_index_params)
desc = await pc.describe_index(name)
assert desc.embed is None

embed_config = {
"model": "multilingual-e5-large",
"field_map": {"text": "chunk_text"},
}
await pc.configure_index(name, embed=embed_config)

desc = await pc.describe_index(name)
assert desc.embed.model == "multilingual-e5-large"
assert desc.embed.field_map == {"text": "chunk_text"}
assert desc.embed.read_parameters == {"input_type": "query", "truncate": "END"}
assert desc.embed.write_parameters == {
"input_type": "passage",
"truncate": "END",
}
assert desc.embed.vector_type == "dense"
assert desc.embed.dimension == 1024
assert desc.embed.metric == "cosine"

await pc.delete_index(name)