Skip to content

Commit 55f6380

Browse files
committed
Batching logic
Reformatting Adding VoyageAI to enum
1 parent 2ebcb71 commit 55f6380

File tree

3 files changed

+22
-2
lines changed

3 files changed

+22
-2
lines changed

redisvl/utils/vectorize/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,5 @@ def vectorizer_from_dict(vectorizer: dict) -> BaseVectorizer:
3636
return MistralAITextVectorizer(model)
3737
elif vectorizer_type == Vectorizers.vertexai:
3838
return VertexAITextVectorizer(model)
39+
elif vectorizer_type == Vectorizers.voyageai:
40+
return VoyageAITextVectorizer(model)

redisvl/utils/vectorize/base.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ class Vectorizers(Enum):
1414
mistral = "mistral"
1515
vertexai = "vertexai"
1616
hf = "hf"
17+
voyageai = "voyageai"
1718

1819

1920
class BaseVectorizer(BaseModel, ABC):

redisvl/utils/vectorize/text/voyageai.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
from redisvl.utils.vectorize.base import BaseVectorizer
99

10+
1011
# ignore that voyageai isn't imported
1112
# mypy: disable-error-code="name-defined"
1213

@@ -207,7 +208,15 @@ def embed_many(
207208
raise TypeError("Truncation (optional) parameter is a bool.")
208209

209210
if batch_size is None:
210-
batch_size = 72 if self.model in ["voyage-2", "voyage-02"] else 7
211+
batch_size = (
212+
72
213+
if self.model in ["voyage-2", "voyage-02"]
214+
else (
215+
30
216+
if self.model == "voyage-3-lite"
217+
else (10 if self.model == "voyage-3" else 7)
218+
)
219+
)
211220

212221
embeddings: List = []
213222
for batch in self.batchify(texts, batch_size, preprocess):
@@ -277,7 +286,15 @@ async def aembed_many(
277286
raise TypeError("Truncation (optional) parameter is a bool.")
278287

279288
if batch_size is None:
280-
batch_size = 72 if self.model in ["voyage-2", "voyage-02"] else 7
289+
batch_size = (
290+
72
291+
if self.model in ["voyage-2", "voyage-02"]
292+
else (
293+
30
294+
if self.model == "voyage-3-lite"
295+
else (10 if self.model == "voyage-3" else 7)
296+
)
297+
)
281298

282299
embeddings: List = []
283300
for batch in self.batchify(texts, batch_size, preprocess):

0 commit comments

Comments
 (0)