Skip to content
18 changes: 10 additions & 8 deletions semantic_router/encoders/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,18 +211,19 @@ def __call__(self, docs: List[str]) -> List[List[float]]:
Raises:
ValueError: If no embeddings are returned for a document.
"""

batch_size = 50

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ValueError: No embeddings returned for batch. Error: Query failed with status 413: {"error":"batch size 50 > maximum allowed batch size 32","error_type":"Validation"}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @joaomsimoes what HuggingFace TEI model were you using when you encountered this error?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry for the late answer @Siraj-Aizlewood

I was using Alibaba-NLP/gte-large-en-v1.5

embeddings = []
for d in docs:
for i in range(0, len(docs), batch_size):
batch = docs[i : i + batch_size]
try:
output = self.query({"inputs": d, "parameters": {}})
if not output or len(output) == 0:
outputs = self.query({"inputs": batch, "parameters": {}})
if not outputs or len(outputs) == 0:
raise ValueError("No embeddings returned from the query.")
embeddings.append(output)

embeddings = embeddings + outputs
except Exception as e:
raise ValueError(
f"No embeddings returned for document. Error: {e}"
) from e
raise ValueError(f"No embeddings returned for batch. Error: {e}") from e

return embeddings

def query(self, payload, max_retries=3, retry_interval=5):
Expand Down Expand Up @@ -261,6 +262,7 @@ def query(self, payload, max_retries=3, retry_interval=5):
continue
else:
response.raise_for_status()
break

except requests.exceptions.RequestException:
if attempt < max_retries - 1:
Expand Down