Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 34 additions & 9 deletions openviking/models/rerank/openai_rerank.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,26 @@ def rerank_batch(self, query: str, documents: List[str]) -> Optional[List[float]
if not documents:
return []

# Filter out empty documents — rerank providers (e.g. DashScope) reject
# empty strings with HTTP 400.
valid_indices = [i for i, d in enumerate(documents) if d and d.strip()]
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Suggestion] (non-blocking) Please add regression tests for the new filtering behavior. The current suite does not cover either a mixed batch like ['doc', '', ' '] with index remapping or the all-empty batch path, so the rerank/fallback semantics introduced here are not locked down.

if not valid_indices:
return [0.0] * len(documents)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Bug] (blocking) When every input document is empty or whitespace, this returns an all-zero score list instead of signaling rerank failure. HierarchicalRetriever._rerank_scores() treats any numeric list with the expected length as a successful rerank, so this path bypasses fallback to vector scores and can filter out otherwise retrievable results at the rerank threshold. Mixed batches should keep the current behavior, but an all-empty batch should return None or otherwise trigger the existing fallback path.


if len(valid_indices) < len(documents):
filtered_docs = [documents[i] for i in valid_indices]
logger.debug(
"[OpenAIRerankClient] Filtered %d empty documents from %d total",
len(documents) - len(valid_indices),
len(documents),
)
else:
filtered_docs = documents

req_body = {
"model": self.model_name,
"query": query,
"documents": documents,
"documents": filtered_docs,
}

try:
Expand All @@ -75,34 +91,43 @@ def rerank_batch(self, query: str, documents: List[str]) -> Optional[List[float]
result = response.json()

# Update token usage tracking (estimate, OpenAI rerank doesn't provide token info)
self._extract_and_update_token_usage(result, query, documents)
self._extract_and_update_token_usage(result, query, filtered_docs)

# Standard OpenAI/Cohere rerank format: results[].{index, relevance_score}
results = result.get("results")
if not results:
logger.warning(f"[OpenAIRerankClient] Unexpected response format: {result}")
return None

if len(results) != len(documents):
if len(results) != len(filtered_docs):
logger.warning(
"[OpenAIRerankClient] Unexpected rerank result length: expected=%s actual=%s",
len(documents),
len(filtered_docs),
len(results),
)
return None

# Results may not be in original order — sort by index
scores = [0.0] * len(documents)
# Map scores back to original document indices.
# Empty documents get a score of 0.0.
filtered_scores = [0.0] * len(filtered_docs)
for item in results:
idx = item.get("index")
if idx is None or not (0 <= idx < len(documents)):
if idx is None or not (0 <= idx < len(filtered_docs)):
logger.warning(
"[OpenAIRerankClient] Out-of-bounds or missing index in result: %s", item
)
return None
scores[idx] = item.get("relevance_score", 0.0)
filtered_scores[idx] = item.get("relevance_score", 0.0)

logger.debug(f"[OpenAIRerankClient] Reranked {len(documents)} documents")
scores = [0.0] * len(documents)
for fi, oi in enumerate(valid_indices):
scores[oi] = filtered_scores[fi]

logger.debug(
"[OpenAIRerankClient] Reranked %d documents (%d non-empty)",
len(documents),
len(valid_indices),
)
return scores

except Exception as e:
Expand Down
Loading