Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions openviking/utils/embedding_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,7 @@ async def index_resource(
``"resource"``.
"""
viking_fs = get_viking_fs()
vector_store = viking_fs.vector_store
context_type = get_context_type_for_uri(uri)

# 1. Index Directory Metadata
Expand Down Expand Up @@ -373,11 +374,21 @@ async def index_resource(

file_uri = file_info.get("uri") or f"{uri}/{file_name}"

# For direct indexing, we might not have summaries.
# We pass empty summary_dict, vectorize_file will try to read content for text files.
# Preserve existing abstract from vector index during reindex.
# Without this, reindex overwrites VLM-generated abstracts with empty
# strings, causing rerank to lose document differentiation ability.
existing_abstract = ""
if vector_store:
try:
existing = await vector_store.fetch_by_uri(file_uri, ctx=ctx)
if existing:
existing_abstract = existing.get("abstract", "") or ""
except Exception:
pass

await vectorize_file(
file_path=file_uri,
summary_dict={"name": file_name},
summary_dict={"name": file_name, "summary": existing_abstract},
parent_uri=uri,
context_type=context_type,
ctx=ctx,
Expand Down
Loading