diff --git a/pr_agent/tools/pr_similar_issue.py b/pr_agent/tools/pr_similar_issue.py index f48756d793..7c0a743ff5 100644 --- a/pr_agent/tools/pr_similar_issue.py +++ b/pr_agent/tools/pr_similar_issue.py @@ -445,7 +445,7 @@ def _update_index_with_issues(self, issues_list, repo_name_for_index, upsert=Fal if len(comment_body) < 8000 or \ self.token_handler.count_tokens(comment_body) < MAX_TOKENS[MODEL]: comment_record = Record( - id=issue_key + ".comment_" + str(j + 1), + id=issue_key + ".comment_" + str(j), text=comment_body, metadata=Metadata(repo=repo_name_for_index, username=username, # use issue username for all comments @@ -541,7 +541,7 @@ def _update_table_with_issues(self, issues_list, repo_name_for_index, ingest=Fal if len(comment_body) < 8000 or \ self.token_handler.count_tokens(comment_body) < MAX_TOKENS[MODEL]: comment_record = Record( - id=issue_key + ".comment_" + str(j + 1), + id=issue_key + ".comment_" + str(j), text=comment_body, metadata=Metadata(repo=repo_name_for_index, username=username, # use issue username for all comments @@ -639,7 +639,7 @@ def _update_qdrant_with_issues(self, issues_list, repo_name_for_index, ingest=Fa if len(comment_body) < 8000 or \ self.token_handler.count_tokens(comment_body) < MAX_TOKENS[MODEL]: comment_record = Record( - id=issue_key + ".comment_" + str(j + 1), + id=issue_key + ".comment_" + str(j), text=comment_body, metadata=Metadata(repo=repo_name_for_index, username=username, @@ -672,8 +672,20 @@ def _update_qdrant_with_issues(self, issues_list, repo_name_for_index, ingest=Fa get_logger().info('Upserting into Qdrant...') points = [] for row in df.to_dict(orient="records"): + point_uuid = uuid.uuid5( + uuid.NAMESPACE_DNS, + f"{repo_name_for_index}:{row['id']}", + ).hex points.append( - PointStruct(id=uuid.uuid5(uuid.NAMESPACE_DNS, row["id"]).hex, vector=row["vector"], payload={"id": row["id"], "text": row["text"], "metadata": row["metadata"]}) + PointStruct( + id=point_uuid, + vector=row["vector"], + payload={ + "id": row["id"], + "text": row["text"], + "metadata": row["metadata"], + }, + ) ) self.qdrant.upsert(collection_name=self.index_name, points=points) get_logger().info('Done') diff --git a/requirements.txt b/requirements.txt index 9ef63beb97..04e844f047 100644 --- a/requirements.txt +++ b/requirements.txt @@ -39,6 +39,7 @@ giteapy==1.0.8 # pinecone-datasets @ git+https://github.com/mrT23/pinecone-datasets.git@main # lancedb==0.5.1 # qdrant-client==1.15.1 +# pandas # required by qdrant and pinecone indexing paths # uncomment this to support language LangChainOpenAIHandler # langchain==0.2.0 # langchain-core==0.2.28