From cae644cfe79b237114c34a3e94b7d063e80c1e96 Mon Sep 17 00:00:00 2001 From: Mridula Date: Tue, 27 May 2025 14:46:56 +0100 Subject: [PATCH 1/3] Modified the code to include retrievers instead of rank --- notebooks/search/02-hybrid-search.ipynb | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/notebooks/search/02-hybrid-search.ipynb b/notebooks/search/02-hybrid-search.ipynb index 4d7e7a87..6aa6163c 100644 --- a/notebooks/search/02-hybrid-search.ipynb +++ b/notebooks/search/02-hybrid-search.ipynb @@ -196,7 +196,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -204,13 +204,13 @@ " if len(response[\"hits\"][\"hits\"]) == 0:\n", " print(\"Your search returned no results.\")\n", " else:\n", - " for hit in response[\"hits\"][\"hits\"]:\n", + " for idx, hit in enumerate(response[\"hits\"][\"hits\"], start=1):\n", " id = hit[\"_id\"]\n", " publication_date = hit[\"_source\"][\"publish_date\"]\n", - " rank = hit[\"_rank\"]\n", + " score = hit[\"_score\"]\n", " title = hit[\"_source\"][\"title\"]\n", " summary = hit[\"_source\"][\"summary\"]\n", - " pretty_output = f\"\\nID: {id}\\nPublication date: {publication_date}\\nTitle: {title}\\nSummary: {summary}\\nRank: {rank}\"\n", + " pretty_output = f\"\\nID: {id}\\nPublication date: {publication_date}\\nTitle: {title}\\nSummary: {summary}\\nRank: {idx}\\nScore: {score}\"\n", " print(pretty_output)" ] }, @@ -231,12 +231,12 @@ "\n", "We then use [Reciprocal Rank Fusion (RRF)](https://www.elastic.co/guide/en/elasticsearch/reference/current/rrf.html) to balance the scores to provide a final list of documents, ranked in order of relevance. RRF is a ranking algorithm for combining results from different information retrieval strategies.\n", "\n", - "Note that _score is null, and we instead use _rank to show our top-ranked documents." + "Note: With the retriever API, _score contains the document’s relevance score, and the rank is simply the position in the results (first result is rank 1, etc.)." ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -289,7 +289,7 @@ " \"k\": 5,\n", " \"num_candidates\": 10,\n", " },\n", - " rank={\"rrf\": {}},\n", + " retriever={\"rrf\": {}},\n", ")\n", "\n", "pretty_response(response)" From 06f84bb8317d80cab753ff1d36f9142b48f2ad04 Mon Sep 17 00:00:00 2001 From: Mridula Date: Tue, 27 May 2025 17:39:21 +0100 Subject: [PATCH 2/3] Corrected the json response --- notebooks/search/02-hybrid-search.ipynb | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/notebooks/search/02-hybrid-search.ipynb b/notebooks/search/02-hybrid-search.ipynb index 6aa6163c..275fa739 100644 --- a/notebooks/search/02-hybrid-search.ipynb +++ b/notebooks/search/02-hybrid-search.ipynb @@ -280,18 +280,20 @@ "response = client.search(\n", " index=\"book_index\",\n", " size=5,\n", - " query={\"match\": {\"summary\": \"python programming\"}},\n", - " knn={\n", - " \"field\": \"title_vector\",\n", - " \"query_vector\": model.encode(\n", - " \"python programming\"\n", - " ).tolist(), # generate embedding for query so it can be compared to `title_vector`\n", - " \"k\": 5,\n", - " \"num_candidates\": 10,\n", - " },\n", - " retriever={\"rrf\": {}},\n", + " retriever={\n", + " \"rrf\": {\n", + " \"retrievers\": [\n", + " {\"standard\": {\"query\": {\"match\": {\"summary\": \"python programming\"}}}},\n", + " {\"knn\": {\n", + " \"field\": \"title_vector\",\n", + " \"query_vector\": model.encode(\"python programming\").tolist(),\n", + " \"k\": 5,\n", + " \"num_candidates\": 10\n", + " }}\n", + " ]\n", + " }\n", + " }\n", ")\n", - "\n", "pretty_response(response)" ] } From 2327dc710aedf5badfdc6d7ee62fc90817f85a05 Mon Sep 17 00:00:00 2001 From: Mridula Date: Tue, 27 May 2025 17:56:23 +0100 Subject: [PATCH 3/3] Format notebook with black-jupyter pre-commit hook --- notebooks/search/02-hybrid-search.ipynb | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/notebooks/search/02-hybrid-search.ipynb b/notebooks/search/02-hybrid-search.ipynb index 275fa739..5516974d 100644 --- a/notebooks/search/02-hybrid-search.ipynb +++ b/notebooks/search/02-hybrid-search.ipynb @@ -284,15 +284,17 @@ " \"rrf\": {\n", " \"retrievers\": [\n", " {\"standard\": {\"query\": {\"match\": {\"summary\": \"python programming\"}}}},\n", - " {\"knn\": {\n", - " \"field\": \"title_vector\",\n", - " \"query_vector\": model.encode(\"python programming\").tolist(),\n", - " \"k\": 5,\n", - " \"num_candidates\": 10\n", - " }}\n", + " {\n", + " \"knn\": {\n", + " \"field\": \"title_vector\",\n", + " \"query_vector\": model.encode(\"python programming\").tolist(),\n", + " \"k\": 5,\n", + " \"num_candidates\": 10,\n", + " }\n", + " },\n", " ]\n", " }\n", - " }\n", + " },\n", ")\n", "pretty_response(response)" ]