diff --git a/notebooks/search/02-hybrid-search.ipynb b/notebooks/search/02-hybrid-search.ipynb index 4d7e7a87..5516974d 100644 --- a/notebooks/search/02-hybrid-search.ipynb +++ b/notebooks/search/02-hybrid-search.ipynb @@ -196,7 +196,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -204,13 +204,13 @@ " if len(response[\"hits\"][\"hits\"]) == 0:\n", " print(\"Your search returned no results.\")\n", " else:\n", - " for hit in response[\"hits\"][\"hits\"]:\n", + " for idx, hit in enumerate(response[\"hits\"][\"hits\"], start=1):\n", " id = hit[\"_id\"]\n", " publication_date = hit[\"_source\"][\"publish_date\"]\n", - " rank = hit[\"_rank\"]\n", + " score = hit[\"_score\"]\n", " title = hit[\"_source\"][\"title\"]\n", " summary = hit[\"_source\"][\"summary\"]\n", - " pretty_output = f\"\\nID: {id}\\nPublication date: {publication_date}\\nTitle: {title}\\nSummary: {summary}\\nRank: {rank}\"\n", + " pretty_output = f\"\\nID: {id}\\nPublication date: {publication_date}\\nTitle: {title}\\nSummary: {summary}\\nRank: {idx}\\nScore: {score}\"\n", " print(pretty_output)" ] }, @@ -231,12 +231,12 @@ "\n", "We then use [Reciprocal Rank Fusion (RRF)](https://www.elastic.co/guide/en/elasticsearch/reference/current/rrf.html) to balance the scores to provide a final list of documents, ranked in order of relevance. RRF is a ranking algorithm for combining results from different information retrieval strategies.\n", "\n", - "Note that _score is null, and we instead use _rank to show our top-ranked documents." + "Note: With the retriever API, _score contains the document’s relevance score, and the rank is simply the position in the results (first result is rank 1, etc.)." ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -280,18 +280,22 @@ "response = client.search(\n", " index=\"book_index\",\n", " size=5,\n", - " query={\"match\": {\"summary\": \"python programming\"}},\n", - " knn={\n", - " \"field\": \"title_vector\",\n", - " \"query_vector\": model.encode(\n", - " \"python programming\"\n", - " ).tolist(), # generate embedding for query so it can be compared to `title_vector`\n", - " \"k\": 5,\n", - " \"num_candidates\": 10,\n", + " retriever={\n", + " \"rrf\": {\n", + " \"retrievers\": [\n", + " {\"standard\": {\"query\": {\"match\": {\"summary\": \"python programming\"}}}},\n", + " {\n", + " \"knn\": {\n", + " \"field\": \"title_vector\",\n", + " \"query_vector\": model.encode(\"python programming\").tolist(),\n", + " \"k\": 5,\n", + " \"num_candidates\": 10,\n", + " }\n", + " },\n", + " ]\n", + " }\n", " },\n", - " rank={\"rrf\": {}},\n", ")\n", - "\n", "pretty_response(response)" ] }