mitodl
diff --git a/‎learning_resources_search/api.py‎
Lines changed: 30 additions & 48 deletions b/‎learning_resources_search/api.py‎
Lines changed: 30 additions & 48 deletions
diff --git a/‎learning_resources_search/api_test.py‎
Lines changed: 9 additions & 33 deletions b/‎learning_resources_search/api_test.py‎
Lines changed: 9 additions & 33 deletions
diff --git a/‎learning_resources_search/connection.py‎
Lines changed: 0 additions & 30 deletions b/‎learning_resources_search/connection.py‎
Lines changed: 0 additions & 30 deletions
diff --git a/‎learning_resources_search/constants.py‎
Lines changed: 4 additions & 25 deletions b/‎learning_resources_search/constants.py‎
Lines changed: 4 additions & 25 deletions
@@ -13,14 +13,13 @@
 from learning_resources.models import LearningResource
 from learning_resources_search.connection import (
     get_default_alias_name,
-    get_vector_model_id,
 )
 from learning_resources_search.constants import (
-    COMBINED_INDEX,
     CONTENT_FILE_TYPE,
     COURSE_QUERY_FIELDS,
     COURSE_TYPE,
     DEPARTMENT_QUERY_FIELDS,
+    HYBRID_COMBINED_INDEX,
     HYBRID_SEARCH_MODE,
     LEARNING_RESOURCE,
     LEARNING_RESOURCE_QUERY_FIELDS,
@@ -55,6 +54,23 @@
     "-created_on",
 ]
 
+HYBRID_SEARCH_KNN_K_VALUE = 5
+HYBRID_SEARCH_PAGINATION_DEPTH = 10
+HYBRID_SEARCH_POST_PROCESSOR = {
+    "description": "Post processor for hybrid search",
+    "phase_results_processors": [
+        {
+            "normalization-processor": {
+                "normalization": {"technique": "min_max"},
+                "combination": {
+                    "technique": "arithmetic_mean",
+                    "parameters": {"weights": [0.8, 0.2]},
+                },
+            }
+        }
+    ],
+}
+
 
 def gen_content_file_id(content_file_id):
     """
@@ -86,7 +102,7 @@ def relevant_indexes(resource_types, aggregations, endpoint, use_hybrid_search):
     if endpoint == CONTENT_FILE_TYPE:
         return [get_default_alias_name(COURSE_TYPE)]
     elif use_hybrid_search:
-        return [get_default_alias_name(COMBINED_INDEX)]
+        return [get_default_alias_name(HYBRID_COMBINED_INDEX)]
 
     if aggregations and "resource_type" in aggregations:
         return map(get_default_alias_name, LEARNING_RESOURCE_TYPES)
@@ -652,41 +668,22 @@ def add_text_query_to_search(
         text_query = {"bool": {"must": [text_query], "filter": query_type_query}}
 
     if use_hybrid_search:
-        vector_model_id = get_vector_model_id()
-        if not vector_model_id:
-            log.error("Vector model not found. Cannot perform hybrid search.")
-            error_message = "Vector model not found."
-            raise ValueError(error_message)
-
-        vector_query_description = {
-            "neural": {
-                "description_embedding": {
-                    "query_text": text,
-                    "model_id": vector_model_id,
-                    "min_score": 0.015,
-                },
-            }
-        }
-
-        vector_query_title = {
-            "neural": {
-                "title_embedding": {
-                    "query_text": text,
-                    "model_id": vector_model_id,
-                    "min_score": 0.015,
-                },
+        encoder = dense_encoder()
+        query_vector = encoder.embed_query(text)
+        vector_query = {
+            "knn": {
+                "vector_embedding": {
+                    "vector": query_vector,
+                    "k": HYBRID_SEARCH_KNN_K_VALUE,
+                }
             }
         }
 
         search = search.extra(
             query={
                 "hybrid": {
-                    "pagination_depth": 10,
-                    "queries": [
-                        text_query,
-                        vector_query_description,
-                        vector_query_title,
-                    ],
+                    "pagination_depth": HYBRID_SEARCH_PAGINATION_DEPTH,
+                    "queries": [text_query, vector_query],
                 }
             }
         )
@@ -803,22 +800,7 @@ def execute_learn_search(search_params):
     search = construct_search(search_params)
 
     if search_params.get("search_mode") == HYBRID_SEARCH_MODE:
-        search = search.extra(
-            search_pipeline={
-                "description": "Post processor for hybrid search",
-                "phase_results_processors": [
-                    {
-                        "normalization-processor": {
-                            "normalization": {"technique": "min_max"},
-                            "combination": {
-                                "technique": "arithmetic_mean",
-                                "parameters": {"weights": [0.6, 0.2, 0.2]},
-                            },
-                        }
-                    }
-                ],
-            }
-        )
+        search = search.extra(search_pipeline=HYBRID_SEARCH_POST_PROCESSOR)
 
     results = search.execute().to_dict()
     if results.get("_shards", {}).get("failures"):
 
@@ -1946,8 +1946,7 @@ def test_execute_learn_search_for_learning_resource_query(opensearch):
                 "content",
                 "summary",
                 "flashcards",
-                "description_embedding",
-                "title_embedding",
+                "vector_embedding",
             ]
         },
     }
@@ -2395,8 +2394,7 @@ def test_execute_learn_search_with_script_score(
                 "content",
                 "summary",
                 "flashcards",
-                "description_embedding",
-                "title_embedding",
+                "vector_embedding",
             ]
         },
     }
@@ -2417,10 +2415,8 @@ def test_execute_learn_search_with_hybrid_search(mocker, settings, opensearch):
 
     settings.DEFAULT_SEARCH_MODE = "best_fields"
 
-    mocker.patch(
-        "learning_resources_search.api.get_vector_model_id",
-        return_value="vector_model_id",
-    )
+    mock_encoder = mocker.patch("learning_resources_search.api.dense_encoder")()
+    mock_encoder.embed_query.return_value = [0.1, 0.2, 0.3]
 
     search_params = {
         "aggregations": ["offered_by"],
@@ -2727,24 +2723,7 @@ def test_execute_learn_search_with_hybrid_search(mocker, settings, opensearch):
                             "filter": {"exists": {"field": "resource_type"}},
                         }
                     },
-                    {
-                        "neural": {
-                            "description_embedding": {
-                                "query_text": "math",
-                                "model_id": "vector_model_id",
-                                "min_score": 0.015,
-                            }
-                        }
-                    },
-                    {
-                        "neural": {
-                            "title_embedding": {
-                                "query_text": "math",
-                                "model_id": "vector_model_id",
-                                "min_score": 0.015,
-                            }
-                        }
-                    },
+                    {"knn": {"vector_embedding": {"vector": [0.1, 0.2, 0.3], "k": 5}}},
                 ],
             }
         },
@@ -2805,7 +2784,7 @@ def test_execute_learn_search_with_hybrid_search(mocker, settings, opensearch):
                         "normalization": {"technique": "min_max"},
                         "combination": {
                             "technique": "arithmetic_mean",
-                            "parameters": {"weights": [0.6, 0.2, 0.2]},
+                            "parameters": {"weights": [0.8, 0.2]},
                         },
                     }
                 }
@@ -2824,8 +2803,7 @@ def test_execute_learn_search_with_hybrid_search(mocker, settings, opensearch):
                 "content",
                 "summary",
                 "flashcards",
-                "description_embedding",
-                "title_embedding",
+                "vector_embedding",
             ]
         },
     }
@@ -3217,8 +3195,7 @@ def test_execute_learn_search_with_min_score(mocker, settings, opensearch):
                 "content",
                 "summary",
                 "flashcards",
-                "description_embedding",
-                "title_embedding",
+                "vector_embedding",
             ]
         },
     }
@@ -3396,8 +3373,7 @@ def test_execute_learn_search_for_content_file_query(opensearch):
                 "content",
                 "summary",
                 "flashcards",
-                "description_embedding",
-                "title_embedding",
+                "vector_embedding",
             ]
         },
     }
 
@@ -135,33 +135,3 @@ def refresh_index(index):
     """
     conn = get_conn()
     conn.indices.refresh(index)
-
-
-def get_vector_model_id():
-    """
-    Get the model ID for the currently loaded vector model
-    """
-    conn = get_conn()
-    model_name = settings.OPENSEARCH_VECTOR_MODEL_NAME
-    body = {"query": {"term": {"name.keyword": model_name}}}
-    models = conn.transport.perform_request(
-        "GET", "/_plugins/_ml/models/_search", body=body
-    )
-
-    if len(models.get("hits", {}).get("hits", [])) > 0:
-        return models["hits"]["hits"][0]["_source"]["model_id"]
-
-    return None
-
-
-def get_vector_model_info():
-    """
-    Get information about the currently loaded vector model
-    """
-
-    conn = get_conn()
-    model_id = get_vector_model_id()
-    if not model_id:
-        return None
-
-    return conn.transport.perform_request("GET", f"/_plugins/_ml/models/{model_id}")
@@ -22,8 +22,7 @@
 CURRENT_INDEX = "current_index"
 REINDEXING_INDEX = "reindexing_index"
 BOTH_INDEXES = "all_indexes"
-COMBINED_INDEX = "combined_hybrid"
-
+HYBRID_COMBINED_INDEX = "combined_hybrid"
 LEARNING_RESOURCE = "learning_resource"
 HYBRID_SEARCH_MODE = "hybrid"
 
@@ -49,7 +48,7 @@ class IndexestoUpdate(Enum):
 )
 
 
-BASE_INDEXES = (PERCOLATE_INDEX_TYPE, COMBINED_INDEX)
+BASE_INDEXES = (PERCOLATE_INDEX_TYPE, HYBRID_COMBINED_INDEX)
 
 ALL_INDEX_TYPES = BASE_INDEXES + LEARNING_RESOURCE_TYPES
 
@@ -323,26 +322,7 @@ class FilterConfig:
     "max_weekly_hours": {"type": "integer"},
 }
 
-EMBEDDING_FIELDS = {
-    "title_embedding": {
-        "type": "knn_vector",
-        "method": {
-            "engine": "lucene",
-            "space_type": "l2",
-            "name": "hnsw",
-            "parameters": {},
-        },
-    },
-    "description_embedding": {
-        "type": "knn_vector",
-        "method": {
-            "engine": "lucene",
-            "space_type": "l2",
-            "name": "hnsw",
-            "parameters": {},
-        },
-    },
-}
+EMBEDDING_FIELDS = {"vector_embedding": {"type": "knn_vector"}}
 
 
 CONTENT_FILE_MAP = {
@@ -471,8 +451,7 @@ class FilterConfig:
     "content",
     "summary",
     "flashcards",
-    "description_embedding",
-    "title_embedding",
+    "vector_embedding",
 ]
 
 LEARNING_RESOURCE_SEARCH_SORTBY_OPTIONS = {