Skip to content

Commit 3461ed6

Browse files
Python: Fixes to Cosmos DB NoSQL query syntax generation. (#10373)
### Motivation and Context Review information: 1. Why is this change required? There were several SQL query syntax generation issue for both text_search() and vectorized_search() methods 2. What problem does it solve? a) In `_build_where_clauses_from_filter()` the WHERE clause creation did not quote string values and did not properly handle list[] data model attribures, b) for `_build_vector_query()` method the WHERE clause was placed after the ORDER BY clause causing query syntax errors, c) for `_build_search_text_query()` method the data_model_definition items were not being interrogated, thus added `CONTAINS()` in the WHERE clause. 3. What scenario does it contribute to? The ability to use Azure Cosmos DB NoSQL for `text_search()` and `vectorized_search()`. 4. Issue resolution: #10368 ### Description The errors noted in the Issue and the bug fixes noted above correct the SQL query syntax generation for the `text_search()` and `vectorized_search()` methods and now produce accurate results for performing both types of queries with and without filters (aka WHERE clause). ### Contribution Checklist - ✅ The code builds clean without any errors or warnings - ✅ The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - ✅ All unit tests pass, and I have added new tests where possible - 🙏 I didn't break anyone 😄 --------- Co-authored-by: Evan Mattson <[email protected]>
1 parent c445f5c commit 3461ed6

File tree

1 file changed

+24
-9
lines changed

1 file changed

+24
-9
lines changed

python/semantic_kernel/connectors/memory/azure_cosmos_db/azure_cosmos_db_no_sql_collection.py

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -178,23 +178,25 @@ def _build_search_text_query(self, options: VectorSearchOptions) -> str:
178178
where_clauses = self._build_where_clauses_from_filter(options.filter)
179179
contains_clauses = " OR ".join(
180180
f"CONTAINS(c.{field}, @search_text)"
181-
for field in self.data_model_definition.fields
182-
if isinstance(field, VectorStoreRecordDataField) and field.is_full_text_searchable
181+
for field, field_def in self.data_model_definition.fields.items()
182+
if isinstance(field_def, VectorStoreRecordDataField) and field_def.is_full_text_searchable
183183
)
184+
if where_clauses:
185+
where_clauses = f" {where_clauses} AND"
184186
return (
185187
f"SELECT TOP @top {self._build_select_clause(options.include_vectors)} " # nosec: B608
186-
f"FROM c WHERE ({contains_clauses}) AND {where_clauses}" # nosec: B608
188+
f"FROM c WHERE{where_clauses} ({contains_clauses})" # nosec: B608
187189
)
188190

189191
def _build_vector_query(self, options: VectorSearchOptions) -> str:
190192
where_clauses = self._build_where_clauses_from_filter(options.filter)
191193
if where_clauses:
192-
where_clauses = f"WHERE {where_clauses}"
194+
where_clauses = f"WHERE {where_clauses} "
193195
vector_field_name: str = self.data_model_definition.try_get_vector_field(options.vector_field_name).name # type: ignore
194196
return (
195-
f"SELECT TOP @top {self._build_select_clause(options.include_vectors)}," # nosec: B608
196-
f" VectorDistance(c.{vector_field_name}, @vector) AS distance FROM c ORDER " # nosec: B608
197-
f"BY VectorDistance(c.{vector_field_name}, @vector) {where_clauses}" # nosec: B608
197+
f"SELECT TOP @top {self._build_select_clause(options.include_vectors)}, " # nosec: B608
198+
f"VectorDistance(c.{vector_field_name}, @vector) AS distance FROM c " # nosec: B608
199+
f"{where_clauses}ORDER BY VectorDistance(c.{vector_field_name}, @vector)" # nosec: B608
198200
)
199201

200202
def _build_select_clause(self, include_vectors: bool) -> str:
@@ -218,11 +220,24 @@ def _build_where_clauses_from_filter(self, filters: VectorSearchFilter | None) -
218220
return ""
219221
clauses = []
220222
for filter in filters.filters:
223+
field_def = self.data_model_definition.fields[filter.field_name]
221224
match filter:
222225
case EqualTo():
223-
clauses.append(f"c.{filter.field_name} = {filter.value}")
226+
clause = ""
227+
if field_def.property_type in ["int", "float"]:
228+
clause = f"c.{filter.field_name} = {filter.value}"
229+
if field_def.property_type == "str":
230+
clause = f"c.{filter.field_name} = '{filter.value}'"
231+
if field_def.property_type == "list[str]":
232+
filter_value = f"ARRAY_CONTAINS(c.{filter.field_name}, '{filter.value}')"
233+
if field_def.property_type in ["list[int]", "list[float]"]:
234+
filter_value = f"ARRAY_CONTAINS(c.{filter.field_name}, {filter.value})"
235+
clauses.append(clause)
224236
case AnyTagsEqualTo():
225-
clauses.append(f"{filter.value} IN c.{filter.field_name}")
237+
filter_value = filter.value
238+
if field_def.property_type == "list[str]":
239+
filter_value = f"'{filter.value}'"
240+
clauses.append(f"{filter_value} IN c.{filter.field_name}")
226241
case _:
227242
raise ValueError(f"Unsupported filter: {filter}")
228243
return " AND ".join(clauses)

0 commit comments

Comments
 (0)