Skip to content

Commit

Permalink
Merge pull request #50 from FREVA-CLINT/contains-not
Browse files Browse the repository at this point in the history
Introduce exclusive search
  • Loading branch information
antarcticrainforest authored Jul 29, 2024
2 parents 802acd6 + 284f9c1 commit 785a2b2
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 24 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
# Changelog

All notable changes to this project will be documented in this file.
## [v2407.0.0]

### Changed
- Add ability to perform search that should *not* contain certain values

## [v2403.0.3]

### Changed
Expand Down
22 changes: 12 additions & 10 deletions docs/source/databrowser/cli.rst
Original file line number Diff line number Diff line change
Expand Up @@ -96,24 +96,26 @@ There are many more options for defining a value for a given key:
| | expression to find |
| | what you want.) |
+-------------------------------------------------+------------------------+
| ``attribute=value1 attribute=value2`` | Search for files |
| | containing either |
| OR: | value1 OR value2 for |
| | the given attribute |
| ``attribute={value1,value2}`` | (note that's the same |
| | attribute twice!) |
| ``attribute=value1 [...] attribute=valueN`` | Search for files |
| | containing *any* of N |
| OR: | given values of same |
| | attribute. |
| ``attribute={value1,..,valueN}`` | |
| | |
+-------------------------------------------------+------------------------+
| ``attribute1=value1 attribute2=value2`` | Search for files |
| | containing value1 for |
| | attribute1 AND value2 |
| | for attribute2 |
+-------------------------------------------------+------------------------+
| ``attribute_not_=value`` | Search for files NOT |
| ``attribute=-value`` ``attribute=not value`` | Search for files NOT |
| | containing value |
+-------------------------------------------------+------------------------+
| ``attribute_not_=value1 attribute_not_=value2`` | Search for files |
| | containing neither |
| | value1 nor value2 |
| ``attribute=-value1 attribute=not value2`` | Search for files |
| | *not* caintaning given |
| OR | values. You can also |
| | combine search |
| ``attribute1_not_=value _not_attribute2=value`` | attributes |
+-------------------------------------------------+------------------------+

.. note::
Expand Down
62 changes: 49 additions & 13 deletions freva-rest/src/databrowser_api/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,9 @@ def primary_keys(self) -> list[str]:
if v == "primary"
]
else:
_keys = [k for (k, v) in self._freva_facets.items() if v == "primary"]
_keys = [
k for (k, v) in self._freva_facets.items() if v == "primary"
]
if self.flavour in ("cordex",):
for key in self.cordex_keys:
_keys.append(key)
Expand Down Expand Up @@ -473,11 +475,14 @@ async def validate_parameters(
"""
translator = Translator(flavour, translate)
for key in query:
key = key.lower().replace("_not_", "")
if (
key not in translator.valid_facets
and key not in ("time_select",) + cls.uniq_keys
):
raise HTTPException(status_code=422, detail="Could not validate input.")
raise HTTPException(
status_code=422, detail="Could not validate input."
)
return SolrSearch(
config,
flavour=flavour,
Expand Down Expand Up @@ -535,7 +540,9 @@ def adjust_time_string(
raise ValueError(f"Choose `time_select` from {methods}") from exc
start, _, end = time.lower().partition("to")
try:
start = parse(start or "1", default=datetime(1, 1, 1, 0, 0, 0)).isoformat()
start = parse(
start or "1", default=datetime(1, 1, 1, 0, 0, 0)
).isoformat()
end = parse(
end or "9999", default=datetime(9999, 12, 31, 23, 59, 59)
).isoformat()
Expand Down Expand Up @@ -598,7 +605,9 @@ async def init_intake_catalogue(self) -> Tuple[int, IntakeCatalogue]:
source[k] = result[k][0]
elif result.get(k):
source[k] = result[k]
catalogue["catalog_dict"].append(self.translator.translate_query(source))
catalogue["catalog_dict"].append(
self.translator.translate_query(source)
)

return search_status, IntakeCatalogue(
catalogue=catalogue, total_count=total_count
Expand Down Expand Up @@ -641,7 +650,8 @@ async def _iterintake(self) -> AsyncIterator[str]:
source = {
k: (
out[k][0]
if isinstance(out.get(k), list) and len(out.get(k)) == 1
if isinstance(out.get(k), list)
and len(out.get(k)) == 1
else out.get(k)
)
for k in [self.uniq_key] + self.translator.facet_hierachy
Expand All @@ -652,7 +662,9 @@ async def _iterintake(self) -> AsyncIterator[str]:
for line in list(encoder.iterencode(entry)):
yield line

async def intake_catalogue(self, search: IntakeCatalogue) -> AsyncIterator[str]:
async def intake_catalogue(
self, search: IntakeCatalogue
) -> AsyncIterator[str]:
"""Create an intake catalogue from the solr search."""
iteritems = tuple(
range(self.batch_size + 1, search.total_count, self.batch_size)
Expand Down Expand Up @@ -753,6 +765,31 @@ async def init_stream(self) -> Tuple[int, SearchResult]:
primary_facets=[],
)

def _join_facet_queries(
self, key: str, facets: List[str]
) -> Tuple[str, str]:
"""Create lucene search contain and NOT contain search queries"""

negative, positive = [], []
for search_value in facets:
if key not in self.uniq_keys:
search_value = search_value.lower()
if search_value.lower().startswith("not "):
"len('not ') = 4"
negative.append(search_value[4:])
elif search_value[0] in ("!", "-"):
negative.append(search_value[1:])
elif "_not_" in key:
negative.append(search_value)
else:
positive.append(search_value)
search_value_pos = " OR ".join(positive)
search_value_neg = " OR ".join(negative)
for char in self.escape_chars:
search_value_pos = search_value_pos.replace(char, "\\" + char)
search_value_neg = search_value_neg.replace(char, "\\" + char)
return search_value_pos, search_value_neg

def _get_url(self) -> tuple[str, Dict[str, Any]]:
"""Get the url for the solr query."""
core = {
Expand All @@ -762,13 +799,12 @@ def _get_url(self) -> tuple[str, Dict[str, Any]]:
url = f"{self._config.get_core_url(core)}/select/"
query = []
for key, value in self.facets.items():
if key in self.uniq_keys:
search_value = " OR ".join(map(str, value))
else:
search_value = " OR ".join(map(str.lower, value))
for char in self.escape_chars:
search_value = search_value.replace(char, "\\" + char)
query.append(f"{key.lower()}:({search_value})")
query_pos, query_neg = self._join_facet_queries(key, value)
key = key.lower().replace("_not_", "")
if query_pos:
query.append(f"{key}:({query_pos})")
if query_neg:
query.append(f"-{key}:({query_neg})")
return url, {
"fq": self.time + ["", " AND ".join(query) or "*:*"],
"q": "*:*",
Expand Down
18 changes: 18 additions & 0 deletions freva-rest/src/databrowser_api/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,24 @@ def test_metadata_search(client: TestClient) -> None:
).json()
assert len(res6["facets"].keys()) == 1

res7 = client.get(
"api/databrowser/metadata_search/freva/file",
params={
"dataset": ["-cmip6-swift", "not cmip6-fs"],
"project": "cmip6",
},
).json()
assert "cmip6-swift" not in res7["facets"]["dataset"]
assert "cmip6-fs" not in res7["facets"]["dataset"]
assert "cmip6-hsm" in res7["facets"]["dataset"]
res8 = client.get(
"api/databrowser/metadata_search/freva/file",
params={
"project_not_": "cmip6",
},
).json()
assert "cmip6" not in res8["facets"]["project"]


def test_intake_search(client: TestClient) -> None:
"""Test the creation of intake catalogues."""
Expand Down
2 changes: 1 addition & 1 deletion freva-rest/src/freva_rest/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "2403.0.3"
__version__ = "2407.0.0"
__all__ = ["__version__"]


Expand Down

0 comments on commit 785a2b2

Please sign in to comment.