diff --git a/tests/share/search/__init__.py b/tests/share/search/__init__.py index 871256d44..76b608261 100644 --- a/tests/share/search/__init__.py +++ b/tests/share/search/__init__.py @@ -3,11 +3,10 @@ from typing import Iterable from unittest import mock -from share.search import index_strategy - @contextlib.contextmanager -def patch_index_strategies(strategies: Iterable[index_strategy.IndexStrategy]): +def patch_index_strategies(strategies: Iterable): + from share.search import index_strategy with mock.patch.object(index_strategy, '_AvailableStrategies', new=enum.Enum( '_AvailableStrategies', [ (_strategy.strategy_name, _strategy) @@ -15,3 +14,14 @@ def patch_index_strategies(strategies: Iterable[index_strategy.IndexStrategy]): ], )): yield + + +@contextlib.contextmanager +def patch_index_strategy(strategy): + from share.search import index_strategy as _module_to_patch + with ( + mock.patch.object(_module_to_patch, 'all_strategy_names', return_value=frozenset([strategy.strategy_name])), + mock.patch.object(_module_to_patch, 'each_strategy', return_value=[strategy]), + mock.patch.object(_module_to_patch, 'get_strategy', return_value=strategy), + ): + yield diff --git a/tests/share/search/end_to_end/__init__.py b/tests/share/search/end_to_end/__init__.py new file mode 100644 index 000000000..ea9b78354 --- /dev/null +++ b/tests/share/search/end_to_end/__init__.py @@ -0,0 +1 @@ +__all__ = () diff --git a/tests/share/search/end_to_end/_common.py b/tests/share/search/end_to_end/_common.py new file mode 100644 index 000000000..5501a07ab --- /dev/null +++ b/tests/share/search/end_to_end/_common.py @@ -0,0 +1,254 @@ +import datetime +import itertools +from urllib.parse import urlencode +from typing import Iterator + +from primitive_metadata import primitive_rdf as rdf + +from trove.vocab import mediatypes +from trove.vocab.namespaces import RDF, DCTERMS, OWL, FOAF, DCAT, BLARG, OSFMAP, TROVE +from tests.share.search.index_strategy._with_real_services import RealElasticTestCase +from tests.trove.factories import ( + create_indexcard, + index_indexcards, +) + + +# abstract base class -- subclasses need to implement RealElasticTestCase.get_index_strategy +class End2EndSearchTestCase(RealElasticTestCase): + MEDIATYPES = (mediatypes.JSONAPI,) # TODO: more + + def setUp(self): + super().setUp() + _indexcards = self._create_test_cards() + index_indexcards(self.index_strategy, _indexcards) + + ### + # test methods + + def test_like_osfsearch(self): + # cardsearch + for _queryparams, _expected_focus_iris in self._cardsearch_cases(): + self._test_get_for_each_mediatype( + url_path='/trove/index-card-search', + queryparams=_queryparams, + actual_getter=self._get_cardsearch_focus_iris, + expected=_expected_focus_iris, + ) + # valuesearch + for _queryparams, _expected_values in self._valuesearch_cases(): + self._test_get_for_each_mediatype( + url_path='/trove/index-value-search', + queryparams=_queryparams, + actual_getter=self._get_valuesearch_values, + expected=_expected_values, + ) + + ### + # internals + + def _test_get_for_each_mediatype( + self, + url_path, + queryparams, + actual_getter, + expected, + ): + for _mediatype in self.MEDIATYPES: + _response = self._send_get(url_path, queryparams, _mediatype) + _actual = actual_getter(_response) + self.assertEqual(_actual, expected) + + def _create_test_cards(self): + self.all_card_focus_iris = { + BLARG.myproj, + BLARG.mypreprint, + } + self.card__myproj = create_indexcard(BLARG.myproj, { + RDF.type: {OSFMAP.Project}, + DCTERMS.title: {rdf.literal('my project', language='en')}, + DCTERMS.description: {rdf.literal('this project sure is.', language='en')}, + OWL.sameAs: {'https://doi.example/13.618/7', 'http://raid.example/whatever'}, + DCTERMS.creator: {BLARG.a_person, BLARG.nother_person}, + OSFMAP.keyword: {rdf.literal('keyword', language='en')}, + DCAT.accessService: {BLARG.anOsfOrSomething}, + DCTERMS.created: {rdf.literal(datetime.date(2020, 2, 2))}, + }, rdf_tripledict={ + BLARG.a_person: { + RDF.type: {DCTERMS.Agent, FOAF.Person}, + FOAF.name: {rdf.literal('peerrr sssssooo oooonnn nnnnnnnn')}, + }, + BLARG.nother_person: { + RDF.type: {DCTERMS.Agent, FOAF.Person}, + FOAF.name: {rdf.literal('nootthhh eeerrrppp peeeerrrrssssooooonnnnn')}, + OSFMAP.affiliation: {BLARG.an_institution}, + }, + BLARG.an_institution: { + RDF.type: {DCTERMS.Agent, FOAF.Organization}, + FOAF.name: {rdf.literal('innssttt iiitttuuuu ttttiiiioooonnnnn')}, + OSFMAP.affiliation: {BLARG.an_institution}, + }, + }, deriver_iris=(TROVE['derive/osfmap_json'],)) + self.card__mypreprint = create_indexcard(BLARG.mypreprint, { + RDF.type: {OSFMAP.Preprint}, + DCTERMS.title: {rdf.literal('my preprint', language='en')}, + DCTERMS.description: {rdf.literal('this preprint sure is that.', language='en')}, + OWL.sameAs: {'https://doi.example/13.618/11', 'http://raid.example/whateverz'}, + DCTERMS.creator: {BLARG.nother_person, BLARG.third_person}, + OSFMAP.keyword: { + rdf.literal('keyword', language='en'), + rdf.literal('lockword', language='en'), + }, + DCAT.accessService: {BLARG.anOsfOrSomething}, + DCTERMS.created: {rdf.literal(datetime.date(2022, 2, 2))}, + }, rdf_tripledict={ + BLARG.nother_person: { + RDF.type: {DCTERMS.Agent, FOAF.Person}, + FOAF.name: {rdf.literal('nootthhh eeerrrppp peeeerrrrssssooooonnnnn')}, + }, + BLARG.third_person: { + RDF.type: {DCTERMS.Agent, FOAF.Person}, + FOAF.name: {rdf.literal('⚞33οΈβƒ£πŸ•’πŸ₯‰ ☘️🎢 Β³β‘Άβž‚ βšžπŸ‘©β€πŸ‘©β€πŸ‘§βšŸ γ›β¬±βšŸ')}, + }, + BLARG.an_institution: { + RDF.type: {DCTERMS.Agent, FOAF.Organization}, + FOAF.name: {rdf.literal('innssttt iiitttuuuu ttttiiiioooonnnnn')}, + }, + }, deriver_iris=(TROVE['derive/osfmap_json'],)) + return [ + self.card__myproj, + self.card__mypreprint, + ] + + def _send_get(self, base_url, queryparams, mediatype): + assert '?' not in base_url + queryparams['acceptMediatype'] = mediatype + _url = f'{base_url}?{urlencode(queryparams)}' + return self.client.get(_url) + + def _get_cardsearch_focus_iris(self, response): + if response.headers['Content-Type'] != mediatypes.JSONAPI: + raise NotImplementedError('TODO: more mediatypes') + _response_json = response.json() + return set(itertools.chain.from_iterable( + _json_resource['attributes']['resourceIdentifier'] + for _json_resource in _response_json['included'] + if _json_resource['type'] == 'index-card' + )) + + def _get_valuesearch_values(self, response): + if response.headers['Content-Type'] != mediatypes.JSONAPI: + raise NotImplementedError('TODO: more mediatypes') + _response_json = response.json() + return set(itertools.chain.from_iterable( + _json_resource['attributes']['resourceIdentifier'] + for _json_resource in _response_json['included'] + if _json_resource['type'] == 'index-card' + )) + + def _cardsearch_cases(self) -> Iterator[tuple[dict[str, str], set[str] | list[str]]]: + yield ( # empty baseline + {}, # no query params + self.all_card_focus_iris, + ) + yield ( # osf-search "all types" tab + { + 'cardSearchFilter[resourceType]': 'Registration,RegistrationComponent,Project,ProjectComponent,Preprint,Agent,File', + 'cardSearchFilter[accessService]': BLARG.anOsfOrSomething, + 'cardSearchText[*,creator.name,isContainedBy.creator.name]': '', + 'sort': '-relevance', + }, + self.all_card_focus_iris, + ) + yield ( # osf-search "all types" tab (with cardSearchText) + { + 'cardSearchFilter[resourceType]': 'Registration,RegistrationComponent,Project,ProjectComponent,Preprint,Agent,File', + 'cardSearchFilter[accessService]': BLARG.anOsfOrSomething, + 'cardSearchText[*,creator.name,isContainedBy.creator.name]': 'βšžπŸ‘©β€πŸ‘©β€πŸ‘§βšŸ', + 'sort': '-relevance', + }, + {BLARG.mypreprint}, + ) + yield ( # osf-search "projects" tab + { + 'cardSearchFilter[resourceType]': 'Project,ProjectComponent', + 'cardSearchFilter[accessService]': BLARG.anOsfOrSomething, + 'cardSearchText[*,creator.name,isContainedBy.creator.name]': '', + 'sort': '-relevance', + }, + {BLARG.myproj}, + ) + yield ( # osf-search "preprints" tab + { + 'cardSearchFilter[resourceType]': 'Preprint', + 'cardSearchFilter[accessService]': BLARG.anOsfOrSomething, + 'cardSearchText[*,creator.name,isContainedBy.creator.name]': '', + 'sort': '-relevance', + }, + {BLARG.mypreprint}, + ) + yield ( # osf-search "registrations" tab + { + 'cardSearchFilter[resourceType]': 'Registration,RegistrationComponent', + 'cardSearchFilter[accessService]': BLARG.anOsfOrSomething, + 'cardSearchText[*,creator.name,isContainedBy.creator.name]': '', + 'sort': '-relevance', + }, + set(), # TODO + ) + yield ( # osf-search "files" tab + { + 'cardSearchFilter[resourceType]': 'File', + 'cardSearchFilter[accessService]': BLARG.anOsfOrSomething, + 'cardSearchText[*,creator.name,isContainedBy.creator.name]': '', + 'sort': '-relevance', + }, + set(), # TODO + ) + + def _valuesearch_cases(self) -> Iterator[tuple[dict[str, str], set[str] | list[str]]]: + yield ( # simple baseline + {'valueSearchPropertyPath': 'resourceType'}, + {OSFMAP.Project, OSFMAP.Preprint}, + ) + yield ( # osf-search "all types" tab; "creator" facet + { + 'valueSearchPropertyPath': 'creator', + 'cardSearchFilter[resourceType]': 'Registration,RegistrationComponent,Project,ProjectComponent,Preprint,Agent,File', + 'cardSearchFilter[accessService]': BLARG.anOsfOrSomething, + 'cardSearchText[*,creator.name,isContainedBy.creator.name]': '', + 'sort': '-relevance', + }, + {BLARG.a_person, BLARG.nother_person, BLARG.third_person}, + ) + yield ( # osf-search "all types" tab; "creator" facet with valueSearchText + { + 'valueSearchPropertyPath': 'creator', + 'valueSearchText': 'βšžπŸ‘©β€πŸ‘©β€πŸ‘§βšŸ', + 'cardSearchFilter[resourceType]': 'Registration,RegistrationComponent,Project,ProjectComponent,Preprint,Agent,File', + 'cardSearchFilter[accessService]': BLARG.anOsfOrSomething, + 'cardSearchText[*,creator.name,isContainedBy.creator.name]': '', + 'sort': '-relevance', + }, + {BLARG.third_person}, + ) + yield ( # osf-search "preprints" tab; "creator" facet + { + 'valueSearchPropertyPath': 'creator', + 'cardSearchFilter[resourceType]': 'Preprint', + 'cardSearchFilter[accessService]': BLARG.anOsfOrSomething, + 'cardSearchText[*,creator.name,isContainedBy.creator.name]': '', + 'sort': '-relevance', + }, + {BLARG.nother_person, BLARG.third_person}, + ) + yield ( # osf-search "all types" tab; "dateCreated" facet + { + 'valueSearchPropertyPath': 'dateCreated', + 'cardSearchFilter[resourceType]': 'Registration,RegistrationComponent,Project,ProjectComponent,Preprint,Agent,File', + 'cardSearchFilter[accessService]': BLARG.anOsfOrSomething, + 'cardSearchText[*,creator.name,isContainedBy.creator.name]': '', + 'sort': '-relevance', + }, + {'2020', '2022'}, # year histogram + ) diff --git a/tests/share/search/end_to_end/test_osfsearch_on_trovesearch_denorm.py b/tests/share/search/end_to_end/test_osfsearch_on_trovesearch_denorm.py new file mode 100644 index 000000000..a29023158 --- /dev/null +++ b/tests/share/search/end_to_end/test_osfsearch_on_trovesearch_denorm.py @@ -0,0 +1,7 @@ +from share.search.index_strategy.trovesearch_denorm import TrovesearchDenormIndexStrategy +from . import _common + + +class TestOsfsearchOnTrovesearchDenorm(_common.End2EndSearchTestCase): + def get_index_strategy(self): # for RealElasticTestCase + return TrovesearchDenormIndexStrategy('test_osfsearch_on_trovesearch_denorm') diff --git a/tests/share/search/index_strategy/_common_trovesearch_tests.py b/tests/share/search/index_strategy/_common_trovesearch_tests.py index 6d6eab52b..e5d6c1fe0 100644 --- a/tests/share/search/index_strategy/_common_trovesearch_tests.py +++ b/tests/share/search/index_strategy/_common_trovesearch_tests.py @@ -15,6 +15,7 @@ create_indexcard, update_indexcard_content, create_supplement, + index_indexcards, ) from ._with_real_services import RealElasticTestCase @@ -30,7 +31,7 @@ def setUp(self): def test_for_smoke_without_daemon(self): _indexcard = self._create_indexcard( focus_iri=BLARG.hello, - rdf_tripledict={BLARG.hello: {RDFS.label: {rdf.literal('hello')}}}, + rdf_twopledict={RDFS.label: {rdf.literal('hello')}}, ) _messages_chunk = messages.MessagesChunk( messages.MessageType.UPDATE_INDEXCARD, @@ -44,7 +45,7 @@ def test_for_smoke_without_daemon(self): def test_for_smoke_with_daemon(self): _indexcard = self._create_indexcard( focus_iri=BLARG.hello, - rdf_tripledict={BLARG.hello: {RDFS.label: {rdf.literal('hello')}}}, + rdf_twopledict={RDFS.label: {rdf.literal('hello')}}, ) _messages_chunk = messages.MessagesChunk( messages.MessageType.UPDATE_INDEXCARD, @@ -78,11 +79,9 @@ def test_cardsearch_after_deletion(self): def test_cardsearch_after_updates(self): _cards = self._fill_test_data_for_querying() self._update_indexcard_content(_cards[BLARG.c], BLARG.c, { - BLARG.c: { - RDF.type: {BLARG.Thing}, - DCTERMS.subject: {BLARG.subj_ac, BLARG.subj_c}, # subj_bc removed; subj_c added - DCTERMS.title: {rdf.literal('cccc')}, - }, + RDF.type: {BLARG.Thing}, + DCTERMS.subject: {BLARG.subj_ac, BLARG.subj_c}, # subj_bc removed; subj_c added + DCTERMS.title: {rdf.literal('cccc')}, }) self._index_indexcards([_cards[BLARG.c]]) _cases = [ @@ -112,11 +111,9 @@ def test_cardsearch_pagination(self): _focus_iri = BLARG[f'i{_i}'] _expected_iris.add(_focus_iri) _cards.append(self._create_indexcard(_focus_iri, { - _focus_iri: { - RDF.type: {BLARG.Thing}, - DCTERMS.title: {rdf.literal(f'card #{_i}')}, - DCTERMS.created: {rdf.literal(_start_date + timedelta(weeks=_i, days=_i))}, - }, + RDF.type: {BLARG.Thing}, + DCTERMS.title: {rdf.literal(f'card #{_i}')}, + DCTERMS.created: {rdf.literal(_start_date + timedelta(weeks=_i, days=_i))}, })) self._index_indexcards(_cards) # gather all pages results: @@ -187,12 +184,10 @@ def test_valuesearch_after_deletion(self): def test_valuesearch_after_updates(self): _cards = self._fill_test_data_for_querying() self._update_indexcard_content(_cards[BLARG.c], BLARG.c, { - BLARG.c: { - RDF.type: {BLARG.Thing}, - DCTERMS.creator: {BLARG.someone_new}, # someone_else removed; someone_new added - DCTERMS.subject: {BLARG.subj_ac, BLARG.subj_c, BLARG.subj_new}, # subj_bc removed; subj_new added - DCTERMS.title: {rdf.literal('cccc')}, - }, + RDF.type: {BLARG.Thing}, + DCTERMS.creator: {BLARG.someone_new}, # someone_else removed; someone_new added + DCTERMS.subject: {BLARG.subj_ac, BLARG.subj_c, BLARG.subj_new}, # subj_bc removed; subj_new added + DCTERMS.title: {rdf.literal('cccc')}, }) self._index_indexcards([_cards[BLARG.c]]) _cases = [ @@ -239,16 +234,15 @@ def _assert_valuesearch_values(self, queryparams, expected_values): def _fill_test_data_for_querying(self): _card_a = self._create_indexcard(BLARG.a, { - BLARG.a: { - RDF.type: {BLARG.Thing}, - OWL.sameAs: {BLARG.a_same, BLARG.a_same2}, - DCTERMS.created: {rdf.literal(date(1999, 12, 31))}, - DCTERMS.creator: {BLARG.someone}, - DCTERMS.title: {rdf.literal('aaaa')}, - DCTERMS.subject: {BLARG.subj_ac, BLARG.subj_a}, - DCTERMS.references: {BLARG.b, BLARG.c}, - DCTERMS.description: {rdf.literal('This place is not a place of honor... no highly esteemed deed is commemorated here... nothing valued is here.', language='en')}, - }, + RDF.type: {BLARG.Thing}, + OWL.sameAs: {BLARG.a_same, BLARG.a_same2}, + DCTERMS.created: {rdf.literal(date(1999, 12, 31))}, + DCTERMS.creator: {BLARG.someone}, + DCTERMS.title: {rdf.literal('aaaa')}, + DCTERMS.subject: {BLARG.subj_ac, BLARG.subj_a}, + DCTERMS.references: {BLARG.b, BLARG.c}, + DCTERMS.description: {rdf.literal('This place is not a place of honor... no highly esteemed deed is commemorated here... nothing valued is here.', language='en')}, + }, rdf_tripledict={ BLARG.someone: { FOAF.name: {rdf.literal('some one')}, }, @@ -265,16 +259,15 @@ def _fill_test_data_for_querying(self): }, }) _card_b = self._create_indexcard(BLARG.b, { - BLARG.b: { - RDF.type: {BLARG.Thing}, - OWL.sameAs: {BLARG.b_same}, - DCTERMS.created: {rdf.literal(date(2012, 12, 31))}, - DCTERMS.creator: {BLARG.someone}, - DCTERMS.title: {rdf.literal('bbbb')}, - DCTERMS.subject: {BLARG.subj_b, BLARG.subj_bc}, - DCTERMS.references: {BLARG.c}, - DCTERMS.description: {rdf.literal('What is here was dangerous and repulsive to us. This message is a warning about danger. ', language='en')}, - }, + RDF.type: {BLARG.Thing}, + OWL.sameAs: {BLARG.b_same}, + DCTERMS.created: {rdf.literal(date(2012, 12, 31))}, + DCTERMS.creator: {BLARG.someone}, + DCTERMS.title: {rdf.literal('bbbb')}, + DCTERMS.subject: {BLARG.subj_b, BLARG.subj_bc}, + DCTERMS.references: {BLARG.c}, + DCTERMS.description: {rdf.literal('What is here was dangerous and repulsive to us. This message is a warning about danger. ', language='en')}, + }, rdf_tripledict={ BLARG.someone: { FOAF.name: {rdf.literal('some one')}, }, @@ -285,44 +278,37 @@ def _fill_test_data_for_querying(self): }, }) _card_c = self._create_indexcard(BLARG.c, { - BLARG.c: { - RDF.type: {BLARG.Thing}, - DCTERMS.created: {rdf.literal(date(2024, 12, 31))}, - DCTERMS.creator: {BLARG.someone_else}, - DCTERMS.title: {rdf.literal('cccc')}, - DCTERMS.subject: { - BLARG['subj_ac/'], # this one has an extra trailing slash - BLARG.subj_bc, - BLARG.subj_c, - }, - DCTERMS.description: {rdf.literal('The danger is unleashed only if you substantially disturb this place physically. This place is best shunned and left uninhabited.', language='en')}, + RDF.type: {BLARG.Thing}, + DCTERMS.created: {rdf.literal(date(2024, 12, 31))}, + DCTERMS.creator: {BLARG.someone_else}, + DCTERMS.title: {rdf.literal('cccc')}, + DCTERMS.subject: { + BLARG['subj_ac/'], # this one has an extra trailing slash + BLARG.subj_bc, + BLARG.subj_c, }, + DCTERMS.description: {rdf.literal('The danger is unleashed only if you substantially disturb this place physically. This place is best shunned and left uninhabited.', language='en')}, + }, rdf_tripledict={ BLARG.someone_else: { FOAF.name: {rdf.literal('some one else')}, }, }) create_supplement(_card_a, BLARG.a, { - BLARG.a: { - DCTERMS.replaces: {BLARG.a_past}, - DCAT.servesDataset: { - rdf.blanknode({DCAT.spatialResolutionInMeters: {rdf.literal(10)}}), - }, + DCTERMS.replaces: {BLARG.a_past}, + DCAT.servesDataset: { + rdf.blanknode({DCAT.spatialResolutionInMeters: {rdf.literal(10)}}), }, }) create_supplement(_card_b, BLARG.b, { - BLARG.b: { - DCTERMS.replaces: {BLARG.b_past}, - DCAT.servesDataset: { - rdf.blanknode({DCAT.spatialResolutionInMeters: {rdf.literal(7)}}), - }, + DCTERMS.replaces: {BLARG.b_past}, + DCAT.servesDataset: { + rdf.blanknode({DCAT.spatialResolutionInMeters: {rdf.literal(7)}}), }, }) create_supplement(_card_c, BLARG.c, { - BLARG.c: { - DCTERMS.replaces: {BLARG.c_past}, - DCAT.servesDataset: { - rdf.blanknode({DCAT.spatialResolutionInMeters: {rdf.literal(333)}}), - }, + DCTERMS.replaces: {BLARG.c_past}, + DCAT.servesDataset: { + rdf.blanknode({DCAT.spatialResolutionInMeters: {rdf.literal(333)}}), }, }) _cards = { @@ -608,8 +594,13 @@ def valuesearch_sameas_cases(self): {BLARG.subj_ac, BLARG.subj_a, BLARG.subj_c, BLARG.subj_bc}, ) - def _create_indexcard(self, focus_iri: str, rdf_tripledict: rdf.RdfTripleDictionary) -> trove_db.Indexcard: - _indexcard = create_indexcard(focus_iri, rdf_tripledict, (TROVE['derive/osfmap_json'],)) + def _create_indexcard( + self, + focus_iri: str, + rdf_twopledict: rdf.RdfTwopleDictionary | None = None, + rdf_tripledict: rdf.RdfTripleDictionary | None = None, + ) -> trove_db.Indexcard: + _indexcard = create_indexcard(focus_iri, rdf_twopledict, rdf_tripledict, (TROVE['derive/osfmap_json'],)) self._indexcard_focus_by_uuid[str(_indexcard.uuid)] = focus_iri return _indexcard @@ -617,21 +608,14 @@ def _update_indexcard_content( self, indexcard: trove_db.Indexcard, focus_iri: str, - rdf_tripledict: rdf.RdfTripleDictionary, + rdf_twopledict: rdf.RdfTwopleDictionary | None = None, + rdf_tripledict: rdf.RdfTripleDictionary | None = None, ) -> None: - update_indexcard_content(indexcard, focus_iri, rdf_tripledict) + update_indexcard_content(indexcard, focus_iri, rdf_twopledict, rdf_tripledict) self._indexcard_focus_by_uuid[str(indexcard.uuid)] = focus_iri def _index_indexcards(self, indexcards: Iterable[trove_db.Indexcard]): - _messages_chunk = messages.MessagesChunk( - messages.MessageType.UPDATE_INDEXCARD, - [_indexcard.id for _indexcard in indexcards], - ) - self.assertTrue(all( - _response.is_done - for _response in self.index_strategy.pls_handle_messages_chunk(_messages_chunk) - )) - self.index_strategy.pls_refresh() + index_indexcards(self.index_strategy, indexcards) def _delete_indexcards(self, indexcards: Iterable[trove_db.Indexcard]): for _indexcard in indexcards: diff --git a/tests/share/search/index_strategy/_with_real_services.py b/tests/share/search/index_strategy/_with_real_services.py index 8ad685026..a4219b312 100644 --- a/tests/share/search/index_strategy/_with_real_services.py +++ b/tests/share/search/index_strategy/_with_real_services.py @@ -1,3 +1,4 @@ +import abc import contextlib from unittest import mock @@ -8,17 +9,21 @@ from share.search.daemon import IndexerDaemonControl from share.search.index_messenger import IndexMessenger from share.search import index_strategy -from tests.share.search import patch_index_strategies +from tests.share.search import patch_index_strategy # base class for testing IndexStrategy subclasses with actual elasticsearch. # (using TransactionTestCase so there's NOT a transaction wrapping each test # and IndexerDaemon can use a separate db connection from a separate thread) -class RealElasticTestCase(TransactionTestCase): +class RealElasticTestCase(TransactionTestCase, abc.ABC): serialized_rollback = True # for TransactionTestCase; restore db after - # required for subclasses + @abc.abstractmethod def get_index_strategy(self) -> index_strategy.IndexStrategy: + '''return an IndexStrategy instance that will be tested + + override in subclasses to reuse these tests + ''' raise NotImplementedError(f'{self.__class__} must implement `get_index_strategy`') def setUp(self): @@ -26,7 +31,7 @@ def setUp(self): self.enterContext(mock.patch('share.models.core._setup_user_token_and_groups')) self.index_strategy = self.get_index_strategy() self.index_strategy.pls_teardown() # in case it already exists - self.enterContext(patch_index_strategies([self.index_strategy])) + self.enterContext(patch_index_strategy(self.index_strategy)) self.index_messenger = IndexMessenger( celery_app=celery_app, index_strategys=[self.index_strategy], diff --git a/tests/share/search/index_strategy/test_sharev2_elastic5.py b/tests/share/search/index_strategy/test_sharev2_elastic5.py index 016330c84..8d0d84e73 100644 --- a/tests/share/search/index_strategy/test_sharev2_elastic5.py +++ b/tests/share/search/index_strategy/test_sharev2_elastic5.py @@ -24,10 +24,8 @@ def setUp(self): self.__indexcard = create_indexcard( BLARG.hello, { - BLARG.hello: { - RDF.type: {SHAREv2.CreativeWork}, - DCTERMS.title: {rdf.literal('hello', language='en')}, - }, + RDF.type: {SHAREv2.CreativeWork}, + DCTERMS.title: {rdf.literal('hello', language='en')}, }, deriver_iris=[SHAREv2.sharev2_elastic], ) diff --git a/tests/share/search/index_strategy/test_sharev2_elastic8.py b/tests/share/search/index_strategy/test_sharev2_elastic8.py index fb3a1a5c9..c41667000 100644 --- a/tests/share/search/index_strategy/test_sharev2_elastic8.py +++ b/tests/share/search/index_strategy/test_sharev2_elastic8.py @@ -17,10 +17,8 @@ def setUp(self): self.__indexcard = create_indexcard( BLARG.hello, { - BLARG.hello: { - RDF.type: {SHAREv2.CreativeWork}, - DCTERMS.title: {rdf.literal('hello', language='en')}, - }, + RDF.type: {SHAREv2.CreativeWork}, + DCTERMS.title: {rdf.literal('hello', language='en')}, }, deriver_iris=[SHAREv2.sharev2_elastic], ) diff --git a/tests/trove/factories.py b/tests/trove/factories.py index 475cdc80f..1a7d4b31b 100644 --- a/tests/trove/factories.py +++ b/tests/trove/factories.py @@ -8,14 +8,26 @@ from trove import digestive_tract +__all__ = ( + 'create_indexcard', + 'create_supplement', + 'index_indexcards', + 'update_indexcard_content', +) + + def create_indexcard( focus_iri: str, - rdf_tripledict: rdf.RdfTripleDictionary, + rdf_twopledict: rdf.RdfTwopleDictionary | None = None, + rdf_tripledict: rdf.RdfTripleDictionary | None = None, deriver_iris: Collection[str] = (), ) -> trove_db.Indexcard: _suid = factories.SourceUniqueIdentifierFactory() _indexcard = trove_db.Indexcard.objects.create(source_record_suid=_suid) - update_indexcard_content(_indexcard, focus_iri, rdf_tripledict) + _indexcard.focus_identifier_set.add( + trove_db.ResourceIdentifier.objects.get_or_create_for_iri(focus_iri), + ) + update_indexcard_content(_indexcard, focus_iri, rdf_twopledict, rdf_tripledict) if deriver_iris: digestive_tract.derive(_indexcard, deriver_iris) return _indexcard @@ -24,15 +36,21 @@ def create_indexcard( def update_indexcard_content( indexcard: trove_db.Indexcard, focus_iri: str, - rdf_tripledict: rdf.RdfTripleDictionary, + rdf_twopledict: rdf.RdfTwopleDictionary | None = None, + rdf_tripledict: rdf.RdfTripleDictionary | None = None, ) -> None: - _raw = factories.RawDatumFactory(suid=indexcard.source_record_suid) + _card_content = _combined_tripledict(focus_iri, rdf_twopledict, rdf_tripledict) + _card_content_turtle = rdf.turtle_from_tripledict(_card_content) + _raw = factories.RawDatumFactory(suid=indexcard.source_record_suid, datum=_card_content_turtle) + indexcard.focus_identifier_set.add( + trove_db.ResourceIdentifier.objects.get_or_create_for_iri(focus_iri), + ) trove_db.LatestIndexcardRdf.objects.update_or_create( indexcard=indexcard, defaults={ 'from_raw_datum': _raw, 'focus_iri': focus_iri, - 'rdf_as_turtle': rdf.turtle_from_tripledict(rdf_tripledict), + 'rdf_as_turtle': _card_content_turtle, 'turtle_checksum_iri': 'foo', # not enforced }, ) @@ -41,15 +59,44 @@ def update_indexcard_content( def create_supplement( indexcard: trove_db.Indexcard, focus_iri: str, - rdf_tripledict: rdf.RdfTripleDictionary, + rdf_twopledict: rdf.RdfTwopleDictionary | None = None, + rdf_tripledict: rdf.RdfTripleDictionary | None = None, ) -> trove_db.SupplementaryIndexcardRdf: _supp_suid = factories.SourceUniqueIdentifierFactory() - _supp_raw = factories.RawDatumFactory(suid=_supp_suid) + _supp_content = _combined_tripledict(focus_iri, rdf_twopledict, rdf_tripledict) + _supp_content_turtle = rdf.turtle_from_tripledict(_supp_content) + _supp_raw = factories.RawDatumFactory(suid=_supp_suid, datum=_supp_content_turtle) return trove_db.SupplementaryIndexcardRdf.objects.create( from_raw_datum=_supp_raw, indexcard=indexcard, supplementary_suid=_supp_suid, focus_iri=focus_iri, - rdf_as_turtle=rdf.turtle_from_tripledict(rdf_tripledict), + rdf_as_turtle=_supp_content_turtle, turtle_checksum_iri='sup', # not enforced ) + + +def index_indexcards(index_strategy, indexcards): + from share.search import messages + _messages_chunk = messages.MessagesChunk( + messages.MessageType.UPDATE_INDEXCARD, + [_indexcard.id for _indexcard in indexcards], + ) + assert all( + _response.is_done + for _response in index_strategy.pls_handle_messages_chunk(_messages_chunk) + ) + index_strategy.pls_refresh() + + +def _combined_tripledict( + focus_iri: str, + rdf_twopledict: rdf.RdfTwopleDictionary | None = None, + rdf_tripledict: rdf.RdfTripleDictionary | None = None, +) -> rdf.RdfTripleDictionary: + _graph = rdf.RdfGraph() + if rdf_twopledict is not None: + _graph.add_twopledict(focus_iri, rdf_twopledict) + if rdf_tripledict is not None: + _graph.add_tripledict(rdf_tripledict) + return _graph.tripledict