diff --git a/adsrefpipe/app.py b/adsrefpipe/app.py index f5c7516..fd9b22b 100755 --- a/adsrefpipe/app.py +++ b/adsrefpipe/app.py @@ -471,8 +471,10 @@ def populate_resolved_reference_records_pre_resolved(self, references: List, his item_num=item_num, reference_str=ref.get('refstr', None) or ref.get('refplaintext', None), bibcode='0000', + scix_id = '0000', score=-1, - reference_raw=ref.get('refraw', None)) + reference_raw=ref.get('refraw', None), + external_identifier=_ensure_list(ref.get('external_identifier', None)) or []) resolved_records.append(resolved_record) # add the id and remove xml_reference that is now in database ref['id'] = 'H%dI%d' % (history_id, item_num) @@ -573,6 +575,7 @@ def populate_tables_post_resolved(self, resolved_reference: List, source_bibcode item_num=item_num, reference_str=ref.get('refstring', None), bibcode=ref.get('bibcode', None), + scix_id=ref.get('scix_id',None), score=ref.get('score', None), reference_raw=ref.get('refstring', None), external_identifier=_ensure_list(ref.get('external_identifier', None)) or []) diff --git a/adsrefpipe/models.py b/adsrefpipe/models.py index b2db105..0c6ecd9 100755 --- a/adsrefpipe/models.py +++ b/adsrefpipe/models.py @@ -214,8 +214,9 @@ class ResolvedReference(Base): score = Column(Numeric) reference_raw = Column(String) external_identifier = Column(ARRAY(String)) + scix_id = Column(String) - def __init__(self, history_id: int, item_num: int, reference_str: str, bibcode: str, score: float, reference_raw: str, external_identifier: list = None): + def __init__(self, history_id: int, item_num: int, reference_str: str, bibcode: str, score: float, reference_raw: str, external_identifier: list = None, scix_id: str = None): """ initializes a resolved reference object @@ -223,6 +224,7 @@ def __init__(self, history_id: int, item_num: int, reference_str: str, bibcode: :param item_num: order of the reference within the source :param reference_str: reference string :param bibcode: resolved bibcode + :param scix_id: resolved scix_id :param score: confidence score of the resolved reference :param reference_raw: raw reference string :param external_identifier: list of external identifiers associated with the reference, e.g. ["doi:...", "arxiv:...", "ascl:..."] @@ -234,6 +236,7 @@ def __init__(self, history_id: int, item_num: int, reference_str: str, bibcode: self.score = score self.reference_raw = reference_raw self.external_identifier = external_identifier or [] + self.scix_id = scix_id def toJSON(self) -> dict: """ @@ -248,7 +251,8 @@ def toJSON(self) -> dict: 'score': self.score, 'item_num': self.item_num, **({'reference_raw': self.reference_raw} if self.reference_raw else {}), - 'external_identifier': self.external_identifier + 'external_identifier': self.external_identifier, + **({'scix_id': self.scix_id} if self.scix_id else {}) } diff --git a/adsrefpipe/tests/unittests/test_app.py b/adsrefpipe/tests/unittests/test_app.py index 837ba4e..de57411 100644 --- a/adsrefpipe/tests/unittests/test_app.py +++ b/adsrefpipe/tests/unittests/test_app.py @@ -4,7 +4,7 @@ sys.path.insert(0, project_home) import unittest -from unittest.mock import patch, MagicMock, Mock +from unittest.mock import patch, MagicMock, Mock, call from datetime import datetime, timedelta from collections import namedtuple @@ -42,6 +42,17 @@ def _get_external_identifier(rec): return getattr(rec, "external_identifier", None) or [] +def _get_scix_id(rec): + """ + Works whether rec is a dict (bulk mappings) or an ORM object. + """ + if rec is None: + return None + if isinstance(rec, dict): + return rec.get("scix_id") + return getattr(rec, "scix_id", None) + + def _make_session_scope_cm(session): """ Return a context manager mock that behaves like app.session_scope() @@ -125,24 +136,25 @@ def add_stub_data(self): ('2020-04-03 18:08:32', '2020-05-11 11:14:28', '128', '109') ] + # Add scix_id values (4th element per tuple) to exercise new column. resolved_reference = [ [ ('J.-P. Uzan, Varying constants, gravitation and cosmology, Living Rev. Rel. 14 (2011) 2, [1009.5514]. ', - '2011LRR....14....2U', 1.0, ['arxiv:1009.5514']), + '2011LRR....14....2U', 1.0, ['arxiv:1009.5514'], 'scix:ABCD-1234-001a'), ('C. J. A. P. Martins, The status of varying constants: A review of the physics, searches and implications, 1709.02923.', - '2017RPPh...80l6902M', 1.0, ['arxiv:1709.02923']) + '2017RPPh...80l6902M', 1.0, ['arxiv:1709.02923'], 'scix:ABCD-1234-001b') ], [ ('Alsubai, K. A., Parley, N. R., Bramich, D. M., et al. 2011, MNRAS, 417, 709.', - '2011MNRAS.417..709A', 1.0, ['doi:10.0000/mnras.417.709']), + '2011MNRAS.417..709A', 1.0, ['doi:10.0000/mnras.417.709'], 'scix:ABCD-1234-002a'), ('Arcangeli, J., Desert, J.-M., Parmentier, V., et al. 2019, A&A, 625, A136 ', - '2019A&A...625A.136A', 1.0, ['doi:10.0000/aa.625.A136']) + '2019A&A...625A.136A', 1.0, ['doi:10.0000/aa.625.A136'], 'scix:ABCD-1234-002b') ], [ ('Abellan, F. J., Indebetouw, R., Marcaide, J. M., et al. 2017, ApJL, 842, L24', - '2017ApJ...842L..24A', 1.0, ['ascl:1701.001']), + '2017ApJ...842L..24A', 1.0, ['ascl:1701.001'], 'scix:ABCD-1234-003a'), ('Ackermann, M., Albert, A., Atwood, W. B., et al. 2016, A&A, 586, A71 ', - '2016A&A...586A..71A', 1.0, ['doi:10.0000/aa.586.A71']) + '2016A&A...586A..71A', 1.0, ['doi:10.0000/aa.586.A71'], 'scix:ABCD-1234-003b') ], ] @@ -217,11 +229,10 @@ def _fake_insert_history_record(s, rec): reference_str=service[0], bibcode=service[1], score=service[2], - reference_raw=service[0] + reference_raw=service[0], + external_identifier=service[3], + scix_id=service[4], ) - # Populate external_identifier if your model supports it; keep safe if not. - if hasattr(resolved_record, "external_identifier"): - resolved_record.external_identifier = service[3] resolved_records.append(resolved_record) compare_record = CompareClassic( @@ -572,7 +583,6 @@ def toJSON(self): FakeRefSrcRow("arXiv", "0003arXiv.........Z", os.path.join(self.arXiv_stubdata_dir, "00003.raw")), ] - q_refsrc = MagicMock(name="q_refsrc") q_refsrc.filter.return_value = q_refsrc q_refsrc.all.side_effect = [rows_valid, []] # first call returns records, second is empty @@ -706,6 +716,7 @@ def test_populate_tables_post_resolved_with_classic(self): 'bibcode': '2023A&A...657A...1X', 'score': 1.0, 'external_identifier': ['doi:10.1234/abc', 'arxiv:2301.00001'], + 'scix_id': 'scix:ABCD-1234-ref1', }, { 'id': 'H1I2', @@ -713,6 +724,7 @@ def test_populate_tables_post_resolved_with_classic(self): 'bibcode': '2023A&A...657A...2X', 'score': 0.8, 'external_identifier': ['ascl:2301.001', 'doi:10.9999/xyz'], + 'scix_id': 'scix:ABCD-1234-ref2', } ] @@ -736,12 +748,15 @@ def test_populate_tables_post_resolved_with_classic(self): mock_insert.assert_called_once() mock_logger.assert_called_with("Updated 2 resolved reference records successfully.") - # Check whether external_identifier is populated with correct data + # Check whether external_identifier + scix_id are populated with correct data _, resolved_records = mock_update.call_args[0] self.assertEqual(len(resolved_records), 2) self.assertEqual(_get_external_identifier(resolved_records[0]), ['doi:10.1234/abc', 'arxiv:2301.00001']) self.assertEqual(_get_external_identifier(resolved_records[1]), ['ascl:2301.001', 'doi:10.9999/xyz']) + self.assertEqual(_get_scix_id(resolved_records[0]), 'scix:ABCD-1234-ref1') + self.assertEqual(_get_scix_id(resolved_records[1]), 'scix:ABCD-1234-ref2') + @patch("adsrefpipe.app.ProcessedHistory") @patch("adsrefpipe.app.ResolvedReference") @patch("adsrefpipe.app.CompareClassic") @@ -872,8 +887,6 @@ def test_filter_reprocess_query(self, mock_datetime): mock_query.filter.assert_called() called_args, _ = mock_query.filter.call_args compiled_query = called_args[0].compile(dialect=postgresql.dialect()) - print(str(called_args[0])) - print(compiled_query.params) self.assertTrue(str(called_args[0]), 'resolved_reference.score <= :score_1') self.assertTrue(compiled_query.params.get('score_1'), 0.8) # Note: expected_since is computed but filter clause details are app-specific. @@ -1034,6 +1047,40 @@ def test_compare_classic_toJSON(self): } self.assertEqual(compare.toJSON(), expected_json) + def test_resolved_reference_toJSON_includes_scix_id(self): + """Test ResolvedReference.toJSON includes scix_id when present""" + rr = ResolvedReference( + history_id=123, + item_num=1, + reference_str="Some ref", + bibcode="2020A&A...000A...1X", + score=0.9, + reference_raw="Some ref raw", + external_identifier=["doi:10.1234/xyz"], + scix_id="scix:ABCD-1234-0004", + ) + got = rr.toJSON() + self.assertEqual(got["history_id"], 123) + self.assertEqual(got["item_num"], 1) + self.assertEqual(got["bibcode"], "2020A&A...000A...1X") + self.assertEqual(got["external_identifier"], ["doi:10.1234/xyz"]) + self.assertEqual(got["scix_id"], "scix:ABCD-1234-0004") + + def test_resolved_reference_toJSON_omits_scix_id_when_none(self): + """Test ResolvedReference.toJSON omits scix_id when not set""" + rr = ResolvedReference( + history_id=123, + item_num=1, + reference_str="Some ref", + bibcode="2020A&A...000A...1X", + score=0.9, + reference_raw="Some ref raw", + external_identifier=["doi:10.1234/xyz"], + scix_id=None, + ) + got = rr.toJSON() + self.assertTrue("scix_id" not in got) + class TestDatabaseNoStubdata(unittest.TestCase): """ @@ -1115,7 +1162,8 @@ def test_populate_tables(self): "refstring": "J.-P. Uzan, Varying constants, gravitation and cosmology, Living Rev. Rel. 14 (2011) 2, [1009.5514]. ", "refraw": "J.-P. Uzan, Varying constants, gravitation and cosmology, Living Rev. Rel. 14 (2011) 2, [1009.5514]. ", "id": "H1I1", - "external_identifier": ["arxiv:1009.5514", "doi:10.1234/abc"] + "external_identifier": ["arxiv:1009.5514", "doi:10.1234/abc"], + "scix_id": "scix:ABCD-1234-0005", }, { "score": "1.0", @@ -1123,7 +1171,8 @@ def test_populate_tables(self): "refstring": "C. J. A. P. Martins, The status of varying constants: A review of the physics, searches and implications, 1709.02923.", "refraw": "C. J. A. P. Martins, The status of varying constants: A review of the physics, searches and implications, 1709.02923.", "id": "H1I2", - "external_identifier": ["arxiv:1709.02923", "ascl:2301.001"] + "external_identifier": ["arxiv:1709.02923", "ascl:2301.001"], + "scix_id": "scix:ABCD-1234-0006", } ] @@ -1170,6 +1219,8 @@ def test_populate_tables(self): self.assertEqual(len(got), 2) self.assertEqual(got[0]["external_identifier"], ["arxiv:1009.5514", "doi:10.1234/abc"]) self.assertEqual(got[1]["external_identifier"], ["arxiv:1709.02923", "ascl:2301.001"]) + self.assertEqual(got[0]["scix_id"], "scix:ABCD-1234-0005") + self.assertEqual(got[1]["scix_id"], "scix:ABCD-1234-0006") def test_get_parser_error(self): """ test get_parser when it errors for unrecognized source filename """