Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion adsrefpipe/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,8 +471,10 @@ def populate_resolved_reference_records_pre_resolved(self, references: List, his
item_num=item_num,
reference_str=ref.get('refstr', None) or ref.get('refplaintext', None),
bibcode='0000',
scix_id = '0000',
score=-1,
reference_raw=ref.get('refraw', None))
reference_raw=ref.get('refraw', None),
external_identifier=_ensure_list(ref.get('external_identifier', None)) or [])
resolved_records.append(resolved_record)
# add the id and remove xml_reference that is now in database
ref['id'] = 'H%dI%d' % (history_id, item_num)
Expand Down Expand Up @@ -573,6 +575,7 @@ def populate_tables_post_resolved(self, resolved_reference: List, source_bibcode
item_num=item_num,
reference_str=ref.get('refstring', None),
bibcode=ref.get('bibcode', None),
scix_id=ref.get('scix_id',None),
score=ref.get('score', None),
reference_raw=ref.get('refstring', None),
external_identifier=_ensure_list(ref.get('external_identifier', None)) or [])
Expand Down
8 changes: 6 additions & 2 deletions adsrefpipe/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,15 +214,17 @@ class ResolvedReference(Base):
score = Column(Numeric)
reference_raw = Column(String)
external_identifier = Column(ARRAY(String))
scix_id = Column(String)

def __init__(self, history_id: int, item_num: int, reference_str: str, bibcode: str, score: float, reference_raw: str, external_identifier: list = None):
def __init__(self, history_id: int, item_num: int, reference_str: str, bibcode: str, score: float, reference_raw: str, external_identifier: list = None, scix_id: str = None):
"""
initializes a resolved reference object

:param history_id: ID of the related processed history entry
:param item_num: order of the reference within the source
:param reference_str: reference string
:param bibcode: resolved bibcode
:param scix_id: resolved scix_id
:param score: confidence score of the resolved reference
:param reference_raw: raw reference string
:param external_identifier: list of external identifiers associated with the reference, e.g. ["doi:...", "arxiv:...", "ascl:..."]
Expand All @@ -234,6 +236,7 @@ def __init__(self, history_id: int, item_num: int, reference_str: str, bibcode:
self.score = score
self.reference_raw = reference_raw
self.external_identifier = external_identifier or []
self.scix_id = scix_id

def toJSON(self) -> dict:
"""
Expand All @@ -248,7 +251,8 @@ def toJSON(self) -> dict:
'score': self.score,
'item_num': self.item_num,
**({'reference_raw': self.reference_raw} if self.reference_raw else {}),
'external_identifier': self.external_identifier
'external_identifier': self.external_identifier,
**({'scix_id': self.scix_id} if self.scix_id else {})
}


Expand Down
85 changes: 68 additions & 17 deletions adsrefpipe/tests/unittests/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
sys.path.insert(0, project_home)

import unittest
from unittest.mock import patch, MagicMock, Mock
from unittest.mock import patch, MagicMock, Mock, call
from datetime import datetime, timedelta
from collections import namedtuple

Expand Down Expand Up @@ -42,6 +42,17 @@ def _get_external_identifier(rec):
return getattr(rec, "external_identifier", None) or []


def _get_scix_id(rec):
"""
Works whether rec is a dict (bulk mappings) or an ORM object.
"""
if rec is None:
return None
if isinstance(rec, dict):
return rec.get("scix_id")
return getattr(rec, "scix_id", None)


def _make_session_scope_cm(session):
"""
Return a context manager mock that behaves like app.session_scope()
Expand Down Expand Up @@ -125,24 +136,25 @@ def add_stub_data(self):
('2020-04-03 18:08:32', '2020-05-11 11:14:28', '128', '109')
]

# Add scix_id values (4th element per tuple) to exercise new column.
resolved_reference = [
[
('J.-P. Uzan, Varying constants, gravitation and cosmology, Living Rev. Rel. 14 (2011) 2, [1009.5514]. ',
'2011LRR....14....2U', 1.0, ['arxiv:1009.5514']),
'2011LRR....14....2U', 1.0, ['arxiv:1009.5514'], 'scix:ABCD-1234-001a'),
('C. J. A. P. Martins, The status of varying constants: A review of the physics, searches and implications, 1709.02923.',
'2017RPPh...80l6902M', 1.0, ['arxiv:1709.02923'])
'2017RPPh...80l6902M', 1.0, ['arxiv:1709.02923'], 'scix:ABCD-1234-001b')
],
[
('Alsubai, K. A., Parley, N. R., Bramich, D. M., et al. 2011, MNRAS, 417, 709.',
'2011MNRAS.417..709A', 1.0, ['doi:10.0000/mnras.417.709']),
'2011MNRAS.417..709A', 1.0, ['doi:10.0000/mnras.417.709'], 'scix:ABCD-1234-002a'),
('Arcangeli, J., Desert, J.-M., Parmentier, V., et al. 2019, A&A, 625, A136 ',
'2019A&A...625A.136A', 1.0, ['doi:10.0000/aa.625.A136'])
'2019A&A...625A.136A', 1.0, ['doi:10.0000/aa.625.A136'], 'scix:ABCD-1234-002b')
],
[
('Abellan, F. J., Indebetouw, R., Marcaide, J. M., et al. 2017, ApJL, 842, L24',
'2017ApJ...842L..24A', 1.0, ['ascl:1701.001']),
'2017ApJ...842L..24A', 1.0, ['ascl:1701.001'], 'scix:ABCD-1234-003a'),
('Ackermann, M., Albert, A., Atwood, W. B., et al. 2016, A&A, 586, A71 ',
'2016A&A...586A..71A', 1.0, ['doi:10.0000/aa.586.A71'])
'2016A&A...586A..71A', 1.0, ['doi:10.0000/aa.586.A71'], 'scix:ABCD-1234-003b')
],
]

Expand Down Expand Up @@ -217,11 +229,10 @@ def _fake_insert_history_record(s, rec):
reference_str=service[0],
bibcode=service[1],
score=service[2],
reference_raw=service[0]
reference_raw=service[0],
external_identifier=service[3],
scix_id=service[4],
)
# Populate external_identifier if your model supports it; keep safe if not.
if hasattr(resolved_record, "external_identifier"):
resolved_record.external_identifier = service[3]
resolved_records.append(resolved_record)

compare_record = CompareClassic(
Expand Down Expand Up @@ -572,7 +583,6 @@ def toJSON(self):
FakeRefSrcRow("arXiv", "0003arXiv.........Z", os.path.join(self.arXiv_stubdata_dir, "00003.raw")),
]


q_refsrc = MagicMock(name="q_refsrc")
q_refsrc.filter.return_value = q_refsrc
q_refsrc.all.side_effect = [rows_valid, []] # first call returns records, second is empty
Expand Down Expand Up @@ -706,13 +716,15 @@ def test_populate_tables_post_resolved_with_classic(self):
'bibcode': '2023A&A...657A...1X',
'score': 1.0,
'external_identifier': ['doi:10.1234/abc', 'arxiv:2301.00001'],
'scix_id': 'scix:ABCD-1234-ref1',
},
{
'id': 'H1I2',
'refstring': 'Reference 2',
'bibcode': '2023A&A...657A...2X',
'score': 0.8,
'external_identifier': ['ascl:2301.001', 'doi:10.9999/xyz'],
'scix_id': 'scix:ABCD-1234-ref2',
}
]

Expand All @@ -736,12 +748,15 @@ def test_populate_tables_post_resolved_with_classic(self):
mock_insert.assert_called_once()
mock_logger.assert_called_with("Updated 2 resolved reference records successfully.")

# Check whether external_identifier is populated with correct data
# Check whether external_identifier + scix_id are populated with correct data
_, resolved_records = mock_update.call_args[0]
self.assertEqual(len(resolved_records), 2)
self.assertEqual(_get_external_identifier(resolved_records[0]), ['doi:10.1234/abc', 'arxiv:2301.00001'])
self.assertEqual(_get_external_identifier(resolved_records[1]), ['ascl:2301.001', 'doi:10.9999/xyz'])

self.assertEqual(_get_scix_id(resolved_records[0]), 'scix:ABCD-1234-ref1')
self.assertEqual(_get_scix_id(resolved_records[1]), 'scix:ABCD-1234-ref2')

@patch("adsrefpipe.app.ProcessedHistory")
@patch("adsrefpipe.app.ResolvedReference")
@patch("adsrefpipe.app.CompareClassic")
Expand Down Expand Up @@ -872,8 +887,6 @@ def test_filter_reprocess_query(self, mock_datetime):
mock_query.filter.assert_called()
called_args, _ = mock_query.filter.call_args
compiled_query = called_args[0].compile(dialect=postgresql.dialect())
print(str(called_args[0]))
print(compiled_query.params)
self.assertTrue(str(called_args[0]), 'resolved_reference.score <= :score_1')
self.assertTrue(compiled_query.params.get('score_1'), 0.8)
# Note: expected_since is computed but filter clause details are app-specific.
Expand Down Expand Up @@ -1034,6 +1047,40 @@ def test_compare_classic_toJSON(self):
}
self.assertEqual(compare.toJSON(), expected_json)

def test_resolved_reference_toJSON_includes_scix_id(self):
"""Test ResolvedReference.toJSON includes scix_id when present"""
rr = ResolvedReference(
history_id=123,
item_num=1,
reference_str="Some ref",
bibcode="2020A&A...000A...1X",
score=0.9,
reference_raw="Some ref raw",
external_identifier=["doi:10.1234/xyz"],
scix_id="scix:ABCD-1234-0004",
)
got = rr.toJSON()
self.assertEqual(got["history_id"], 123)
self.assertEqual(got["item_num"], 1)
self.assertEqual(got["bibcode"], "2020A&A...000A...1X")
self.assertEqual(got["external_identifier"], ["doi:10.1234/xyz"])
self.assertEqual(got["scix_id"], "scix:ABCD-1234-0004")

def test_resolved_reference_toJSON_omits_scix_id_when_none(self):
"""Test ResolvedReference.toJSON omits scix_id when not set"""
rr = ResolvedReference(
history_id=123,
item_num=1,
reference_str="Some ref",
bibcode="2020A&A...000A...1X",
score=0.9,
reference_raw="Some ref raw",
external_identifier=["doi:10.1234/xyz"],
scix_id=None,
)
got = rr.toJSON()
self.assertTrue("scix_id" not in got)


class TestDatabaseNoStubdata(unittest.TestCase):
"""
Expand Down Expand Up @@ -1115,15 +1162,17 @@ def test_populate_tables(self):
"refstring": "J.-P. Uzan, Varying constants, gravitation and cosmology, Living Rev. Rel. 14 (2011) 2, [1009.5514]. ",
"refraw": "J.-P. Uzan, Varying constants, gravitation and cosmology, Living Rev. Rel. 14 (2011) 2, [1009.5514]. ",
"id": "H1I1",
"external_identifier": ["arxiv:1009.5514", "doi:10.1234/abc"]
"external_identifier": ["arxiv:1009.5514", "doi:10.1234/abc"],
"scix_id": "scix:ABCD-1234-0005",
},
{
"score": "1.0",
"bibcode": "2017RPPh...80l6902M",
"refstring": "C. J. A. P. Martins, The status of varying constants: A review of the physics, searches and implications, 1709.02923.",
"refraw": "C. J. A. P. Martins, The status of varying constants: A review of the physics, searches and implications, 1709.02923.",
"id": "H1I2",
"external_identifier": ["arxiv:1709.02923", "ascl:2301.001"]
"external_identifier": ["arxiv:1709.02923", "ascl:2301.001"],
"scix_id": "scix:ABCD-1234-0006",
}
]

Expand Down Expand Up @@ -1170,6 +1219,8 @@ def test_populate_tables(self):
self.assertEqual(len(got), 2)
self.assertEqual(got[0]["external_identifier"], ["arxiv:1009.5514", "doi:10.1234/abc"])
self.assertEqual(got[1]["external_identifier"], ["arxiv:1709.02923", "ascl:2301.001"])
self.assertEqual(got[0]["scix_id"], "scix:ABCD-1234-0005")
self.assertEqual(got[1]["scix_id"], "scix:ABCD-1234-0006")

def test_get_parser_error(self):
""" test get_parser when it errors for unrecognized source filename """
Expand Down