From aa7d29cdad61bfd911d9559dec4518ad27ee89be Mon Sep 17 00:00:00 2001 From: kkaris Date: Mon, 9 Mar 2026 17:48:10 -0700 Subject: [PATCH 1/4] Handle if corner case when pubmed_data=None --- indra/literature/pubmed_client.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/indra/literature/pubmed_client.py b/indra/literature/pubmed_client.py index 32f6621aab..fd31183810 100644 --- a/indra/literature/pubmed_client.py +++ b/indra/literature/pubmed_client.py @@ -673,7 +673,7 @@ def _get_references(reference_list, only_pmid=True): return references -def _get_article_info(medline_citation, pubmed_data, detailed_authors=False): +def _get_article_info(medline_citation, pubmed_data=None, detailed_authors=False): article = medline_citation.find('Article') pmid = _find_elem_text(medline_citation, './PMID') pii = _find_elem_text(article, @@ -684,11 +684,12 @@ def _get_article_info(medline_citation, pubmed_data, detailed_authors=False): './ELocationID[@EIdType="doi"][@ValidYN="Y"]') # ...and if that doesn't work, look in the ArticleIdList - if doi is None: + if doi is None and pubmed_data is not None: doi = _find_elem_text(pubmed_data, './/ArticleId[@IdType="doi"]') # Try to get the PMCID - pmcid = _find_elem_text(pubmed_data, './/ArticleId[@IdType="pmc"]') + if pubmed_data is not None: + pmcid = _find_elem_text(pubmed_data, './/ArticleId[@IdType="pmc"]') # Title title = _get_title_from_article_element(article) @@ -777,13 +778,14 @@ def get_metadata_from_pubmed_article( if mesh_annotations: context_info = _get_annotations(medline_citation) result.update(context_info) - if references_included: + if references_included and pubmed_data is not None: references = _get_references(pubmed_data.find('ReferenceList'), only_pmid=(references_included == 'pmid')) result['references'] = references - publication_date = _get_pubmed_publication_date(pubmed_data) - result['publication_date'] = publication_date + if pubmed_data is not None: + publication_date = _get_pubmed_publication_date(pubmed_data) + result['publication_date'] = publication_date # Get the abstracts if requested if get_abstracts: From 9588392e783adeff378c736ebdeea5b48cd78210 Mon Sep 17 00:00:00 2001 From: kkaris Date: Mon, 9 Mar 2026 17:50:09 -0700 Subject: [PATCH 2/4] Docstring updates --- indra/literature/pubmed_client.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/indra/literature/pubmed_client.py b/indra/literature/pubmed_client.py index fd31183810..478216b80b 100644 --- a/indra/literature/pubmed_client.py +++ b/indra/literature/pubmed_client.py @@ -353,6 +353,15 @@ def get_full_xml_by_pmids( ------ RuntimeError If the edirect CLI utilities are not installed or not found on PATH. + + Notes + ----- + - This function requires the edirect command line utilities to be installed + and visible on your PATH. See https://www.ncbi.nlm.nih.gov/books/NBK179288/ + for instructions. + - Note that the output is sorted by PMID numerically e.g., + 10, 11, 20, 22, 1000 (and not lexicographically e.g., 10, 1000, 11, 20, 22) + without regard to the order in which the pmids are passed in. """ # Have to use lxml.etree because the XML returned by efetch is not properly # formatted for ET.XML @@ -371,11 +380,10 @@ def get_full_xml_by_pmids( "for instructions.") tree = lxml_etree.fromstring(xml_bytes, parser=parser) + if tree is None: + raise RuntimeError("Could not parse XML returned by efetch.") # Each article is in a tag, encapsulated in a # tag. - # Note that the tags are sorted by PMID numerically e.g., - # 10, 11, 20, 1000, and not lexicographically e.g., 10, 1000, 11, 20, - # regardless of the order in which the pmids are passed if fname is not None: pretty_save_xml(tree, fname) return tree @@ -764,7 +772,11 @@ def get_metadata_from_pubmed_article( Returns ------- - + : Dict + A dict containing the following fields: 'doi', 'title', 'authors', + 'journal_title', 'journal_abbrev', 'journal_nlm_id', 'issn_list', + 'page', 'volume', 'issue', 'issue_pub_date', 'mesh_annotations', + 'publication_date', 'abstract', 'publication_types' and 'references'. """ medline_citation = pubmed_article.find('./MedlineCitation') pubmed_data = pubmed_article.find('PubmedData') From d3a589aee7980ee132bae88dfcd53562b0822071 Mon Sep 17 00:00:00 2001 From: kkaris Date: Mon, 9 Mar 2026 17:50:54 -0700 Subject: [PATCH 3/4] Set gilda url to https --- indra/preassembler/grounding_mapper/gilda.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/indra/preassembler/grounding_mapper/gilda.py b/indra/preassembler/grounding_mapper/gilda.py index 98630f6ff9..b6ddf01fb1 100644 --- a/indra/preassembler/grounding_mapper/gilda.py +++ b/indra/preassembler/grounding_mapper/gilda.py @@ -15,7 +15,7 @@ logger = logging.getLogger(__name__) grounding_service_url = get_config('GILDA_URL', failure_ok=True) \ - if has_config('GILDA_URL') else 'http://grounding.indra.bio/' + if has_config('GILDA_URL') else 'https://grounding.indra.bio/' def get_grounding( From a88f34044887e4ebc12db5cceee396b92ec6b682 Mon Sep 17 00:00:00 2001 From: kkaris Date: Tue, 10 Mar 2026 08:27:25 -0700 Subject: [PATCH 4/4] Allow None in helper with a warning --- indra/literature/pubmed_client.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/indra/literature/pubmed_client.py b/indra/literature/pubmed_client.py index 478216b80b..b319204527 100644 --- a/indra/literature/pubmed_client.py +++ b/indra/literature/pubmed_client.py @@ -434,6 +434,9 @@ def get_abstract(pubmed_id, prepend_title=True): # A function to get the text for the element, or None if not found def _find_elem_text(root, xpath_string): + if root is None: + logger.warning("Root is None when trying to find element with xpath: %s" % xpath_string) + return None elem = root.find(xpath_string) return None if elem is None else elem.text