Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 27 additions & 10 deletions indra/literature/pubmed_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,15 @@ def get_full_xml_by_pmids(
------
RuntimeError
If the edirect CLI utilities are not installed or not found on PATH.

Notes
-----
- This function requires the edirect command line utilities to be installed
and visible on your PATH. See https://www.ncbi.nlm.nih.gov/books/NBK179288/
for instructions.
- Note that the output is sorted by PMID numerically e.g.,
10, 11, 20, 22, 1000 (and not lexicographically e.g., 10, 1000, 11, 20, 22)
without regard to the order in which the pmids are passed in.
"""
# Have to use lxml.etree because the XML returned by efetch is not properly
# formatted for ET.XML
Expand All @@ -371,11 +380,10 @@ def get_full_xml_by_pmids(
"for instructions.")

tree = lxml_etree.fromstring(xml_bytes, parser=parser)
if tree is None:
raise RuntimeError("Could not parse XML returned by efetch.")
# Each article is in a <PubmedArticle> tag, encapsulated in a
# <PubmedArticleSet> tag.
# Note that the <PubmedArticle> tags are sorted by PMID numerically e.g.,
# 10, 11, 20, 1000, and not lexicographically e.g., 10, 1000, 11, 20,
# regardless of the order in which the pmids are passed
if fname is not None:
pretty_save_xml(tree, fname)
return tree
Expand Down Expand Up @@ -426,6 +434,9 @@ def get_abstract(pubmed_id, prepend_title=True):

# A function to get the text for the element, or None if not found
def _find_elem_text(root, xpath_string):
if root is None:
logger.warning("Root is None when trying to find element with xpath: %s" % xpath_string)
return None
elem = root.find(xpath_string)
return None if elem is None else elem.text

Expand Down Expand Up @@ -673,7 +684,7 @@ def _get_references(reference_list, only_pmid=True):
return references


def _get_article_info(medline_citation, pubmed_data, detailed_authors=False):
def _get_article_info(medline_citation, pubmed_data=None, detailed_authors=False):
article = medline_citation.find('Article')
pmid = _find_elem_text(medline_citation, './PMID')
pii = _find_elem_text(article,
Expand All @@ -684,11 +695,12 @@ def _get_article_info(medline_citation, pubmed_data, detailed_authors=False):
'./ELocationID[@EIdType="doi"][@ValidYN="Y"]')

# ...and if that doesn't work, look in the ArticleIdList
if doi is None:
if doi is None and pubmed_data is not None:
doi = _find_elem_text(pubmed_data, './/ArticleId[@IdType="doi"]')

# Try to get the PMCID
pmcid = _find_elem_text(pubmed_data, './/ArticleId[@IdType="pmc"]')
if pubmed_data is not None:
pmcid = _find_elem_text(pubmed_data, './/ArticleId[@IdType="pmc"]')

# Title
title = _get_title_from_article_element(article)
Expand Down Expand Up @@ -763,7 +775,11 @@ def get_metadata_from_pubmed_article(

Returns
-------

: Dict
A dict containing the following fields: 'doi', 'title', 'authors',
'journal_title', 'journal_abbrev', 'journal_nlm_id', 'issn_list',
'page', 'volume', 'issue', 'issue_pub_date', 'mesh_annotations',
'publication_date', 'abstract', 'publication_types' and 'references'.
"""
medline_citation = pubmed_article.find('./MedlineCitation')
pubmed_data = pubmed_article.find('PubmedData')
Expand All @@ -777,13 +793,14 @@ def get_metadata_from_pubmed_article(
if mesh_annotations:
context_info = _get_annotations(medline_citation)
result.update(context_info)
if references_included:
if references_included and pubmed_data is not None:
references = _get_references(pubmed_data.find('ReferenceList'),
only_pmid=(references_included == 'pmid'))
result['references'] = references

publication_date = _get_pubmed_publication_date(pubmed_data)
result['publication_date'] = publication_date
if pubmed_data is not None:
publication_date = _get_pubmed_publication_date(pubmed_data)
result['publication_date'] = publication_date

# Get the abstracts if requested
if get_abstracts:
Expand Down
2 changes: 1 addition & 1 deletion indra/preassembler/grounding_mapper/gilda.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
logger = logging.getLogger(__name__)

grounding_service_url = get_config('GILDA_URL', failure_ok=True) \
if has_config('GILDA_URL') else 'http://grounding.indra.bio/'
if has_config('GILDA_URL') else 'https://grounding.indra.bio/'


def get_grounding(
Expand Down
Loading