diff --git a/README.md b/README.md index b5ba4f45a5..cb64f84b5e 100644 --- a/README.md +++ b/README.md @@ -85,6 +85,7 @@ Biological pathway databases: | VirHostNet | [`indra.sources.virhostnet`](https://indra.readthedocs.io/en/latest/modules/sources/virhostnet/index.html) | http://virhostnet.prabi.fr/ | | CTD | [`indra.sources.ctd`](https://indra.readthedocs.io/en/latest/modules/sources/ctd/index.html) | http://ctdbase.org | | DrugBank | [`indra.sources.drugbank`](https://indra.readthedocs.io/en/latest/modules/sources/drugbank/index.html) | https://www.drugbank.ca/ | +| OmniPath | [`indra.sources.omnipath`](https://indra.readthedocs.io/en/latest/modules/sources/omnipath/index.html) | https://omnipathdb.org/ | Custom knowledge bases: diff --git a/doc/modules/sources/index.rst b/doc/modules/sources/index.rst index 1d7743595d..7799b3dc03 100644 --- a/doc/modules/sources/index.rst +++ b/doc/modules/sources/index.rst @@ -45,6 +45,7 @@ Standard Molecular Pathway Databases virhostnet/index ctd/index drugbank/index + omnipath/index Custom Knowledge Bases ---------------------- diff --git a/doc/modules/sources/omnipath/index.rst b/doc/modules/sources/omnipath/index.rst new file mode 100644 index 0000000000..4018d142b4 --- /dev/null +++ b/doc/modules/sources/omnipath/index.rst @@ -0,0 +1,17 @@ +OmniPath (:py:mod:`indra.sources.omnipath`) +=========================================== + +.. automodule:: indra.sources.omnipath + :members: + +OmniPath API (:py:mod:`indra.sources.omnipath.api`) +--------------------------------------------------- + +.. automodule:: indra.sources.omnipath.api + :members: + +OmniPath Processor (:py:mod:`indra.sources.omnipath.processor`) +--------------------------------------------------------------- + +.. automodule:: indra.sources.omnipath.processor + :members: diff --git a/indra/assemblers/html/assembler.py b/indra/assemblers/html/assembler.py index c57bb0d59d..88fb532b10 100644 --- a/indra/assemblers/html/assembler.py +++ b/indra/assemblers/html/assembler.py @@ -33,7 +33,7 @@ '#cab2d6', '#fb9a99', '#a6cee3', '#33a02c', '#b15928', '#e31a1c'], 'light': ['#bc80bd', '#fccde5', '#b3de69', '#80b1d3', '#fb8072', '#bebada', '#fdb462', '#d9d9d9', '#8dd3c7', '#ffed6f', '#ccebc5', '#e0e03d', - '#ffe8f4', '#acfcfc', '#dd99ff'] + '#ffe8f4', '#acfcfc', '#dd99ff', '#00d4a6'] } @@ -45,7 +45,7 @@ def color_gen(scheme): db_sources = ['phosphosite', 'cbn', 'pc11', 'biopax', 'bel_lc', 'signor', 'biogrid', 'lincs_drug', 'tas', 'hprd', 'trrust', - 'ctd', 'virhostnet', 'phosphoelm', 'drugbank'] + 'ctd', 'virhostnet', 'phosphoelm', 'drugbank', 'omnipath'] reader_sources = ['geneways', 'tees', 'isi', 'trips', 'rlimsp', 'medscan', 'sparser', 'eidos', 'reach'] diff --git a/indra/resources/default_belief_probs.json b/indra/resources/default_belief_probs.json index 42d7bd801b..f00f49beae 100644 --- a/indra/resources/default_belief_probs.json +++ b/indra/resources/default_belief_probs.json @@ -28,7 +28,8 @@ "hypothes.is": 0.01, "virhostnet": 0.01, "ctd": 0.01, - "drugbank": 0.01 + "drugbank": 0.01, + "omnipath": 0.01 }, "rand": { "eidos": 0.3, @@ -59,6 +60,7 @@ "hypothes.is": 0.1, "virhostnet": 0.1, "ctd": 0.1, - "drugbank": 0.1 + "drugbank": 0.1, + "omnipath": 0.1 } } diff --git a/indra/sources/omnipath/__init__.py b/indra/sources/omnipath/__init__.py new file mode 100644 index 0000000000..9bba139ba3 --- /dev/null +++ b/indra/sources/omnipath/__init__.py @@ -0,0 +1,18 @@ +""" +The OmniPath module accesses biomolecular interaction data from various +curated databases using the OmniPath API (see +https://saezlab.github.io/pypath/html/index.html#webservice) and processes +the returned data into statements using the OmniPathProcessor. + +Currently, the following data is collected: + - Modifications from the PTMS endpoint https://saezlab.github.io/pypath/html/index.html#enzyme-substrate-interactions + - Ligand-Receptor data from the interactions endpoint https://saezlab.github.io/pypath/html/index.html#interaction-datasets + +To process all statements, use the function `process_from_web`: + +>>> from indra.sources.omnipath import process_from_web +>>> omnipath_processor = process_from_web() +>>> stmts = omnipath_processor.statements +""" +from .api import process_from_web +from .processor import OmniPathProcessor diff --git a/indra/sources/omnipath/api.py b/indra/sources/omnipath/api.py new file mode 100644 index 0000000000..f29db719c8 --- /dev/null +++ b/indra/sources/omnipath/api.py @@ -0,0 +1,75 @@ +import logging +import requests +from .processor import OmniPathProcessor + +logger = logging.getLogger(__name__) + + +op_url = 'http://omnipathdb.org' + + +def process_from_web(): + """Query the OmniPath web API and return an OmniPathProcessor. + + Returns + ------- + OmniPathProcessor + An OmniPathProcessor object which contains a list of extracted + INDRA Statements in its statements attribute. + """ + ptm_json = _get_modifications() + ligrec_json = _get_interactions() + op = OmniPathProcessor(ptm_json=ptm_json, ligrec_json=ligrec_json) + op.process_ptm_mods() + op.process_ligrec_interactions() + return op + + +def _get_modifications(): + """Get all PTMs from Omnipath in JSON format. + + Returns + ------- + JSON content for PTMs. + """ + params = {'format': 'json', + 'fields': ['curation_effort', 'isoforms', 'references', + 'resources', 'sources']} + ptm_url = '%s/ptms' % op_url + res = requests.get(ptm_url, params=params) + if not res.status_code == 200 or not res.text: + return None + else: + return res.json() + + +def _get_interactions(datasets=None): + """Wrapper for calling the omnipath interactions API + + See full list of query options here: + https://omnipathdb.org/queries/interactions + + Parameters + ---------- + datasets + A list of dataset names. Options are: + dorothea, kinaseextra, ligrecextra, lncrna_mrna, mirnatarget, + omnipath, pathwayextra, tf_mirna, tf_target, tfregulons + Default: 'ligrecextra' + + Returns + ------- + dict + json of database request + """ + interactions_url = '%s/interactions' % op_url + params = { + 'fields': ['curation_effort', 'entity_type', 'references', + 'resources', 'sources', 'type'], + 'format': 'json', + 'datasets': datasets or ['ligrecextra'] + } + res = requests.get(interactions_url, params=params) + res.raise_for_status() + + return res.json() diff --git a/indra/sources/omnipath/processor.py b/indra/sources/omnipath/processor.py new file mode 100644 index 0000000000..65c0100fd7 --- /dev/null +++ b/indra/sources/omnipath/processor.py @@ -0,0 +1,205 @@ +from __future__ import unicode_literals +import logging +from collections import Counter +from indra.ontology.standardize import standardize_agent_name +from indra.statements import modtype_to_modclass, Agent, Evidence, Complex, \ + get_statement_by_name as stmt_by_name, BoundCondition + +logger = logging.getLogger(__name__) + + +ignore_srcs = [db.lower() for db in ['NetPath', 'SIGNOR', 'ProtMapper', + 'BioGRID', 'HPRD-phos', 'phosphoELM']] + + +class OmniPathProcessor(object): + """Class to process OmniPath JSON into INDRA Statements.""" + def __init__(self, ptm_json=None, ligrec_json=None): + self.statements = [] + self.ptm_json = ptm_json + self.ligrec_json = ligrec_json + + def process_ptm_mods(self): + """Process ptm json if present""" + if self.ptm_json: + self.statements += self._stmts_from_op_mods(self.ptm_json) + + def process_ligrec_interactions(self): + """Process ligand-receptor json if present""" + if self.ligrec_json: + self.statements += self._stmt_from_op_lr(self.ligrec_json) + + def _stmts_from_op_mods(self, ptm_json): + """Build Modification Statements from a list of Omnipath PTM entries + """ + ptm_stmts = [] + unhandled_mod_types = [] + annot_ignore = {'enzyme', 'substrate', 'residue_type', + 'residue_offset', 'references', 'modification'} + if ptm_json is None: + return [] + for mod_entry in ptm_json: + # Skip entries without references + if not mod_entry['references']: + continue + enz = self._agent_from_up_id(mod_entry['enzyme']) + sub = self._agent_from_up_id(mod_entry['substrate']) + res = mod_entry['residue_type'] + pos = mod_entry['residue_offset'] + evidence = [] + for source_pmid in mod_entry['references']: + source_db, pmid = source_pmid.split(':', 1) + # Skip evidence from already known sources + if source_db.lower() in ignore_srcs: + continue + if 'pmc' in pmid.lower(): + text_refs = {'PMCID': pmid.split('/')[-1]} + pmid = None + else: + text_refs = None + evidence.append(Evidence( + source_api='omnipath', + source_id=source_db, + pmid=pmid, + text_refs=text_refs, + annotations={k: v for k, v in mod_entry.items() if k not + in annot_ignore} + )) + mod_type = mod_entry['modification'] + modclass = modtype_to_modclass.get(mod_type) + if modclass is None: + unhandled_mod_types.append(mod_type) + continue + else: + # All evidences filtered out + if not evidence: + continue + stmt = modclass(enz, sub, res, pos, evidence) + ptm_stmts.append(stmt) + return ptm_stmts + + def _stmt_from_op_lr(self, ligrec_json): + """Make ligand-receptor Complexes from Omnipath API interactions db""" + ligrec_stmts = [] + ign_annot = {'source_sub_id', 'source', 'target', 'references'} + no_refs = 0 + bad_pmid = 0 + no_consensus = 0 + if ligrec_json is None: + return ligrec_stmts + + for lr_entry in ligrec_json: + if not lr_entry['references']: + no_refs += 1 + continue + if len(lr_entry['sources']) == 1 and \ + lr_entry['sources'][0].lower() in ignore_srcs: + continue + + # Assemble evidence + evidence = [] + for source_pmid in lr_entry['references']: + source_db, pmid = source_pmid.split(':') + # Skip evidence from already known sources + if source_db.lower() in ignore_srcs: + continue + if len(pmid) > 8: + bad_pmid += 1 + continue + annot = {k: v for k, v in lr_entry.items() if k not in + ign_annot} + annot['source_sub_id'] = source_db + evidence.append(Evidence(source_api='omnipath', pmid=pmid, + annotations=annot)) + + # Get statements if we have evidences + if evidence: + # Get complexes + ligrec_stmts.append(self._get_op_complex(lr_entry['source'], + lr_entry['target'], + evidence)) + + # On consensus, make Activations or Inhibitions as well + if bool(lr_entry['consensus_stimulation']) ^ \ + bool(lr_entry['consensus_inhibition']): + activation = True if lr_entry['consensus_stimulation'] else \ + False + ligrec_stmts.append(self._get_ligrec_regs( + lr_entry['source'], lr_entry['target'], evidence, + activation=activation)) + elif lr_entry['consensus_stimulation'] and \ + lr_entry['consensus_inhibition']: + no_consensus += 1 + # All evidences were filtered out + else: + no_refs += 1 + + if no_refs: + logger.warning(f'{no_refs} entries without references were ' + f'skipped') + if bad_pmid: + logger.warning(f'{bad_pmid} references with bad pmids were ' + f'skipped') + if no_consensus: + logger.warning(f'{no_consensus} entries with conflicting ' + f'regulation were skipped') + + return ligrec_stmts + + @staticmethod + def _agent_from_up_id(up_id): + """Build an Agent object from a Uniprot ID. Adds db_refs for both + Uniprot and HGNC where available.""" + db_refs = {'UP': up_id} + ag = Agent(up_id, db_refs=db_refs) + standardize_agent_name(ag) + return ag + + def _bc_agent_from_up_list(self, up_id_list): + # Return the first agent with the remaining agents as a bound condition + agents_list = [self._agent_from_up_id(up_id) for up_id in up_id_list] + agent = agents_list[0] + agent.bound_conditions = \ + [BoundCondition(a, True) for a in agents_list[1:]] + return agent + + def _complex_agents_from_op_complex(self, up_id_str): + """Return a list of agents from a string containing multiple UP ids + """ + # Get agents + if 'complex' in up_id_str.lower(): + up_id_list = [up for up in up_id_str.split(':')[1].split('_')] + else: + up_id_list = [up_id_str] + + return [self._agent_from_up_id(up_id) for up_id in up_id_list] + + def _get_op_complex(self, source, target, evidence_list): + ag_list = self._complex_agents_from_op_complex(source) + \ + self._complex_agents_from_op_complex(target) + return Complex(members=ag_list, + evidence=evidence_list) + + def _get_ligrec_regs(self, source, target, evidence_list, activation=True): + # Check if any of the agents is a complex + # Source + if 'complex' in source.lower(): + # Make bound condition agent + up_id_list = [up for up in source.split(':')[1].split('_')] + subj = self._bc_agent_from_up_list(up_id_list) + else: + subj = self._agent_from_up_id(source) + # Target + if 'complex' in target.lower(): + # Make bound condition agent + up_id_list = [up for up in target.split(':')[1].split('_')] + obj = self._bc_agent_from_up_list(up_id_list) + else: + obj = self._agent_from_up_id(target) + + # Regular case: + Regulation = stmt_by_name('activation') if activation else \ + stmt_by_name('inhibition') + + regulation = Regulation(subj=subj, obj=obj, evidence=evidence_list) + return regulation diff --git a/indra/tests/make_mock_ontology.py b/indra/tests/make_mock_ontology.py index 353a517112..1b2d32eb05 100644 --- a/indra/tests/make_mock_ontology.py +++ b/indra/tests/make_mock_ontology.py @@ -37,7 +37,8 @@ 'UP:Q02750', 'UP:P01112', 'UP:P01019', 'UP:Q9MZT7', 'UP:Q13422', 'HMDB:HMDB0000122', 'HGNC:7', 'HGNC:5', 'MIRBASE:MI0001730', 'HGNC:31476', 'DRUGBANK:DB00001', 'MESH:D013812', 'CHEBI:CHEBI:26523', - 'UP:Q99490', 'MESH:D008099', 'MESH:D057189' + 'UP:Q99490', 'MESH:D008099', 'MESH:D057189', + 'UP:P15056', 'UP:O60674', 'UP:P0DP23', 'UP:Q13507' } always_include_ns = {'FPLX', 'INDRA_ACTIVITIES', 'INDRA_MODS'} diff --git a/indra/tests/test_omnipath.py b/indra/tests/test_omnipath.py new file mode 100644 index 0000000000..0deade729d --- /dev/null +++ b/indra/tests/test_omnipath.py @@ -0,0 +1,64 @@ +import requests +from indra.sources.omnipath import OmniPathProcessor +from indra.sources.omnipath.api import op_url +from indra.statements import Agent +from indra.ontology.standardize import standardize_agent_name + +BRAF_UPID = 'P15056' +JAK2_UPID = 'O60674' +CALM1_UPID = 'P0DP23' +TRPC3_UPID = 'Q13507' + +BRAF_AG = Agent(None, db_refs={'UP': BRAF_UPID}) +standardize_agent_name(BRAF_AG) +JAK2_AG = Agent(None, db_refs={'UP': JAK2_UPID}) +standardize_agent_name(JAK2_AG) +CALM1_AG = Agent(None, db_refs={'UP': CALM1_UPID}) +standardize_agent_name(CALM1_AG) +TRPC3_AG = Agent(None, db_refs={'UP': TRPC3_UPID}) +standardize_agent_name(TRPC3_AG) + + +def test_omnipath_web_api(): + query_url = '%s/queries' % op_url + res = requests.get(query_url) + assert res.status_code == 200 + + +def test_mods_from_web(): + params = {'format': 'json', 'substrates': JAK2_UPID, + 'fields': ['sources', 'references']} + ptm_url = '%s/ptms' % op_url + res = requests.get(ptm_url, params=params) + assert res.status_code == 200 + assert res.text + ptm_json = res.json() + assert ptm_json[0]['substrate'] == JAK2_UPID, ptm_json[0]['substrate'] + op = OmniPathProcessor(ptm_json=ptm_json) + op.process_ptm_mods() + stmts = op.statements + assert JAK2_AG.name in [a.name for a in stmts[0].agent_list()],\ + stmts[0].agent_list() + assert 'omnipath' == stmts[0].evidence[0].source_api,\ + stmts[0].evidence[0].source_api + + +def test_ligrec_from_web(): + params = {'format': 'json', 'datasets': ['ligrecextra'], + 'fields': ['curation_effort', 'entity_type', 'references', + 'resources', 'sources', 'type'], + 'sources': [CALM1_UPID]} + query_url = '%s/interactions' % op_url + res = requests.get(query_url, params) + assert res.status_code == 200 + assert res.text + assert 'error' not in res.text.lower() + ligrec_json = res.json() + assert ligrec_json[0]['source'] == CALM1_UPID + op = OmniPathProcessor(ligrec_json=ligrec_json) + op.process_ligrec_interactions() + stmts = op.statements + assert CALM1_AG.name in [a.name for a in stmts[0].agent_list()], \ + stmts[0].agent_list() + assert 'omnipath' == stmts[0].evidence[0].source_api,\ + stmts[0].evidence[0].source_api diff --git a/setup.py b/setup.py index 3d80be493c..e43cd022b1 100755 --- a/setup.py +++ b/setup.py @@ -78,9 +78,9 @@ def main(): 'indra.sources.hume', 'indra.sources.hypothesis', 'indra.sources.index_cards', 'indra.sources.indra_db_rest', 'indra.sources.isi', - 'indra.sources.lincs_drug', - 'indra.sources.ndex_cx', 'indra.sources.phosphoelm', - 'indra.sources.reach', + 'indra.sources.lincs_drug', 'indra.sources.ndex_cx', + 'indra.sources.reach', 'indra.sources.omnipath', + 'indra.sources.phosphoelm', 'indra.sources.rlimsp', 'indra.sources.sofia', 'indra.sources.sparser', 'indra.sources.tas', 'indra.sources.tees',