diff --git a/code/ARAX/ARAXQuery/ARAX_expander.py b/code/ARAX/ARAXQuery/ARAX_expander.py
index f5deb327e..374ca8872 100644
--- a/code/ARAX/ARAXQuery/ARAX_expander.py
+++ b/code/ARAX/ARAXQuery/ARAX_expander.py
@@ -50,6 +50,9 @@ def trim_to_size(input_list, length):
else:
return input_list
+KPS_THAT_RETURN_PREFERRED_NODE_CURIES = {'infores:retriever'}
+KP_THAT_CAN_HANDLE_SINGLE_NODE_QUERIES = {'infores:rtx-kg2'}
+
class ARAXExpander:
def __init__(self):
@@ -1101,22 +1104,9 @@ async def expand_edge_async(
# Do some post-processing (deduplicate nodes, remove self-edges..)
# KG2c and retriever are already deduplicated and uses canonical predicates
- if kp_to_use != 'infores:rtx-kg2' and kp_to_use != 'infores:retriever':
- qg_org_kg = eu.check_for_canonical_predicates(qg_org_kg, kp_to_use, log)
- qg_org_kg,\
- dropped_edge_counts = self._deduplicate_nodes(qg_org_kg,
- kp_to_use,
- log)
- for qedge_key, count in dropped_edge_counts.items():
- if count > 0:
- # update query plan here
- done_str = log.query_plan['qedge_keys'][qedge_key][kp_to_use]['description']
- log.update_query_plan(qedge_key,
- kp_to_use,
- "Warning",
- done_str + "; "
- f"{count} edges dropped due "
- "to node reference failure")
+ if kp_to_use not in KPS_THAT_RETURN_PREFERRED_NODE_CURIES:
+ log.warning(f"{kp_to_use}: this KP may not return preferred CURIEs; please check, and if it does return only preferred CURIEs, add to the Expand whitelist")
+
if any(edges for edges in qg_org_kg.edges_by_qg_id.values()): # Make sure the KP actually returned something
qg_org_kg = self._remove_self_edges(qg_org_kg, kp_to_use, log)
@@ -1136,6 +1126,8 @@ def _expand_node(qnode_key: str,
# This function expands a single node using the specified knowledge provider (for now only KG2 is supported)
log.debug(f"Expanding node {qnode_key} using {kps_to_use}")
qnode = query_graph.nodes[qnode_key]
+ if qnode.ids:
+ qnode.ids = eu.get_canonical_curies_list(qnode.ids, log)
single_node_qg = QueryGraph(nodes={qnode_key: qnode}, edges={})
answer_kg = QGOrganizedKnowledgeGraph()
if log.status != 'OK':
@@ -1145,18 +1137,21 @@ def _expand_node(qnode_key: str,
return answer_kg
# Answer the query using the proper KP (only our own KP answers single-node queries for now)
- if kps_to_use == ["infores:rtx-kg2"]:
- kp_querier = TRAPIQuerier(response_object=log,
- kp_name=kps_to_use[0],
- user_specified_kp=user_specified_kp,
- kp_timeout=kp_timeout)
- answer_kg = kp_querier.answer_single_node_query(single_node_qg)
- log.info(f"Query for node {qnode_key} returned results ({eu.get_printable_counts_by_qg_id(answer_kg)})")
- return answer_kg
- else:
- log.error("Only infores:rtx-kg2 can answer single-node queries currently", error_code="InvalidKP")
+ kps_to_use_that_cannot_handle_single_node_queries = set(kps_to_use) - KP_THAT_CAN_HANDLE_SINGLE_NODE_QUERIES
+ if kps_to_use_that_cannot_handle_single_node_queries:
+ log.error("these KPs cannot answer single-node queries: "
+ f"{kps_to_use_that_cannot_handle_single_node_queries}",
+ error_code="InvalidKP")
return answer_kg
+ kp_querier = TRAPIQuerier(response_object=log,
+ kp_name=next(iter(KP_THAT_CAN_HANDLE_SINGLE_NODE_QUERIES)),
+ user_specified_kp=user_specified_kp,
+ kp_timeout=kp_timeout)
+ answer_kg = kp_querier.answer_single_node_query(single_node_qg)
+ log.info(f"Query for node {qnode_key} returned results ({eu.get_printable_counts_by_qg_id(answer_kg)})")
+ return answer_kg
+
def _get_query_graph_for_edge(self, qedge_key: str, full_qg: QueryGraph, overarching_kg: QGOrganizedKnowledgeGraph, log: ARAXResponse) -> QueryGraph:
# This function creates a query graph for the specified qedge, updating its qnodes' curies as needed
edge_qg = QueryGraph(nodes={}, edges={})
@@ -1205,79 +1200,6 @@ def _get_query_graph_for_edge(self, qedge_key: str, full_qg: QueryGraph, overarc
f"{qedge.predicates if qedge.predicates else ''}-({output_qnode_key}:{output_qnode.categories}{output_curie_summary})")
return edge_qg
- @staticmethod
- def _deduplicate_nodes(
- answer_kg: QGOrganizedKnowledgeGraph,
- kp_name: str,
- log: ARAXResponse
- ) -> tuple[QGOrganizedKnowledgeGraph, dict[str, int]]:
- log.debug(f"{kp_name}: Deduplicating nodes")
- deduplicated_kg = QGOrganizedKnowledgeGraph(nodes={qnode_key: {} for qnode_key in answer_kg.nodes_by_qg_id},
- edges={qedge_key: {} for qedge_key in answer_kg.edges_by_qg_id})
- deduplicated_kg.unbound_edges = answer_kg.unbound_edges
- curie_mappings = {}
-
- # First deduplicate the bound nodes
- for qnode_key, nodes in {**answer_kg.nodes_by_qg_id, UNBOUND_NODES_KEY: answer_kg.unbound_nodes}.items():
- # Load preferred curie info from NodeSynonymizer
- log.debug(f"{kp_name}: Getting preferred curies for {qnode_key} nodes returned in this step")
- canonicalized_nodes = eu.get_canonical_curies_dict(list(nodes), log) if nodes else {}
- if log.status != 'OK':
- return deduplicated_kg
-
- for node_key in nodes:
- # Figure out the preferred curie/name for this node
- node = nodes.get(node_key)
- canonicalized_node = canonicalized_nodes.get(node_key)
- if canonicalized_node:
- preferred_curie = canonicalized_node.get('preferred_curie', node_key)
- preferred_name = canonicalized_node.get('preferred_name', node.name)
- preferred_type = canonicalized_node.get('preferred_type')
- preferred_categories = eu.convert_to_list(preferred_type) if preferred_type else node.categories
- curie_mappings[node_key] = preferred_curie
- else:
- # Means the NodeSynonymizer didn't recognize this curie
- preferred_curie = node_key
- preferred_name = node.name
- preferred_categories = node.categories
- curie_mappings[node_key] = preferred_curie
-
- # Add this node into our deduplicated KG as necessary
- if qnode_key != UNBOUND_NODES_KEY:
- if preferred_curie not in deduplicated_kg.nodes_by_qg_id[qnode_key]:
- node_key = preferred_curie
- node.name = preferred_name
- node.categories = preferred_categories
- deduplicated_kg.add_node(node_key, node, qnode_key)
- else: # this is an unbound node
- if preferred_curie not in deduplicated_kg.unbound_nodes:
- node.name = preferred_name
- node.categories = preferred_categories
- deduplicated_kg.unbound_nodes[preferred_curie] = node
-
- # Then update the edges to reflect changes made to the nodes
- dropped_edge_count = {}
- for qedge_key, edges in answer_kg.edges_by_qg_id.items():
- dropped_edge_count[qedge_key] = 0
- for edge_key, edge in edges.items():
- drop_edge = False
- if edge.subject not in curie_mappings:
- log.warning(f"{kp_name}: edge subject not in curie mappings; qedge key: {qedge_key}; subject ID: {edge.subject}")
- drop_edge = True
- dropped_edge_count[qedge_key] += 1
- else:
- edge.subject = curie_mappings.get(edge.subject)
- if edge.object not in curie_mappings:
- log.warning(f"{kp_name}: edge object not in curie mappings; qedge key: {qedge_key}; object ID: {edge.object}")
- drop_edge = True
- dropped_edge_count[qedge_key] += 1
- else:
- edge.object = curie_mappings.get(edge.object)
- if not drop_edge:
- deduplicated_kg.add_edge(edge_key, edge, qedge_key)
- log.debug(f"{kp_name}: After deduplication, answer KG counts are: {eu.get_printable_counts_by_qg_id(deduplicated_kg)}")
- return deduplicated_kg, dropped_edge_count
-
@staticmethod
def _extract_query_subgraph(qedge_keys_to_expand: list[str], query_graph: QueryGraph, log: ARAXResponse) -> QueryGraph:
# This function extracts a sub-query graph containing the provided qedge IDs from a larger query graph
diff --git a/code/ARAX/ARAXQuery/ARAX_filter_kg.py b/code/ARAX/ARAXQuery/ARAX_filter_kg.py
index 78f70d396..adbd8643c 100644
--- a/code/ARAX/ARAXQuery/ARAX_filter_kg.py
+++ b/code/ARAX/ARAXQuery/ARAX_filter_kg.py
@@ -575,13 +575,24 @@ def __remove_edges_by_predicate(self, describe=False):
"""
message = self.message
parameters = self.parameters
+ kg = message.knowledge_graph
# make a list of the allowable parameters (keys), and their possible values (values). Note that the action and corresponding name will always be in the allowable parameters
if message and parameters and hasattr(message, 'query_graph') and hasattr(message.query_graph, 'edges'):
allowable_parameters = {'action': {'remove_edges_by_predicate'},
'edge_predicate': set([x.predicate for x in self.message.knowledge_graph.edges.values()]),
'remove_connected_nodes': {'true', 'false', 'True', 'False', 't', 'f', 'T', 'F'},
- 'qnode_keys': set([t for x in self.message.knowledge_graph.nodes.values() if x.qnode_keys is not None for t in x.qnode_keys]),
- 'qedge_keys': set([t for x in self.message.knowledge_graph.edges.values() if x.qedge_keys is not None for t in x.qedge_keys])
+ 'qnode_keys': {
+ qnode_key
+ for node in kg.nodes.values()
+ for qnode_key in (getattr(node, "qnode_keys", None) or [])
+ },
+ 'qedge_keys': {
+ qedge_key
+ for edge in kg.edges.values()
+ for qedge_key in (getattr(edge, "qedge_keys", None) or [])
+ }
+# 'qnode_keys': set([t for x in self.message.knowledge_graph.nodes.values() if x.qnode_keys is not None for t in x.qnode_keys]),
+# 'qedge_keys': set([t for x in self.message.knowledge_graph.edges.values() if x.qedge_keys is not None for t in x.qedge_keys])
}
else:
allowable_parameters = {'action': {'remove_edges_by_predicate'},
@@ -849,6 +860,7 @@ def __remove_edges_by_std_dev(self, describe=False):
:return:
"""
message = self.message
+ kg = message.knowledge_graph
parameters = self.parameters
# make a list of the allowable parameters (keys), and their possible values (values). Note that the action and corresponding name will always be in the allowable parameters
if message and parameters and hasattr(message, 'knowledge_graph') and hasattr(message.knowledge_graph, 'edges'):
@@ -868,8 +880,16 @@ def __remove_edges_by_std_dev(self, describe=False):
'threshold': {float()},
'top': {'true', 'false', 'True', 'False', 't', 'f', 'T', 'F'},
'remove_connected_nodes': {'true', 'false', 'True', 'False', 't', 'f', 'T', 'F'},
- 'qnode_keys':set([t for x in self.message.knowledge_graph.nodes.values() if x.qnode_keys is not None for t in x.qnode_keys]),
- 'qedge_keys': set([t for x in self.message.knowledge_graph.edges.values() if x.qedge_keys is not None for t in x.qedge_keys])
+ 'qnode_keys': {
+ qnode_key
+ for node in kg.nodes.values()
+ for qnode_key in (getattr(node, "qnode_keys", None) or [])
+ },
+ 'qedge_keys': {
+ qedge_key
+ for edge in kg.edges.values()
+ for qedge_key in (getattr(edge, "qedge_keys", None) or [])
+ }
}
else:
allowable_parameters = {'action': {'remove_edges_by_std_dev'},
diff --git a/code/ARAX/ARAXQuery/Expand/kp_info_cacher.py b/code/ARAX/ARAXQuery/Expand/kp_info_cacher.py
index cfc99a573..5426eee72 100644
--- a/code/ARAX/ARAXQuery/Expand/kp_info_cacher.py
+++ b/code/ARAX/ARAXQuery/Expand/kp_info_cacher.py
@@ -142,7 +142,7 @@ def load_kp_info_caches(self, log: ARAXResponse):
log.error(f"Unable to load KP info caches: {e}")
# The caches MUST be up to date at this point, so we just load them
- log.debug("Loading cached Smart API amd meta map info")
+ log.debug("Loading cached Smart API and meta map info")
with open(self.smart_api_and_meta_map_cache, "rb") as cache:
cache = pickle.load(cache)
smart_api_info = cache['smart_api_cache']
diff --git a/code/ARAX/ARAXQuery/Expand/kp_selector.py b/code/ARAX/ARAXQuery/Expand/kp_selector.py
index b94a8fe61..034961105 100644
--- a/code/ARAX/ARAXQuery/Expand/kp_selector.py
+++ b/code/ARAX/ARAXQuery/Expand/kp_selector.py
@@ -70,9 +70,15 @@ def get_kps_for_single_hop_qg(self, qg: QueryGraph) -> Optional[set[str]]:
obj_categories = set(self.bh.get_descendants(qg.nodes[qedge.object].categories))
predicates = set(self.bh.get_descendants(qedge_predicates))
+ kps_skip_metakg_checks_that_are_allowed = KPS_SKIP_METAKG_CHECKS - \
+ (self.kps_excluded_by_maturity | self.kps_excluded_by_version)
+
# use metamap to check kp for predicate triple
+ self.log.debug("number of allowed KPs that can skip metakg checks: "
+ f"{len(kps_skip_metakg_checks_that_are_allowed)}")
+
+ accepting_kps = kps_skip_metakg_checks_that_are_allowed
self.log.debug(f"selecting from {len(self.valid_kps)} kps")
- accepting_kps = set()
for kp in self.meta_map:
# kp should contain the infores CURIE of the knowledge provider
if self._triple_is_in_meta_map(kp,
@@ -151,64 +157,67 @@ def get_desirable_equivalent_curies(self, curies: list[str], categories: Optiona
supported_prefixes = self._get_supported_prefixes(eu.convert_to_list(categories), kp)
self.log.debug(f"{kp}: Prefixes {kp} supports for categories {categories} (and descendants) are: "
f"{supported_prefixes}")
- converted_curies: set[str] = set()
+ converted_curies: list[str] = []
+ converted_curies_seen: set[str] = set()
unsupported_curies: set[str] = set()
synonyms_dict = eu.get_curie_synonyms_dict(curies)
# Convert each input curie to a preferred, supported prefix
for input_curie, equivalent_curies in synonyms_dict.items():
input_curie_prefix = self._get_uppercase_prefix(input_curie)
- supported_equiv_curies_by_prefix = defaultdict(set)
+ supported_equiv_curies_by_prefix: defaultdict[str, list[str]] = defaultdict(list)
for curie in equivalent_curies:
prefix = self._get_uppercase_prefix(curie)
if prefix in supported_prefixes:
- supported_equiv_curies_by_prefix[prefix].add(curie)
+ if curie not in supported_equiv_curies_by_prefix[prefix]:
+ supported_equiv_curies_by_prefix[prefix].append(curie)
if supported_equiv_curies_by_prefix:
# Grab equivalent curies with the same prefix as the input curie, if available
if input_curie_prefix in supported_equiv_curies_by_prefix:
- curies_to_send = supported_equiv_curies_by_prefix[input_curie_prefix]
- # Otherwise pick any supported curie prefix present
+ chosen_prefix = input_curie_prefix
+ # Otherwise pick the first supported prefix encountered while scanning
+ # equivalent_curies. This mirrors old behavior and stays deterministic
+ # now that synonymizer output order is stable.
else:
- curies_to_send = next(curie_set for curie_set in supported_equiv_curies_by_prefix.values())
- converted_curies = converted_curies.union(curies_to_send)
+ chosen_prefix = next(iter(supported_equiv_curies_by_prefix))
+ for curie_to_send in supported_equiv_curies_by_prefix[chosen_prefix]:
+ if curie_to_send not in converted_curies_seen:
+ converted_curies.append(curie_to_send)
+ converted_curies_seen.add(curie_to_send)
else:
unsupported_curies.add(input_curie)
if unsupported_curies:
self.log.warning(f"{kp}: Could not find curies with prefixes {kp} prefers, for these curies: "
f"{unsupported_curies}; will not send these to KP")
- return list(converted_curies)
+ return converted_curies
def make_qg_use_supported_prefixes(self, qg: QueryGraph, kp_name: str, log: ARAXResponse) -> Optional[QueryGraph]:
for qnode_key, qnode in qg.nodes.items():
if qnode.ids:
- if kp_name == "infores:rtx-kg2":
- # Just convert them into canonical curies
- qnode.ids = eu.get_canonical_curies_list(qnode.ids, log)
- else:
- if qnode.categories:
- # Otherwise figure out which kind of curies KPs want
- categories = eu.convert_to_list(qnode.categories)
- supported_prefixes = self._get_supported_prefixes(categories, kp_name)
- used_prefixes = {self._get_uppercase_prefix(curie) for curie in qnode.ids}
- # Only convert curie(s) if any use an unsupported prefix
- if used_prefixes.issubset(supported_prefixes):
- self.log.debug(f"{kp_name}: All {qnode_key} curies use prefix(es) {kp_name} supports; no "
- f"conversion necessary")
- else:
- self.log.debug(f"{kp_name}: One or more {qnode_key} curies use a prefix {kp_name} doesn't "
- f"support; will convert these")
- converted_curies = self.get_desirable_equivalent_curies(qnode.ids, qnode.categories, kp_name)
- if converted_curies:
- log.debug(f"{kp_name}: Converted {qnode_key}'s {len(qnode.ids)} curies to a list of "
- f"{len(converted_curies)} curies tailored for {kp_name}")
- qnode.ids = converted_curies
- else:
- log.info(f"{kp_name} cannot answer the query because no equivalent curies were found "
- f"with prefixes it supports for qnode {qnode_key}. Original curies were: "
- f"{qnode.ids}")
- return None
+ if qnode.categories:
+ # Otherwise figure out which kind of curies KPs want
+ categories = eu.convert_to_list(qnode.categories)
+ supported_prefixes = self._get_supported_prefixes(categories, kp_name)
+ used_prefixes = {self._get_uppercase_prefix(curie) for curie in qnode.ids}
+ # Only convert curie(s) if any use an unsupported prefix
+ if used_prefixes.issubset(supported_prefixes):
+ self.log.debug(f"{kp_name}: All {qnode_key} curies use prefix(es) {kp_name} supports; no "
+ f"conversion necessary")
else:
- # the query graph has no categories; just ask NodeNorm what the preferred CURIE is and use that
- qnode.ids = eu.get_canonical_curies_list(qnode.ids, log)
+ self.log.debug(f"{kp_name}: One or more {qnode_key} curies use a prefix {kp_name} doesn't "
+ f"support; will convert these")
+ converted_curies = self.get_desirable_equivalent_curies(qnode.ids, qnode.categories, kp_name)
+ if converted_curies:
+ log.debug(f"{kp_name}: Converted {qnode_key}'s {len(qnode.ids)} curies to a list of "
+ f"{len(converted_curies)} curies tailored for {kp_name}")
+ qnode.ids = converted_curies
+ else:
+ log.info(f"{kp_name} cannot answer the query because no equivalent curies were found "
+ f"with prefixes it supports for qnode {qnode_key}. Original curies were: "
+ f"{qnode.ids}")
+ return None
+ else:
+ # the query graph has no categories; just ask NodeNorm what the preferred CURIE is and use that
+ qnode.ids = eu.get_canonical_curies_list(qnode.ids, log)
return qg
@staticmethod
diff --git a/code/ARAX/ARAXQuery/Expand/trapi_querier.py b/code/ARAX/ARAXQuery/Expand/trapi_querier.py
index eeb9ed8fb..81a63de2a 100644
--- a/code/ARAX/ARAXQuery/Expand/trapi_querier.py
+++ b/code/ARAX/ARAXQuery/Expand/trapi_querier.py
@@ -280,17 +280,6 @@ def _get_kg_to_qg_mappings_from_results(
kg_id = edge_binding.id
qedge_key_mappings[kg_id].add(qedge_key)
- if not self.kp_infores_curie == "infores:rtx-kg2":
- # Convert parent curie mappings back to canonical form (we send KPs synonyms sometimes..)
- raw_parent_query_ids = {parent_curie for kg_id, query_ids in kg_id_to_parent_query_id_map.items()
- for parent_curie in query_ids}
- canonical_parent_query_ids = eu.get_canonical_curies_dict(list(raw_parent_query_ids), self.log)
- for kg_id in set(kg_id_to_parent_query_id_map):
- canonical_query_ids = {canonical_parent_query_ids[raw_parent_id]["preferred_curie"]
- if canonical_parent_query_ids.get(raw_parent_id) else raw_parent_id
- for raw_parent_id in kg_id_to_parent_query_id_map.get(kg_id, set())}
- kg_id_to_parent_query_id_map[kg_id] = canonical_query_ids
-
return {"nodes": qnode_key_mappings, "edges": qedge_key_mappings}, kg_id_to_parent_query_id_map
@@ -710,7 +699,24 @@ def _add_subclass_of_edges(self, answer_kg: QGOrganizedKnowledgeGraph) -> QGOrga
all_parent_query_ids = {parent_id for node_key in nodes_with_non_empty_parent_query_ids
for parent_id in answer_kg.nodes_by_qg_id[qnode_key][node_key].query_ids}
parents_missing_from_kg = all_parent_query_ids.difference(set(answer_kg.nodes_by_qg_id[qnode_key]))
- parent_node_info = eu.get_canonical_curies_dict(list(parents_missing_from_kg), self.log)
+
+ # Build a lookup of existing nodes for parents missing under this qnode_key.
+ # These nodes should already be in the answer KG — either as unbound nodes or
+ # bound under a different qnode_key — so we reuse them instead of calling
+ # NodeSynonymizer.
+ existing_parent_nodes = {}
+ for parent_curie in parents_missing_from_kg:
+ if parent_curie in answer_kg.unbound_nodes:
+ existing_parent_nodes[parent_curie] = answer_kg.unbound_nodes[parent_curie].deepcopy()
+ else:
+ for other_qnode_key, nodes_dict in answer_kg.nodes_by_qg_id.items():
+ if other_qnode_key != qnode_key and parent_curie in nodes_dict:
+ existing_parent_nodes[parent_curie] = nodes_dict[parent_curie].deepcopy()
+ break
+ if parent_curie not in existing_parent_nodes:
+ self.log.warning(f"{self.kp_infores_curie}: Parent node {parent_curie} not found "
+ f"anywhere in the answer KG; creating an empty Node")
+ existing_parent_nodes[parent_curie] = Node()
# Add subclass_of edges to the answer KG for any nodes that the KP provided query ID mappings for
for node_key in nodes_with_non_empty_parent_query_ids:
@@ -739,13 +745,7 @@ def _add_subclass_of_edges(self, answer_kg: QGOrganizedKnowledgeGraph) -> QGOrga
for edge in subclass_edges:
# Add the parent to the KG if it isn't in there already
if edge.object not in answer_kg.nodes_by_qg_id[qnode_key]:
- parent_info_dict = parent_node_info.get(edge.object)
- if parent_info_dict:
- parent_node = Node(name=parent_info_dict.get("preferred_name"),
- categories=[parent_info_dict.get("preferred_category")],
- attributes=[])
- else:
- parent_node = Node()
+ parent_node = existing_parent_nodes[edge.object]
parent_node.query_ids = [] # Does not need a mapping since it appears in the QG
answer_kg.add_node(edge.object, parent_node, qnode_key)
edge_key = self._get_arax_edge_key(edge)
diff --git a/code/ARAX/ARAXQuery/Filter_KG/remove_edges.py b/code/ARAX/ARAXQuery/Filter_KG/remove_edges.py
index de6fa5998..50257a789 100644
--- a/code/ARAX/ARAXQuery/Filter_KG/remove_edges.py
+++ b/code/ARAX/ARAXQuery/Filter_KG/remove_edges.py
@@ -18,9 +18,8 @@ def check_kg_nodes(self):
for key, node in self.message.query_graph.nodes.items():
qids[key] = 0
for key, node in self.message.knowledge_graph.nodes.items():
- if node.qnode_keys is not None:
- for qid in node.qnode_keys:
- qids[qid] += 1
+ for qid in (getattr(node, 'qnode_keys', None) or []):
+ qids[qid] += 1
for k, v in qids.items():
if v == 0:
self.response.error(f"Filter removed all of the nodes in the knowledge graph with the qnode id {k}", error_code="RemovedQueryNode")
@@ -32,20 +31,25 @@ def remove_edges_by_predicate(self):
:return: response
"""
self.response.debug("Removing Edges")
- self.response.info("Removing edges from the knowledge graph matching the specified predicate")
edge_params = self.edge_parameters
+ predicate_to_remove = edge_params['edge_predicate']
+ self.response.info("Removing edges from the knowledge graph matching the specified predicate: "
+ f"{predicate_to_remove}")
+ kg = self.message.knowledge_graph
+ qg = self.message.query_graph
try:
edges_to_remove = set()
node_keys_to_remove = {}
edge_qid_dict = {}
- for key, q_edge in self.message.query_graph.edges.items():
+ for key, q_edge in qg.edges.items():
edge_qid_dict[key] = {'subject':q_edge.subject, 'object':q_edge.object}
# iterate over the edges find the edges to remove
- for key, edge in self.message.knowledge_graph.edges.items():
- if edge_params['edge_predicate'] == edge.predicate:
+ for key, edge in kg.edges.items():
+ if edge.predicate == predicate_to_remove:
edges_to_remove.add(key)
+ self.response.debug(f"Removing edge: {key}")
if edge_params['remove_connected_nodes']:
- for qedge_key in edge.qedge_keys:
+ for qedge_key in getattr(edge, 'qedge_keys', []):
if edge.subject not in node_keys_to_remove:
node_keys_to_remove[edge.subject] = {edge_qid_dict[qedge_key]['subject']}
else:
@@ -60,7 +64,7 @@ def remove_edges_by_predicate(self):
nodes_to_remove = set()
skipped_qnode_keys = set()
# iterate over nodes find adjacent connected nodes
- for key, node in self.message.knowledge_graph.nodes.items():
+ for key, node in kg.nodes.items():
if key in node_keys_to_remove:
if 'qnode_keys' in edge_params:
if node.qnode_keys is not None:
@@ -88,27 +92,26 @@ def remove_edges_by_predicate(self):
del node_keys_to_remove[key]
# remove connected nodes
for key in nodes_to_remove:
- del self.message.knowledge_graph.nodes[key]
+ del kg.nodes[key]
# iterate over edges find edges connected to the nodes
- for key, edge in self.message.knowledge_graph.edges.items():
+ for key, edge in kg.edges.items():
if edge.subject in node_keys_to_remove or edge.object in node_keys_to_remove:
edges_to_remove.add(key)
self.check_kg_nodes()
# remove edges
- #self.message.knowledge_graph.edges = [val for idx,val in enumerate(self.message.knowledge_graph.edges) if idx not in edges_to_remove]
for key in edges_to_remove:
- if edge_params.get('qedge_keys',None) is not None:
- if hasattr(self.message.knowledge_graph.edges[key],'qedge_keys') and self.message.knowledge_graph.edges[key].qedge_keys is not None:
- qedge_key_diff = set(self.message.knowledge_graph.edges[key].qedge_keys) - set(edge_params['qedge_keys'])
+ if edge_params.get('qedge_keys', None) is not None:
+ if hasattr(kg.edges[key],'qedge_keys') and kg.edges[key].qedge_keys is not None:
+ qedge_key_diff = set(kg.edges[key].qedge_keys) - set(edge_params['qedge_keys'])
if len(qedge_key_diff) < 1:
- del self.message.knowledge_graph.edges[key]
+ del kg.edges[key]
else:
- self.message.knowledge_graph.edges[key].qedge_keys = list(qedge_key_diff)
+ kg.edges[key].qedge_keys = list(qedge_key_diff)
else:
self.response.warning(
f"The edge {key} does not have a qedge_keys property. Since a value was supplied for the qedge_keys parameter the edge was not removed.")
else:
- del self.message.knowledge_graph.edges[key]
+ del kg.edges[key]
except Exception:
tb = traceback.format_exc()
@@ -116,7 +119,7 @@ def remove_edges_by_predicate(self):
self.response.error(tb, error_code = error_type.__name__)
self.response.error("Something went wrong removing edges from the knowledge graph")
else:
- self.response.info("Edges successfully removed")
+ self.response.info(f"Edges successfully removed: {len(edges_to_remove)}; num left: {len(kg.edges)}")
return self.response
@@ -138,6 +141,8 @@ def remove_edges_by_property(self):
'provided_by'}
provided_by_flag = edge_params['edge_attribute'] in provided_by_attributes
+ message = self.message
+ kg = message.knowledge_graph
try:
edges_to_remove = set()
@@ -146,7 +151,7 @@ def remove_edges_by_property(self):
for key, q_edge in self.message.query_graph.edges.items():
edge_qid_dict[key] = {'subject':q_edge.subject, 'object':q_edge.object}
# iterate over the edges find the edges to remove
- for key, edge in self.message.knowledge_graph.edges.items():
+ for key, edge in kg.edges.items():
edge_dict = edge.to_dict()
# TRAPI1.0 hack to allow filtering by old properties that are now attributes
if hasattr(edge, 'attributes'):
@@ -202,7 +207,7 @@ def remove_edges_by_property(self):
nodes_to_remove = set()
skipped_qnode_keys = set()
# iterate over nodes find adjacent connected nodes
- for key, node in self.message.knowledge_graph.nodes.items():
+ for key, node in kg.nodes.items():
if key in node_keys_to_remove:
if 'qnode_keys' in edge_params:
if node.qnode_keys is not None:
@@ -213,10 +218,8 @@ def remove_edges_by_property(self):
else:
node.qnode_keys.remove(param_qnode_key)
else:
- # del node_keys_to_remove[key]
skipped_qnode_keys.add(key)
else:
- # del node_keys_to_remove[key]
skipped_qnode_keys.add(key)
else:
if len(node.qnode_keys) == 1:
@@ -229,29 +232,27 @@ def remove_edges_by_property(self):
for key in skipped_qnode_keys:
del node_keys_to_remove[key]
# remove connected nodes
- #self.message.knowledge_graph.nodes = [val for idx,val in enumerate(self.message.knowledge_graph.nodes) if idx not in nodes_to_remove]
for key in nodes_to_remove:
- del self.message.knowledge_graph.nodes[key]
+ del kg.nodes[key]
# iterate over edges find edges connected to the nodes
- for key, edge in self.message.knowledge_graph.edges.items():
+ for key, edge in kg.edges.items():
if edge.subject in node_keys_to_remove or edge.object in node_keys_to_remove:
edges_to_remove.add(key)
self.check_kg_nodes()
# remove edges
- #self.message.knowledge_graph.edges = [val for idx,val in enumerate(self.message.knowledge_graph.edges) if idx not in edges_to_remove]
for key in edges_to_remove:
if edge_params.get('qedge_keys',None) is not None:
- if hasattr(self.message.knowledge_graph.edges[key],'qedge_keys') and self.message.knowledge_graph.edges[key].qedge_keys is not None:
- qedge_key_diff = set(self.message.knowledge_graph.edges[key].qedge_keys) - set(edge_params['qedge_keys'])
+ if hasattr(kg.edges[key],'qedge_keys') and kg.edges[key].qedge_keys is not None:
+ qedge_key_diff = set(kg.edges[key].qedge_keys) - set(edge_params['qedge_keys'])
if len(qedge_key_diff) < 1:
- del self.message.knowledge_graph.edges[key]
+ del kg.edges[key]
else:
- self.message.knowledge_graph.edges[key].qedge_keys = list(qedge_key_diff)
+ kg.edges[key].qedge_keys = list(qedge_key_diff)
else:
self.response.warning(
f"The edge {key} does not have a qedge_keys property. Since a value was supplied for the qedge_keys parameter the edge was not removed.")
else:
- del self.message.knowledge_graph.edges[key]
+ del kg.edges[key]
except Exception:
tb = traceback.format_exc()
error_type, error, _ = sys.exc_info()
@@ -270,6 +271,8 @@ def remove_edges_by_attribute(self):
self.response.debug("Removing Edges")
self.response.info("Removing edges from the knowledge graph with the specified attribute values")
edge_params = self.edge_parameters
+ message = self.message
+ kg = message.knowledge_graph
try:
if edge_params['direction'] == 'above':
def compare(x, y):
@@ -284,7 +287,7 @@ def compare(x, y):
for key, q_edge in self.message.query_graph.edges.items():
edge_qid_dict[key] = {'subject':q_edge.subject, 'object':q_edge.object}
# iterate over the edges find the edges to remove
- for key, edge in self.message.knowledge_graph.edges.items(): # iterate over the edges
+ for key, edge in kg.edges.items(): # iterate over the edges
if hasattr(edge, 'attributes'): # check if they have attributes
if edge.attributes: # if there are any edge attributes
for attribute in edge.attributes: # for each attribute
@@ -307,7 +310,7 @@ def compare(x, y):
nodes_to_remove = set()
skipped_qnode_keys = set()
# iterate over nodes find adjacent connected nodes
- for key, node in self.message.knowledge_graph.nodes.items():
+ for key, node in kg.nodes.items():
if key in node_keys_to_remove:
if 'qnode_keys' in edge_params:
if node.qnode_keys is not None:
@@ -318,10 +321,8 @@ def compare(x, y):
else:
node.qnode_keys.remove(param_qnode_key)
else:
- # del node_keys_to_remove[key]
skipped_qnode_keys.add(key)
else:
- # del node_keys_to_remove[key]
skipped_qnode_keys.add(key)
else:
if len(node.qnode_keys) == 1:
@@ -334,34 +335,30 @@ def compare(x, y):
for key in skipped_qnode_keys:
del node_keys_to_remove[key]
# remove connected nodes
- #self.message.knowledge_graph.nodes = [val for idx, val in enumerate(self.message.knowledge_graph.nodes) if idx not in nodes_to_remove]
for key in nodes_to_remove:
- del self.message.knowledge_graph.nodes[key]
- #i = 0
+ del kg.nodes[key]
c = 0
# iterate over edges find edges connected to the nodes
- for key, edge in self.message.knowledge_graph.edges.items():
+ for key, edge in kg.edges.items():
if edge.subject in node_keys_to_remove or edge.object in node_keys_to_remove:
edges_to_remove.add(key)
else:
c += 1
- #i += 1
self.check_kg_nodes()
# remove edges
- #self.message.knowledge_graph.edges = [val for idx,val in enumerate(self.message.knowledge_graph.edges) if idx not in edges_to_remove]
for key in edges_to_remove:
if edge_params.get('qedge_keys',None) is not None:
- if hasattr(self.message.knowledge_graph.edges[key],'qedge_keys') and self.message.knowledge_graph.edges[key].qedge_keys is not None:
- qedge_key_diff = set(self.message.knowledge_graph.edges[key].qedge_keys) - set(edge_params['qedge_keys'])
+ if hasattr(kg.edges[key],'qedge_keys') and kg.edges[key].qedge_keys is not None:
+ qedge_key_diff = set(kg.edges[key].qedge_keys) - set(edge_params['qedge_keys'])
if len(qedge_key_diff) < 1:
- del self.message.knowledge_graph.edges[key]
+ del kg.edges[key]
else:
- self.message.knowledge_graph.edges[key].qedge_keys = list(qedge_key_diff)
+ kg.edges[key].qedge_keys = list(qedge_key_diff)
else:
self.response.warning(
f"The edge {key} does not have a qedge_keys property. Since a value was supplied for the qedge_keys parameter the edge was not removed.")
else:
- del self.message.knowledge_graph.edges[key]
+ del kg.edges[key]
except Exception:
tb = traceback.format_exc()
error_type, error, _ = sys.exc_info()
@@ -380,6 +377,8 @@ def remove_edges_by_stats(self):
self.response.debug("Removing Edges")
self.response.info("Removing edges from the knowledge graph with the specified attribute values")
edge_params = self.edge_parameters
+ message = self.message
+ kg = message.knowledge_graph
try:
edges_to_remove = set()
node_keys_to_remove = {}
@@ -388,7 +387,7 @@ def remove_edges_by_stats(self):
edge_qid_dict[key] = {'subject':q_edge.subject, 'object':q_edge.object}
values = []
# iterate over the edges find the edges to remove
- for key, edge in self.message.knowledge_graph.edges.items(): # iterate over the edges
+ for key, edge in kg.edges.items(): # iterate over the edges
if hasattr(edge, 'attributes'): # check if they have attributes
if edge.attributes: # if there are any edge attributes
for attribute in edge.attributes: # for each attribute
@@ -436,7 +435,7 @@ def remove_edges_by_stats(self):
for edge in values: # here edge = (edge index, value, subject id, object id)
edges_to_remove.add(edge[0]) # mark it to be removed
if edge_params['remove_connected_nodes']: # if you want to remove the connected nodes, mark those too
- for qedge_key in self.message.knowledge_graph.edges[edge[0]].qedge_keys:
+ for qedge_key in kg.edges[edge[0]].qedge_keys:
if edge[2] not in node_keys_to_remove: # edge[2] = edge subect
node_keys_to_remove[edge[2]] = {edge_qid_dict[qedge_key]['subject']}
else:
@@ -452,7 +451,7 @@ def remove_edges_by_stats(self):
nodes_to_remove = set()
skipped_qnode_keys = set()
# iterate over nodes find adjacent connected nodes
- for key, node in self.message.knowledge_graph.nodes.items():
+ for key, node in kg.nodes.items():
if key in node_keys_to_remove:
if 'qnode_keys' in edge_params:
if node.qnode_keys is not None:
@@ -479,32 +478,32 @@ def remove_edges_by_stats(self):
for key in skipped_qnode_keys:
del node_keys_to_remove[key]
# remove connected nodes
- #self.message.knowledge_graph.nodes = [val for idx, val in enumerate(self.message.knowledge_graph.nodes) if idx not in nodes_to_remove]
+ #kg.nodes = [val for idx, val in enumerate(kg.nodes) if idx not in nodes_to_remove]
for key in nodes_to_remove:
- del self.message.knowledge_graph.nodes[key]
+ del kg.nodes[key]
c = 0
# iterate over edges find edges connected to the nodes
- for key, edge in self.message.knowledge_graph.edges.items():
+ for key, edge in kg.edges.items():
if edge.subject in node_keys_to_remove or edge.object in node_keys_to_remove:
edges_to_remove.add(key)
else:
c += 1
self.check_kg_nodes()
# remove edges
- #self.message.knowledge_graph.edges = [val for idx,val in enumerate(self.message.knowledge_graph.edges) if idx not in edges_to_remove]
+ #kg.edges = [val for idx,val in enumerate(kg.edges) if idx not in edges_to_remove]
for key in edges_to_remove:
if edge_params.get('qedge_keys',None) is not None:
- if hasattr(self.message.knowledge_graph.edges[key],'qedge_keys') and self.message.knowledge_graph.edges[key].qedge_keys is not None:
- qedge_key_diff = set(self.message.knowledge_graph.edges[key].qedge_keys) - set(edge_params['qedge_keys'])
+ if hasattr(kg.edges[key],'qedge_keys') and kg.edges[key].qedge_keys is not None:
+ qedge_key_diff = set(kg.edges[key].qedge_keys) - set(edge_params['qedge_keys'])
if len(qedge_key_diff) < 1:
- del self.message.knowledge_graph.edges[key]
+ del kg.edges[key]
else:
- self.message.knowledge_graph.edges[key].qedge_keys = list(qedge_key_diff)
+ kg.edges[key].qedge_keys = list(qedge_key_diff)
else:
self.response.warning(
f"The edge {key} does not have a qedge_keys property. Since a value was supplied for the qedge_keys parameter the edge was not removed.")
else:
- del self.message.knowledge_graph.edges[key]
+ del kg.edges[key]
except Exception:
tb = traceback.format_exc()
error_type, error, _ = sys.exc_info()
diff --git a/code/ARAX/test/test_ARAX_expand.py b/code/ARAX/test/test_ARAX_expand.py
index 8348ef61e..2778c8d7e 100644
--- a/code/ARAX/test/test_ARAX_expand.py
+++ b/code/ARAX/test/test_ARAX_expand.py
@@ -158,7 +158,7 @@ def test_720_multiple_qg_ids_in_different_results():
"add_qedge(key=e00, subject=n00, object=n01)",
"add_qedge(key=e01, subject=n01, object=n02, predicates=biolink:physically_interacts_with)",
"add_qedge(key=e02, subject=n02, object=n03, predicates=biolink:physically_interacts_with)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"return(message=true, store=false)"
]
nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
@@ -178,35 +178,6 @@ def test_bte_query():
nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
-def test_single_node_query_with_synonyms():
- actions_list = [
- "add_qnode(key=n00, ids=CHEMBL.COMPOUND:CHEMBL1771)",
- "expand(node_key=n00, kp=infores:rtx-kg2)",
- "return(message=true, store=false)"
- ]
- nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
-
-
-def test_single_node_query_with_no_results():
- actions_list = [
- "add_qnode(key=n00, ids=FAKE:curie)",
- "expand(kp=infores:rtx-kg2)",
- "return(message=true, store=false)"
- ]
- nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list, kg_should_be_incomplete=True)
- assert not nodes_by_qg_id and not edges_by_qg_id
-
-
-def test_single_node_query_with_list():
- actions_list = [
- "add_qnode(key=n00, ids=[CHEMBL.COMPOUND:CHEMBL108, CHEMBL.COMPOUND:CHEMBL110])",
- "expand(kp=infores:rtx-kg2)",
- "return(message=true, store=false)"
- ]
- nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
- assert len(nodes_by_qg_id['n00']) == 2
-
-
@pytest.mark.slow
def test_branched_query():
actions_list = [
@@ -217,7 +188,7 @@ def test_branched_query():
"add_qedge(subject=n01, object=n00, key=e00)",
"add_qedge(subject=n02, object=n00, key=e01)",
"add_qedge(subject=n00, object=n03, key=e02)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"return(message=true, store=false)"
]
nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
@@ -229,8 +200,8 @@ def test_query_that_expands_same_edge_twice():
"add_qnode(key=n00, ids=DOID:9065, categories=biolink:Disease)",
"add_qnode(key=n01, categories=biolink:ChemicalEntity)",
"add_qedge(key=e00, subject=n00, object=n01, predicates=biolink:treats_or_applied_or_studied_to_treat)",
- "expand(kp=infores:rtx-kg2)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
+ "expand(kp=infores:retriever)",
"return(message=true, store=false)"
]
nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
@@ -241,7 +212,7 @@ def test_771_continue_if_no_results_query():
"add_qnode(ids=UniProtKB:P14136, key=n00)",
"add_qnode(ids=NOTAREALCURIE, key=n01)",
"add_qedge(subject=n00, object=n01, key=e00)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"return(message=true, store=false)"
]
nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list, kg_should_be_incomplete=True)
@@ -255,7 +226,7 @@ def test_774_continue_if_no_results_query():
"add_qnode(ids=CHEMBL.COMPOUND:CHEMBL112, key=n1)",
"add_qnode(ids=DOID:8295, key=n2)",
"add_qedge(subject=n1, object=n2, key=e1)",
- "expand(edge_key=e1, kp=infores:rtx-kg2)",
+ "expand(edge_key=e1, kp=infores:retriever)",
"return(message=true, store=false)"
]
nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list, kg_should_be_incomplete=True)
@@ -264,10 +235,10 @@ def test_774_continue_if_no_results_query():
def test_curie_list_query():
actions_list = [
- "add_qnode(ids=[DOID:6419, DOID:3717, DOID:11406], key=n00)",
+ "add_qnode(ids=[MONDO:0008542, MONDO:0005027, MONDO:0005036], key=n00)",
"add_qnode(categories=biolink:PhenotypicFeature, key=n01)",
"add_qedge(subject=n00, object=n01, predicates=biolink:has_phenotype, key=e00)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"return(message=true, store=false)"
]
nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
@@ -280,7 +251,7 @@ def test_query_with_curies_on_both_ends():
"add_qnode(ids=MONDO:0005393, key=n00)", # Gout
"add_qnode(ids=UMLS:C0018100, key=n01)", # Antigout agents
"add_qedge(subject=n00, object=n01, key=e00)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"return(message=true, store=false)"
]
nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
@@ -294,7 +265,7 @@ def test_query_with_intermediate_curie_node():
"add_qnode(categories=biolink:ChemicalEntity, key=n02)",
"add_qedge(subject=n01, object=n02, key=e01, predicates=biolink:treats_or_applied_or_studied_to_treat)",
"add_qedge(subject=n00, object=n01, key=e00, predicates=biolink:related_to)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"return(message=true, store=false)"
]
nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
@@ -305,7 +276,7 @@ def test_847_dont_expand_curie_less_edge():
"add_qnode(key=n00, categories=biolink:Protein)",
"add_qnode(key=n01, categories=biolink:ChemicalEntity)",
"add_qedge(key=e00, subject=n00, object=n01)",
- "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "expand(edge_key=e00, kp=infores:retriever)",
"return(message=true, store=false)"
]
nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list, should_throw_error=True,
@@ -318,7 +289,7 @@ def test_deduplication_and_self_edges():
"add_qnode(ids=UMLS:C0004572, key=n00)", # Babesia
"add_qnode(key=n01)",
"add_qedge(subject=n00, object=n01, key=e00)",
- "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "expand(edge_key=e00, kp=infores:retriever)",
"return(message=true, store=false)"
]
nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
@@ -339,7 +310,7 @@ def test_873_consider_both_gene_and_protein():
"add_qnode(ids=DOID:9452, key=n00)",
"add_qnode(categories=biolink:Protein, key=n01)",
"add_qedge(subject=n00, object=n01, key=e00)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"return(message=true, store=false)",
]
nodes_by_qg_id_protein, edges_by_qg_id_protein = _run_query_and_do_standard_testing(actions_list_protein)
@@ -347,7 +318,7 @@ def test_873_consider_both_gene_and_protein():
"add_qnode(ids=DOID:9452, key=n00)",
"add_qnode(categories=biolink:Gene, key=n01)",
"add_qedge(subject=n00, object=n01, key=e00)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"return(message=true, store=false)",
]
nodes_by_qg_id_gene, edges_by_qg_id_gene = _run_query_and_do_standard_testing(actions_list_gene)
@@ -479,7 +450,7 @@ def test_exclude_edge_parallel():
"add_qnode(categories=biolink:ChemicalEntity, key=n01)",
"add_qedge(subject=n01, object=n00, predicates=biolink:treats_or_applied_or_studied_to_treat, key=e00)",
"add_qedge(subject=n01, object=n00, predicates=biolink:causes, key=e01)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"return(message=true, store=false)"
]
nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
@@ -492,7 +463,7 @@ def test_exclude_edge_parallel():
"add_qnode(categories=biolink:ChemicalEntity, key=n01)",
"add_qedge(subject=n01, object=n00, predicates=biolink:treats_or_applied_or_studied_to_treat, key=e00)",
"add_qedge(subject=n01, object=n00, predicates=biolink:causes, exclude=true, key=e01)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"return(message=true, store=false)"
]
nodes_by_qg_id_not, edges_by_qg_id_not = _run_query_and_do_standard_testing(actions_list)
@@ -515,7 +486,7 @@ def test_exclude_edge_perpendicular():
# 'Exclude' portion (just optional for now to get a baseline)
f"add_qnode(categories=biolink:Pathway, key=nx0, option_group_id=1, ids=[{exclude_curies}])",
"add_qedge(subject=n01, object=nx0, key=ex0, option_group_id=1, predicates=biolink:related_to)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"return(message=true, store=false)"
]
nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
@@ -534,7 +505,7 @@ def test_exclude_edge_perpendicular():
# 'Exclude' portion
f"add_qnode(categories=biolink:Pathway, key=nx0, ids=[{exclude_curies}])",
"add_qedge(subject=n01, object=nx0, key=ex0, exclude=True, predicates=biolink:related_to)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"return(message=true, store=false)"
]
nodes_by_qg_id_not, edges_by_qg_id_not = _run_query_and_do_standard_testing(actions_list)
@@ -550,8 +521,8 @@ def test_exclude_edge_ordering():
"add_qnode(categories=biolink:ChemicalEntity, key=n01)",
"add_qedge(subject=n00, object=n01, predicates=biolink:treats_or_applied_or_studied_to_treat, key=e00)",
"add_qedge(subject=n00, object=n01, predicates=biolink:predisposes_to_condition, exclude=true, key=e01)",
- "expand(kp=infores:rtx-kg2, edge_key=e00)",
- "expand(kp=infores:rtx-kg2, edge_key=e01)",
+ "expand(kp=infores:retriever, edge_key=e00)",
+ "expand(kp=infores:retriever, edge_key=e01)",
"return(message=true, store=false)"
]
nodes_by_qg_id_a, edges_by_qg_id_a = _run_query_and_do_standard_testing(actions_list)
@@ -560,7 +531,7 @@ def test_exclude_edge_ordering():
"add_qnode(categories=biolink:ChemicalEntity, key=n01)",
"add_qedge(subject=n00, object=n01, predicates=biolink:treats_or_applied_or_studied_to_treat, key=e00)",
"add_qedge(subject=n00, object=n01, predicates=biolink:predisposes_to_condition, exclude=true, key=e01)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"return(message=true, store=false)"
]
nodes_by_qg_id_b, edges_by_qg_id_b = _run_query_and_do_standard_testing(actions_list)
@@ -569,7 +540,7 @@ def test_exclude_edge_ordering():
"add_qnode(categories=biolink:ChemicalEntity, key=n01)",
"add_qedge(subject=n00, object=n01, predicates=biolink:predisposes_to_condition, exclude=true, key=e01)",
"add_qedge(subject=n00, object=n01, predicates=biolink:treats_or_applied_or_studied_to_treat, key=e00)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"return(message=true, store=false)"
]
nodes_by_qg_id_c, edges_by_qg_id_c = _run_query_and_do_standard_testing(actions_list)
@@ -585,7 +556,7 @@ def test_exclude_edge_no_results():
"add_qnode(categories=biolink:ChemicalEntity, key=n01)",
"add_qedge(subject=n00, object=n01, predicates=biolink:treats_or_applied_or_studied_to_treat, key=e00)",
"add_qedge(subject=n00, object=n01, predicates=biolink:not_a_real_edge_type, exclude=true, key=e01)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"return(message=true, store=false)"
]
nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions)
@@ -598,7 +569,7 @@ def test_option_group_query_one_hop():
"add_qnode(key=n01, categories=biolink:ChemicalEntity)",
"add_qedge(key=e00, subject=n01, object=n00, predicates=biolink:causes)",
"add_qedge(key=e01, subject=n00, object=n01, predicates=biolink:affects, option_group_id=1)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"return(message=true, store=false)"
]
nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions)
@@ -614,7 +585,7 @@ def test_option_group_query_no_results():
"add_qedge(key=e00, subject=n00, object=n01, predicates=biolink:related_to)",
"add_qedge(key=e01, subject=n00, object=n02, option_group_id=1, predicates=biolink:overlaps)",
"add_qedge(key=e02, subject=n02, object=n01, option_group_id=1, predicates=biolink:affects)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"return(message=true, store=false)"
]
nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions)
@@ -625,7 +596,7 @@ def test_category_and_predicate_format():
"add_qnode(ids=UniProtKB:P42857, key=n00)",
"add_qnode(categories=biolink:Protein, key=n01)",
"add_qedge(subject=n00, object=n01, key=e00, predicates=biolink:affects)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"return(message=true, store=false)"
]
nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
@@ -644,7 +615,7 @@ def test_issue_1212():
"add_qnode(ids=FAKE:Curie, categories=biolink:ChemicalEntity, key=n00)",
"add_qnode(categories=biolink:Disease, key=n01)",
"add_qedge(subject=n00, object=n01, key=e00)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"return(message=true, store=false)"
]
nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list, kg_should_be_incomplete=True)
@@ -656,7 +627,7 @@ def test_issue_1314():
"add_qnode(key=n0, ids=DRUGBANK:DB00394, categories=biolink:ChemicalEntity)",
"add_qnode(key=n1, categories=biolink:Disease)",
"add_qedge(key=e0, subject=n1, object=n0, predicates=biolink:subject_of_treatment_application_or_study_for_treatment_by)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"return(message=true, store=false)"
]
nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
@@ -678,7 +649,7 @@ def test_issue_1236_a():
"add_qnode(ids=NCBIGene:1803, key=n00)",
"add_qnode(categories=biolink:Disease, key=n01)",
"add_qedge(subject=n00, object=n01, key=e00, predicates=biolink:gene_associated_with_condition)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"return(message=true, store=false)"
]
nodes_by_qg_id_kg2_only, edges_by_qg_id_kg2_only = _run_query_and_do_standard_testing(actions_list_kg2_only)
@@ -691,7 +662,7 @@ def test_issue_1236_b():
"add_qnode(ids=DOID:14330, categories=biolink:Disease, key=n00)",
"add_qnode(categories=biolink:Protein, key=n01)",
"add_qedge(subject=n00, object=n01, key=e00, predicates=biolink:condition_associated_with_gene)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"return(message=true, store=false)"
]
nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
@@ -702,7 +673,7 @@ def test_kg2_predicate_hierarchy_reasoning():
"add_qnode(ids=CHEMBL.COMPOUND:CHEMBL112, categories=biolink:ChemicalEntity, key=n00)",
"add_qnode(categories=biolink:Protein, key=n01)",
"add_qedge(subject=n00, object=n01, key=e00, predicates=biolink:affects)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"return(message=true, store=false)"
]
nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
@@ -716,7 +687,7 @@ def test_domain_range_exclusion():
"add_qnode(ids=UMLS:C1510438, key=n00)",
"add_qnode(categories=biolink:Disease, key=n01)",
"add_qedge(subject=n00, object=n01, key=e00, predicates=biolink:diagnoses)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"return(message=true, store=false)"
]
nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
@@ -731,7 +702,7 @@ def test_issue_1373_pinned_curies():
"add_qnode(categories=biolink:ChemicalEntity, key=n02)",
"add_qedge(subject=n00, object=n01, key=e00, predicates=biolink:related_to)",
"add_qedge(subject=n01, object=n02, key=e01, predicates=biolink:related_to)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"return(message=true, store=false)"
]
nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
@@ -815,7 +786,7 @@ def test_qualified_regulates_query():
{
"id": "knowledge_source",
"name": "knowledge source",
- "value": ["infores:rtx-kg2"],
+ "value": ["infores:retriever"],
"operator": "==",
"not": False
}
@@ -831,7 +802,7 @@ def test_1516_single_quotes_in_ids():
"add_qnode(key=n0,ids=UniProtKB:P00491)",
"add_qnode(key=n1)",
"add_qedge(key=e01,subject=n0,object=n1)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"return(message=true, store=false)"
]
nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions)
@@ -839,14 +810,14 @@ def test_1516_single_quotes_in_ids():
def test_input_curie_remapping():
actions = [
- "add_qnode(key=n0, ids=KEGG.COMPOUND:C02700)",
+ "add_qnode(key=n0, ids=KEGG.COMPOUND:C00022)",
"add_qnode(key=n1, categories=biolink:Protein)",
"add_qedge(key=e01, subject=n0, object=n1)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"return(message=true, store=false)"
]
nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions)
- assert "KEGG.COMPOUND:C02700" in nodes_by_qg_id["n0"]
+ assert "KEGG.COMPOUND:C00022" in nodes_by_qg_id["n0"]
def test_constraint_validation():
@@ -893,7 +864,7 @@ def test_edge_constraints():
{
"id": "knowledge_source",
"name": "knowledge source",
- "value": ["infores:rtx-kg2","infores:arax","infores:drugbank"],
+ "value": ["infores:retriever","infores:arax","infores:drugbank"],
"operator": "==",
"not": False
}
@@ -909,7 +880,7 @@ def test_canonical_predicates():
"add_qnode(key=n00, ids=CHEMBL.COMPOUND:CHEMBL945)",
"add_qnode(key=n01, categories=biolink:BiologicalEntity)",
"add_qedge(key=e00, subject=n00, object=n01, predicates=biolink:participates_in)", # Not canonical
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"return(message=true, store=false)"
]
nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions)
@@ -938,7 +909,7 @@ def test_merging_node_attributes_1450():
"add_qnode(key=n1, categories=biolink:Disease)",
"add_qedge(key=e01, subject=n0, object=n1, predicates=biolink:treats_or_applied_or_studied_to_treat)",
"expand(kp=infores:biothings-explorer)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"return(message=true, store=false)"
]
nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions)
@@ -947,7 +918,7 @@ def test_merging_node_attributes_1450():
"add_qnode(key=n0, ids=CHEMBL.COMPOUND:CHEMBL112)",
"add_qnode(key=n1, categories=biolink:Disease)",
"add_qedge(key=e01, subject=n0, object=n1, predicates=biolink:treats_or_applied_or_studied_to_treat)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"expand(kp=infores:biothings-explorer)",
"return(message=true, store=false)"
]
@@ -989,7 +960,7 @@ def test_almost_cycle_1565():
"add_qedge(subject=n1, object=n0, key=e0, predicates=biolink:related_to)",
"add_qedge(subject=n1, object=n2, key=e1, predicates=biolink:related_to)",
"add_qedge(subject=n0, object=n2, key=e2, predicates=biolink:related_to)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"return(message=true, store=false)"
]
nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
@@ -1077,7 +1048,7 @@ def test_inverted_treats_handling():
"add_qnode(key=n0, ids=MONDO:0005077)",
"add_qnode(key=n1, categories=biolink:ChemicalEntity)",
"add_qedge(key=e0, subject=n0, object=n1, predicates=biolink:treats_or_applied_or_studied_to_treat)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"return(message=true, store=false)"
]
nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions)
@@ -1436,7 +1407,7 @@ def test_kp_list():
"add_qnode(key=qg0, ids=CHEMBL.COMPOUND:CHEMBL112)",
"add_qnode(key=qg1, categories=biolink:Protein)",
"add_qedge(subject=qg1, object=qg0, key=qe0, predicates=biolink:physically_interacts_with)",
- "expand(edge_key=qe0, kp=[infores:rtx-kg2, infores:molepro])",
+ "expand(edge_key=qe0, kp=[infores:retriever])",
"return(message=true, store=false)"
]
nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions, timeout=30)
@@ -1444,10 +1415,10 @@ def test_kp_list():
def test_missing_epc_attributes():
actions = [
- "add_qnode(name=Parkinson's disease, key=n0)",
+ "add_qnode(ids=MONDO:0005180, key=n0)",
"add_qnode(categories=biolink:Drug, key=n1)",
"add_qedge(subject=n1, object=n0, key=e0, predicates=biolink:predisposes_to_condition)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"return(message=true, store=false)"
]
nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions)
@@ -1463,41 +1434,12 @@ def test_missing_epc_attributes():
assert publications
-def test_kg2_version():
- query = {
- "nodes": {
- "n00": {
- "ids": ["RTX:KG2c"]
- }
- },
- "edges": {}
- }
- nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(json_query=query)
-
- # First grab KG2 version from the KG2c build node
- assert nodes_by_qg_id["n00"]
- assert len(nodes_by_qg_id["n00"]) == 1
- build_node = nodes_by_qg_id["n00"]["RTX:KG2c"]
- kg2c_build_node_version = build_node.name.replace("RTX-KG", "").strip("c")
- print(f"KG2 version from KG2c build node is: {kg2c_build_node_version}")
-
- # Then grab KG2 version from the OpenAPI spec
- code_dir = os.path.dirname(os.path.abspath(__file__)) + "/../../"
- kg2_openapi_yaml_path = f"{code_dir}/UI/OpenAPI/specifications/export/KG2/1.5.0/openapi.yaml"
- with open(kg2_openapi_yaml_path) as kg2_api_file:
- kg2_openapi_configuration = yaml.safe_load(kg2_api_file)
- kg2_openapi_version = kg2_openapi_configuration["info"]["version"]
- print(f"KG2 version from KG2 openapi.yaml file is: {kg2_openapi_version}")
-
- assert kg2c_build_node_version == kg2_openapi_version
-
-
def test_klat_attributes():
actions_list = [
- "add_qnode(key=n0, ids=DRUGBANK:DB00394)",
- "add_qnode(key=n1, categories=biolink:Disease)",
- "add_qedge(key=e0, subject=n1, object=n0, predicates=biolink:treats_or_applied_or_studied_to_treat)",
- "expand(kp=infores:rtx-kg2)",
+ "add_qnode(key=n0, ids=CHEBI:15367)",
+ "add_qnode(key=n1, ids=MONDO:0015564)",
+ "add_qedge(key=e0, subject=n0, object=n1, predicates=biolink:treats_or_applied_or_studied_to_treat)",
+ "expand(kp=infores:retriever)",
"return(message=true, store=false)"
]
nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
@@ -1515,7 +1457,7 @@ def test_treats_patch_issue_2328_a():
"ids": ["MONDO:0015564"]
},
"chemical": {
- "categories": ["biolink:ChemicalEntity"]
+ "ids": ["CHEBI:175901"]
}
},
"edges": {
@@ -1537,9 +1479,10 @@ def test_treats_patch_issue_2328_a():
}
nodes_by_qg_id, edges_by_qg_id, message = _run_query_and_do_standard_testing(json_query=query, return_message=True)
assert edges_by_qg_id["t_edge"]
+
# Make sure the KG2 edges, which are higher-level treats edges, are in the KG (used as support edges)
creative_expand_treats_edges = [edge for edge_key, edge in message.knowledge_graph.edges.items()
- if edge_key.startswith("creative_expand")]
+ if edge_key.startswith("creative_DTD_")]
support_edge_keys = set()
for edge in creative_expand_treats_edges:
aux_graph_keys = get_support_graphs_attribute(edge).value
@@ -1548,9 +1491,8 @@ def test_treats_patch_issue_2328_a():
aux_graph = message.auxiliary_graphs[aux_graph_key]
support_edge_keys.update(set(aux_graph.edges))
support_edges = [message.knowledge_graph.edges[edge_key] for edge_key in support_edge_keys]
-
assert any(source.resource_id == "infores:rtx-kg2" for edge in support_edges for source in edge.sources)
- # assert not any(source.resource_id == "infores:semmeddb" for edge in support_edges for source in edge.sources)
+
def test_treats_patch_issue_2328_b():
# Verify that the edge editing doesn't happen outside of inferred mode
@@ -1560,33 +1502,24 @@ def test_treats_patch_issue_2328_b():
"ids": ["MONDO:0015564"]
},
"chemical": {
- "categories": ["biolink:ChemicalEntity"]
+ "ids": ["CHEBI:15367"]
}
},
"edges": {
"t_edge": {
"object": "disease",
"subject": "chemical",
- "predicates": ["biolink:treats_or_applied_or_studied_to_treat", "biolink:applied_to_treat"],
- "attribute_constraints": [
- {
- "id": "knowledge_source",
- "name": "knowledge source",
- "value": ["infores:rtx-kg2"],
- "operator": "=="
- }
- ]
+ "predicates": ["biolink:treats_or_applied_or_studied_to_treat"]
}
}
}
nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(json_query=query)
assert edges_by_qg_id["t_edge"]
kg2_edges_treats_or = [edge for edge in edges_by_qg_id["t_edge"].values()
- if any(source.resource_id == "infores:rtx-kg2" for source in edge.sources)]
+ if any(source.resource_id == "infores:retriever" for source in edge.sources)]
print(f"Answer includes {len(kg2_edges_treats_or)} edges from KG2")
assert kg2_edges_treats_or
assert any(edge for edge in kg2_edges_treats_or if edge.predicate == "biolink:treats_or_applied_or_studied_to_treat")
- assert any(edge for edge in kg2_edges_treats_or if edge.predicate == "biolink:applied_to_treat")
@pytest.mark.external
@@ -1637,12 +1570,6 @@ def test_creative_treats_predicate_alteration_2412():
def test_issue_2662():
- kpic = KPInfoCacher()
- saved_trapi_version = kpic.forced_kp_version
- kpic.forced_kp_version = "1.6.0"
- kpic.refresh_kp_info_caches()
- saved_arax_response_output = ARAXResponse.output
- ARAXResponse.output = 'STDERR'
query_graph_dict = {
"edges": {
"50efaa83": {
@@ -1673,9 +1600,6 @@ def test_issue_2662():
}
}
message = ARAXQuery().query_return_message(envelope_dict).message
- kpic.forced_kp_version = saved_trapi_version
- kpic.refresh_kp_info_caches()
- ARAXResponse.output = saved_arax_response_output
aux_graphs = message.auxiliary_graphs
assert aux_graphs is not None and len(aux_graphs) > 0
kg = message.knowledge_graph
@@ -1684,11 +1608,6 @@ def test_issue_2662():
def test_issue_2678():
- kpic = KPInfoCacher()
- saved_trapi_version = kpic.forced_kp_version
- kpic.forced_kp_version = "1.6.0"
- kpic.refresh_kp_info_caches()
- saved_arax_response_output = ARAXResponse.output
query_graph_dict = {
"edges": {
"50efaa83": {
@@ -1723,8 +1642,6 @@ def test_issue_2678():
message = response.message
messages_str = json.dumps(aq.response.messages)
disease_node = message.knowledge_graph.nodes['MONDO:0016098']
- kpic.forced_kp_version = saved_trapi_version
- kpic.refresh_kp_info_caches()
assert 'biolink:PhenotypicFeature' not in messages_str
diff --git a/code/ARAX/test/test_ARAX_filter_kg.py b/code/ARAX/test/test_ARAX_filter_kg.py
index 3775fb12c..d6078ae32 100644
--- a/code/ARAX/test/test_ARAX_filter_kg.py
+++ b/code/ARAX/test/test_ARAX_filter_kg.py
@@ -51,7 +51,7 @@ def test_warnings():
"add_qnode(name=DOID:8741, key=n00)",
"add_qnode(categories=biolink:ChemicalEntity, key=n01)",
"add_qedge(subject=n00, object=n01, key=e00)",
- "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "expand(edge_key=e00, kp=infores:retriever)",
"filter_kg(action=remove_edges_by_continuous_attribute, edge_attribute=asdfghjkl, direction=below, threshold=.2)",
"filter_kg(action=remove_edges_by_discrete_attribute, edge_attribute=asdfghjkl, value=qwertyuiop)",
"filter_kg(action=remove_edges_by_std_dev, edge_attribute=asdfghjkl, remove_connected_nodes=f, threshold=0.25, top=f, direction=above)",
@@ -71,15 +71,15 @@ def test_error():
"create_message",
"add_qnode(name=MONDO:0001475, key=n00)",
"add_qnode(categories=biolink:ChemicalEntity, key=n01)",
- "add_qedge(subject=n01, object=n00, key=e00, predicates=biolink:related_to_at_instance_level)",
- "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "add_qedge(subject=n01, object=n00, key=e00, predicates=biolink:related_to)",
+ "expand(edge_key=e00, kp=infores:retriever)",
"filter_kg(action=remove_edges_by_predicate, edge_predicate=biolink:treats_or_applied_or_studied_to_treat, remove_connected_nodes=t, qedge_keys=[e00])",
"resultify(ignore_edge_direction=true)",
"return(message=true, store=false)"
]}}
[response, message] = _do_arax_query(query, False)
assert response.status == 'ERROR'
- assert response.error_code == "OrphanEdges"
+ assert response.error_code == "RemovedQueryNode"
def test_edge_key_removal():
query = {"operations": {"actions": [
@@ -89,7 +89,7 @@ def test_edge_key_removal():
"add_qnode(categories=biolink:Disease, key=n02)",
"add_qedge(subject=n01, object=n00, key=e00, predicates=biolink:treats)",
"add_qedge(subject=n01, object=n02, key=e01, predicates=biolink:treats)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"filter_kg(action=remove_edges_by_predicate, edge_predicate=biolink:treats, remove_connected_nodes=f, qedge_keys=[e01])",
"return(message=true, store=false)"
]}}
@@ -109,7 +109,7 @@ def test_default_std_dev():
"add_qnode(categories=biolink:ChemicalEntity, key=n02)",
"add_qedge(subject=n00, object=n01, key=e00)",
"add_qedge(subject=n01, object=n02, key=e01)",
- "expand(edge_key=[e00,e01], kp=infores:rtx-kg2)",
+ "expand(edge_key=[e00,e01], kp=infores:retriever)",
"overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J1)",
"overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J2)",
"return(message=true, store=false)",
@@ -125,7 +125,7 @@ def test_default_std_dev():
"add_qnode(categories=biolink:ChemicalEntity, key=n02)",
"add_qedge(subject=n00, object=n01, key=e00)",
"add_qedge(subject=n01, object=n02, key=e01)",
- "expand(edge_key=[e00,e01], kp=infores:rtx-kg2)",
+ "expand(edge_key=[e00,e01], kp=infores:retriever)",
"overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J1)",
"overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J2)",
"filter_kg(action=remove_edges_by_std_dev, edge_attribute=jaccard_index, remove_connected_nodes=f)",
@@ -146,7 +146,7 @@ def test_std_dev():
"add_qnode(categories=biolink:ChemicalEntity, key=n02)",
"add_qedge(subject=n00, object=n01, key=e00)",
"add_qedge(subject=n01, object=n02, key=e01)",
- "expand(edge_key=[e00,e01], kp=infores:rtx-kg2)",
+ "expand(edge_key=[e00,e01], kp=infores:retriever)",
"overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J1)",
"overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J2)",
"resultify(ignore_edge_direction=true, debug=true)",
@@ -165,7 +165,7 @@ def test_std_dev():
"add_qnode(categories=biolink:ChemicalEntity, key=n02)",
"add_qedge(subject=n00, object=n01, key=e00)",
"add_qedge(subject=n01, object=n02, key=e01)",
- "expand(edge_key=[e00,e01], kp=infores:rtx-kg2)",
+ "expand(edge_key=[e00,e01], kp=infores:retriever)",
"overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J1)",
"overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J2)",
"filter_kg(action=remove_edges_by_std_dev, edge_attribute=jaccard_index, remove_connected_nodes=f, threshold=0.25, top=f, direction=above)",
@@ -187,7 +187,7 @@ def test_default_top_n():
"add_qnode(categories=biolink:ChemicalEntity, key=n02)",
"add_qedge(subject=n00, object=n01, key=e00)",
"add_qedge(subject=n01, object=n02, key=e01)",
- "expand(edge_key=[e00,e01], kp=infores:rtx-kg2)",
+ "expand(edge_key=[e00,e01], kp=infores:retriever)",
"overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J1)",
"overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J2)",
"return(message=true, store=false)",
@@ -205,7 +205,7 @@ def test_default_top_n():
"add_qnode(categories=biolink:ChemicalEntity, key=n02)",
"add_qedge(subject=n00, object=n01, key=e00)",
"add_qedge(subject=n01, object=n02, key=e01)",
- "expand(edge_key=[e00,e01], kp=infores:rtx-kg2)",
+ "expand(edge_key=[e00,e01], kp=infores:retriever)",
"overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J1)",
"overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J2)",
"filter_kg(action=remove_edges_by_top_n, edge_attribute=jaccard_index, remove_connected_nodes=f)",
@@ -225,7 +225,7 @@ def test_remove_property_known_attributes():
"add_qnode(ids=CHEBI:17754, categories=biolink:ChemicalEntity, key=n0)",
"add_qnode(categories=biolink:Gene, key=n1)",
"add_qedge(subject=n1, object=n0, key=e0,predicates=biolink:negatively_regulates_entity_to_entity)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"filter_kg(action=remove_edges_by_discrete_attribute,edge_attribute=provided_by,value=SEMMEDDB:,remove_connected_nodes=false)",
"resultify()",
"filter_results(action=limit_number_of_results, max_results=30)",
@@ -243,7 +243,7 @@ def test_remove_attribute_known_attributes():
"add_qnode(categories=biolink:ChemicalEntity, key=n02)",
"add_qedge(subject=n00, object=n01, key=e00)",
"add_qedge(subject=n01, object=n02, key=e01, predicates=biolink:physically_interacts_with)",
- "expand(edge_key=[e00,e01], kp=infores:rtx-kg2)",
+ "expand(edge_key=[e00,e01], kp=infores:retriever)",
"overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J1)",
"filter_kg(action=remove_edges_by_continuous_attribute, edge_attribute=jaccard_index, direction=below, threshold=.2, remove_connected_nodes=t, qnode_keys=[n02])",
#"filter_kg(action=remove_edges_by_discrete_attribute,edge_attribute=provided_by, value=Pharos)",
@@ -262,7 +262,7 @@ def test_provided_by_filter():
"add_qnode(ids=CHEBI:17754, categories=biolink:ChemicalEntity, key=n0)",
"add_qnode(categories=biolink:Gene, key=n1)",
"add_qedge(subject=n1, object=n0, key=e0,predicates=biolink:entity_negatively_regulates_entity)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"filter_kg(action=remove_edges_by_discrete_attribute,edge_attribute=knowledge_source,value=infores:semmeddb,remove_connected_nodes=false)",
"resultify()",
#"filter_results(action=limit_number_of_results, max_results=30)",
@@ -277,7 +277,7 @@ def test_provided_by_filter():
"add_qnode(ids=CHEBI:17754, categories=biolink:ChemicalEntity, key=n0)",
"add_qnode(categories=biolink:Gene, key=n1)",
"add_qedge(subject=n1, object=n0, key=e0,predicates=biolink:entity_negatively_regulates_entity)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
#"filter_kg(action=remove_edges_by_discrete_attribute,edge_attribute=biolink:original_source,value=infores:semmeddb,remove_connected_nodes=false)",
"resultify()",
#"filter_results(action=limit_number_of_results, max_results=30)",
@@ -323,7 +323,7 @@ def test_tuple_bug():
"add_qnode(key=n00,ids=DRUGBANK:DB00150,categories=biolink:ChemicalEntity)",
"add_qnode(key=n01,categories=biolink:Protein)",
"add_qedge(key=e00,subject=n00,object=n01)",
- "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "expand(edge_key=e00, kp=infores:retriever)",
"overlay(action=fisher_exact_test,subject_qnode_key=n00,virtual_relation_label=F0,object_qnode_key=n01)",
"filter_kg(action=remove_edges_by_top_n,edge_attribute=fisher_exact_test_p-value,direction=below,n=10,remove_connected_nodes=true,qnode_keys=[n01])",
"resultify()",
diff --git a/code/ARAX/test/test_ARAX_filter_results.py b/code/ARAX/test/test_ARAX_filter_results.py
index 3a424f7e6..533b9eb44 100644
--- a/code/ARAX/test/test_ARAX_filter_results.py
+++ b/code/ARAX/test/test_ARAX_filter_results.py
@@ -51,7 +51,7 @@ def test_n_results():
"add_qnode(name=UMLS:C0040250, key=n00)",
"add_qnode(categories=biolink:ChemicalEntity, key=n01)",
"add_qedge(subject=n00, object=n01, key=e00)",
- "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "expand(edge_key=e00, kp=infores:retriever)",
"overlay(action=add_node_pmids, max_num=15)",
"resultify(ignore_edge_direction=true)",
"filter_results(action=sort_by_node_attribute, node_attribute=pubmed_ids, direction=a, max_results=3)",
@@ -67,7 +67,7 @@ def test_no_results():
"add_qnode(name=DOID:4337, key=n00)",
"add_qnode(categories=biolink:ChemicalEntity, key=n01)",
"add_qedge(subject=n00, object=n01, key=e00)",
- "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "expand(edge_key=e00, kp=infores:retriever)",
"overlay(action=add_node_pmids, max_num=15)",
"filter_results(action=sort_by_node_attribute, node_attribute=pubmed_ids, direction=a, max_results=20)",
"return(message=true, store=false)"
@@ -83,7 +83,7 @@ def test_prune():
"add_qnode(name=DOID:4337, key=n00)",
"add_qnode(categories=biolink:ChemicalEntity, key=n01)",
"add_qedge(subject=n00, object=n01, key=e00)",
- "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "expand(edge_key=e00, kp=infores:retriever)",
"overlay(action=add_node_pmids, max_num=15)",
"resultify(ignore_edge_direction=true)",
"filter_results(action=sort_by_node_attribute, node_attribute=pubmed_ids, direction=a, max_results=20, prune_kg=f)",
@@ -95,7 +95,7 @@ def test_prune():
"add_qnode(name=DOID:4337, key=n00)",
"add_qnode(categories=biolink:ChemicalEntity, key=n01)",
"add_qedge(subject=n00, object=n01, key=e00)",
- "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "expand(edge_key=e00, kp=infores:retriever)",
"overlay(action=add_node_pmids, max_num=15)",
"resultify(ignore_edge_direction=true)",
"filter_results(action=sort_by_node_attribute, node_attribute=pubmed_ids, direction=a, max_results=20)",
@@ -125,7 +125,7 @@ def test_warning():
"add_qnode(name=UMLS:C0040250, key=n00)",
"add_qnode(categories=biolink:ChemicalEntity, key=n01)",
"add_qedge(subject=n00, object=n01, key=e00)",
- "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "expand(edge_key=e00, kp=infores:retriever)",
"overlay(action=add_node_pmids, max_num=15)",
"resultify(ignore_edge_direction=true)",
"filter_results(action=sort_by_node_attribute, node_attribute=pubmed_ids, direction=a, max_results=3)",
@@ -145,7 +145,7 @@ def test_sort_by_edge_attribute():
"add_qnode(categories=biolink:ChemicalEntity, key=n02)",
"add_qedge(subject=n00, object=n01, key=e00)",
"add_qedge(subject=n01, object=n02, key=e01)",
- "expand(edge_key=[e00,e01], kp=infores:rtx-kg2)",
+ "expand(edge_key=[e00,e01], kp=infores:retriever)",
"overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J1)",
"overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J2)",
"resultify(ignore_edge_direction=true)",
@@ -163,7 +163,7 @@ def test_sort_by_node_attribute():
"add_qnode(name=UMLS:C0040250, key=n00)",
"add_qnode(categories=biolink:ChemicalEntity, key=n01)",
"add_qedge(subject=n00, object=n01, key=e00)",
- "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "expand(edge_key=e00, kp=infores:retriever)",
"overlay(action=add_node_pmids, max_num=15)",
"resultify(ignore_edge_direction=true)",
"filter_results(action=sort_by_node_attribute, node_attribute=pubmed_ids, direction=a, max_results=3, qnode_keys=[n01])",
@@ -180,7 +180,7 @@ def test_sort_by_score():
"add_qnode(name=UMLS:C0040250, key=n00)",
"add_qnode(categories=biolink:ChemicalEntity, key=n01)",
"add_qedge(subject=n00, object=n01, key=e00)",
- "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "expand(edge_key=e00, kp=infores:retriever)",
"resultify(ignore_edge_direction=true)",
"filter_results(action=sort_by_score, direction=a, max_results=3)",
"return(message=true, store=false)"
diff --git a/code/ARAX/test/test_ARAX_json_queries.py b/code/ARAX/test/test_ARAX_json_queries.py
index fa0a72dae..ef8f3e425 100644
--- a/code/ARAX/test/test_ARAX_json_queries.py
+++ b/code/ARAX/test/test_ARAX_json_queries.py
@@ -138,7 +138,7 @@ def test_workflow1():
"id": "fill",
"parameters": {
"allowlist": [
- "infores:rtx-kg2"
+ "infores:retriever"
],
"qedge_keys": [
"e00"
@@ -159,7 +159,7 @@ def test_workflow1():
"subject": "n00",
"object": "n01",
"predicates": [
- "biolink:physically_interacts_with"
+ "biolink:interacts_with"
]
}
},
@@ -171,7 +171,7 @@ def test_workflow1():
},
"n01": {
"categories": [
- "biolink:Protein"
+ "biolink:Gene"
]
}
}
@@ -180,7 +180,7 @@ def test_workflow1():
}
nodes_by_qg_id, edges_by_qg_id, response = _run_query_and_do_standard_testing(json_query=query)
essences = [x.to_dict()['essence'].upper() for x in response.envelope.message.results]
- assert 'VANILLOID RECEPTOR' in essences
+ assert 'PTGS2' in essences
@pytest.mark.slow
def test_workflow2():
@@ -193,7 +193,7 @@ def test_workflow2():
"id": "fill",
"parameters": {
"allowlist": [
- "infores:rtx-kg2",
+ "infores:retriever",
"infores:biothings-explorer"
],
"qedge_keys": [
@@ -213,7 +213,7 @@ def test_workflow2():
"id": "fill",
"parameters": {
"allowlist": [
- "infores:rtx-kg2",
+ "infores:retriever",
"infores:biothings-explorer"
]
}
@@ -234,7 +234,7 @@ def test_workflow2():
"id": "fill",
"parameters": {
"allowlist": [
- "infores:rtx-kg2",
+ "infores:retriever",
"infores:biothings-explorer"
],
"qedge_keys": [
diff --git a/code/ARAX/test/test_ARAX_overlay.py b/code/ARAX/test/test_ARAX_overlay.py
index 51a642b8e..dac8289cb 100644
--- a/code/ARAX/test/test_ARAX_overlay.py
+++ b/code/ARAX/test/test_ARAX_overlay.py
@@ -106,11 +106,11 @@ def test_jaccard():
query = {"operations": {"actions": [
"create_message",
"add_qnode(name=DOID:1947, key=n00)",
- "add_qnode(categories=biolink:Protein, is_set=true, key=n01)",
+ "add_qnode(categories=biolink:Gene, is_set=true, key=n01)",
"add_qnode(categories=biolink:ChemicalEntity, key=n02)",
"add_qedge(subject=n00, object=n01, key=e00)",
- "add_qedge(subject=n01, object=n02, key=e01, predicates=biolink:physically_interacts_with)",
- "expand(edge_key=[e00,e01], kp=infores:rtx-kg2)",
+ "add_qedge(subject=n01, object=n02, key=e01, predicates=biolink:interacts_with)",
+ "expand(edge_key=[e00,e01], kp=infores:retriever)",
"overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J1)",
"resultify(ignore_edge_direction=true, debug=true)",
"return(message=true, store=false)",
@@ -144,7 +144,7 @@ def test_add_node_pmids():
"add_qnode(name=MONDO:0018077, key=n00)",
"add_qnode(categories=biolink:ChemicalEntity, is_set=true, key=n01)",
"add_qedge(subject=n00, object=n01, key=e00)",
- "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "expand(edge_key=e00, kp=infores:retriever)",
"overlay(action=add_node_pmids, max_num=15)",
"return(message=true, store=false)"
]}}
@@ -176,7 +176,7 @@ def test_compute_ngd_virtual():
"add_qnode(name=DOID:384, key=n00)",
"add_qnode(categories=biolink:ChemicalEntity, is_set=true, key=n01)",
"add_qedge(subject=n00, object=n01, key=e00)",
- "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "expand(edge_key=e00, kp=infores:retriever)",
"overlay(action=compute_ngd, subject_qnode_key=n00, object_qnode_key=n01, virtual_relation_label=N1)",
"resultify(ignore_edge_direction=true, debug=true)",
"return(message=true, store=false)",
@@ -213,7 +213,7 @@ def test_compute_ngd_attribute():
"add_qnode(name=DOID:384, key=n00)",
"add_qnode(categories=biolink:ChemicalEntity, is_set=true, key=n01)",
"add_qedge(subject=n00, object=n01, key=e00)",
- "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "expand(edge_key=e00, kp=infores:retriever)",
"overlay(action=compute_ngd)",
"resultify(ignore_edge_direction=true, debug=true)",
"return(message=true, store=false)",
@@ -241,14 +241,14 @@ def test_FET_ex1():
query = {"operations": {"actions": [
"create_message",
"add_qnode(ids=DOID:12889, key=n00, categories=biolink:Disease)",
- "add_qnode(categories=biolink:Protein, is_set=true, key=n01)",
+ "add_qnode(categories=biolink:Gene, is_set=true, key=n01)",
"add_qedge(subject=n00, object=n01,key=e00)",
- "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "expand(edge_key=e00, kp=infores:retriever)",
"overlay(action=fisher_exact_test, subject_qnode_key=n00, object_qnode_key=n01, virtual_relation_label=FET1, rel_edge_key=e00)",
"filter_kg(action=remove_edges_by_continuous_attribute, edge_attribute=fisher_exact_test_p-value, direction=above, threshold=0.005, remove_connected_nodes=t, qnode_keys=[n01])",
"add_qnode(categories=biolink:ChemicalEntity, is_set=true, key=n02)",
- "add_qedge(subject=n01, object=n02, key=e01, predicates=biolink:physically_interacts_with)",
- "expand(edge_key=e01, kp=infores:rtx-kg2)",
+ "add_qedge(subject=n01, object=n02, key=e01, predicates=biolink:interacts_with)",
+ "expand(edge_key=e01, kp=infores:retriever)",
"overlay(action=fisher_exact_test, subject_qnode_key=n01, object_qnode_key=n02, virtual_relation_label=FET2, filter_type=cutoff, value=0.05)",
"resultify()",
"return(message=true, store=false)"
@@ -303,7 +303,7 @@ def test_FET_ex2():
"add_qnode(ids=DOID:12889, key=n00, categories=biolink:Disease)",
"add_qnode(categories=biolink:Protein, key=n01)",
"add_qedge(subject=n00, object=n01, key=e00)",
- "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "expand(edge_key=e00, kp=infores:retriever)",
"overlay(action=fisher_exact_test, subject_qnode_key=n00, virtual_relation_label=FET, object_qnode_key=n01, rel_edge_key=e00, top_n=20)",
"resultify()",
"return(message=true, store=false)"
@@ -354,7 +354,7 @@ def test_paired_concept_frequency_virtual():
"add_qnode(name=DOID:1588, key=n0)",
"add_qnode(categories=biolink:ChemicalEntity, key=n1)",
"add_qedge(subject=n0, object=n1, key=e0)",
- "expand(edge_key=e0, kp=infores:rtx-kg2)",
+ "expand(edge_key=e0, kp=infores:retriever)",
"overlay(action=overlay_clinical_info, paired_concept_frequency=true, subject_qnode_key=n0, object_qnode_key=n1, virtual_relation_label=CP1)",
"resultify()",
"return(message=true, store=false)",
@@ -372,7 +372,7 @@ def test_paired_concept_frequency_attribute():
"add_qnode(name=DOID:1588, key=n0)",
"add_qnode(categories=biolink:ChemicalEntity, key=n1)",
"add_qedge(subject=n0, object=n1, key=e0)",
- "expand(edge_key=e0, kp=infores:rtx-kg2)",
+ "expand(edge_key=e0, kp=infores:retriever)",
"overlay(action=overlay_clinical_info, COHD_method=paired_concept_frequency)",
"resultify()",
"return(message=true, store=false)",
@@ -390,7 +390,7 @@ def test_observed_expected_ratio_virtual():
"add_qnode(name=DOID:1588, key=n0)",
"add_qnode(categories=biolink:ChemicalEntity, key=n1)",
"add_qedge(subject=n0, object=n1, key=e0)",
- "expand(edge_key=e0, kp=infores:rtx-kg2)",
+ "expand(edge_key=e0, kp=infores:retriever)",
"overlay(action=overlay_clinical_info,observed_expected_ratio=true, subject_qnode_key=n0, object_qnode_key=n1, virtual_relation_label=CP1)",
"resultify()",
"return(message=true, store=false)",
@@ -408,7 +408,7 @@ def test_observed_expected_ratio_attribute():
"add_qnode(name=DOID:1588, key=n0)",
"add_qnode(categories=biolink:ChemicalEntity, key=n1)",
"add_qedge(subject=n0, object=n1, key=e0)",
- "expand(edge_key=e0, kp=infores:rtx-kg2)",
+ "expand(edge_key=e0, kp=infores:retriever)",
"overlay(action=overlay_clinical_info, COHD_method=observed_expected_ratio)",
"resultify()",
"return(message=true, store=false)",
@@ -426,7 +426,7 @@ def test_chi_square_virtual():
"add_qnode(name=DOID:1588, key=n0)",
"add_qnode(categories=biolink:ChemicalEntity, key=n1)",
"add_qedge(subject=n0, object=n1, key=e0)",
- "expand(edge_key=e0, kp=infores:rtx-kg2)",
+ "expand(edge_key=e0, kp=infores:retriever)",
"overlay(action=overlay_clinical_info, chi_square=true, subject_qnode_key=n0, object_qnode_key=n1, virtual_relation_label=CP1)",
"resultify()",
"return(message=true, store=false)",
@@ -444,7 +444,7 @@ def test_chi_square_attribute():
"add_qnode(name=DOID:1588, key=n0)",
"add_qnode(categories=biolink:ChemicalEntity, key=n1)",
"add_qedge(subject=n0, object=n1, key=e0)",
- "expand(edge_key=e0, kp=infores:rtx-kg2)",
+ "expand(edge_key=e0, kp=infores:retriever)",
"overlay(action=overlay_clinical_info, COHD_method=chi_square)",
"resultify()",
"return(message=true, store=false)",
@@ -464,7 +464,7 @@ def test_predict_drug_treats_disease_virtual():
"add_qnode(ids=DOID:0080909, key=n0, categories=biolink:Disease)",
"add_qnode(categories=biolink:ChemicalEntity, key=n1)",
"add_qedge(subject=n0, object=n1, key=e0)",
- "expand(edge_key=e0, kp=infores:rtx-kg2)",
+ "expand(edge_key=e0, kp=infores:retriever)",
"overlay(action=predict_drug_treats_disease, subject_qnode_key=n1, object_qnode_key=n0, virtual_relation_label=P1)",
"resultify()",
"return(message=true, store=false)",
@@ -484,7 +484,7 @@ def test_predict_drug_treats_disease_attribute():
"add_qnode(ids=DOID:0080909, key=n0)",
"add_qnode(categories=biolink:ChemicalEntity, key=n1)",
"add_qedge(subject=n0, object=n1, key=e0)",
- "expand(edge_key=e0, kp=infores:rtx-kg2)",
+ "expand(edge_key=e0, kp=infores:retriever)",
"overlay(action=predict_drug_treats_disease, threshold=0.7)",
"resultify()",
"return(message=true, store=false)",
@@ -504,7 +504,7 @@ def test_issue_832():
"add_qnode(ids=DOID:0080909, key=n0)",
"add_qnode(categories=biolink:ChemicalEntity, key=n1)",
"add_qedge(subject=n0, object=n1, key=e0)",
- "expand(edge_key=e0, kp=infores:rtx-kg2)",
+ "expand(edge_key=e0, kp=infores:retriever)",
"overlay(action=predict_drug_treats_disease, subject_qnode_key=n1, object_qnode_key=n0, virtual_relation_label=P1)",
"resultify()",
"return(message=true, store=false)",
@@ -523,7 +523,7 @@ def test_issue_832_non_drug():
"add_qnode(ids=UniProtKB:P62328, key=n0)",
"add_qnode(categories=biolink:ChemicalEntity, key=n1)",
"add_qedge(subject=n0, object=n1, key=e0)",
- "expand(edge_key=e0, kp=infores:rtx-kg2)",
+ "expand(edge_key=e0, kp=infores:retriever)",
"overlay(action=predict_drug_treats_disease, subject_qnode_key=n1, object_qnode_key=n0, virtual_relation_label=P1)",
"resultify()",
"return(message=true, store=false)",
@@ -543,7 +543,7 @@ def test_issue_840():
"add_qnode(name=DOID:1588, key=n0)",
"add_qnode(categories=biolink:ChemicalEntity, key=n1)",
"add_qedge(subject=n0, object=n1, key=e0)",
- "expand(edge_key=e0, kp=infores:rtx-kg2)",
+ "expand(edge_key=e0, kp=infores:retriever)",
"overlay(action=overlay_clinical_info, paired_concept_frequency=true, subject_qnode_key=n1, object_qnode_key=n0, virtual_relation_label=V1)",
"resultify()",
"return(message=true, store=false)",
@@ -559,7 +559,7 @@ def test_issue_840():
"add_qnode(name=DOID:1588, key=n0)",
"add_qnode(categories=biolink:ChemicalEntity, key=n1)",
"add_qedge(subject=n0, object=n1, key=e0)",
- "expand(edge_key=e0, kp=infores:rtx-kg2)",
+ "expand(edge_key=e0, kp=infores:retriever)",
"overlay(action=overlay_clinical_info, paired_concept_frequency=true)",
"resultify()",
"return(message=true, store=false)",
@@ -577,7 +577,7 @@ def test_issue_840_non_drug():
"add_qnode(name=UniProtKB:P62328, key=n0)",
"add_qnode(categories=biolink:ChemicalEntity, key=n1)",
"add_qedge(subject=n0, object=n1, key=e0)",
- "expand(edge_key=e0, kp=infores:rtx-kg2)",
+ "expand(edge_key=e0, kp=infores:retriever)",
"overlay(action=overlay_clinical_info, paired_concept_frequency=true, subject_qnode_key=n1, object_qnode_key=n0, virtual_relation_label=V1)",
"resultify()",
"return(message=true, store=false)",
@@ -595,7 +595,7 @@ def test_issue_840_non_drug():
"add_qnode(name=UniProtKB:P62328, key=n0)",
"add_qnode(categories=biolink:ChemicalEntity, key=n1)",
"add_qedge(subject=n0, object=n1, key=e0)",
- "expand(edge_key=e0, kp=infores:rtx-kg2)",
+ "expand(edge_key=e0, kp=infores:retriever)",
"overlay(action=overlay_clinical_info, paired_concept_frequency=true)",
"resultify()",
"return(message=true, store=false)",
@@ -635,7 +635,7 @@ def test_overlay_exposures_data_virtual():
query = {"operations": {"actions": [
"add_qnode(name=CHEMBL.COMPOUND:CHEMBL635, key=n0)",
"add_qnode(name=MESH:D052638, key=n1)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"overlay(action=overlay_exposures_data, virtual_relation_label=E1, subject_qnode_key=n0, object_qnode_key=n1)",
"resultify()",
"return(message=true, store=false)",
@@ -652,7 +652,7 @@ def test_overlay_exposures_data_attribute():
"add_qnode(name=MONDO:0012607, key=n0)",
"add_qnode(name=MONDO:0010940, key=n1)",
"add_qedge(subject=n0, object=n1, key=e0)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"overlay(action=overlay_exposures_data)",
"resultify()",
"return(message=true, store=false)",
@@ -669,7 +669,7 @@ def test_overlay_clinical_info_no_ids():
"create_message",
"add_qnode(name=acetaminophen, key=n0)",
"add_qnode(name=Sotos syndrome, key=n1)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"overlay(action=overlay_clinical_info,COHD_method=paired_concept_frequency,virtual_relation_label=C1,subject_qnode_key=n0,object_qnode_key=n1)",
"overlay(action=overlay_clinical_info,COHD_method=observed_expected_ratio,virtual_relation_label=C2,subject_qnode_key=n0,object_qnode_key=n1)",
"overlay(action=overlay_clinical_info,COHD_method=chi_square,virtual_relation_label=C3,subject_qnode_key=n0,object_qnode_key=n1)",
@@ -694,7 +694,7 @@ def test_missing_ngd_pmids():
"add_qedge(subject=n0, object=n1, key=e0)",
"add_qnode(categories=[biolink:ChemicalEntity,biolink:Drug], key=n2)",
"add_qedge(subject=n1, object=n2, key=e1)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"overlay(action=compute_ngd, virtual_relation_label=N1, subject_qnode_key=n0, object_qnode_key=n1)",
"overlay(action=compute_ngd, virtual_relation_label=N2, subject_qnode_key=n1, object_qnode_key=n2)",
"overlay(action=compute_ngd, virtual_relation_label=N3, subject_qnode_key=n0, object_qnode_key=n2)",
@@ -733,7 +733,7 @@ def test_jaccard_not_above_1():
"add_qedge(key=E0,subject=N0,object=N1,predicates=biolink:physically_interacts_with)",
"add_qnode(key=N2,categories=biolink:ChemicalEntity)",
"add_qedge(key=E2,subject=N1,object=N2)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"overlay(action=compute_ngd,default_value=inf,virtual_relation_label=V1,subject_qnode_key=N0,object_qnode_key=N1)",
"overlay(action=compute_ngd,default_value=inf,virtual_relation_label=V2,subject_qnode_key=N1,object_qnode_key=N2)",
"overlay(action=compute_ngd,default_value=inf,virtual_relation_label=V3,subject_qnode_key=N0,object_qnode_key=N2)",
diff --git a/code/ARAX/test/test_ARAX_resultify.py b/code/ARAX/test/test_ARAX_resultify.py
index 90800839b..8f9dcc420 100644
--- a/code/ARAX/test/test_ARAX_resultify.py
+++ b/code/ARAX/test/test_ARAX_resultify.py
@@ -699,7 +699,7 @@ def test09():
"add_qnode(name=DOID:731, key=n00, categories=biolink:Disease, is_set=false)",
"add_qnode(categories=biolink:PhenotypicFeature, is_set=false, key=n01)",
"add_qedge(subject=n00, object=n01, key=e00)",
- "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "expand(edge_key=e00, kp=infores:retriever)",
"resultify(ignore_edge_direction=true, debug=true)",
"filter_results(action=limit_number_of_results, max_results=100)",
"return(message=true, store=false)"
@@ -722,7 +722,7 @@ def test_example1():
"add_qnode(key=qg0, ids=CHEMBL.COMPOUND:CHEMBL112)",
"add_qnode(key=qg1, categories=biolink:Protein)",
"add_qedge(subject=qg1, object=qg0, key=qe0)",
- "expand(edge_key=qe0, kp=infores:rtx-kg2)",
+ "expand(edge_key=qe0, kp=infores:retriever)",
"resultify(ignore_edge_direction=true, debug=true)",
"return(message=true, store=false)"
]
@@ -846,7 +846,7 @@ def test_issue680():
"add_qnode(categories=biolink:ChemicalEntity, key=n02)",
"add_qedge(subject=n01, object=n00, key=e00, predicates=biolink:causes)",
"add_qedge(subject=n01, object=n02, key=e01, predicates=biolink:physically_interacts_with)",
- "expand(edge_key=[e00,e01], kp=infores:rtx-kg2)",
+ "expand(edge_key=[e00,e01], kp=infores:retriever)",
"overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J1)",
"filter_kg(action=remove_edges_by_continuous_attribute, edge_attribute=jaccard_index, direction=below, threshold=.2, remove_connected_nodes=t, qnode_keys=[n02])",
"resultify(ignore_edge_direction=true, debug=true)",
@@ -880,11 +880,14 @@ def test_issue686a():
# Tests that an error is thrown when an invalid parameter is passed to resultify
actions = [
'add_qnode(key=qg0, ids=CHEMBL.COMPOUND:CHEMBL112)',
- 'expand(kp=infores:rtx-kg2)',
+ 'add_qnode(key=qg1, ids=MONDO:0018958)',
+ 'add_qedge(key=e0, subject=qg0, object=qg1, predicates=biolink:treats)',
+ 'expand(kp=infores:retriever)',
'resultify(ignore_edge_direction=true, INVALID_PARAMETER_NAME=true)',
"return(message=true, store=false)"
]
response, message = _do_arax_query(actions)
+ assert response.status == 'ERROR'
assert 'INVALID_PARAMETER_NAME' in response.show()
@@ -892,7 +895,9 @@ def test_issue686b():
# Tests that resultify can be called with no parameters passed in
actions = [
'add_qnode(key=qg0, ids=CHEMBL.COMPOUND:CHEMBL112)',
- 'expand(kp=infores:rtx-kg2)',
+ 'add_qnode(key=qg1, ids=MONDO:0018958)',
+ 'add_qedge(key=e0, subject=qg0, object=qg1, predicates=biolink:treats)',
+ 'expand(kp=infores:retriever)',
'resultify()',
"return(message=true, store=false)"
]
@@ -904,7 +909,9 @@ def test_issue686c():
# Tests that setting ignore_edge_direction to an invalid value results in an error
actions = [
'add_qnode(key=qg0, ids=CHEMBL.COMPOUND:CHEMBL112)',
- 'expand(kp=infores:rtx-kg2)',
+ 'add_qnode(key=qg1, ids=MONDO:0018958)',
+ 'add_qedge(key=e0, subject=qg0, object=qg1, predicates=biolink:treats)',
+ 'expand(kp=infores:retriever)',
'resultify(ignore_edge_direction=foo)',
"return(message=true, store=false)"
]
@@ -915,14 +922,16 @@ def test_issue686c():
def test_issue687():
# Tests that ignore_edge_direction need not be specified
actions = [
- 'add_qnode(key=qg0, ids=CHEMBL.COMPOUND:CHEMBL112)',
- 'expand(kp=infores:rtx-kg2)',
+ 'add_qnode(key=n0, ids=CHEBI:15367)',
+ 'add_qnode(key=n1, ids=MONDO:0015564)',
+ 'add_qedge(key=e0, subject=n0, object=n1, predicates=biolink:treats_or_applied_or_studied_to_treat)',
+ 'expand(kp=infores:retriever)',
'resultify(debug=true)',
"return(message=true, store=false)"
]
response, message = _do_arax_query(actions)
assert response.status == 'OK'
- assert message.results and len(message.results) == len(message.knowledge_graph.nodes)
+ assert message.results
def test_issue727():
@@ -967,7 +976,7 @@ def test_issue731b():
"add_qnode(categories=biolink:Disease, key=n2)",
"add_qedge(subject=n0, object=n1, key=e0)",
"add_qedge(subject=n1, object=n2, key=e1)",
- "expand(edge_key=[e0,e1], kp=infores:rtx-kg2)",
+ "expand(edge_key=[e0,e1], kp=infores:retriever)",
"resultify(debug=true)",
"return(message=true, store=false)"
]
@@ -1057,7 +1066,7 @@ def test_issue720_1():
"add_qnode(categories=biolink:Disease, key=n02)",
"add_qedge(subject=n00, object=n01, key=e00)",
"add_qedge(subject=n01, object=n02, key=e01)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"resultify(debug=true)",
"return(message=true, store=false)"
]
@@ -1081,7 +1090,7 @@ def test_issue720_2():
"add_qedge(key=e00, subject=n01, object=n00, predicates=biolink:causes)",
"add_qedge(key=e01, subject=n01, object=n02, predicates=biolink:interacts_with)",
"add_qedge(key=e02, subject=n02, object=n03, predicates=biolink:interacts_with)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"resultify(debug=true)",
"return(message=true, store=false)"
]
@@ -1119,18 +1128,6 @@ def test_issue833_extraneous_intermediate_nodes():
assert result_e00_edges.intersection(kg_edges_using_this_node)
-def test_single_node():
- actions = [
- "add_qnode(name=ibuprofen, key=n00)",
- "expand(node_key=n00, kp=infores:rtx-kg2)",
- "resultify(debug=true)",
- "return(message=true, store=false)"
- ]
- response, message = _do_arax_query(actions)
- assert response.status == 'OK'
- assert len(message.results) > 0
-
-
def test_parallel_edges_between_nodes():
qg_nodes = {"n00": "",
"n01": "is_set",
@@ -1196,7 +1193,7 @@ def test_issue1119_a():
"add_qnode(categories=biolink:Drug, key=n01)",
"add_qedge(subject=n01, object=n00, predicates=biolink:treats_or_applied_or_studied_to_treat, key=e00)",
"add_qedge(subject=n01, object=n00, predicates=biolink:predisposes_to_condition, key=e01)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"resultify()",
"return(message=true, store=false)"
]
@@ -1213,7 +1210,7 @@ def test_issue1119_a():
"add_qnode(categories=biolink:Drug, key=n01)",
"add_qedge(subject=n01, object=n00, predicates=biolink:treats_or_applied_or_studied_to_treat, key=e00)",
"add_qedge(subject=n01, object=n00, predicates=biolink:predisposes_to_condition, exclude=true, key=ex0)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"resultify()",
"return(message=true, store=false)"
]
@@ -1237,7 +1234,7 @@ def test_issue1119_b():
"add_qedge(subject=n01, object=n02, key=e01, predicates=biolink:physically_interacts_with)",
"add_qnode(categories=biolink:Pathway, key=n03)",
"add_qedge(subject=n01, object=n03, key=e02, predicates=biolink:participates_in, exclude=true)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"resultify()",
"return(message=true, store=false)"
]
@@ -1257,7 +1254,7 @@ def test_issue1119_c():
"add_qnode(key=n01, categories=biolink:ChemicalEntity)",
"add_qedge(key=e00, subject=n01, object=n00, predicates=biolink:causes)",
"add_qedge(key=e01, subject=n01, object=n00, predicates=biolink:predisposes_to_condition, option_group_id=1)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"resultify(debug=true)",
"return(message=true, store=false)"
]
@@ -1274,7 +1271,7 @@ def test_issue1119_c():
"add_qnode(key=n00, ids=MONDO:0005015)",
"add_qnode(key=n01, categories=biolink:ChemicalEntity)",
"add_qedge(key=e00, subject=n01, object=n00, predicates=biolink:causes)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"resultify(debug=true)",
"return(message=true, store=false)"
]
@@ -1289,7 +1286,7 @@ def test_issue1119_c():
"add_qnode(key=n00, ids=MONDO:0005015)",
f"add_qnode(key=n01, ids=[{', '.join(n01_node_keys_original)}])",
"add_qedge(key=e01, subject=n01, object=n00, predicates=biolink:predisposes_to_condition)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"resultify(debug=true)",
"return(message=true, store=false)"
]
@@ -1307,7 +1304,7 @@ def test_issue1119_d():
"add_qedge(key=e00, subject=n01, object=n00, predicates=biolink:affects)",
"add_qedge(key=e01, subject=n01, object=n00, predicates=biolink:treats_or_applied_or_studied_to_treat, option_group_id=1)",
"add_qedge(key=e03, subject=n01, object=n00, exclude=True, predicates=biolink:predisposes_to_condition)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"resultify(debug=true)",
"return(message=true, store=false)"
]
@@ -1331,7 +1328,7 @@ def test_issue1146_a():
"add_qnode(key=n1, categories=biolink:Protein, is_set=true)",
"add_qedge(key=e0, subject=n2, object=n1, predicates=biolink:physically_interacts_with)",
"add_qedge(key=e1, subject=n1, object=n0, predicates=biolink:causes)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"overlay(action=compute_ngd, virtual_relation_label=N2, subject_qnode_key=n0, object_qnode_key=n2)",
"resultify(debug=true)",
"filter_results(action=limit_number_of_results, max_results=4)",
@@ -1358,7 +1355,7 @@ def test_disconnected_qg():
"add_qnode(name=acetaminophen, key=n01)",
"add_qnode(categories=biolink:Disease, key=n02)",
"add_qedge(key=e00, subject=n01, object=n02)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"resultify(debug=true)",
"return(message=true, store=false)"
]
@@ -1419,7 +1416,7 @@ def test_issue1446():
"add_qedge(key=e0,subject=n1,object=n0, predicates=biolink:affects)",
"add_qedge(key=e1,subject=n1,object=n0, predicates=biolink:associated_with, option_group_id=1)",
"add_qedge(key=e2,subject=n1,object=n0, predicates=biolink:related_to, option_group_id=2)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"overlay(action=compute_ngd, virtual_relation_label=N1, subject_qnode_key=n0, object_qnode_key=n1)",
"resultify()",
"filter_results(action=limit_number_of_results, max_results=100)",
@@ -1437,7 +1434,7 @@ def test_issue1848():
"add_qnode(key=n0, ids=MONDO:0019391)",
"add_qnode(key=n1, categories=biolink:Gene)",
"add_qedge(key=e0, subject=n1, object=n0, predicates=biolink:causes)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"add_qnode(key=n2, categories=biolink:Drug)",
"add_qedge(key=e1, subject=n1, object=n2)",
"resultify()",
@@ -1461,7 +1458,7 @@ def test_node_binding_query_id_one_hop_single_input_curie():
f"add_qnode(ids={INSULIN_CURIE}, key=n01)",
# f"add_qnode(categories=biolink:Drug, key=n01)",
"add_qedge(subject=n01, object=n00, predicates=biolink:treats_or_applied_or_studied_to_treat, key=e00)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"resultify(debug=true)",
"return(message=true, store=false)"
]
@@ -1495,7 +1492,7 @@ def test_node_binding_query_id_one_hop_multiple_input_curies():
f"add_qnode(ids=[{','.join(parent_query_ids)}], key=n00)",
f"add_qnode(categories=biolink:Drug, key=n01)",
"add_qedge(subject=n01, object=n00, predicates=biolink:treats_or_applied_or_studied_to_treat, key=e00)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"resultify(debug=true)",
"return(message=true, store=false)"
]
@@ -1532,7 +1529,7 @@ def test_node_binding_query_id_two_hop_double_pinned():
f"add_qnode(categories=biolink:Drug, key=n02)",
"add_qedge(subject=n01, object=n00, predicates=biolink:related_to, key=e00)",
"add_qedge(subject=n01, object=n02, predicates=biolink:treats_or_applied_or_studied_to_treat, key=e01)",
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"resultify(debug=true)",
"return(message=true, store=false)"
]
@@ -1642,7 +1639,7 @@ def test_issue2166():
"add_qedge(key=e3, subject=n2, object=n3, option_group_id=option1)",
"add_qedge(key=e4, subject=n3, object=nMET, option_group_id=option1)",
# expand
- "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:retriever)",
"resultify(ignore_edge_direction=true)"
]
response, message = _do_arax_query(actions)
@@ -1678,7 +1675,7 @@ def test_legacy_subclass_of_handling():
ARAXMessenger().create_envelope(response)
response.envelope.message.query_graph = QueryGraph.from_dict(query_graph)
expander = ARAXExpander()
- expander.apply(response, {"kp": "infores:rtx-kg2"})
+ expander.apply(response, {"kp": "infores:retriever"})
resultifier = ARAXResultify()
resultifier.apply(response, {})
message = response.envelope.message
diff --git a/code/ARAX/test/test_ARAX_translate.py b/code/ARAX/test/test_ARAX_translate.py
index eb2063dd5..534c8bbd1 100644
--- a/code/ARAX/test/test_ARAX_translate.py
+++ b/code/ARAX/test/test_ARAX_translate.py
@@ -97,10 +97,7 @@ def test_lookup():
},
"n1": {
"ids": [
- "CHEBI:45783"
- ],
- "categories": [
- "biolink:SmallMolecule"
+ "CHEBI:46195"
]
}
},
@@ -128,7 +125,7 @@ def test_fill_success():
{
"id": "fill",
"parameters": {
- "allowlist": ["infores:rtx-kg2"],
+ "allowlist": ["infores:retriever"],
"qedge_keys": ["e01"]
}
}
@@ -143,7 +140,7 @@ def test_fill_success():
},
"n1": {
"ids": [
- "CHEBI:45783"
+ "CHEBI:46195"
],
"categories": [
"biolink:ChemicalSubstance"
@@ -173,7 +170,7 @@ def test_fill_error():
{
"id": "fill",
"parameters": {
- "allowlist": ["infores:rtx-kg2"],
+ "allowlist": ["infores:retriever"],
"qedge_keys": ["asdf"]
}
}
@@ -233,10 +230,7 @@ def test_score():
},
"n1": {
"ids": [
- "CHEBI:45783"
- ],
- "categories": [
- "biolink:SmallMolecule"
+ "CHEBI:46195"
]
}
},
@@ -264,7 +258,7 @@ def test_bind():
{
"id": "fill",
"parameters": {
- "allowlist": ["infores:rtx-kg2"]
+ "allowlist": ["infores:retriever"]
}
},
{
@@ -282,10 +276,7 @@ def test_bind():
},
"n1": {
"ids": [
- "CHEBI:45783"
- ],
- "categories": [
- "biolink:ChemicalSubstance"
+ "CHEBI:46195"
]
}
},
@@ -311,7 +302,7 @@ def test_complete_results():
{
"id": "fill",
"parameters": {
- "allowlist": ["infores:rtx-kg2"]
+ "allowlist": ["infores:retriever"]
}
},
{
@@ -329,7 +320,7 @@ def test_complete_results():
},
"n1": {
"ids": [
- "CHEBI:45783"
+ "CHEBI:46195"
],
"categories": [
"biolink:ChemicalSubstance"
@@ -358,7 +349,7 @@ def test_filter_results_top_n():
{
"id": "fill",
"parameters": {
- "allowlist": ["infores:rtx-kg2"]
+ "allowlist": ["infores:retriever"]
}
},
{
@@ -391,10 +382,7 @@ def test_filter_results_top_n():
},
"n1": {
"ids": [
- "CHEBI:45783"
- ],
- "categories": [
- "biolink:SmallMolecule"
+ "CHEBI:2948"
]
}
},
@@ -449,10 +437,7 @@ def test_overlay_after_lookup():
},
"n1": {
"ids": [
- "CHEBI:45783"
- ],
- "categories": [
- "biolink:SmallMolecule"
+ "CHEBI:2948"
]
}
},
diff --git a/code/ARAX/test/test_ARAX_workflows.py b/code/ARAX/test/test_ARAX_workflows.py
index 213823f1f..4d7923ea7 100644
--- a/code/ARAX/test/test_ARAX_workflows.py
+++ b/code/ARAX/test/test_ARAX_workflows.py
@@ -86,7 +86,7 @@ def test_option_group_id():
"add_qnode(categories=biolink:ChemicalEntity, key=n01)",
"add_qedge(subject=n00, object=n01, predicates=biolink:indicated_for, option_group_key=a, id=e00)",
"add_qedge(subject=n00, object=n01, predicates=biolink:contraindicated_for, option_group_key=1, id=e01)",
- "expand(edge_key=[e00,e01], kp=infores:rtx-kg2)",
+ "expand(edge_key=[e00,e01], kp=infores:retriever)",
]}}
[response, message] = _do_arax_query(query)
for key, edge in message.query_graph.edges.items():
@@ -102,7 +102,7 @@ def test_exclude():
"add_qnode(categories=biolink:ChemicalEntity, key=n01)",
"add_qedge(subject=n00, object=n01, predicates=biolink:treats, key=e00)",
"add_qedge(subject=n00, object=n01, predicates=biolink:contraindicated_for, exclude=true, key=e01)",
- "expand(edge_key=[e00,e01], kp=infores:rtx-kg2)",
+ "expand(edge_key=[e00,e01], kp=infores:retriever)",
]}}
[response, message] = _do_arax_query(query)
assert response.status == 'OK'
@@ -121,7 +121,7 @@ def test_example_2():
"add_qnode(categories=biolink:ChemicalEntity, key=n02)",
"add_qedge(subject=n00, object=n01, key=e00)",
"add_qedge(subject=n01, object=n02, key=e01, predicates=biolink:physically_interacts_with)",
- "expand(edge_key=[e00,e01], kp=infores:rtx-kg2)",
+ "expand(edge_key=[e00,e01], kp=infores:retriever)",
"overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J1)",
"filter_kg(action=remove_edges_by_continuous_attribute, edge_attribute=jaccard_index, direction=below, threshold=.2, remove_connected_nodes=t, qnode_keys=[n02])",
"filter_kg(action=remove_edges_by_discrete_attribute,edge_attribute=provided_by, value=Pharos)",
@@ -146,7 +146,7 @@ def test_example_3():
"add_qnode(categories=biolink:Protein, key=n02)",
"add_qedge(subject=n00, object=n01, key=e00)",
"add_qedge(subject=n01, object=n02, key=e01)",
- "expand(edge_key=[e00,e01], kp=infores:rtx-kg2)",
+ "expand(edge_key=[e00,e01], kp=infores:retriever)",
"overlay(action=overlay_clinical_info, observed_expected_ratio=true, virtual_relation_label=C1, subject_qnode_key=n00, object_qnode_key=n01)",
"filter_kg(action=remove_edges_by_continuous_attribute, edge_attribute=observed_expected_ratio, direction=below, threshold=1, remove_connected_nodes=t, qnode_keys=[n01])",
"filter_kg(action=remove_orphaned_nodes, node_category=biolink:Protein)",
@@ -165,22 +165,22 @@ def test_example_3():
def test_FET_example_1():
- # This a FET 3-top example: try to find the phenotypes of drugs connected to proteins connected to DOID:14330
+ # This a FET 3-hop example: try to find the phenotypes of drugs connected to proteins connected to DOID:14330
query = {"operations": {"actions": [
"add_qnode(ids=DOID:12889, key=n00, categories=biolink:Disease)",
- "add_qnode(categories=biolink:Protein, is_set=true, key=n01)",
+ "add_qnode(categories=biolink:Gene, is_set=true, key=n01)",
"add_qedge(subject=n00, object=n01, key=e00)",
- "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "expand(edge_key=e00, kp=infores:retriever, prune_threshold=20)",
"overlay(action=fisher_exact_test, subject_qnode_key=n00, object_qnode_key=n01, virtual_relation_label=FET1, rel_edge_key=e00)",
"filter_kg(action=remove_edges_by_continuous_attribute, edge_attribute=fisher_exact_test_p-value, direction=above, threshold=0.005, remove_connected_nodes=t, qnode_keys=[n01])",
"add_qnode(categories=biolink:ChemicalEntity, is_set=true, key=n02)",
- "add_qedge(subject=n01, object=n02, key=e01, predicates=biolink:physically_interacts_with)",
- "expand(edge_key=e01, kp=infores:rtx-kg2)",
+ "add_qedge(subject=n01, object=n02, key=e01, predicates=biolink:interacts_with)",
+ "expand(edge_key=e01, kp=infores:retriever, prune_threshold=20)",
"overlay(action=fisher_exact_test, subject_qnode_key=n01, object_qnode_key=n02, virtual_relation_label=FET2, rel_edge_key=e01)",
"filter_kg(action=remove_edges_by_continuous_attribute, edge_attribute=fisher_exact_test_p-value, direction=above, threshold=0.005, remove_connected_nodes=t, qnode_keys=[n02])",
"add_qnode(categories=biolink:PhenotypicFeature, key=n03)",
"add_qedge(subject=n02, object=n03, key=e02)",
- "expand(edge_key=e02, kp=infores:rtx-kg2)",
+ "expand(edge_key=e02, kp=infores:retriever, prune_threshold=20)",
"resultify()",
"return(message=true, store=false)"
]}}
@@ -200,17 +200,17 @@ def test_FET_example_1():
def test_FET_example_2():
- # This a FET 2-top example: try to find the diseases that share the same protein with ibuprofen (CHEMBL.COMPOUND:CHEMBL521)
+ # This a FET 2-hop example: try to find the diseases that share the same protein with ibuprofen (CHEMBL.COMPOUND:CHEMBL521)
query = {"operations": {"actions": [
"add_qnode(key=n00, ids=CHEMBL.COMPOUND:CHEMBL1472, categories=biolink:ChemicalEntity)",
"add_qnode(key=n01, categories=biolink:Protein)",
"add_qedge(key=e00, subject=n00, object=n01)",
- "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "expand(edge_key=e00, kp=infores:retriever, prune_threshold=20)",
"overlay(action=fisher_exact_test, subject_qnode_key=n00, object_qnode_key=n01, virtual_relation_label=FET1)",
"filter_kg(action=remove_edges_by_continuous_attribute, edge_attribute=fisher_exact_test_p-value, direction=above, threshold=0.01, remove_connected_nodes=t, qnode_keys=[n01])",
"add_qnode(categories=biolink:Disease, key=n02)",
"add_qedge(subject=n01, object=n02, key=e01)",
- "expand(edge_key=e01, kp=infores:rtx-kg2)",
+ "expand(edge_key=e01, kp=infores:retriever)",
"resultify()",
"filter_results(action=limit_number_of_results, max_results=50)",
"return(message=true, store=false)"
@@ -231,22 +231,22 @@ def test_FET_example_2():
def test_FET_example_3():
- # This a FET 3-top example: try to find the proteins connected to diseases that share the same phenotypes of age-related macular degeneration(MONDO:0005150)
+ # This a FET 3-hop example: try to find the genes connected to diseases that share the same phenotypes of a given disease
query = {"operations": {"actions": [
- "add_qnode(ids=MONDO:0005150, key=n00, categories=biolink:Disease)",
+ "add_qnode(ids=MONDO:0005148, key=n00, categories=biolink:Disease)",
"add_qnode(categories=biolink:PhenotypicFeature, key=n01)",
"add_qedge(subject=n00, object=n01, key=e00)",
- "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "expand(edge_key=e00, kp=infores:retriever, prune_threshold=5)",
"overlay(action=fisher_exact_test, subject_qnode_key=n00, object_qnode_key=n01, virtual_relation_label=FET1, rel_edge_key=e00)",
"filter_kg(action=remove_edges_by_continuous_attribute, edge_attribute=fisher_exact_test_p-value, direction=above, threshold=0.001, remove_connected_nodes=t, qnode_keys=[n01])",
"add_qnode(categories=biolink:Disease, key=n02)",
"add_qedge(subject=n01,object=n02,key=e01)",
- "expand(edge_key=e01, kp=infores:rtx-kg2)",
+ "expand(edge_key=e01, kp=infores:retriever, prune_threshold=5)",
"overlay(action=fisher_exact_test, subject_qnode_key=n01, object_qnode_key=n02, virtual_relation_label=FET2, rel_edge_key=e01)",
"filter_kg(action=remove_edges_by_continuous_attribute, edge_attribute=fisher_exact_test_p-value, direction=above, threshold=0.001, remove_connected_nodes=t, qnode_keys=[n02])",
- "add_qnode(categories=biolink:Protein, key=n03)",
+ "add_qnode(categories=biolink:Gene, key=n03)",
"add_qedge(subject=n02, object=n03, key=e02)",
- "expand(edge_key=e02, kp=infores:rtx-kg2)",
+ "expand(edge_key=e02, kp=infores:retriever, prune_threshold=5)",
"resultify()",
"return(message=true, store=false)"
]}}
@@ -266,17 +266,17 @@ def test_FET_example_3():
def test_FET_example_4():
- # This a FET 2-top example collecting nodes and edges from KG2: try to find the diseases that share the same protein with Parkinson disease(DOID:14330)
+ # This a FET 2-hop example collecting nodes and edges from KG2: try to find the diseases that share the same protein with Parkinson disease(DOID:14330)
query = {"operations": {"actions": [
"add_qnode(ids=DOID:10718, key=n00, categories=biolink:Disease)",
"add_qnode(categories=biolink:Protein, key=n01)",
"add_qedge(subject=n00, object=n01, key=e00)",
- "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "expand(edge_key=e00, kp=infores:retriever)",
"overlay(action=fisher_exact_test, subject_qnode_key=n00, virtual_relation_label=FET1, object_qnode_key=n01,rel_edge_id=e00)",
"filter_kg(action=remove_edges_by_continuous_attribute,edge_attribute=fisher_exact_test_p-value,direction=above,threshold=0.001,remove_connected_nodes=t,qnode_keys=[n01])",
"add_qnode(categories=biolink:Disease, key=n02)",
"add_qedge(subject=n01, object=n02, key=e01)",
- "expand(edge_key=e01, kp=infores:rtx-kg2)",
+ "expand(edge_key=e01, kp=infores:retriever)",
"resultify()",
"return(message=true, store=false)"
]}}
@@ -301,7 +301,7 @@ def test_FET_ranking_1():
"add_qnode(key=n00,ids=UniProtKB:P14136,categories=biolink:Protein)",
"add_qnode(categories=biolink:BiologicalProcess, key=n01)",
"add_qedge(subject=n00, object=n01, key=e00)",
- "expand(edge_key=e00,kp=infores:rtx-kg2)",
+ "expand(edge_key=e00,kp=infores:retriever)",
"overlay(action=fisher_exact_test, subject_qnode_key=n00, object_qnode_key=n01, virtual_relation_label=FET)",
"resultify()",
"return(message=true, store=false)",
@@ -320,7 +320,7 @@ def test_example_2_kg2():
"add_qnode(categories=biolink:ChemicalEntity, key=n02)",
"add_qedge(subject=n00, object=n01, key=e00)",
"add_qedge(subject=n01, object=n02, key=e01, predicates=biolink:molecularly_interacts_with)",
- "expand(edge_key=[e00,e01], infores:rtx-kg2)",
+ "expand(edge_key=[e00,e01], kp=infores:retriever, prune_threshold=20)",
"overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J1)", # seems to work just fine
"filter_kg(action=remove_edges_by_continuous_attribute, edge_attribute=jaccard_index, direction=below, threshold=.008, remove_connected_nodes=t, qnode_keys=[n02])",
"resultify(ignore_edge_direction=true)",
@@ -347,7 +347,7 @@ def test_clinical_overlay_example1():
"add_qnode(categories=biolink:ChemicalEntity, key=n02)",
"add_qedge(subject=n00, object=n01, key=e00)",
"add_qedge(subject=n01, object=n02, key=e01, predicates=biolink:molecularly_interacts_with)",
- "expand(edge_key=[e00,e01], infores:rtx-kg2)",
+ "expand(edge_key=[e00,e01], infores:retriever)",
# overlay a bunch of clinical info
"overlay(action=overlay_clinical_info, paired_concept_frequency=true, subject_qnode_key=n00, object_qnode_key=n02, virtual_relation_label=C1)",
"overlay(action=overlay_clinical_info, observed_expected_ratio=true, subject_qnode_key=n00, object_qnode_key=n02, virtual_relation_label=C2)",
@@ -377,7 +377,7 @@ def test_clinical_overlay_example2():
"add_qnode(name=DOID:11830, key=n00)",
"add_qnode(categories=biolink:ChemicalEntity, key=n01)",
"add_qedge(subject=n00, object=n01, key=e00)",
- "expand(edge_key=e00, infores:rtx-kg2)",
+ "expand(edge_key=e00, infores:retriever)",
# overlay a bunch of clinical info
"overlay(action=overlay_clinical_info, paired_concept_frequency=true)",
"overlay(action=overlay_clinical_info, observed_expected_ratio=true)",
@@ -412,9 +412,9 @@ def test_two_hop_based_on_types_1():
"add_qnode(categories=biolink:ChemicalEntity, key=n02)",
"add_qedge(subject=n00, object=n01, key=e00)",
"add_qedge(subject=n01, object=n02, key=e01)",
- "expand(edge_key=e00, infores:rtx-kg2)",
+ "expand(edge_key=e00, infores:retriever)",
#"expand(edge_key=e00, kp=infores:biothings-explorer)",
- "expand(edge_key=e01, infores:rtx-kg2)",
+ "expand(edge_key=e01, infores:retriever)",
"overlay(action=overlay_clinical_info, paired_concept_frequency=true, subject_qnode_key=n00, object_qnode_key=n02, virtual_relation_label=C1)",
"overlay(action=overlay_clinical_info, observed_expected_ratio=true, subject_qnode_key=n00, object_qnode_key=n02, virtual_relation_label=C2)",
"overlay(action=overlay_clinical_info, chi_square=true, subject_qnode_key=n00, object_qnode_key=n02, virtual_relation_label=C3)",
@@ -447,7 +447,7 @@ def test_one_hop_based_on_types_1():
f"add_qnode(ids={doid}, key=n00, categories=biolink:Disease)",
"add_qnode(categories=biolink:ChemicalEntity, key=n01)",
"add_qedge(subject=n00, object=n01, key=e00)",
- "expand(edge_key=e00, infores:rtx-kg2)",
+ "expand(edge_key=e00, infores:retriever)",
"expand(edge_key=e00, kp=infores:biothings-explorer)",
"overlay(action=overlay_clinical_info, observed_expected_ratio=true)",
# "overlay(action=predict_drug_treats_disease)",
@@ -472,7 +472,7 @@ def test_one_hop_kitchen_sink_BTE_1():
"add_qnode(curie=DOID:11830, key=n0, categories=biolink:Disease)",
"add_qnode(categories=biolink:ChemicalEntity, key=n1)",
"add_qedge(subject=n0, object=n1, key=e1)",
- #"expand(edge_key=e00, infores:rtx-kg2)",
+ #"expand(edge_key=e00, infores:retriever)",
"expand(edge_key=e1, kp=infores:biothings-explorer)",
"overlay(action=overlay_clinical_info, paired_concept_frequency=true)",
"overlay(action=overlay_clinical_info, observed_expected_ratio=true)",
@@ -501,7 +501,7 @@ def test_one_hop_kitchen_sink_BTE_2():
"add_qnode(curie=DOID:11830, key=n0, categories=biolink:Disease)",
"add_qnode(categories=biolink:Gene, key=n1)",
"add_qedge(subject=n0, object=n1, key=e1)",
- #"expand(edge_key=e00, infores:rtx-kg2)",
+ #"expand(edge_key=e00, infores:retriever)",
"expand(edge_key=e1, kp=infores:biothings-explorer)",
"overlay(action=overlay_clinical_info, paired_concept_frequency=true)",
"overlay(action=overlay_clinical_info, observed_expected_ratio=true)",
@@ -525,7 +525,7 @@ def test_FET_ranking_2():
"add_qnode(key=n00,ids=[UniProtKB:P14136,UniProtKB:P35579],is_set=true,categories=biolink:Protein)",
"add_qnode(categories=biolink:BiologicalProcess, key=n01)",
"add_qedge(subject=n00, object=n01, key=e00)",
- "expand(edge_key=e00,kp=infores:rtx-kg2)",
+ "expand(edge_key=e00,kp=infores:retriever)",
"overlay(action=fisher_exact_test, subject_qnode_key=n00, object_qnode_key=n01, virtual_relation_label=FET)",
"resultify()",
"return(message=true, store=false)"
@@ -711,7 +711,7 @@ def test_issue_1848():
},
{
"id": "fill",
- "parameters": { "allowlist": ["infores:rtx-kg2"],
+ "parameters": { "allowlist": ["infores:retriever"],
"qedge_keys": [
"e1",
"e2",
@@ -808,7 +808,7 @@ def test_issue_1848():
# "add_qnode(key=n02, categories=protein)",
# "add_qedge(key=e00, subject=n00, object=n01)",
# "add_qedge(key=e01, subject=n01, object=n02)",
-# "expand(edge_key=[e00,e01], infores:rtx-kg2)",
+# "expand(edge_key=[e00,e01], infores:retriever)",
# "overlay(action=overlay_clinical_info, observed_expected_ratio=true, virtual_relation_label=C1, subject_qnode_key=n00, object_qnode_key=n01)",
# "overlay(action=compute_ngd, virtual_relation_label=N1, subject_qnode_key=n01, object_qnode_key=n02)",
# "filter_kg(action=remove_edges_by_continuous_attribute, edge_attribute=observed_expected_ratio, direction=below, threshold=2, remove_connected_nodes=t, qnode_keys=[n01])",
diff --git a/code/code-archive/old-arax-tests/conftest.py b/code/code-archive/old-arax-tests/conftest.py
new file mode 100644
index 000000000..edc3928ea
--- /dev/null
+++ b/code/code-archive/old-arax-tests/conftest.py
@@ -0,0 +1,85 @@
+#!/usr/bin/env python3
+import os
+import sys
+import time
+
+import pytest
+pathlist = os.path.realpath(__file__).split(os.path.sep)
+sys.path.append(os.path.sep.join([*pathlist[:(pathlist.index("RTX") + 1)], "code", "ARAX", "ARAXQuery"]))
+from ARAX_database_manager import ARAXDatabaseManager
+sys.path.append(os.path.sep.join([*pathlist[:(pathlist.index("RTX") + 1)], "code", "ARAX", "ARAXQuery", "Expand"]))
+from kp_info_cacher import KPInfoCacher
+
+from Filter_KG.remove_nodes import RemoveNodes
+RemoveNodes.load_block_list_file()
+
+def pytest_addoption(parser):
+ parser.addoption(
+ "--runslow", action="store_true", default=False, help="include slow tests"
+ )
+ parser.addoption(
+ "--runonlyslow", action="store_true", default=False, help="run only slow tests"
+ )
+ parser.addoption(
+ "--runexternal", action="store_true", default=False, help="include tests that rely on external KPs"
+ )
+ parser.addoption(
+ "--runonlyexternal", action="store_true", default=False, help="run only external tests"
+ )
+ parser.addoption(
+ "--nodatabases", action="store_true", default=False, help="(deprecated, now the default) do not download databases"
+ )
+ parser.addoption(
+ "--withdatabases", action="store_true", default=False, help="download/update databases before running tests"
+ )
+
+def pytest_configure(config):
+ config.addinivalue_line("markers", "slow: mark test as slow to run")
+ config.addinivalue_line("markers", "external: mark test as relying on an external KP")
+
+
+def pytest_sessionstart(session):
+ """
+ Pytest runs these steps at the beginning of the testing session (prior to running any tests)
+ """
+
+ config = session.config
+ if config.getoption("--withdatabases"):
+ print("Running database manager to check for missing databases..")
+ db_manager = ARAXDatabaseManager(allow_downloads=True)
+ db_manager.update_databases()
+ else:
+ print("Skipping database check (pass --withdatabases to download/update databases)")
+
+ # Refresh KP info cache if it hasn't been updated in more than an hour
+ kp_info_cacher = KPInfoCacher()
+ cache_exists = os.path.exists(kp_info_cacher.smart_api_and_meta_map_cache)
+ if cache_exists:
+ cache_is_stale = time.time() - os.path.getmtime(kp_info_cacher.smart_api_and_meta_map_cache) > 3600
+ else:
+ cache_is_stale = True
+ if cache_exists and not cache_is_stale:
+ print(f"KP info cache is up to date.")
+ else:
+ print(f"Running KP info cacher to update stale/missing cache..")
+ kp_info_cacher.refresh_kp_info_caches()
+
+
+def pytest_collection_modifyitems(config, items):
+ # Thanks docs.pytest.org/en/latest/example/simple.html#control-skipping-of-tests-according-to-command-line-option
+ skip_slow = pytest.mark.skip(reason="need --runslow option to run")
+ skip_fast = pytest.mark.skip(reason="--runonlyslow option was used; this test is fast")
+ skip_external = pytest.mark.skip(reason="need --runexternal option to run")
+ skip_internal = pytest.mark.skip(reason="--runonlyexternal option was used; this test is internal")
+ for item in items:
+ if "slow" in item.keywords:
+ if not config.getoption("--runslow") and not config.getoption("--runonlyslow"):
+ item.add_marker(skip_slow)
+ elif config.getoption("--runonlyslow"):
+ item.add_marker(skip_fast)
+
+ if "external" in item.keywords:
+ if not config.getoption("--runexternal") and not config.getoption("--runonlyexternal"):
+ item.add_marker(skip_external)
+ elif config.getoption("--runonlyexternal"):
+ item.add_marker(skip_internal)
diff --git a/code/code-archive/old-arax-tests/test_ARAX_connect.py b/code/code-archive/old-arax-tests/test_ARAX_connect.py
new file mode 100644
index 000000000..7bf1c684f
--- /dev/null
+++ b/code/code-archive/old-arax-tests/test_ARAX_connect.py
@@ -0,0 +1,214 @@
+#!/usr/bin/env python3
+
+# Intended to test ARAX connect
+
+import sys
+import os
+import pytest
+from collections import Counter
+import copy
+import json
+import ast
+from typing import List, Union
+
+import numpy as np
+
+sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../../ARAXQuery")
+sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../ARAXQuery")
+from ARAX_query import ARAXQuery
+from ARAX_response import ARAXResponse
+
+PACKAGE_PARENT = '../../UI/OpenAPI/openapi_server'
+sys.path.append(os.path.normpath(os.path.join(os.getcwd(), PACKAGE_PARENT)))
+from openapi_server.models.message import Message
+
+
+def _do_arax_query(query: dict) -> List[Union[ARAXResponse, Message]]:
+ araxq = ARAXQuery()
+ response = araxq.query(query)
+ if response.status != 'OK':
+ print(response.show(level=response.DEBUG))
+ return [response, response.envelope.message]
+
+
+def _attribute_tester(message, attribute_name: str, attribute_type: str, num_different_values=2):
+ """
+ Tests attributes of a message
+ message: returned from _do_arax_query
+ attribute_name: the attribute name to test (eg. 'jaccard_index')
+ attribute_type: the attribute type (eg. 'EDAM-DATA:1234')
+ num_different_values: the number of distinct values you wish to see have been added as attributes
+ """
+ edges_of_interest = []
+ values = set()
+ for key, edge in message.knowledge_graph.edges.items():
+ assert 'primary_knowledge_source' in [source.resource_role for source in edge.sources]
+ if hasattr(edge, 'edge_attributes'):
+ for attr in edge.edge_attributes:
+ if attr.original_attribute_name == attribute_name:
+ edges_of_interest.append(edge)
+ assert attr.attribute_type_id == attribute_type
+ values.add(attr.value)
+ assert len(edges_of_interest) > 0
+ assert len(values) >= num_different_values
+
+
+def _virtual_tester(message: Message, edge_predicate: str, relation: str, attribute_name: str, attribute_type: str,
+ num_different_values=2):
+ """
+ Tests overlay functions that add virtual edges
+ message: returned from _do_arax_query
+ edge_predicate: the name of the virtual edge (eg. biolink:has_jaccard_index_with)
+ relation: the relation you picked for the virtual_edge_relation (eg. N1)
+ attribute_name: the attribute name to test (eg. 'jaccard_index')
+ attribute_type: the attribute type (eg. 'EDAM-DATA:1234')
+ num_different_values: the number of distinct values you wish to see have been added as attributes
+ """
+ edge_predicates_in_kg = Counter([x.predicate for x in message.knowledge_graph.edges.values()])
+ assert edge_predicate in edge_predicates_in_kg
+ edges_of_interest = [x for x in message.knowledge_graph.edges.values() if x.relation == relation]
+ values = set()
+ assert len(edges_of_interest) > 0
+ for edge in edges_of_interest:
+ assert hasattr(edge, 'attributes')
+ assert 'primary_knowledge_source' in [source.resource_role for source in edge.sources]
+ assert edge.attributes
+ assert edge.attributes[0].original_attribute_name == attribute_name
+ values.add(edge.attributes[0].value)
+ assert edge.attributes[0].attribute_type_id == attribute_type
+ # make sure two or more values were added
+ assert len(values) >= num_different_values
+
+
+# TODO add DSL tests for new schema
+# def test_connect_ulcerative_colitis_to_adalimumab():
+# query = {"operations": {"actions": [
+# "create_message",
+# "add_qnode(ids=MONDO:0005101, key=n_src)",
+# "add_qnode(ids=UNII:FYS6T7F842, key=n_dst)",
+# "add_qnode(categories=biolink:Disease, key=n_cns)",
+# "add_qpath(key=p0,subject=n_src,object=n_dst,predicates=biolink:related_to,intermediate_nodes=n_cns)",
+# "connect(action=connect_nodes, max_path_length=3)",
+# ]}}
+# [response, message] = _do_arax_query(query)
+# assert response.status == 'OK'
+# assert len(response.envelope.message.results) > 0
+# assert len(response.envelope.message.results[0].node_bindings) == 3
+# assert len(response.envelope.message.auxiliary_graphs) > 0
+# assert len(response.envelope.message.query_graph.nodes) == 3
+# assert len(response.envelope.message.query_graph.paths) == 1
+#
+#
+# def test_connect_resveratrol_glyoxalase():
+# query = {"operations": {"actions": [
+# "create_message",
+# "add_qnode(ids=PUBCHEM.COMPOUND:445154, key=n_src)",
+# "add_qnode(ids=NCBIGene:2739, key=n_dst)",
+# "add_qpath(key=p0,subject=n_src,object=n_dst,predicates=biolink:related_to)",
+# "connect(action=connect_nodes, max_path_length=4)",
+# ]}}
+# [response, message] = _do_arax_query(query)
+# assert response.status == 'OK'
+# assert len(response.envelope.message.results) > 0
+# assert len(response.envelope.message.results[0].node_bindings) == 2
+# assert len(response.envelope.message.auxiliary_graphs) > 0
+# assert len(response.envelope.message.query_graph.nodes) == 2
+# assert len(response.envelope.message.query_graph.paths) == 1
+#
+#
+# def test_connect_pde5i_alzheimer():
+# query = {"operations": {"actions": [
+# "create_message",
+# "add_qnode(ids=MONDO:0004975, key=n_src)",
+# "add_qnode(ids=UMLS:C1318700, key=n_dst)",
+# "add_qpath(key=p0,subject=n_src,object=n_dst,predicates=biolink:related_to)",
+# "connect(action=connect_nodes, max_path_length=4)",
+# ]}}
+# [response, message] = _do_arax_query(query)
+# assert response.status == 'OK'
+# assert len(response.envelope.message.results) > 0
+# assert len(response.envelope.message.results[0].node_bindings) == 2
+# assert len(response.envelope.message.auxiliary_graphs) > 0
+# assert len(response.envelope.message.query_graph.nodes) == 2
+# assert len(response.envelope.message.query_graph.paths) == 1
+#
+#
+# def test_glucose_diabetes():
+# query = {"operations": {"actions": [
+# "create_message",
+# "add_qnode(name=CHEBI:37626, key=n_src)",
+# "add_qnode(name=MONDO:0005015, key=n_dst)",
+# "add_qpath(key=p0,subject=n_src,object=n_dst,predicates=biolink:related_to)",
+# "connect(action=connect_nodes, max_path_length=3)"
+# ]}}
+# [response, message] = _do_arax_query(query)
+# assert response.status == 'OK'
+# assert len(response.envelope.message.results) > 0
+# assert len(response.envelope.message.results[0].node_bindings) == 2
+# assert len(response.envelope.message.auxiliary_graphs) > 0
+# assert len(response.envelope.message.query_graph.nodes) == 2
+# assert len(response.envelope.message.query_graph.paths) == 1
+
+
+def test_TRAPI_unconstrained_query():
+ query = {"operations": {"actions": [
+ "add_qnode(ids=CHEBI:31690, key=n0)",
+ "add_qnode(ids=MONDO:0004979, key=n1)",
+ "add_qpath(subject=n0, object=n1)",
+ "connect(action=connect_nodes, max_path_length=2)"
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ assert len(response.envelope.message.results) == 1
+ assert len(response.envelope.message.results[0].node_bindings) == 2
+ response.debug(f"analyses length: {len(response.envelope.message.results[0].analyses)}")
+ assert len(response.envelope.message.results[0].analyses) > 0
+ response.debug(f"auxiliary_graphs length: {len(response.envelope.message.auxiliary_graphs)}")
+ assert len(response.envelope.message.auxiliary_graphs) > 0
+ assert len(response.envelope.message.query_graph.nodes) == 2
+ assert len(response.envelope.message.query_graph.paths) == 1
+
+@pytest.mark.slow
+def test_TRAPI_constrained_query():
+ query = {
+ "message": {
+ "query_graph": {
+ "nodes": {
+ "n0": {
+ "ids": ["CHEBI:37626"]
+ },
+ "n1": {
+ "ids": ["MONDO:0005015"]
+ },
+ },
+ "paths": {
+ "p0": {
+ "subject": "n0",
+ "object": "n1",
+ "constraints": [
+ {
+ "intermediate_categories": ["biolink:Gene"]
+ }
+ ]
+ }
+ }
+ }
+ }
+ }
+
+ araxq = ARAXQuery()
+ araxq.query(query)
+ response = araxq.response
+ assert response.status == 'OK'
+ assert len(response.envelope.message.results) == 1
+ assert len(response.envelope.message.results[0].node_bindings) == 2
+ response.debug(f"analyses length: {len(response.envelope.message.results[0].analyses)}")
+ assert len(response.envelope.message.results[0].analyses) > 0
+ response.debug(f"auxiliary_graphs length: {len(response.envelope.message.auxiliary_graphs)}")
+ assert len(response.envelope.message.auxiliary_graphs) > 0
+ assert len(response.envelope.message.query_graph.nodes) == 2
+ assert len(response.envelope.message.query_graph.paths) == 1
+
+
+if __name__ == "__main__":
+ pytest.main(['-v'])
diff --git a/code/code-archive/old-arax-tests/test_ARAX_expand.py b/code/code-archive/old-arax-tests/test_ARAX_expand.py
new file mode 100644
index 000000000..8348ef61e
--- /dev/null
+++ b/code/code-archive/old-arax-tests/test_ARAX_expand.py
@@ -0,0 +1,1732 @@
+#!/bin/env python3
+"""
+Usage:
+ Run all expand tests: pytest -v test_ARAX_expand.py
+ Run a single test: pytest -v test_ARAX_expand.py -k test_branched_query
+"""
+import json
+import os
+import sys
+from typing import List, Dict, Optional
+import pytest
+import yaml
+
+sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/../ARAXQuery/")
+from ARAX_query import ARAXQuery
+from ARAX_response import ARAXResponse
+from ARAX_expander import ARAXExpander
+from ARAX_messenger import ARAXMessenger
+from ARAX_resultify import ARAXResultify
+from kp_info_cacher import KPInfoCacher
+import Expand.expand_utilities as eu
+sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/../../UI/OpenAPI/python-flask-server/")
+from openapi_server.models.edge import Edge
+from openapi_server.models.node import Node
+from openapi_server.models.attribute import Attribute
+from openapi_server.models.query_graph import QueryGraph
+
+
+def _run_query_and_do_standard_testing(actions: Optional[List[str]] = None,
+ json_query: Optional[dict] = None,
+ kg_should_be_incomplete=False,
+ debug=False,
+ should_throw_error=False,
+ error_code: Optional[str] = None,
+ timeout: Optional[int] = None,
+ return_message: bool = False) -> tuple:
+ # Run the query
+ araxq = ARAXQuery()
+ assert actions or json_query # Must provide some sort of query to run
+ query_object = {"operations": {"actions": actions}} if actions else {"message": {"query_graph": json_query}}
+ if timeout:
+ query_object["query_options"] = {"kp_timeout": timeout}
+ response = araxq.query(query_object)
+ message = araxq.message
+ if response.status != 'OK':
+ print(response.show(level=ARAXResponse.DEBUG))
+ assert response.status == 'OK' or should_throw_error
+ if should_throw_error and error_code:
+ assert response.error_code == error_code
+
+ # Convert output knowledge graph to a dictionary format for faster processing (organized by QG IDs)
+ dict_kg = eu.convert_standard_kg_to_qg_organized_kg(message.knowledge_graph)
+ nodes_by_qg_id = dict_kg.nodes_by_qg_id
+ edges_by_qg_id = dict_kg.edges_by_qg_id
+
+ # Optionally print more detail
+ if debug:
+ print_nodes(nodes_by_qg_id)
+ print_edges(edges_by_qg_id)
+ print_counts_by_qgid(nodes_by_qg_id, edges_by_qg_id)
+ print(response.show(level=ARAXResponse.DEBUG))
+
+ # Run standard testing (applies to every test case)
+ assert eu.qg_is_fulfilled(response.original_query_graph, dict_kg, enforce_required_only=True) or kg_should_be_incomplete or should_throw_error
+ check_for_orphans(nodes_by_qg_id, edges_by_qg_id)
+ check_property_format(nodes_by_qg_id, edges_by_qg_id)
+
+ return (nodes_by_qg_id, edges_by_qg_id, message) if return_message else (nodes_by_qg_id, edges_by_qg_id)
+
+
+def print_counts_by_qgid(nodes_by_qg_id: Dict[str, Dict[str, Node]], edges_by_qg_id: Dict[str, Dict[str, Edge]]):
+ print(f"KG counts:")
+ if nodes_by_qg_id or edges_by_qg_id:
+ for qnode_key, corresponding_nodes in sorted(nodes_by_qg_id.items()):
+ print(f" {qnode_key}: {len(corresponding_nodes)}")
+ for qedge_key, corresponding_edges in sorted(edges_by_qg_id.items()):
+ print(f" {qedge_key}: {len(corresponding_edges)}")
+ else:
+ print(" KG is empty")
+
+
+def print_nodes(nodes_by_qg_id: Dict[str, Dict[str, Node]]):
+ for qnode_key, nodes in sorted(nodes_by_qg_id.items()):
+ for node_key, node in sorted(nodes.items()):
+ print(f"{qnode_key}: {node.categories}, {node_key}, {node.name}, {node.qnode_keys}, "
+ f"{node.query_ids if hasattr(node, 'query_ids') else ''}")
+
+
+def print_edges(edges_by_qg_id: Dict[str, Dict[str, Edge]]):
+ for qedge_key, edges in sorted(edges_by_qg_id.items()):
+ for edge_key, edge in sorted(edges.items()):
+ print(f"{qedge_key}: {edge_key}, {edge.subject}--{edge.predicate}->{edge.object}, {edge.qedge_keys}")
+
+
+def check_for_orphans(nodes_by_qg_id: Dict[str, Dict[str, Node]], edges_by_qg_id: Dict[str, Dict[str, Edge]]):
+ node_keys = set()
+ node_keys_used_by_edges = set()
+ for qnode_key, nodes in nodes_by_qg_id.items():
+ for node_key, node in nodes.items():
+ node_keys.add(node_key)
+ for qedge_key, edges in edges_by_qg_id.items():
+ for edge_key, edge in edges.items():
+ node_keys_used_by_edges.add(edge.subject)
+ node_keys_used_by_edges.add(edge.object)
+ assert node_keys == node_keys_used_by_edges or len(node_keys_used_by_edges) == 0
+
+
+def check_property_format(nodes_by_qg_id: Dict[str, Dict[str, Node]], edges_by_qg_id: Dict[str, Dict[str, Edge]]):
+ for qnode_key, nodes in nodes_by_qg_id.items():
+ for node_key, node in nodes.items():
+ assert node_key and isinstance(node_key, str)
+ assert node.qnode_keys and isinstance(node.qnode_keys, list)
+ assert isinstance(node.name, str) or node.name is None
+ assert isinstance(node.categories, list) or node.categories is None
+ if node.attributes:
+ for attribute in node.attributes:
+ _check_attribute(attribute)
+ for qedge_key, edges in edges_by_qg_id.items():
+ for edge_key, edge in edges.items():
+ assert edge_key and isinstance(edge_key, str)
+ assert edge.qedge_keys and isinstance(edge.qedge_keys, list)
+ assert edge.subject and isinstance(edge.subject, str)
+ assert edge.object and isinstance(edge.object, str)
+ assert isinstance(edge.predicate, str) or edge.predicate is None
+ if edge.attributes:
+ for attribute in edge.attributes:
+ _check_attribute(attribute)
+
+
+def _check_attribute(attribute: Attribute):
+ assert attribute.attribute_type_id and isinstance(attribute.attribute_type_id, str)
+ assert attribute.value is not None and (isinstance(attribute.value, str) or isinstance(attribute.value, list) or
+ isinstance(attribute.value, int) or isinstance(attribute.value, float) or
+ isinstance(attribute.value, dict))
+ assert isinstance(attribute.value_type_id, str) or attribute.value_type_id is None
+ assert isinstance(attribute.value_url, str) or attribute.value_url is None
+ assert isinstance(attribute.attribute_source, str) or attribute.attribute_source is None
+ assert isinstance(attribute.original_attribute_name, str) or attribute.original_attribute_name is None
+ assert isinstance(attribute.description, str) or attribute.description is None
+
+
+def get_primary_knowledge_source(edge: Edge) -> str:
+ return next(source.resource_id for source in edge.sources if source.resource_role == "primary_knowledge_source")
+
+def get_support_graphs_attribute(edge: Edge) -> any:
+ sg_attrs = [attribute for attribute in edge.attributes if attribute.attribute_type_id == "biolink:support_graphs"]
+ assert len(sg_attrs) <= 1
+ return sg_attrs[0] if sg_attrs else None
+
+
+@pytest.mark.slow
+def test_720_multiple_qg_ids_in_different_results():
+ actions_list = [
+ "add_qnode(key=n00, ids=MONDO:0014324)",
+ "add_qnode(key=n01, categories=biolink:Protein)",
+ "add_qnode(key=n02, categories=biolink:ChemicalEntity)",
+ "add_qnode(key=n03, categories=biolink:Protein)",
+ "add_qedge(key=e00, subject=n00, object=n01)",
+ "add_qedge(key=e01, subject=n01, object=n02, predicates=biolink:physically_interacts_with)",
+ "add_qedge(key=e02, subject=n02, object=n03, predicates=biolink:physically_interacts_with)",
+ "expand(kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
+ assert set(nodes_by_qg_id['n01']).intersection(set(nodes_by_qg_id['n03']))
+ assert any(set(node.qnode_keys) == {'n01', 'n03'} for node in nodes_by_qg_id['n01'].values())
+
+
+@pytest.mark.external
+def test_bte_query():
+ actions_list = [
+ "add_qnode(ids=UniProtKB:P16471, categories=biolink:Protein, key=n00)",
+ "add_qnode(categories=biolink:Cell, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "expand(kp=infores:biothings-explorer)",
+ "return(message=true, store=false)",
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
+
+
+def test_single_node_query_with_synonyms():
+ actions_list = [
+ "add_qnode(key=n00, ids=CHEMBL.COMPOUND:CHEMBL1771)",
+ "expand(node_key=n00, kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
+
+
+def test_single_node_query_with_no_results():
+ actions_list = [
+ "add_qnode(key=n00, ids=FAKE:curie)",
+ "expand(kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list, kg_should_be_incomplete=True)
+ assert not nodes_by_qg_id and not edges_by_qg_id
+
+
+def test_single_node_query_with_list():
+ actions_list = [
+ "add_qnode(key=n00, ids=[CHEMBL.COMPOUND:CHEMBL108, CHEMBL.COMPOUND:CHEMBL110])",
+ "expand(kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
+ assert len(nodes_by_qg_id['n00']) == 2
+
+
+@pytest.mark.slow
+def test_branched_query():
+ actions_list = [
+ "add_qnode(key=n00, ids=DOID:0060227)", # Adams-Oliver
+ "add_qnode(key=n01, categories=biolink:PhenotypicFeature, is_set=true)",
+ "add_qnode(key=n02, categories=biolink:Disease)",
+ "add_qnode(key=n03, categories=biolink:Protein, is_set=true)",
+ "add_qedge(subject=n01, object=n00, key=e00)",
+ "add_qedge(subject=n02, object=n00, key=e01)",
+ "add_qedge(subject=n00, object=n03, key=e02)",
+ "expand(kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
+
+
+@pytest.mark.slow
+def test_query_that_expands_same_edge_twice():
+ actions_list = [
+ "add_qnode(key=n00, ids=DOID:9065, categories=biolink:Disease)",
+ "add_qnode(key=n01, categories=biolink:ChemicalEntity)",
+ "add_qedge(key=e00, subject=n00, object=n01, predicates=biolink:treats_or_applied_or_studied_to_treat)",
+ "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
+
+
+def test_771_continue_if_no_results_query():
+ actions_list = [
+ "add_qnode(ids=UniProtKB:P14136, key=n00)",
+ "add_qnode(ids=NOTAREALCURIE, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "expand(kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list, kg_should_be_incomplete=True)
+ assert 'n01' not in nodes_by_qg_id
+ assert 'e00' not in edges_by_qg_id
+
+
+@pytest.mark.slow
+def test_774_continue_if_no_results_query():
+ actions_list = [
+ "add_qnode(ids=CHEMBL.COMPOUND:CHEMBL112, key=n1)",
+ "add_qnode(ids=DOID:8295, key=n2)",
+ "add_qedge(subject=n1, object=n2, key=e1)",
+ "expand(edge_key=e1, kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list, kg_should_be_incomplete=True)
+ assert not nodes_by_qg_id and not edges_by_qg_id
+
+
+def test_curie_list_query():
+ actions_list = [
+ "add_qnode(ids=[DOID:6419, DOID:3717, DOID:11406], key=n00)",
+ "add_qnode(categories=biolink:PhenotypicFeature, key=n01)",
+ "add_qedge(subject=n00, object=n01, predicates=biolink:has_phenotype, key=e00)",
+ "expand(kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
+ assert len(nodes_by_qg_id["n00"]) >= 3
+
+
+@pytest.mark.slow
+def test_query_with_curies_on_both_ends():
+ actions_list = [
+ "add_qnode(ids=MONDO:0005393, key=n00)", # Gout
+ "add_qnode(ids=UMLS:C0018100, key=n01)", # Antigout agents
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "expand(kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
+
+
+@pytest.mark.slow
+def test_query_with_intermediate_curie_node():
+ actions_list = [
+ "add_qnode(categories=biolink:Protein, key=n00, is_set=True)",
+ "add_qnode(ids=HP:0005110, key=n01)", # atrial fibrillation
+ "add_qnode(categories=biolink:ChemicalEntity, key=n02)",
+ "add_qedge(subject=n01, object=n02, key=e01, predicates=biolink:treats_or_applied_or_studied_to_treat)",
+ "add_qedge(subject=n00, object=n01, key=e00, predicates=biolink:related_to)",
+ "expand(kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
+
+
+def test_847_dont_expand_curie_less_edge():
+ actions_list = [
+ "add_qnode(key=n00, categories=biolink:Protein)",
+ "add_qnode(key=n01, categories=biolink:ChemicalEntity)",
+ "add_qedge(key=e00, subject=n00, object=n01)",
+ "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list, should_throw_error=True,
+ error_code="QueryGraphNoIds")
+
+
+@pytest.mark.slow
+def test_deduplication_and_self_edges():
+ actions_list = [
+ "add_qnode(ids=UMLS:C0004572, key=n00)", # Babesia
+ "add_qnode(key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
+ # Check that deduplication worked appropriately
+ all_node_keys = {node_key for nodes in nodes_by_qg_id.values() for node_key in nodes}
+ babesia_curies = {"UMLS:C0004572", "CHV:0000001647", "LNC:LP19999-9", "MEDDRA:10003963", "MESH:D001403",
+ "NCIT:C122040", "NCI_CDISC:C122040", "SNOMEDCT:35029001"}
+ babesia_curies_in_answer = all_node_keys.intersection(babesia_curies)
+ assert len(babesia_curies_in_answer) <= 1
+ # Check that we don't have any self-edges
+ self_edges = [edge for edge in edges_by_qg_id['e00'].values() if edge.subject == edge.object]
+ assert not self_edges
+
+
+@pytest.mark.slow
+def test_873_consider_both_gene_and_protein():
+ actions_list_protein = [
+ "add_qnode(ids=DOID:9452, key=n00)",
+ "add_qnode(categories=biolink:Protein, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "expand(kp=infores:rtx-kg2)",
+ "return(message=true, store=false)",
+ ]
+ nodes_by_qg_id_protein, edges_by_qg_id_protein = _run_query_and_do_standard_testing(actions_list_protein)
+ actions_list_gene = [
+ "add_qnode(ids=DOID:9452, key=n00)",
+ "add_qnode(categories=biolink:Gene, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "expand(kp=infores:rtx-kg2)",
+ "return(message=true, store=false)",
+ ]
+ nodes_by_qg_id_gene, edges_by_qg_id_gene = _run_query_and_do_standard_testing(actions_list_gene)
+ assert set(nodes_by_qg_id_protein['n01']) == set(nodes_by_qg_id_gene['n01'])
+
+
+@pytest.mark.external
+def test_cohd_expand():
+ actions_list = [
+ "add_qnode(ids=MONDO:0005301, key=n00)",
+ "add_qnode(categories=biolink:SmallMolecule, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00, predicates=biolink:correlated_with)",
+ "expand(edge_key=e00, kp=infores:cohd)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
+
+
+@pytest.mark.skip(reason="retire DTD")
+def test_dtd_expand_1():
+ actions_list = [
+ "add_qnode(name=acetaminophen, key=n0)",
+ "add_qnode(name=Sotos syndrome, key=n1)",
+ "add_qedge(subject=n0, object=n1, key=e0)",
+ "expand(edge_key=e0, kp=infores:arax-drug-treats-disease, DTD_threshold=0, DTD_slow_mode=True)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
+ assert all([edges_by_qg_id[qedge_key][edge_key].predicate == "biolink:probability_treats" for qedge_key in edges_by_qg_id for edge_key in edges_by_qg_id[qedge_key]])
+ assert all([edges_by_qg_id[qedge_key][edge_key].attributes[0].original_attribute_name == "probability_treats" for qedge_key in edges_by_qg_id for edge_key in edges_by_qg_id[qedge_key]])
+ assert all([edges_by_qg_id[qedge_key][edge_key].attributes[0].attribute_type_id == "EDAM-DATA:0951" for qedge_key in edges_by_qg_id for edge_key in edges_by_qg_id[qedge_key]])
+ assert all([edges_by_qg_id[qedge_key][edge_key].attributes[0].value_url == "https://doi.org/10.1101/765305" for qedge_key in edges_by_qg_id for edge_key in edges_by_qg_id[qedge_key]])
+
+
+# @pytest.mark.slow
+@pytest.mark.skip(reason="retire DTD")
+def test_dtd_expand_2():
+ actions_list = [
+ "add_qnode(name=acetaminophen, key=n0)",
+ "add_qnode(categories=biolink:Disease, key=n1)",
+ "add_qedge(subject=n0, object=n1, key=e0)",
+ "expand(edge_key=e0, kp=infores:arax-drug-treats-disease, DTD_threshold=0, DTD_slow_mode=True)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
+ assert all([edges_by_qg_id[qedge_key][edge_key].predicate == "biolink:probability_treats" for qedge_key in edges_by_qg_id for edge_key in edges_by_qg_id[qedge_key]])
+ assert all([edges_by_qg_id[qedge_key][edge_key].attributes[0].original_attribute_name == "probability_treats" for qedge_key in edges_by_qg_id for edge_key in edges_by_qg_id[qedge_key]])
+ assert all([edges_by_qg_id[qedge_key][edge_key].attributes[0].attribute_type_id == "EDAM-DATA:0951" for qedge_key in edges_by_qg_id for edge_key in edges_by_qg_id[qedge_key]])
+ assert all([edges_by_qg_id[qedge_key][edge_key].attributes[0].value_url == "https://doi.org/10.1101/765305" for qedge_key in edges_by_qg_id for edge_key in edges_by_qg_id[qedge_key]])
+
+
+@pytest.mark.external
+def test_chp_expand_1():
+ actions_list = [
+ "add_qnode(ids=ENSEMBL:ENSG00000162419, key=n00)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "expand(edge_key=e00, kp=infores:connections-hypothesis)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
+
+
+@pytest.mark.external
+def test_chp_expand_2():
+ actions_list = [
+ "add_qnode(ids=[ENSEMBL:ENSG00000124532,ENSEMBL:ENSG00000075975,ENSEMBL:ENSG00000104774], key=n00)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "expand(edge_key=e00, kp=infores:connections-hypothesis)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
+
+
+@pytest.mark.external
+def test_genetics_kp():
+ actions_list = [
+ "add_qnode(ids=NCBIGene:1803, categories=biolink:Gene, key=n00)",
+ "add_qnode(categories=biolink:Disease, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00, predicates=biolink:gene_associated_with_condition)",
+ "expand(kp=infores:genetics-data-provider)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
+
+
+@pytest.mark.external
+def test_molepro_query():
+ actions_list = [
+ "add_qnode(ids=HGNC:9379, categories=biolink:Gene, key=n00)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "expand(kp=infores:molepro)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
+
+
+@pytest.mark.external
+def test_spoke_query():
+ actions_list = [
+ "add_qnode(ids=NCBIGene:7157, categories=biolink:Gene, key=n00)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n01)",
+ "add_qedge(subject=n01, object=n00, key=e00, predicates=biolink:molecularly_interacts_with)",
+ "expand(kp=infores:spoke)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
+
+
+@pytest.mark.external
+def test_spoke_query_2():
+ actions_list = [
+ "add_qnode(ids=NCBIGene:7157, categories=biolink:Gene, key=n00)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00, predicates=biolink:molecularly_interacts_with)",
+ "expand(kp=infores:spoke)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
+
+
+@pytest.mark.slow
+def test_exclude_edge_parallel():
+ # First run a query without any kryptonite edges to get a baseline
+ actions_list = [
+ "add_qnode(name=DOID:3312, key=n00)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n01)",
+ "add_qedge(subject=n01, object=n00, predicates=biolink:treats_or_applied_or_studied_to_treat, key=e00)",
+ "add_qedge(subject=n01, object=n00, predicates=biolink:causes, key=e01)",
+ "expand(kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
+ contraindicated_pairs = {tuple(sorted([edge.subject, edge.object])) for edge in edges_by_qg_id["e01"].values()}
+ assert contraindicated_pairs
+
+ # Then exclude the contraindicated edge and make sure the appropriate nodes are blown away
+ actions_list = [
+ "add_qnode(name=DOID:3312, key=n00)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n01)",
+ "add_qedge(subject=n01, object=n00, predicates=biolink:treats_or_applied_or_studied_to_treat, key=e00)",
+ "add_qedge(subject=n01, object=n00, predicates=biolink:causes, exclude=true, key=e01)",
+ "expand(kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id_not, edges_by_qg_id_not = _run_query_and_do_standard_testing(actions_list)
+ # None of the contraindicated n01 nodes should appear in the answer this time
+ final_pairs = {tuple(sorted([edge.subject, edge.object])) for edge in edges_by_qg_id_not["e00"].values()}
+ assert not contraindicated_pairs.intersection(final_pairs)
+ assert "e01" not in edges_by_qg_id_not
+
+
+@pytest.mark.slow
+def test_exclude_edge_perpendicular():
+ exclude_curies = ", ".join(['GO:0006915'])
+ # First run a query without any kryptonite edges to get a baseline
+ actions_list = [
+ "add_qnode(ids=DOID:3312, key=n00)",
+ "add_qnode(categories=biolink:Protein, key=n01, is_set=true)",
+ f"add_qnode(categories=biolink:ChemicalEntity, key=n02)",
+ "add_qedge(subject=n01, object=n00, key=e00, predicates=biolink:causes)",
+ "add_qedge(subject=n01, object=n02, key=e01, predicates=biolink:affects)",
+ # 'Exclude' portion (just optional for now to get a baseline)
+ f"add_qnode(categories=biolink:Pathway, key=nx0, option_group_id=1, ids=[{exclude_curies}])",
+ "add_qedge(subject=n01, object=nx0, key=ex0, option_group_id=1, predicates=biolink:related_to)",
+ "expand(kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
+ nodes_used_by_kryptonite_edge = eu.get_node_keys_used_by_edges(edges_by_qg_id["ex0"])
+ n01_nodes_to_blow_away = set(nodes_by_qg_id["n01"]).intersection(nodes_used_by_kryptonite_edge)
+ assert n01_nodes_to_blow_away
+ assert len(n01_nodes_to_blow_away) < len(nodes_by_qg_id["n01"])
+
+ # Then use a kryptonite edge and make sure the appropriate nodes are blown away
+ actions_list = [
+ "add_qnode(ids=DOID:3312, key=n00)",
+ "add_qnode(categories=biolink:Protein, key=n01, is_set=true)",
+ f"add_qnode(categories=biolink:ChemicalEntity, key=n02)",
+ "add_qedge(subject=n01, object=n00, key=e00, predicates=biolink:causes)",
+ "add_qedge(subject=n01, object=n02, key=e01, predicates=biolink:affects)",
+ # 'Exclude' portion
+ f"add_qnode(categories=biolink:Pathway, key=nx0, ids=[{exclude_curies}])",
+ "add_qedge(subject=n01, object=nx0, key=ex0, exclude=True, predicates=biolink:related_to)",
+ "expand(kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id_not, edges_by_qg_id_not = _run_query_and_do_standard_testing(actions_list)
+ assert not n01_nodes_to_blow_away.intersection(set(nodes_by_qg_id_not["n01"]))
+ assert "ex0" not in edges_by_qg_id_not and "nx0" not in nodes_by_qg_id_not
+
+
+@pytest.mark.slow
+def test_exclude_edge_ordering():
+ # This test makes sures that kryptonite qedges are expanded AFTER their adjacent qedges
+ actions_list = [
+ "add_qnode(name=DOID:3312, key=n00)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n01)",
+ "add_qedge(subject=n00, object=n01, predicates=biolink:treats_or_applied_or_studied_to_treat, key=e00)",
+ "add_qedge(subject=n00, object=n01, predicates=biolink:predisposes_to_condition, exclude=true, key=e01)",
+ "expand(kp=infores:rtx-kg2, edge_key=e00)",
+ "expand(kp=infores:rtx-kg2, edge_key=e01)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id_a, edges_by_qg_id_a = _run_query_and_do_standard_testing(actions_list)
+ actions_list = [
+ "add_qnode(name=DOID:3312, key=n00)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n01)",
+ "add_qedge(subject=n00, object=n01, predicates=biolink:treats_or_applied_or_studied_to_treat, key=e00)",
+ "add_qedge(subject=n00, object=n01, predicates=biolink:predisposes_to_condition, exclude=true, key=e01)",
+ "expand(kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id_b, edges_by_qg_id_b = _run_query_and_do_standard_testing(actions_list)
+ actions_list = [
+ "add_qnode(name=DOID:3312, key=n00)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n01)",
+ "add_qedge(subject=n00, object=n01, predicates=biolink:predisposes_to_condition, exclude=true, key=e01)",
+ "add_qedge(subject=n00, object=n01, predicates=biolink:treats_or_applied_or_studied_to_treat, key=e00)",
+ "expand(kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id_c, edges_by_qg_id_c = _run_query_and_do_standard_testing(actions_list)
+ # All of these queries should produce the same KG contents
+ assert set(nodes_by_qg_id_a["n01"]) == set(nodes_by_qg_id_b["n01"]) == set(nodes_by_qg_id_c["n01"])
+ assert set(edges_by_qg_id_a["e00"]) == set(edges_by_qg_id_b["e00"]) == set(edges_by_qg_id_c["e00"])
+
+
+def test_exclude_edge_no_results():
+ # Tests query with an exclude edge that doesn't have any matches in the KP (shouldn't error out)
+ actions = [
+ "add_qnode(name=DOID:3312, key=n00)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n01)",
+ "add_qedge(subject=n00, object=n01, predicates=biolink:treats_or_applied_or_studied_to_treat, key=e00)",
+ "add_qedge(subject=n00, object=n01, predicates=biolink:not_a_real_edge_type, exclude=true, key=e01)",
+ "expand(kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions)
+
+
+def test_option_group_query_one_hop():
+ # Tests a simple one-hop query with an optional edge
+ actions = [
+ "add_qnode(key=n00, ids=DOID:3312)",
+ "add_qnode(key=n01, categories=biolink:ChemicalEntity)",
+ "add_qedge(key=e00, subject=n01, object=n00, predicates=biolink:causes)",
+ "add_qedge(key=e01, subject=n00, object=n01, predicates=biolink:affects, option_group_id=1)",
+ "expand(kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions)
+
+
+@pytest.mark.slow
+def test_option_group_query_no_results():
+ # Tests query with optional path that doesn't have any matches in the KP (shouldn't error out)
+ actions = [
+ "add_qnode(key=n00, ids=DOID:3312)",
+ "add_qnode(key=n01, ids=CHEBI:48607)",
+ "add_qnode(key=n02, categories=biolink:Protein, option_group_id=1, is_set=true)",
+ "add_qedge(key=e00, subject=n00, object=n01, predicates=biolink:related_to)",
+ "add_qedge(key=e01, subject=n00, object=n02, option_group_id=1, predicates=biolink:overlaps)",
+ "add_qedge(key=e02, subject=n02, object=n01, option_group_id=1, predicates=biolink:affects)",
+ "expand(kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions)
+
+
+def test_category_and_predicate_format():
+ actions_list = [
+ "add_qnode(ids=UniProtKB:P42857, key=n00)",
+ "add_qnode(categories=biolink:Protein, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00, predicates=biolink:affects)",
+ "expand(kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
+ for qnode_key, nodes in nodes_by_qg_id.items():
+ for node_key, node in nodes.items():
+ assert all(category.startswith("biolink:") for category in node.categories)
+ for qedge_key, edges in edges_by_qg_id.items():
+ for edge_key, edge in edges.items():
+ assert edge.predicate.startswith("biolink:")
+ assert "," not in edge.predicate
+
+
+def test_issue_1212():
+ # If a qnode curie isn't recognized by synonymizer, shouldn't end up with results when using KG2c
+ actions_list = [
+ "add_qnode(ids=FAKE:Curie, categories=biolink:ChemicalEntity, key=n00)",
+ "add_qnode(categories=biolink:Disease, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "expand(kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list, kg_should_be_incomplete=True)
+
+
+def test_issue_1314():
+ # KG2 should return answers for "treated_by" (even though it only contains "treats" edges)
+ actions_list = [
+ "add_qnode(key=n0, ids=DRUGBANK:DB00394, categories=biolink:ChemicalEntity)",
+ "add_qnode(key=n1, categories=biolink:Disease)",
+ "add_qedge(key=e0, subject=n1, object=n0, predicates=biolink:subject_of_treatment_application_or_study_for_treatment_by)",
+ "expand(kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
+
+
+@pytest.mark.external
+def test_issue_1236_a():
+ # Test that multiple KPs are used for expansion when no KP is specified in DSL
+ actions_list = [
+ "add_qnode(ids=NCBIGene:1803, key=n00)",
+ "add_qnode(categories=biolink:Disease, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00, predicates=biolink:gene_associated_with_condition)",
+ "expand()",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
+
+ actions_list_kg2_only = [
+ "add_qnode(ids=NCBIGene:1803, key=n00)",
+ "add_qnode(categories=biolink:Disease, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00, predicates=biolink:gene_associated_with_condition)",
+ "expand(kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id_kg2_only, edges_by_qg_id_kg2_only = _run_query_and_do_standard_testing(actions_list_kg2_only)
+
+ assert len(nodes_by_qg_id["n01"]) > len(nodes_by_qg_id_kg2_only["n01"])
+
+
+def test_issue_1236_b():
+ actions_list = [
+ "add_qnode(ids=DOID:14330, categories=biolink:Disease, key=n00)",
+ "add_qnode(categories=biolink:Protein, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00, predicates=biolink:condition_associated_with_gene)",
+ "expand(kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
+
+
+def test_kg2_predicate_hierarchy_reasoning():
+ actions_list = [
+ "add_qnode(ids=CHEMBL.COMPOUND:CHEMBL112, categories=biolink:ChemicalEntity, key=n00)",
+ "add_qnode(categories=biolink:Protein, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00, predicates=biolink:affects)",
+ "expand(kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
+ assert any(edge for edge in edges_by_qg_id["e00"].values() if edge.predicate == "biolink:affects")
+ assert not any(edge for edge in edges_by_qg_id["e00"].values() if edge.predicate == "biolink:related_to")
+
+
+@pytest.mark.skip(reason="Dev testing for domain range exclusion")
+def test_domain_range_exclusion():
+ actions_list = [
+ "add_qnode(ids=UMLS:C1510438, key=n00)",
+ "add_qnode(categories=biolink:Disease, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00, predicates=biolink:diagnoses)",
+ "expand(kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
+ assert False
+
+
+@pytest.mark.slow
+def test_issue_1373_pinned_curies():
+ actions_list = [
+ "add_qnode(ids=CHEMBL.COMPOUND:CHEMBL2108129, key=n00)",
+ "add_qnode(categories=biolink:Protein, key=n01)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n02)",
+ "add_qedge(subject=n00, object=n01, key=e00, predicates=biolink:related_to)",
+ "add_qedge(subject=n01, object=n02, key=e01, predicates=biolink:related_to)",
+ "expand(kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
+ assert "CHEMBL.COMPOUND:CHEMBL2108129" not in nodes_by_qg_id["n02"]
+
+
+@pytest.mark.external
+def test_multiomics_clinical_risk_kp():
+ actions_list = [
+ "add_qnode(ids=DOID:14330, categories=biolink:Disease, key=n00)",
+ "add_qnode(categories=biolink:PhenotypicFeature, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00, predicates=biolink:related_to)",
+ "expand(kp=infores:biothings-multiomics-clinical-risk)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
+
+
+@pytest.mark.external
+def test_multiomics_drug_response_kp():
+ actions_list = [
+ "add_qnode(ids=NCBIGene:7157, categories=biolink:Gene, key=n00)",
+ "add_qnode(categories=biolink:SmallMolecule, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00, predicates=biolink:associated_with_sensitivity_to)",
+ "expand(kp=infores:biothings-multiomics-biggim-drug-response)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
+
+
+@pytest.mark.external
+def test_multiomics_tumor_gene_mutation_kp():
+ actions_list = [
+ "add_qnode(ids=MONDO:0018177, key=n00)",
+ "add_qnode(categories=biolink:Gene, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "expand(kp=infores:biothings-tcga-mut-freq)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
+
+
+def test_many_kp_query():
+ actions_list = [
+ "add_qnode(ids=CHEMBL.COMPOUND:CHEMBL112, key=n00)",
+ "add_qnode(categories=biolink:Protein, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00, predicates=biolink:interacts_with)",
+ "expand()",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list, timeout=10)
+
+
+def test_qualified_regulates_query():
+ query = {
+ "nodes": {
+ "n0": {
+ "ids": ["NCBIGene:7157"]
+ },
+ "n1": {
+ "categories": ["biolink:Gene"]
+ }
+ },
+ "edges": {
+ "e0": {
+ "subject": "n0",
+ "object": "n1",
+ "qualifier_constraints": [
+ {"qualifier_set": [
+ {"qualifier_type_id": "biolink:qualified_predicate",
+ "qualifier_value": "biolink:causes"},
+ # {"qualifier_type_id": "biolink:object_direction_qualifier",
+ # "qualifier_value": "decreased"}, # for RTX issue 2068
+ # # see also RTX-KG2 issue 339
+ # # Uncomment to test in KG2.8.5
+ {"qualifier_type_id": "biolink:object_aspect_qualifier",
+ "qualifier_value": "activity"}
+ ]}
+ ],
+ "attribute_constraints": [
+ {
+ "id": "knowledge_source",
+ "name": "knowledge source",
+ "value": ["infores:rtx-kg2"],
+ "operator": "==",
+ "not": False
+ }
+ ]
+ }
+ }
+ }
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(json_query=query)
+
+
+def test_1516_single_quotes_in_ids():
+ actions = [
+ "add_qnode(key=n0,ids=UniProtKB:P00491)",
+ "add_qnode(key=n1)",
+ "add_qedge(key=e01,subject=n0,object=n1)",
+ "expand(kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions)
+
+
+def test_input_curie_remapping():
+ actions = [
+ "add_qnode(key=n0, ids=KEGG.COMPOUND:C02700)",
+ "add_qnode(key=n1, categories=biolink:Protein)",
+ "add_qedge(key=e01, subject=n0, object=n1)",
+ "expand(kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions)
+ assert "KEGG.COMPOUND:C02700" in nodes_by_qg_id["n0"]
+
+
+def test_constraint_validation():
+ query = {
+ "edges": {
+ "e00": {
+ "object": "n01",
+ "predicates": ["biolink:physically_interacts_with"],
+ "subject": "n00",
+ "attribute_constraints": [{"id": "test_edge_constraint_1", "name": "test name edge", "operator": "<", "value": 1.0},
+ {"id": "test_edge_constraint_2", "name": "test name edge", "operator": ">", "value": 0.5}]
+ }
+ },
+ "nodes": {
+ "n00": {
+ "categories": ["biolink:ChemicalEntity"],
+ "ids": ["CHEMBL.COMPOUND:CHEMBL112"]
+ },
+ "n01": {
+ "categories": ["biolink:Protein"],
+ "constraints": [{"id": "test_node_constraint", "name": "test name node", "operator": "<", "value": 1.0}]
+ }
+ }
+ }
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(json_query=query, should_throw_error=True,
+ error_code="UnsupportedConstraint")
+
+
+def test_edge_constraints():
+ query = {
+ "nodes": {
+ "n00": {
+ "ids": ["CHEMBL.COMPOUND:CHEMBL112"]
+ },
+ "n01": {
+ "categories": ["biolink:ChemicalEntity"]
+ }
+ },
+ "edges": {
+ "e00": {
+ "object": "n00",
+ "subject": "n01",
+ "attribute_constraints": [
+ {
+ "id": "knowledge_source",
+ "name": "knowledge source",
+ "value": ["infores:rtx-kg2","infores:arax","infores:drugbank"],
+ "operator": "==",
+ "not": False
+ }
+ ]
+ }
+ }
+ }
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(json_query=query)
+
+
+def test_canonical_predicates():
+ actions = [
+ "add_qnode(key=n00, ids=CHEMBL.COMPOUND:CHEMBL945)",
+ "add_qnode(key=n01, categories=biolink:BiologicalEntity)",
+ "add_qedge(key=e00, subject=n00, object=n01, predicates=biolink:participates_in)", # Not canonical
+ "expand(kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions)
+ e00_predicates = {edge.predicate for edge in edges_by_qg_id["e00"].values()}
+ assert "biolink:has_participant" in e00_predicates and "biolink:participates_in" not in e00_predicates
+
+
+@pytest.mark.slow
+@pytest.mark.external
+def test_curie_prefix_conversion_1537():
+ actions = [
+ "add_qnode(key=n0, ids=NCBIGene:60412, categories=biolink:Gene)",
+ "add_qnode(key=n1, categories=biolink:ChemicalEntity)",
+ "add_qedge(key=e01, subject=n0, object=n1, predicates=biolink:related_to)",
+ "expand(kp=infores:connections-hypothesis)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions)
+
+
+@pytest.mark.slow
+@pytest.mark.external
+def test_merging_node_attributes_1450():
+ actions = [
+ "add_qnode(key=n0, ids=CHEMBL.COMPOUND:CHEMBL112)",
+ "add_qnode(key=n1, categories=biolink:Disease)",
+ "add_qedge(key=e01, subject=n0, object=n1, predicates=biolink:treats_or_applied_or_studied_to_treat)",
+ "expand(kp=infores:biothings-explorer)",
+ "expand(kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions)
+ num_attributes_a = len(nodes_by_qg_id["n0"]["CHEMBL.COMPOUND:CHEMBL112"].attributes)
+ actions = [
+ "add_qnode(key=n0, ids=CHEMBL.COMPOUND:CHEMBL112)",
+ "add_qnode(key=n1, categories=biolink:Disease)",
+ "add_qedge(key=e01, subject=n0, object=n1, predicates=biolink:treats_or_applied_or_studied_to_treat)",
+ "expand(kp=infores:rtx-kg2)",
+ "expand(kp=infores:biothings-explorer)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions)
+ num_attributes_b = len(nodes_by_qg_id["n0"]["CHEMBL.COMPOUND:CHEMBL112"].attributes)
+ assert num_attributes_a == num_attributes_b
+
+
+@pytest.mark.external
+def test_icees_dili():
+ actions = [
+ "add_qnode(key=n0, ids=NCIT:C28421, categories=biolink:PhenotypicFeature)",
+ "add_qnode(key=n1, categories=biolink:NamedThing)",
+ "add_qedge(key=e01, subject=n0, object=n1, predicates=biolink:correlated_with)",
+ "expand(kp=infores:icees-dili)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions)
+
+
+@pytest.mark.external
+def test_icees_asthma():
+ actions = [
+ "add_qnode(key=n0, ids=NCIT:C28421, categories=biolink:PhenotypicFeature)",
+ "add_qnode(key=n1, categories=biolink:NamedThing)",
+ "add_qedge(key=e01, subject=n0, object=n1, predicates=biolink:correlated_with)",
+ "expand(kp=infores:icees-asthma)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions)
+
+
+@pytest.mark.slow
+def test_almost_cycle_1565():
+ actions_list = [
+ "add_qnode(ids=MONDO:0010161, key=n0)",
+ "add_qnode(categories=biolink:Gene, key=n1)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n2)",
+ "add_qedge(subject=n1, object=n0, key=e0, predicates=biolink:related_to)",
+ "add_qedge(subject=n1, object=n2, key=e1, predicates=biolink:related_to)",
+ "add_qedge(subject=n0, object=n2, key=e2, predicates=biolink:related_to)",
+ "expand(kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
+
+
+@pytest.mark.slow
+def test_fda_approved_query_simple():
+ query = {
+ "nodes": {
+ "n0": {
+ "ids": [
+ "MONDO:0000888"
+ ]
+ },
+ "n1": {
+ "categories": [
+ "biolink:ChemicalEntity"
+ ],
+ "constraints": [
+ {
+ "id": "biolink:highest_FDA_approval_status",
+ "name": "highest FDA approval status",
+ "operator": "==",
+ "value": "regular approval"
+ }
+ ]
+ }
+ },
+ "edges": {
+ "e0": {
+ "subject": "n1",
+ "object": "n0",
+ "predicates": [
+ "biolink:treats_or_applied_or_studied_to_treat"
+ ]
+ }
+ }
+ }
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(json_query=query)
+
+
+@pytest.mark.slow
+def test_fda_approved_query_workflow_a9_egfr_advanced():
+ query_unconstrained = {
+ "nodes": {
+ "n0": {
+ "categories": [
+ "biolink:SmallMolecule"
+ ]
+ },
+ "n1": {
+ "ids": [
+ "NCBIGene:1956"
+ ]
+ }
+ },
+ "edges": {
+ "e0": {
+ "subject": "n0",
+ "object": "n1",
+ "predicates": [
+ "biolink:related_to"
+ ]
+ }
+ }
+ }
+ nodes_by_qg_id_unconstrained, edges_by_qg_id_unconstrained = _run_query_and_do_standard_testing(json_query=query_unconstrained, timeout=30)
+ assert nodes_by_qg_id_unconstrained.get("n1")
+
+ query_constrained = query_unconstrained
+ fda_approved_constraint = {
+ "id": "biolink:highest_FDA_approval_status",
+ "name": "highest FDA approval status",
+ "operator": "==",
+ "value": "regular approval"
+ }
+ query_constrained["nodes"]["n0"]["constraints"] = [fda_approved_constraint]
+ nodes_by_qg_id_constrained, edges_by_qg_id_constrained = _run_query_and_do_standard_testing(json_query=query_constrained, timeout=30)
+
+ assert len(nodes_by_qg_id_constrained["n0"]) < len(nodes_by_qg_id_unconstrained["n0"])
+
+
+def test_inverted_treats_handling():
+ actions = [
+ "add_qnode(key=n0, ids=MONDO:0005077)",
+ "add_qnode(key=n1, categories=biolink:ChemicalEntity)",
+ "add_qedge(key=e0, subject=n0, object=n1, predicates=biolink:treats_or_applied_or_studied_to_treat)",
+ "expand(kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions)
+
+
+def test_xdtd_expand():
+ query = {
+ "nodes": {
+ "disease": {
+ "ids": ["MONDO:0015564"]
+ },
+ "chemical": {
+ "categories": ["biolink:ChemicalEntity"]
+ }
+ },
+ "edges": {
+ "t_edge": {
+ "object": "disease",
+ "subject": "chemical",
+ "predicates": ["biolink:treats_or_applied_or_studied_to_treat"],
+ "knowledge_type": "inferred"
+ }
+ }
+ }
+ nodes_by_qg_id, edges_by_qg_id, message = _run_query_and_do_standard_testing(json_query=query, return_message=True)
+ assert message.auxiliary_graphs
+ for edge in edges_by_qg_id["t_edge"].values():
+ inferred_edge = False
+ for source in edge.sources:
+ if source.resource_role == "primary_knowledge_source" and source.resource_id == "infores:arax":
+ inferred_edge = True
+ # Perform Tests only for inferred edges
+ if inferred_edge:
+ assert edge.attributes
+ support_graph_attributes = [attribute for attribute in edge.attributes if attribute.attribute_type_id == "biolink:support_graphs"]
+ ## some xdtd predictions don't have support_graphs, so skip them
+ if len(support_graph_attributes) > 0:
+ assert support_graph_attributes
+ assert len(support_graph_attributes) == 1
+ support_graph_attribute = support_graph_attributes[0]
+ assert support_graph_attribute.value[0] in message.auxiliary_graphs
+
+
+def test_xdtd_different_categories():
+ query = {
+ "nodes": {
+ "disease": {
+ "ids": ["MONDO:0015564"]
+ },
+ "chemical": {
+ "categories": ["biolink:Drug"]
+ }
+ },
+ "edges": {
+ "t_edge": {
+ "object": "disease",
+ "subject": "chemical",
+ "predicates": ["biolink:treats_or_applied_or_studied_to_treat"],
+ "knowledge_type": "inferred"
+ }
+ }
+ }
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(json_query=query)
+ query = {
+ "nodes": {
+ "disease": {
+ "ids": ["MONDO:0015564"],
+ "categories": ["biolink:Disease"]
+ },
+ "chemical": {
+ "categories": ["biolink:Drug"]
+ }
+ },
+ "edges": {
+ "t_edge": {
+ "object": "disease",
+ "subject": "chemical",
+ "predicates": ["biolink:treats_or_applied_or_studied_to_treat"],
+ "knowledge_type": "inferred"
+ }
+ }
+ }
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(json_query=query)
+ query = {
+ "nodes": {
+ "disease": {
+ "ids": ["MONDO:0015564"],
+ "categories": ["biolink:DiseaseOrPhenotypicFeature"]
+ },
+ "chemical": {
+ "categories": ["biolink:ChemicalMixture"]
+ }
+ },
+ "edges": {
+ "t_edge": {
+ "object": "disease",
+ "subject": "chemical",
+ "predicates": ["biolink:treats_or_applied_or_studied_to_treat"],
+ "knowledge_type": "inferred"
+ }
+ }
+ }
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(json_query=query)
+
+
+def test_xdtd_multiple_categories():
+ query = {
+ "nodes": {
+ "disease": {
+ "ids": ["MONDO:0015564"]
+ },
+ "chemical": {
+ "categories": ["biolink:Drug", "biolink:ChemicalMixture"]
+ }
+ },
+ "edges": {
+ "t_edge": {
+ "object": "disease",
+ "subject": "chemical",
+ "predicates": ["biolink:treats_or_applied_or_studied_to_treat"],
+ "knowledge_type": "inferred"
+ }
+ }
+ }
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(json_query=query)
+
+
+def test_xdtd_different_predicates():
+ query = {
+ "nodes": {
+ "disease": {
+ "ids": ["MONDO:0015564"]
+ },
+ "chemical": {
+ "categories": ["biolink:Drug", "biolink:ChemicalMixture"]
+ }
+ },
+ "edges": {
+ "t_edge": {
+ "object": "disease",
+ "subject": "chemical",
+ "predicates": ["biolink:ameliorates_condition"],
+ "knowledge_type": "inferred"
+ }
+ }
+ }
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(json_query=query)
+
+
+def test_xdtd_no_curies():
+ query = {
+ "nodes": {
+ "disease": {
+ },
+ "chemical": {
+ "categories": ["biolink:Drug", "biolink:ChemicalMixture"],
+ "ids": ["CHEMBL:CHEMBL1234"]
+ }
+ },
+ "edges": {
+ "t_edge": {
+ "object": "disease",
+ "subject": "chemical",
+ "predicates": ["biolink:ameliorates_condition"],
+ "knowledge_type": "inferred"
+ }
+ }
+ }
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(json_query=query, should_throw_error=True)
+
+
+@pytest.mark.skip
+def test_xdtd_with_other_edges():
+ query = {
+ "nodes": {
+ "disease": {
+ "ids": ["UMLS:C4023597"]
+ },
+ "chemical": {
+ "categories": ["biolink:Drug", "biolink:ChemicalMixture"]
+ },
+ "gene": {
+ "categories": ["biolink:Gene", "biolink:Protein"]
+ }
+ },
+ "edges": {
+ "t_edge": {
+ "object": "disease",
+ "subject": "chemical",
+ "predicates": ["biolink:affects"],
+ "knowledge_type": "inferred"
+ },
+ "non_t_edge": {
+ "object": "gene",
+ "subject": "chemical"
+ }
+ }
+ }
+ # FIXME: this test is failing since the ability to mix inferred with lookup edges is not yet implemented
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(json_query=query, should_throw_error=True)
+
+
+def test_xdtd_curie_not_in_db():
+ query = {
+ "nodes": {
+ "disease": {
+ "ids": ["MONDO:0021783"] # this curie has probabilities but no paths in the XDTDdb
+ },
+ "chemical": {
+ "categories": ["biolink:Drug", "biolink:ChemicalMixture"]
+ }
+ },
+ "edges": {
+ "t_edge": {
+ "object": "disease",
+ "subject": "chemical",
+ "predicates": ["biolink:treats"],
+ "knowledge_type": "inferred"
+ }
+ }
+ }
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(json_query=query, should_throw_error=False)
+
+
+@pytest.mark.slow
+def test_query_ids_mappings():
+ query_curies = ["CHEMBL.COMPOUND:CHEMBL112", "DOID:14330"]
+ actions_list = [
+ f"add_qnode(ids=[{','.join(query_curies)}], key=n00)",
+ "add_qnode(categories=biolink:Protein, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00, predicates=biolink:related_to)",
+ "expand()",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list, timeout=10)
+ # Make sure we actually got some subclass child nodes from KPs
+ assert len(nodes_by_qg_id["n00"]) > 2
+ for node_key, node in nodes_by_qg_id["n00"].items():
+ # Make sure pinned nodes have query_ids filled out
+ assert node.query_ids or node_key in query_curies
+ # Make sure subclass self-edges were added as appropriate
+ for parent_query_id in node.query_ids:
+ assert parent_query_id in nodes_by_qg_id["n00"]
+ # Make sure unpinned nodes do not have query_ids specified
+ for node_key, node in nodes_by_qg_id["n01"].items():
+ assert not node.query_ids
+
+
+@pytest.mark.external
+def test_no_query_ids_issue():
+ query = {
+ "nodes": {
+ "n1": {
+ "categories": [
+ "biolink:GrossAnatomicalStructure"
+ ],
+ "ids": [
+ "UBERON:0009912",
+ "UBERON:0002535",
+ "UBERON:0000019",
+ "UBERON:0002365",
+ "UBERON:0000017",
+ "UBERON:0000970",
+ "UBERON:0001831",
+ "UBERON:0016410",
+ "UBERON:0001737",
+ "UBERON:0000945"
+ ]
+ },
+ "n2": {
+ "categories": [
+ "biolink:Gene"
+ ]
+ }
+ },
+ "edges": {
+ "e1": {
+ "subject": "n1",
+ "object": "n2",
+ "predicates": [
+ "biolink:expresses"
+ ],
+ "attribute_constraints": [
+ {
+ "id": "knowledge_source",
+ "name": "knowledge source",
+ "value": ["infores:connections-hypothesis"],
+ "operator": "==",
+ "not": False
+ }
+ ]
+ }
+ }
+ }
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(json_query=query, timeout=45)
+
+
+@pytest.mark.slow
+def test_subclass_answers_for_non_pinned_qnodes():
+ query = {
+ "nodes": {
+ "n0": {
+ "categories": [
+ "biolink:Disease"
+ ],
+ "ids": [
+ "MONDO:0009061"
+ ]
+ },
+ "n1": {
+ "categories": [
+ "biolink:GrossAnatomicalStructure"
+ ]
+ },
+ "n2": {
+ "categories": [
+ "biolink:Gene"
+ ]
+ },
+ "n3": {
+ "categories": [
+ "biolink:Drug",
+ "biolink:SmallMolecule"
+ ]
+ }
+ },
+ "edges": {
+ "e0": {
+ "subject": "n0",
+ "object": "n1",
+ "predicates": [
+ "biolink:located_in"
+ ]
+ },
+ "e1": {
+ "subject": "n1",
+ "object": "n2",
+ "predicates": [
+ "biolink:expresses"
+ ]
+ },
+ "e2": {
+ "subject": "n3",
+ "object": "n2",
+ "predicates": [
+ "biolink:affects"
+ ]
+ }
+ }
+ }
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(json_query=query, timeout=75)
+
+
+def test_kp_list():
+ actions = [
+ "add_qnode(key=qg0, ids=CHEMBL.COMPOUND:CHEMBL112)",
+ "add_qnode(key=qg1, categories=biolink:Protein)",
+ "add_qedge(subject=qg1, object=qg0, key=qe0, predicates=biolink:physically_interacts_with)",
+ "expand(edge_key=qe0, kp=[infores:rtx-kg2, infores:molepro])",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions, timeout=30)
+
+
+def test_missing_epc_attributes():
+ actions = [
+ "add_qnode(name=Parkinson's disease, key=n0)",
+ "add_qnode(categories=biolink:Drug, key=n1)",
+ "add_qedge(subject=n1, object=n0, key=e0, predicates=biolink:predisposes_to_condition)",
+ "expand(kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions)
+ for qedge_key, edges in edges_by_qg_id.items():
+ for edge_key, edge in edges.items():
+ primary_knowledge_sources = {source.resource_id for source in edge.sources
+ if source.resource_role == "primary_knowledge_source"}
+ assert primary_knowledge_sources
+ if "infores:semmeddb" in primary_knowledge_sources:
+ assert edge.attributes
+ publications = [attribute.value for attribute in edge.attributes
+ if attribute.attribute_type_id == "biolink:publications"]
+ assert publications
+
+
+def test_kg2_version():
+ query = {
+ "nodes": {
+ "n00": {
+ "ids": ["RTX:KG2c"]
+ }
+ },
+ "edges": {}
+ }
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(json_query=query)
+
+ # First grab KG2 version from the KG2c build node
+ assert nodes_by_qg_id["n00"]
+ assert len(nodes_by_qg_id["n00"]) == 1
+ build_node = nodes_by_qg_id["n00"]["RTX:KG2c"]
+ kg2c_build_node_version = build_node.name.replace("RTX-KG", "").strip("c")
+ print(f"KG2 version from KG2c build node is: {kg2c_build_node_version}")
+
+ # Then grab KG2 version from the OpenAPI spec
+ code_dir = os.path.dirname(os.path.abspath(__file__)) + "/../../"
+ kg2_openapi_yaml_path = f"{code_dir}/UI/OpenAPI/specifications/export/KG2/1.5.0/openapi.yaml"
+ with open(kg2_openapi_yaml_path) as kg2_api_file:
+ kg2_openapi_configuration = yaml.safe_load(kg2_api_file)
+ kg2_openapi_version = kg2_openapi_configuration["info"]["version"]
+ print(f"KG2 version from KG2 openapi.yaml file is: {kg2_openapi_version}")
+
+ assert kg2c_build_node_version == kg2_openapi_version
+
+
+def test_klat_attributes():
+ actions_list = [
+ "add_qnode(key=n0, ids=DRUGBANK:DB00394)",
+ "add_qnode(key=n1, categories=biolink:Disease)",
+ "add_qedge(key=e0, subject=n1, object=n0, predicates=biolink:treats_or_applied_or_studied_to_treat)",
+ "expand(kp=infores:rtx-kg2)",
+ "return(message=true, store=false)"
+ ]
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
+ for edge_key, edge in edges_by_qg_id["e0"].items():
+ assert any(attribute.attribute_type_id == "biolink:knowledge_level" for attribute in edge.attributes)
+ assert any(attribute.attribute_type_id == "biolink:agent_type" for attribute in edge.attributes)
+ assert all(isinstance(attribute.value, str) for attribute in edge.attributes
+ if attribute.attribute_type_id in {"biolink:knowledge_level", "biolink:agent_type"})
+
+
+def test_treats_patch_issue_2328_a():
+ query = {
+ "nodes": {
+ "disease": {
+ "ids": ["MONDO:0015564"]
+ },
+ "chemical": {
+ "categories": ["biolink:ChemicalEntity"]
+ }
+ },
+ "edges": {
+ "t_edge": {
+ "object": "disease",
+ "subject": "chemical",
+ "predicates": ["biolink:treats"],
+ "knowledge_type": "inferred",
+ "attribute_constraints": [
+ {
+ "id": "knowledge_source",
+ "name": "knowledge source",
+ "value": ["infores:rtx-kg2"],
+ "operator": "=="
+ }
+ ]
+ }
+ }
+ }
+ nodes_by_qg_id, edges_by_qg_id, message = _run_query_and_do_standard_testing(json_query=query, return_message=True)
+ assert edges_by_qg_id["t_edge"]
+ # Make sure the KG2 edges, which are higher-level treats edges, are in the KG (used as support edges)
+ creative_expand_treats_edges = [edge for edge_key, edge in message.knowledge_graph.edges.items()
+ if edge_key.startswith("creative_expand")]
+ support_edge_keys = set()
+ for edge in creative_expand_treats_edges:
+ aux_graph_keys = get_support_graphs_attribute(edge).value
+ assert aux_graph_keys
+ for aux_graph_key in aux_graph_keys:
+ aux_graph = message.auxiliary_graphs[aux_graph_key]
+ support_edge_keys.update(set(aux_graph.edges))
+ support_edges = [message.knowledge_graph.edges[edge_key] for edge_key in support_edge_keys]
+
+ assert any(source.resource_id == "infores:rtx-kg2" for edge in support_edges for source in edge.sources)
+ # assert not any(source.resource_id == "infores:semmeddb" for edge in support_edges for source in edge.sources)
+
+def test_treats_patch_issue_2328_b():
+ # Verify that the edge editing doesn't happen outside of inferred mode
+ query = {
+ "nodes": {
+ "disease": {
+ "ids": ["MONDO:0015564"]
+ },
+ "chemical": {
+ "categories": ["biolink:ChemicalEntity"]
+ }
+ },
+ "edges": {
+ "t_edge": {
+ "object": "disease",
+ "subject": "chemical",
+ "predicates": ["biolink:treats_or_applied_or_studied_to_treat", "biolink:applied_to_treat"],
+ "attribute_constraints": [
+ {
+ "id": "knowledge_source",
+ "name": "knowledge source",
+ "value": ["infores:rtx-kg2"],
+ "operator": "=="
+ }
+ ]
+ }
+ }
+ }
+ nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(json_query=query)
+ assert edges_by_qg_id["t_edge"]
+ kg2_edges_treats_or = [edge for edge in edges_by_qg_id["t_edge"].values()
+ if any(source.resource_id == "infores:rtx-kg2" for source in edge.sources)]
+ print(f"Answer includes {len(kg2_edges_treats_or)} edges from KG2")
+ assert kg2_edges_treats_or
+ assert any(edge for edge in kg2_edges_treats_or if edge.predicate == "biolink:treats_or_applied_or_studied_to_treat")
+ assert any(edge for edge in kg2_edges_treats_or if edge.predicate == "biolink:applied_to_treat")
+
+
+@pytest.mark.external
+def test_creative_treats_predicate_alteration_2412():
+ query = {
+ "nodes": {
+ "n00": {
+ "ids": ["MONDO:0018958"]
+ },
+ "n01": {
+ "categories": ["biolink:SmallMolecule"]
+ }
+ },
+ "edges": {
+ "e00": {
+ "subject": "n01",
+ "object": "n00",
+ "predicates": ["biolink:treats"],
+ "knowledge_type": "inferred"
+ }
+ }
+ }
+ nodes_by_qg_id, edges_by_qg_id, message = _run_query_and_do_standard_testing(json_query=query, return_message=True)
+
+ # Make sure we appear to have creative expand treats edges
+ assert edges_by_qg_id and edges_by_qg_id.get("e00")
+ assert any(edge_key for edge_key in edges_by_qg_id["e00"] if edge_key.startswith("creative_expand"))
+ primary_sources_e00 = {get_primary_knowledge_source(edge) for edge in edges_by_qg_id["e00"].values()}
+ print(f"primary_knowledge_sources are: {primary_sources_e00}")
+ assert "infores:arax" in primary_sources_e00
+
+ # Make sure 'support' edges, like from ROBOKOP, are present in the KG
+ primary_sources_all = {get_primary_knowledge_source(edge) for edges_dict in edges_by_qg_id.values()
+ for edge in edges_dict.values()}
+ assert "infores:automat-robokop" in primary_sources_all
+
+ # Make sure that creative expand treats edges have support graphs that actually exist
+ for edge_key, edge in edges_by_qg_id["e00"].items():
+ if get_primary_knowledge_source(edge) == "infores:arax":
+ support_graph_attr = get_support_graphs_attribute(edge)
+ assert support_graph_attr
+ aux_graph_keys = eu.convert_to_set(support_graph_attr.value)
+ assert aux_graph_keys.issubset(message.auxiliary_graphs)
+ for aux_graph_key in aux_graph_keys:
+ aux_graph = message.auxiliary_graphs[aux_graph_key]
+ assert set(aux_graph.edges).issubset(message.knowledge_graph.edges)
+
+
+
+def test_issue_2662():
+ kpic = KPInfoCacher()
+ saved_trapi_version = kpic.forced_kp_version
+ kpic.forced_kp_version = "1.6.0"
+ kpic.refresh_kp_info_caches()
+ saved_arax_response_output = ARAXResponse.output
+ ARAXResponse.output = 'STDERR'
+ query_graph_dict = {
+ "edges": {
+ "50efaa83": {
+ "knowledge_type": "lookup",
+ "object": "on",
+ "predicates": [
+ "biolink:treats"
+ ],
+ "subject": "sn"
+ }
+ },
+ "nodes": {
+ "on": {
+ "ids": [
+ "MONDO:0005015"
+ ]
+ },
+ "sn": {
+ "ids": [
+ "CHEBI:5931"
+ ]
+ }
+ }
+ }
+ envelope_dict = {
+ "message": {
+ "query_graph": query_graph_dict
+ }
+ }
+ message = ARAXQuery().query_return_message(envelope_dict).message
+ kpic.forced_kp_version = saved_trapi_version
+ kpic.refresh_kp_info_caches()
+ ARAXResponse.output = saved_arax_response_output
+ aux_graphs = message.auxiliary_graphs
+ assert aux_graphs is not None and len(aux_graphs) > 0
+ kg = message.knowledge_graph
+ assert len(kg.nodes) > 3
+ assert len(kg.edges) > 3
+
+
+def test_issue_2678():
+ kpic = KPInfoCacher()
+ saved_trapi_version = kpic.forced_kp_version
+ kpic.forced_kp_version = "1.6.0"
+ kpic.refresh_kp_info_caches()
+ saved_arax_response_output = ARAXResponse.output
+ query_graph_dict = {
+ "edges": {
+ "50efaa83": {
+ "knowledge_type": "lookup",
+ "object": "on",
+ "predicates": [
+ "biolink:treats_or_applied_or_studied_to_treat"
+ ],
+ "subject": "sn"
+ }
+ },
+ "nodes": {
+ "on": {
+ "ids": [
+ "MONDO:0016098"
+ ]
+ },
+ "sn": {
+ "ids": [
+ "CHEBI:229659"
+ ]
+ }
+ }
+ }
+ envelope_dict = {
+ "message": {
+ "query_graph": query_graph_dict
+ }
+ }
+ aq = ARAXQuery()
+ response = aq.query_return_message(envelope_dict)
+ message = response.message
+ messages_str = json.dumps(aq.response.messages)
+ disease_node = message.knowledge_graph.nodes['MONDO:0016098']
+ kpic.forced_kp_version = saved_trapi_version
+ kpic.refresh_kp_info_caches()
+ assert 'biolink:PhenotypicFeature' not in messages_str
+
+
+if __name__ == "__main__":
+ pytest.main(['-v', 'test_ARAX_expand.py'])
diff --git a/code/code-archive/old-arax-tests/test_ARAX_filter_kg.py b/code/code-archive/old-arax-tests/test_ARAX_filter_kg.py
new file mode 100644
index 000000000..3775fb12c
--- /dev/null
+++ b/code/code-archive/old-arax-tests/test_ARAX_filter_kg.py
@@ -0,0 +1,337 @@
+#!/usr/bin/env python3
+
+# Usage:
+# run all: pytest -v test_ARAX_filter_kg.py
+# run just certain tests: pytest -v test_ARAX_filter_kg.py -k test_default_std_dev
+
+import sys
+import os
+import pytest
+from collections import Counter
+import copy
+import json
+import ast
+from typing import List, Union
+import numpy as np
+
+sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/../ARAXQuery")
+from ARAX_filter_kg import ARAXFilterKG
+from ARAX_query import ARAXQuery
+from ARAX_response import ARAXResponse
+
+PACKAGE_PARENT = '../../UI/OpenAPI/python-flask-server'
+sys.path.append(os.path.normpath(os.path.join(os.getcwd(), PACKAGE_PARENT)))
+from openapi_server.models.edge import Edge
+from openapi_server.models.node import Node
+from openapi_server.models.q_edge import QEdge
+from openapi_server.models.q_node import QNode
+from openapi_server.models.query_graph import QueryGraph
+from openapi_server.models.knowledge_graph import KnowledgeGraph
+from openapi_server.models.node_binding import NodeBinding
+from openapi_server.models.edge_binding import EdgeBinding
+from openapi_server.models.result import Result
+from openapi_server.models.message import Message
+
+
+def _do_arax_query(query: dict, print_response: bool=True) -> List[Union[ARAXResponse, Message]]:
+ araxq = ARAXQuery()
+ response = araxq.query(query)
+ if response.status != 'OK' and print_response:
+ print(response.show(level=response.DEBUG))
+ #return [response, araxq.message]
+ return [response, response.envelope.message]
+
+def test_command_definitions():
+ fkg = ARAXFilterKG()
+ assert fkg.allowable_actions == set(fkg.command_definitions.keys())
+
+def test_warnings():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=DOID:8741, key=n00)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "filter_kg(action=remove_edges_by_continuous_attribute, edge_attribute=asdfghjkl, direction=below, threshold=.2)",
+ "filter_kg(action=remove_edges_by_discrete_attribute, edge_attribute=asdfghjkl, value=qwertyuiop)",
+ "filter_kg(action=remove_edges_by_std_dev, edge_attribute=asdfghjkl, remove_connected_nodes=f, threshold=0.25, top=f, direction=above)",
+ "filter_kg(action=remove_edges_by_top_n, edge_attribute=asdfghjkl, remove_connected_nodes=f, n=50, top=f, direction=above)",
+ "filter_kg(action=remove_edges_by_percentile, edge_attribute=asdfghjkl, remove_connected_nodes=f, threshold=25, top=f, direction=above)",
+ "overlay(action=compute_ngd, virtual_relation_label=N2, subject_qnode_key=n00, object_qnode_key=n01)",
+ "resultify(ignore_edge_direction=true)",
+ "filter_results(action=limit_number_of_results, max_results=20)",
+ "return(message=true, store=false)"
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ assert len(message.results) == 20
+
+def test_error():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=MONDO:0001475, key=n00)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n01)",
+ "add_qedge(subject=n01, object=n00, key=e00, predicates=biolink:related_to_at_instance_level)",
+ "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "filter_kg(action=remove_edges_by_predicate, edge_predicate=biolink:treats_or_applied_or_studied_to_treat, remove_connected_nodes=t, qedge_keys=[e00])",
+ "resultify(ignore_edge_direction=true)",
+ "return(message=true, store=false)"
+ ]}}
+ [response, message] = _do_arax_query(query, False)
+ assert response.status == 'ERROR'
+ assert response.error_code == "OrphanEdges"
+
+def test_edge_key_removal():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=DOID:11086, key=n00)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n01)",
+ "add_qnode(categories=biolink:Disease, key=n02)",
+ "add_qedge(subject=n01, object=n00, key=e00, predicates=biolink:treats)",
+ "add_qedge(subject=n01, object=n02, key=e01, predicates=biolink:treats)",
+ "expand(kp=infores:rtx-kg2)",
+ "filter_kg(action=remove_edges_by_predicate, edge_predicate=biolink:treats, remove_connected_nodes=f, qedge_keys=[e01])",
+ "return(message=true, store=false)"
+ ]}}
+ [response, message] = _do_arax_query(query, False)
+ assert response.status == 'OK'
+ edge_key_set = set()
+ for edge in message.knowledge_graph.edges.values():
+ edge_key_set = edge_key_set.union(edge.qedge_keys)
+ assert 'e01' not in edge_key_set
+
+@pytest.mark.slow
+def test_default_std_dev():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=DOID:0060680, key=n00)",
+ "add_qnode(categories=biolink:Protein, is_set=true, key=n01)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n02)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "add_qedge(subject=n01, object=n02, key=e01)",
+ "expand(edge_key=[e00,e01], kp=infores:rtx-kg2)",
+ "overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J1)",
+ "overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J2)",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ all_vals = [float(y.value) for x in message.knowledge_graph.edges.values() if x.attributes is not None for y in x.attributes if y.original_attribute_name == 'jaccard_index']
+ comp_val = np.mean(all_vals) + np.std(all_vals)
+ comp_len = len([x for x in all_vals if x > comp_val])
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=DOID:0060680, key=n00)",
+ "add_qnode(categories=biolink:Protein, is_set=true, key=n01)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n02)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "add_qedge(subject=n01, object=n02, key=e01)",
+ "expand(edge_key=[e00,e01], kp=infores:rtx-kg2)",
+ "overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J1)",
+ "overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J2)",
+ "filter_kg(action=remove_edges_by_std_dev, edge_attribute=jaccard_index, remove_connected_nodes=f)",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ vals = [float(y.value) for x in message.knowledge_graph.edges.values() if x.attributes is not None for y in x.attributes if y.original_attribute_name == 'jaccard_index']
+ assert len(vals) == comp_len
+ assert np.min(vals) > comp_val
+
+@pytest.mark.slow
+def test_std_dev():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=DOID:0060680, key=n00)",
+ "add_qnode(categories=biolink:Protein, is_set=true, key=n01)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n02)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "add_qedge(subject=n01, object=n02, key=e01)",
+ "expand(edge_key=[e00,e01], kp=infores:rtx-kg2)",
+ "overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J1)",
+ "overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J2)",
+ "resultify(ignore_edge_direction=true, debug=true)",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ all_vals = [float(y.value) for x in message.knowledge_graph.edges.values() if x.attributes is not None for y in x.attributes if y.original_attribute_name == 'jaccard_index']
+ assert len(all_vals) > 0
+ comp_val = np.mean(all_vals) - 0.25*np.std(all_vals)
+ comp_len = len([x for x in all_vals if x < comp_val])
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=DOID:0060680, key=n00)",
+ "add_qnode(categories=biolink:Protein, is_set=true, key=n01)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n02)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "add_qedge(subject=n01, object=n02, key=e01)",
+ "expand(edge_key=[e00,e01], kp=infores:rtx-kg2)",
+ "overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J1)",
+ "overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J2)",
+ "filter_kg(action=remove_edges_by_std_dev, edge_attribute=jaccard_index, remove_connected_nodes=f, threshold=0.25, top=f, direction=above)",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ vals = [float(y.value) for x in message.knowledge_graph.edges.values() if x.attributes is not None for y in x.attributes if y.original_attribute_name == 'jaccard_index']
+ assert len(vals) == comp_len
+ assert len([x for x in vals if x == 1]) == 0
+ assert np.max(vals) < comp_val
+
+@pytest.mark.slow
+def test_default_top_n():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=DOID:0060680, key=n00)",
+ "add_qnode(categories=biolink:Protein, is_set=true, key=n01)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n02)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "add_qedge(subject=n01, object=n02, key=e01)",
+ "expand(edge_key=[e00,e01], kp=infores:rtx-kg2)",
+ "overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J1)",
+ "overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J2)",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ all_vals = [float(y.value) for x in message.knowledge_graph.edges.values() if x.attributes is not None for y in x.attributes if y.original_attribute_name == 'jaccard_index']
+ all_vals.sort()
+ all_vals.reverse()
+ sorted_vals = all_vals[:50]
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=DOID:0060680, key=n00)",
+ "add_qnode(categories=biolink:Protein, is_set=true, key=n01)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n02)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "add_qedge(subject=n01, object=n02, key=e01)",
+ "expand(edge_key=[e00,e01], kp=infores:rtx-kg2)",
+ "overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J1)",
+ "overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J2)",
+ "filter_kg(action=remove_edges_by_top_n, edge_attribute=jaccard_index, remove_connected_nodes=f)",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ vals = [float(y.value) for x in message.knowledge_graph.edges.values() if x.attributes is not None for y in x.attributes if y.original_attribute_name == 'jaccard_index']
+ assert len(vals) == 50
+ vals.sort()
+ vals.reverse()
+ assert vals == sorted_vals
+
+def test_remove_property_known_attributes():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(ids=CHEBI:17754, categories=biolink:ChemicalEntity, key=n0)",
+ "add_qnode(categories=biolink:Gene, key=n1)",
+ "add_qedge(subject=n1, object=n0, key=e0,predicates=biolink:negatively_regulates_entity_to_entity)",
+ "expand(kp=infores:rtx-kg2)",
+ "filter_kg(action=remove_edges_by_discrete_attribute,edge_attribute=provided_by,value=SEMMEDDB:,remove_connected_nodes=false)",
+ "resultify()",
+ "filter_results(action=limit_number_of_results, max_results=30)",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+
+@pytest.mark.slow
+def test_remove_attribute_known_attributes():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=DOID:14330, key=n00)",
+ "add_qnode(categories=biolink:Protein, is_set=true, key=n01)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n02)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "add_qedge(subject=n01, object=n02, key=e01, predicates=biolink:physically_interacts_with)",
+ "expand(edge_key=[e00,e01], kp=infores:rtx-kg2)",
+ "overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J1)",
+ "filter_kg(action=remove_edges_by_continuous_attribute, edge_attribute=jaccard_index, direction=below, threshold=.2, remove_connected_nodes=t, qnode_keys=[n02])",
+ #"filter_kg(action=remove_edges_by_discrete_attribute,edge_attribute=provided_by, value=Pharos)",
+ "overlay(action=predict_drug_treats_disease, subject_qnode_key=n02, object_qnode_key=n00, virtual_relation_label=P1)",
+ "resultify(ignore_edge_direction=true)",
+ "filter_results(action=sort_by_edge_attribute, edge_attribute=jaccard_index, direction=descending, max_results=15)",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+
+@pytest.mark.slow
+def test_provided_by_filter():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(ids=CHEBI:17754, categories=biolink:ChemicalEntity, key=n0)",
+ "add_qnode(categories=biolink:Gene, key=n1)",
+ "add_qedge(subject=n1, object=n0, key=e0,predicates=biolink:entity_negatively_regulates_entity)",
+ "expand(kp=infores:rtx-kg2)",
+ "filter_kg(action=remove_edges_by_discrete_attribute,edge_attribute=knowledge_source,value=infores:semmeddb,remove_connected_nodes=false)",
+ "resultify()",
+ #"filter_results(action=limit_number_of_results, max_results=30)",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ count1 = len(message.results)
+ assert count1 == 0
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(ids=CHEBI:17754, categories=biolink:ChemicalEntity, key=n0)",
+ "add_qnode(categories=biolink:Gene, key=n1)",
+ "add_qedge(subject=n1, object=n0, key=e0,predicates=biolink:entity_negatively_regulates_entity)",
+ "expand(kp=infores:rtx-kg2)",
+ #"filter_kg(action=remove_edges_by_discrete_attribute,edge_attribute=biolink:original_source,value=infores:semmeddb,remove_connected_nodes=false)",
+ "resultify()",
+ #"filter_results(action=limit_number_of_results, max_results=30)",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ count2 = len(message.results)
+ assert count2 > count1
+
+@pytest.mark.external
+@pytest.mark.slow
+def test_stats_error_int_threshold():
+ query = {"operations": {"actions": [
+ "create_message",
+ # Multiple sclerosis -> chemical substance with "related_to" from Clinical Risk KP
+ "add_qnode(ids=MONDO:0005301, key=n0)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n1)",
+ "add_qedge(subject=n0, object=n1, key=e0, predicates=biolink:related_to)",
+ "expand(kp=infores:biothings-multiomics-clinical-risk,edge_key=e0)",
+ "overlay(action=compute_ngd, virtual_relation_label=N1, subject_qnode_key=n0, object_qnode_key=n1)",
+ "resultify()",
+ "filter_results(action=limit_number_of_results, max_results=10)",
+ # Then look for proteins that are shared with these chemical substances and MS
+ "add_qnode(categories=biolink:Protein, key=n2, is_set=True)",
+ "add_qedge(subject=n0, object=n2, key=e1)",
+ "add_qedge(subject=n1, object=n2, key=e2)",
+ "expand(edge_key=[e1,e2])",
+ # Rank drugs by Jaccard Index
+ "overlay(action=compute_jaccard,start_node_key=n0,intermediate_node_key=n2,end_node_key=n1,virtual_relation_label=J1)",
+ "filter_kg(action=remove_edges_by_top_n,edge_attribute=jaccard_index, n=10,remove_connected_nodes=true,qnode_keys=[n2])",
+ "overlay(action=compute_ngd, virtual_relation_label=N2, subject_qnode_key=n1, object_qnode_key=n2)",
+ "overlay(action=compute_ngd, virtual_relation_label=N3, subject_qnode_key=n0, object_qnode_key=n2)",
+ "resultify()",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+
+def test_tuple_bug():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(key=n00,ids=DRUGBANK:DB00150,categories=biolink:ChemicalEntity)",
+ "add_qnode(key=n01,categories=biolink:Protein)",
+ "add_qedge(key=e00,subject=n00,object=n01)",
+ "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "overlay(action=fisher_exact_test,subject_qnode_key=n00,virtual_relation_label=F0,object_qnode_key=n01)",
+ "filter_kg(action=remove_edges_by_top_n,edge_attribute=fisher_exact_test_p-value,direction=below,n=10,remove_connected_nodes=true,qnode_keys=[n01])",
+ "resultify()",
+ "filter_results(action=limit_number_of_results, max_results=100)",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+
+if __name__ == "__main__":
+ pytest.main(['-v'])
diff --git a/code/code-archive/old-arax-tests/test_ARAX_filter_results.py b/code/code-archive/old-arax-tests/test_ARAX_filter_results.py
new file mode 100644
index 000000000..3a424f7e6
--- /dev/null
+++ b/code/code-archive/old-arax-tests/test_ARAX_filter_results.py
@@ -0,0 +1,214 @@
+#!/usr/bin/env python3
+
+# Usage:
+# run all: pytest -v test_ARAX_filter_results.py
+# run just certain tests: pytest -v test_ARAX_filter_results.py -k test_sort
+
+import sys
+import os
+import pytest
+from collections import Counter
+import copy
+import json
+import ast
+from typing import List, Union
+
+sys.path.append(os.getcwd()+"/../ARAXQuery")
+#sys.path.append(os.getcwd()+"/../ARAXQuery")
+from ARAX_filter_results import ARAXFilterResults
+from ARAX_query import ARAXQuery
+from ARAX_response import ARAXResponse
+
+PACKAGE_PARENT = '../../UI/OpenAPI/python-flask-server'
+sys.path.append(os.path.normpath(os.path.join(os.getcwd(), PACKAGE_PARENT)))
+from openapi_server.models.edge import Edge
+from openapi_server.models.node import Node
+from openapi_server.models.q_edge import QEdge
+from openapi_server.models.q_node import QNode
+from openapi_server.models.query_graph import QueryGraph
+from openapi_server.models.knowledge_graph import KnowledgeGraph
+from openapi_server.models.node_binding import NodeBinding
+from openapi_server.models.edge_binding import EdgeBinding
+from openapi_server.models.result import Result
+from openapi_server.models.message import Message
+
+
+def _do_arax_query(query: dict) -> List[Union[ARAXResponse, Message]]:
+ araxq = ARAXQuery()
+ response = araxq.query(query)
+ if response.status != 'OK':
+ print(response.show(level=response.DEBUG))
+ #return [response, araxq.message]
+ return [response, response.envelope.message]
+
+def test_command_definitions():
+ fr = ARAXFilterResults()
+ assert fr.allowable_actions == set(fr.command_definitions.keys())
+
+def test_n_results():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=UMLS:C0040250, key=n00)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "overlay(action=add_node_pmids, max_num=15)",
+ "resultify(ignore_edge_direction=true)",
+ "filter_results(action=sort_by_node_attribute, node_attribute=pubmed_ids, direction=a, max_results=3)",
+ "return(message=true, store=false)"
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ assert message.n_results == len(message.results) == 3
+
+def test_no_results():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=DOID:4337, key=n00)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "overlay(action=add_node_pmids, max_num=15)",
+ "filter_results(action=sort_by_node_attribute, node_attribute=pubmed_ids, direction=a, max_results=20)",
+ "return(message=true, store=false)"
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert 'WARNING: [] filter_results called with no results.' in response.show(level=ARAXResponse.WARNING)
+ assert response.status == 'OK'
+
+@pytest.mark.slow
+def test_prune():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=DOID:4337, key=n00)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "overlay(action=add_node_pmids, max_num=15)",
+ "resultify(ignore_edge_direction=true)",
+ "filter_results(action=sort_by_node_attribute, node_attribute=pubmed_ids, direction=a, max_results=20, prune_kg=f)",
+ "return(message=true, store=false)"
+ ]}}
+ [no_prune_response, no_prune_message] = _do_arax_query(query)
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=DOID:4337, key=n00)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "overlay(action=add_node_pmids, max_num=15)",
+ "resultify(ignore_edge_direction=true)",
+ "filter_results(action=sort_by_node_attribute, node_attribute=pubmed_ids, direction=a, max_results=20)",
+ "return(message=true, store=false)"
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ result_nodes = set()
+ result_edges = set()
+ for result in message.results:
+ for node_binding_list in result.node_bindings.values():
+ for node_binding in node_binding_list:
+ result_nodes.add(node_binding.id)
+ for edge_binding_list in result.edge_bindings.values():
+ for edge_binding in edge_binding_list:
+ result_edges.add(edge_binding.id)
+ for key, node in message.knowledge_graph.nodes.items():
+ assert key in result_nodes
+ for key, edge in message.knowledge_graph.edges.items():
+ assert key in result_edges
+ assert len(message.knowledge_graph.nodes) < len(no_prune_message.knowledge_graph.nodes)
+ assert len(message.knowledge_graph.edges) < len(no_prune_message.knowledge_graph.edges)
+
+def test_warning():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=UMLS:C0040250, key=n00)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "overlay(action=add_node_pmids, max_num=15)",
+ "resultify(ignore_edge_direction=true)",
+ "filter_results(action=sort_by_node_attribute, node_attribute=pubmed_ids, direction=a, max_results=3)",
+ "filter_results(action=sort_by_node_attribute, node_attribute=asdfghjkl, direction=a, max_results=3)",
+ "return(message=true, store=false)"
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ assert len(message.results) == 3
+
+@pytest.mark.slow
+def test_sort_by_edge_attribute():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=DOID:0060680, key=n00)",
+ "add_qnode(categories=biolink:Protein, is_set=true, key=n01)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n02)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "add_qedge(subject=n01, object=n02, key=e01)",
+ "expand(edge_key=[e00,e01], kp=infores:rtx-kg2)",
+ "overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J1)",
+ "overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J2)",
+ "resultify(ignore_edge_direction=true)",
+ "filter_results(action=sort_by_edge_attribute, edge_attribute=jaccard_index, direction=d, max_results=20, qedge_keys=[J2])",
+ "return(message=true, store=false)"
+ ]}}
+ [response, message] = _do_arax_query(query)
+ #return response, message
+ assert response.status == 'OK'
+ assert len(message.results) == 20
+
+def test_sort_by_node_attribute():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=UMLS:C0040250, key=n00)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "overlay(action=add_node_pmids, max_num=15)",
+ "resultify(ignore_edge_direction=true)",
+ "filter_results(action=sort_by_node_attribute, node_attribute=pubmed_ids, direction=a, max_results=3, qnode_keys=[n01])",
+ "return(message=true, store=false)"
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ assert len(message.results) == 3
+ # add something to test if the results are assending and the correct numbers
+
+def test_sort_by_score():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=UMLS:C0040250, key=n00)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "resultify(ignore_edge_direction=true)",
+ "filter_results(action=sort_by_score, direction=a, max_results=3)",
+ "return(message=true, store=false)"
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ assert len(message.results) == 3
+ result_scores = [x.analyses[0].score for x in message.results]
+ assert result_scores == sorted(result_scores)
+ assert max(result_scores) <= 1
+
+@pytest.mark.external
+def test_issue1506():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(ids=MONDO:0005301, key=n00)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n01)",
+ "add_qedge(subject=n01, object=n00, key=e00, predicates=biolink:related_to)",
+ "expand(kp=infores:biothings-multiomics-clinical-risk, edge_key=e00)",
+ "overlay(action=compute_ngd, virtual_relation_label=N1, subject_qnode_key=n01, object_qnode_key=n00)",
+ "resultify()",
+ "filter_results(action=sort_by_edge_attribute, edge_attribute=feature_coefficient, direction=descending, max_results=30, prune_kg=true)",
+ "return(message=true, store=false)"
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ assert len(message.results) == 30
+
+
+if __name__ == "__main__":
+ pytest.main(['-v'])
diff --git a/code/code-archive/old-arax-tests/test_ARAX_infer.py b/code/code-archive/old-arax-tests/test_ARAX_infer.py
new file mode 100644
index 000000000..1bfdc065c
--- /dev/null
+++ b/code/code-archive/old-arax-tests/test_ARAX_infer.py
@@ -0,0 +1,531 @@
+#!/usr/bin/env python3
+
+# Intended to test ARAX infer
+
+import sys
+import os
+import pytest
+from collections import Counter
+import copy
+import json
+import ast
+from typing import List, Union
+
+import numpy as np
+
+sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/../../ARAXQuery")
+sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/../ARAXQuery")
+from ARAX_query import ARAXQuery
+from ARAX_response import ARAXResponse
+
+PACKAGE_PARENT = '../../UI/OpenAPI/openapi_server'
+sys.path.append(os.path.normpath(os.path.join(os.getcwd(), PACKAGE_PARENT)))
+from openapi_server.models.message import Message
+
+
+def _do_arax_query(query: dict) -> List[Union[ARAXResponse, Message]]:
+ araxq = ARAXQuery()
+ response = araxq.query(query)
+ if response.status != 'OK':
+ print(response.show(level=response.DEBUG))
+ return [response, response.envelope.message]
+
+
+def _attribute_tester(message, attribute_name: str, attribute_type: str, num_different_values=2):
+ """
+ Tests attributes of a message
+ message: returned from _do_arax_query
+ attribute_name: the attribute name to test (eg. 'jaccard_index')
+ attribute_type: the attribute type (eg. 'EDAM-DATA:1234')
+ num_different_values: the number of distinct values you wish to see have been added as attributes
+ """
+ edges_of_interest = []
+ values = set()
+ for key, edge in message.knowledge_graph.edges.items():
+ assert 'primary_knowledge_source' in [source.resource_role for source in edge.sources]
+ if hasattr(edge, 'edge_attributes'):
+ for attr in edge.edge_attributes:
+ if attr.original_attribute_name == attribute_name:
+ edges_of_interest.append(edge)
+ assert attr.attribute_type_id == attribute_type
+ values.add(attr.value)
+ assert len(edges_of_interest) > 0
+ assert len(values) >= num_different_values
+
+
+def _virtual_tester(message: Message, edge_predicate: str, relation: str, attribute_name: str, attribute_type: str, num_different_values=2):
+ """
+ Tests overlay functions that add virtual edges
+ message: returned from _do_arax_query
+ edge_predicate: the name of the virtual edge (eg. biolink:has_jaccard_index_with)
+ relation: the relation you picked for the virtual_edge_relation (eg. N1)
+ attribute_name: the attribute name to test (eg. 'jaccard_index')
+ attribute_type: the attribute type (eg. 'EDAM-DATA:1234')
+ num_different_values: the number of distinct values you wish to see have been added as attributes
+ """
+ edge_predicates_in_kg = Counter([x.predicate for x in message.knowledge_graph.edges.values()])
+ assert edge_predicate in edge_predicates_in_kg
+ edges_of_interest = [x for x in message.knowledge_graph.edges.values() if x.relation == relation]
+ values = set()
+ assert len(edges_of_interest) > 0
+ for edge in edges_of_interest:
+ assert 'primary_knowledge_source' in [attribute.attribute_type_id for attribute in edge.attributes]
+ assert hasattr(edge, 'attributes')
+ assert edge.attributes
+ assert edge.attributes[0].original_attribute_name == attribute_name
+ values.add(edge.attributes[0].value)
+ assert edge.attributes[0].attribute_type_id == attribute_type
+ # make sure two or more values were added
+ assert len(values) >= num_different_values
+
+
+def test_xdtd_infer_castleman_disease_1():
+ query = {"operations": {"actions": [
+ "create_message",
+ "infer(action=drug_treatment_graph_expansion,disease_curie=MONDO:0015564)",
+ "return(message=true, store=true)"
+ ]}}
+ [response, message] = _do_arax_query(query)
+ # return response, message
+ assert response.status == 'OK'
+ assert len(message.query_graph.edges) == 1
+ assert len(message.results) > 0
+
+def test_xdtd_infer_castleman_disease_2():
+ query = {"operations": {"actions": [
+ "create_message",
+ "infer(action=drug_treatment_graph_expansion,disease_curie=MONDO:0015564,n_drugs=2,n_paths=15)",
+ "return(message=true, store=true)"
+ ]}}
+ [response, message] = _do_arax_query(query)
+ # return response, message
+ assert response.status == 'OK'
+ assert message.auxiliary_graphs
+ assert len(message.results) > 0
+
+def test_xdtd_infer_ibuprofen_1():
+ query = {"operations": {"actions": [
+ "create_message",
+ "infer(action=drug_treatment_graph_expansion,drug_curie=CHEBI:5855)",
+ "return(message=true, store=true)"
+ ]}}
+ [response, message] = _do_arax_query(query)
+ # return response, message
+ assert response.status == 'OK'
+ assert len(message.query_graph.edges) == 1
+ assert len(message.results) > 0
+
+def test_xdtd_infer_ibuprofen_2():
+ query = {"operations": {"actions": [
+ "create_message",
+ "infer(action=drug_treatment_graph_expansion,drug_curie=CHEBI:5855,n_diseases=2,n_paths=15)",
+ "return(message=true, store=true)"
+ ]}}
+ [response, message] = _do_arax_query(query)
+ # return response, message
+ assert response.status == 'OK'
+ assert message.auxiliary_graphs
+ assert len(message.results) > 0
+
+
+def test_xdtd_issue2160():
+ query = {
+ "message": {"query_graph":
+ {
+ "edges": {
+ "t_edge": {
+ "attribute_constraints": [],
+ "knowledge_type": "inferred",
+ "object": "on",
+ "predicates": [
+ "biolink:treats"
+ ],
+ "qualifier_constraints": [],
+ "subject": "sn"
+ }
+ },
+ "nodes": {
+ "on": {
+ "categories": [
+ "biolink:Disease"
+ ],
+ "constraints": [],
+ "ids": [
+ "MONDO:0019600"
+ ],
+ },
+ "sn": {
+ "categories": [
+ "biolink:SmallMolecule"
+ ],
+ "constraints": [],
+ "ids": [
+ "PUBCHEM.COMPOUND:23931"
+ ],
+ }
+ }
+ }
+ }
+ }
+ [response, message] = _do_arax_query(query)
+ # return response, message
+ assert response.status == 'OK'
+
+def test_xdtd_with_qg():
+ query = {
+ "message": {"query_graph": {
+ "nodes": {
+ "disease": {
+ "ids": ["MONDO:0003912"]
+ },
+ "chemical": {
+ "categories": ["biolink:ChemicalEntity"]
+ }
+ },
+ "edges": {
+ "t_edge": {
+ "object": "disease",
+ "subject": "chemical",
+ "predicates": ["biolink:treats"],
+ "knowledge_type": "inferred"
+ }
+ }
+ }
+ },
+ "operations": {"actions": [
+ "infer(action=drug_treatment_graph_expansion, disease_curie=test_xdtd_with_qg, qedge_id=t_edge)",
+ "return(message=true, store=true)"
+ ]}
+ }
+ [response, message] = _do_arax_query(query)
+ # return response, message
+ assert response.status == 'ERROR'
+ #assert len(message.query_graph.edges) > 1
+ #assert len(message.results) > 0
+
+
+def test_xdtd_with_qg2():
+ query = {
+ "message": {"query_graph": {
+ "nodes": {
+ "disease": {
+ "ids": ["MONDO:0015564"]
+ },
+ "chemical": {
+ "categories": ["biolink:ChemicalEntity"]
+ }
+ },
+ "edges": {
+ "t_edge": {
+ "object": "disease",
+ "subject": "chemical",
+ "predicates": ["biolink:treats"],
+ "knowledge_type": "inferred"
+ }
+ }
+ }
+ },
+ "operations": {"actions": [
+ "infer(action=drug_treatment_graph_expansion, disease_curie=MONDO:0015564, qedge_id=t_edge)",
+ "return(message=true, store=true)"
+ ]}
+ }
+ [response, message] = _do_arax_query(query)
+ # return response, message
+ assert response.status == 'OK'
+ assert len(message.query_graph.edges) == 1
+ assert len(message.results) > 0
+
+
+def test_xdtd_with_qg3():
+ query = {
+ "message": {"query_graph": {
+ "nodes": {
+ "disease": {
+ "ids": ["MONDO:0015564"]
+ },
+ "chemical": {
+ "categories": ["biolink:ChemicalEntity"]
+ }
+ },
+ "edges": {
+ "t_edge": {
+ "object": "disease",
+ "subject": "chemical",
+ "predicates": ["biolink:treats"],
+ "knowledge_type": "inferred"
+ }
+ }
+ }
+ },
+ "operations": {"actions": [
+ "infer(action=drug_treatment_graph_expansion, disease_curie=MONDO:0015564, qedge_id=t_edge, n_drugs=10, n_paths=10)",
+ "return(message=true, store=true)"
+ ]}
+ }
+ [response, message] = _do_arax_query(query)
+ # return response, message
+ assert response.status == 'OK'
+ assert message.auxiliary_graphs
+ assert len(message.results) > 0
+
+
+@pytest.mark.slow
+def test_xdtd_with_qg4():
+ query = {
+ "message": {"query_graph": {
+ "nodes": {
+ "disease": {
+ "categories": ["biolink:Disease"]
+ },
+ "chemical": {
+ "ids": ["UNII:4F4X42SYQ6"]
+ }
+ },
+ "edges": {
+ "t_edge": {
+ "object": "disease",
+ "subject": "chemical",
+ "predicates": ["biolink:treats"],
+ "knowledge_type": "inferred"
+ }
+ }
+ }
+ },
+ "operations": {"actions": [
+ "infer(action=drug_treatment_graph_expansion, drug_curie=UNII:4F4X42SYQ6, qedge_id=t_edge)",
+ "return(message=true, store=true)"
+ ]}
+ }
+ [response, message] = _do_arax_query(query)
+ # return response, message
+ assert response.status == 'OK'
+ assert message.auxiliary_graphs
+ assert len(message.results) > 0
+
+
+def test_xdtd_with_only_qg():
+ query = {
+ "message": {"query_graph": {
+ "nodes": {
+ "disease": {
+ "ids": ["MONDO:0015564"]
+ },
+ "chemical": {
+ "categories": ["biolink:ChemicalEntity"]
+ }
+ },
+ "edges": {
+ "t_edge": {
+ "object": "disease",
+ "subject": "chemical",
+ "predicates": ["biolink:treats"],
+ "knowledge_type": "inferred"
+ }
+ }
+ }
+ }
+ }
+ [response, message] = _do_arax_query(query)
+ # return response, message
+ assert response.status == 'OK'
+ assert len(message.query_graph.edges) == 1
+ assert len(message.results) > 0
+
+@pytest.mark.slow
+def test_xcrg_infer_bomeol():
+ query = {"operations": {"actions": [
+ "create_message",
+ "infer(action=chemical_gene_regulation_graph_expansion, subject_curie=CHEMBL.COMPOUND:CHEMBL1097205, regulation_type=increase, threshold=0.6, path_len=2, n_result_curies=1, n_paths=1)",
+ "return(message=true, store=true)"
+ ]}}
+ [response, message] = _do_arax_query(query)
+ # return response, message
+ assert response.status == 'OK'
+ assert len(message.query_graph.edges) >= 1
+ assert len(message.results) > 0
+ creative_mode_edges = [x for x in list(message.knowledge_graph.edges.keys()) if 'creative_CRG_prediction' in x]
+ if len(creative_mode_edges) != 0:
+ edge_key = creative_mode_edges[0]
+ edge_result = message.knowledge_graph.edges[edge_key]
+ assert edge_result.predicate in ['biolink:regulates', 'biolink:affects']
+
+@pytest.mark.slow
+def test_xcrg_with_qg1():
+ query = {
+ "message": {"query_graph": {
+ "nodes": {
+ "gene": {
+ "ids": ["UniProtKB:P48736"]
+ },
+ "chemical": {
+ "categories": ['biolink:ChemicalEntity', 'biolink:ChemicalMixture','biolink:SmallMolecule']
+ }
+ },
+ "edges": {
+ "r_edge": {
+ "object": "gene",
+ "subject": "chemical",
+ "predicates": ['biolink:regulates', 'biolink:affects'],
+ "knowledge_type": "inferred",
+ "qualifier_constraints": [
+ {
+ "qualifier_set": [
+ {
+ "qualifier_type_id": "biolink:object_direction_qualifier",
+ "qualifier_value": "increased"
+ }
+ ]
+ }
+ ]
+ }
+ }
+ }
+ },
+ "operations": {"actions": [
+ "infer(action=chemical_gene_regulation_graph_expansion,object_qnode_id=gene,qedge_id=r_edge,n_result_curies=1, n_paths=1)",
+ "return(message=true, store=true)"
+ ]}
+ }
+ [response, message] = _do_arax_query(query)
+ # return response, message
+ assert response.status == 'OK'
+ assert len(message.query_graph.edges) >= 1
+ assert len(message.results) > 0
+ creative_mode_edges = [x for x in list(message.knowledge_graph.edges.keys()) if 'creative_CRG_prediction' in x]
+ if len(creative_mode_edges) != 0:
+ edge_key = creative_mode_edges[0]
+ edge_result = message.knowledge_graph.edges[edge_key]
+ assert edge_result.predicate in ['biolink:regulates', 'biolink:affects']
+
+
+@pytest.mark.slow
+def test_xcrg_with_qg2():
+ query = {
+ "message": {"query_graph": {
+ "nodes": {
+ "chemical": {
+ "ids": ["CHEMBL.COMPOUND:CHEMBL1097205"]
+ },
+ "gene": {
+ "categories": ["biolink:Gene","biolink:Protein"]
+ },
+
+ },
+ "edges": {
+ "r_edge": {
+ "object": "gene",
+ "subject": "chemical",
+ "predicates": ['biolink:affects'],
+ "knowledge_type": "inferred",
+ "qualifier_constraints": [
+ {
+ "qualifier_set": [
+ {
+ "qualifier_type_id": "biolink:object_direction_qualifier",
+ "qualifier_value": "decreased"
+ }
+ ]
+ }
+ ]
+ }
+ }
+ }
+ },
+ "operations": {"actions": [
+ "infer(action=chemical_gene_regulation_graph_expansion,subject_qnode_id=chemical,qedge_id=r_edge,n_result_curies=1, n_paths=1)",
+ "return(message=true, store=true)"
+ ]}
+ }
+ [response, message] = _do_arax_query(query)
+ # return response, message
+ assert response.status == 'OK'
+ assert len(message.query_graph.edges) >= 1
+ assert len(message.results) > 0
+ creative_mode_edges = [x for x in list(message.knowledge_graph.edges.keys()) if 'creative_CRG_prediction' in x]
+ if len(creative_mode_edges) != 0:
+ edge_key = creative_mode_edges[0]
+ edge_result = message.knowledge_graph.edges[edge_key]
+ assert edge_result.predicate in ['biolink:regulates', 'biolink:affects']
+
+@pytest.mark.slow
+def test_xcrg_with_only_qg():
+ query = {
+ "message": {"query_graph": {
+ "edges": {
+ "t_edge": {
+ "knowledge_type": "inferred",
+ "object": "ON",
+ "predicates": [
+ "biolink:affects"
+ ],
+ "qualifier_constraints": [
+ {
+ "qualifier_set": [
+ {
+ "qualifier_type_id": "biolink:object_aspect_qualifier",
+ "qualifier_value": "activity_or_abundance"
+ },
+ {
+ "qualifier_type_id": "biolink:object_direction_qualifier",
+ "qualifier_value": "decreased"
+ }
+ ]
+ }
+ ],
+ "subject": "SN"
+ }
+ },
+ "nodes": {
+ "ON": {
+ "categories": [
+ "biolink:Gene",
+ "biolink:Protein"
+ ],
+ "ids": [
+ "NCBIGene:3043"
+ ]
+ },
+ "SN": {
+ "categories": [
+ "biolink:ChemicalEntity"
+ ]
+ }
+ }
+ }
+ }
+ }
+ [response, message] = _do_arax_query(query)
+ # return response, message
+ assert response.status == 'OK'
+ assert len(message.query_graph.edges) >= 1
+ assert len(message.results) > 0
+ creative_mode_edges = [x for x in list(message.knowledge_graph.edges.keys()) if 'creative_CRG_prediction' in x]
+ if len(creative_mode_edges) != 0:
+ edge_key = creative_mode_edges[0]
+ edge_result = message.knowledge_graph.edges[edge_key]
+ assert edge_result.predicate in ['biolink:regulates', 'biolink:affects']
+
+@pytest.mark.slow
+def test_xcrg_infer_dsl():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=acetaminophen, key=n0)",
+ "add_qnode(categories=biolink:Gene, key=n1)",
+ "add_qedge(subject=n0, object=n1, key=e0)",
+ "infer(action=chemical_gene_regulation_graph_expansion, subject_qnode_id=n0, qedge_id=e0, regulation_type=increase, n_result_curies=1, n_paths=1)",
+ "overlay(action=compute_ngd, virtual_relation_label=N1, subject_qnode_key=n0, object_qnode_key=n1)",
+ "resultify()",
+ "filter_results(action=limit_number_of_results, max_results=30)",
+ "return(message=true, store=true)"
+ ]}}
+ [response, message] = _do_arax_query(query)
+ # return response, message
+ assert response.status == 'OK'
+ assert len(message.query_graph.edges) >= 1
+ assert len(message.results) > 0
+ creative_mode_edges = [x for x in list(message.knowledge_graph.edges.keys()) if 'creative_CRG_prediction' in x]
+ if len(creative_mode_edges) != 0:
+ edge_key = creative_mode_edges[0]
+ edge_result = message.knowledge_graph.edges[edge_key]
+ assert edge_result.predicate in ['biolink:regulates', 'biolink:affects']
diff --git a/code/code-archive/old-arax-tests/test_ARAX_json_queries.py b/code/code-archive/old-arax-tests/test_ARAX_json_queries.py
new file mode 100644
index 000000000..fa0a72dae
--- /dev/null
+++ b/code/code-archive/old-arax-tests/test_ARAX_json_queries.py
@@ -0,0 +1,318 @@
+#!/usr/bin/env python3
+# For testing the ARAX json queries with things like the query graph interpreter
+import sys
+import os
+import pytest
+from typing import List, Dict, Tuple, Optional
+
+sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/../ARAXQuery/")
+from ARAX_query import ARAXQuery
+from ARAX_response import ARAXResponse
+import Expand.expand_utilities as eu
+sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/../../UI/OpenAPI/python-flask-server/")
+from openapi_server.models.edge import Edge
+from openapi_server.models.node import Node
+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+from test_ARAX_expand import check_property_format, check_for_orphans, print_nodes, print_edges, print_counts_by_qgid
+
+
+def _run_query_and_do_standard_testing(actions: Optional[List[str]] = None, json_query: Optional[dict] = None,
+ kg_should_be_incomplete=False, debug=False, should_throw_error=False,
+ error_code: Optional[str] = None, timeout: Optional[int] = None) -> Tuple[Dict[str, Dict[str, Node]], Dict[str, Dict[str, Edge]], ARAXResponse]:
+ # Run the query
+ araxq = ARAXQuery()
+ assert actions or json_query # Must provide some sort of query to run
+ # Stick the actions in if they are provided
+ if actions:
+ query_object = {"operations": {"actions": actions}}
+ # otherwise check if it's just the query_graph element
+ elif "message" not in json_query:
+ query_object = {"message": {"query_graph": json_query}}
+ else:
+ query_object = json_query
+ if timeout:
+ query_object["query_options"] = {"kp_timeout": timeout}
+ response = araxq.query(query_object)
+ message = araxq.message
+ if response.status != 'OK':
+ print(response.show(level=ARAXResponse.DEBUG))
+ assert response.status == 'OK' or should_throw_error
+ if should_throw_error and error_code:
+ assert response.error_code == error_code
+
+ # Convert output knowledge graph to a dictionary format for faster processing (organized by QG IDs)
+ dict_kg = eu.convert_standard_kg_to_qg_organized_kg(message.knowledge_graph)
+ nodes_by_qg_id = dict_kg.nodes_by_qg_id
+ edges_by_qg_id = dict_kg.edges_by_qg_id
+
+ # Optionally print more detail
+ if debug:
+ print_nodes(nodes_by_qg_id)
+ print_edges(edges_by_qg_id)
+ print_counts_by_qgid(nodes_by_qg_id, edges_by_qg_id)
+ print(response.show(level=ARAXResponse.DEBUG))
+
+ # Run standard testing (applies to every test case)
+ assert eu.qg_is_fulfilled(message.query_graph, dict_kg, enforce_required_only=True) or kg_should_be_incomplete or should_throw_error
+ check_for_orphans(nodes_by_qg_id, edges_by_qg_id)
+ check_property_format(nodes_by_qg_id, edges_by_qg_id)
+
+ return nodes_by_qg_id, edges_by_qg_id, response
+
+
+def test_query_by_query_graph_2():
+ query = { "message": { "query_graph": { "edges": {
+ "qg2": { "subject": "qg1", "object": "qg0", "predicates": ["biolink:physically_interacts_with"] }
+ },
+ "nodes": {
+ "qg0": { "name": "acetaminophen", "ids": ["CHEMBL.COMPOUND:CHEMBL112"], "categories": ["biolink:ChemicalEntity"] },
+ "qg1": { "name": None, "ids": None, "categories": ["biolink:Protein"] }
+ } } } }
+ #araxq = ARAXQuery()
+ #araxq.query(query)
+ #response = araxq.response
+ #print(response.show())
+ nodes_by_qg_id, edges_by_qg_id, response = _run_query_and_do_standard_testing(json_query=query)
+ #assert response.status == 'OK'
+ #message = response.envelope.message
+ #assert len(message.results) >= 20
+ #assert response.envelope.schema_version == '1.2.0'
+
+def test_ngd_added():
+ """
+ Test that the NGD added property is set correctly and was added by the QGI
+ """
+ query = {
+ "edges": {
+ "e00": {
+ "subject": "n00",
+ "object": "n01",
+ "predicates": ["biolink:physically_interacts_with"]
+ }
+ },
+ "nodes": {
+ "n00": {
+ "ids": ["CHEMBL.COMPOUND:CHEMBL112"]
+ },
+ "n01": {
+ "categories": ["biolink:Protein"]
+ }
+ }
+ }
+ nodes_by_qg_id, edges_by_qg_id, response = _run_query_and_do_standard_testing(json_query=query)
+ qg = response.envelope.message.query_graph
+ # assert 'N1' in qg.edges
+ # assert 'biolink:occurs_together_in_literature_with' in qg.edges['N1'].predicates
+
+
+@pytest.mark.slow
+def test_drug_disease_query():
+ query = {
+ "edges": {
+ "e00": {
+ "subject": "n00",
+ "object": "n01"
+ }
+ },
+ "nodes": {
+ "n00": {
+ "ids": ["MONDO:0021783"],
+ "categories": ["biolink:Disease"]
+ },
+ "n01": {
+ "categories": ["biolink:ChemicalEntity"]
+ }
+ }
+ }
+ nodes_by_qg_id, edges_by_qg_id, response = _run_query_and_do_standard_testing(json_query=query)
+ qg = response.envelope.message.query_graph
+
+
+def test_workflow1():
+ """
+ Test a fill (with one KP), bind, score workflow
+ """
+ query = {
+ "workflow": [
+ {
+ "id": "fill",
+ "parameters": {
+ "allowlist": [
+ "infores:rtx-kg2"
+ ],
+ "qedge_keys": [
+ "e00"
+ ]
+ }
+ },
+ {
+ "id": "bind"
+ },
+ {
+ "id": "score"
+ }
+ ],
+ "message": {
+ "query_graph": {
+ "edges": {
+ "e00": {
+ "subject": "n00",
+ "object": "n01",
+ "predicates": [
+ "biolink:physically_interacts_with"
+ ]
+ }
+ },
+ "nodes": {
+ "n00": {
+ "ids": [
+ "CHEBI:46195"
+ ]
+ },
+ "n01": {
+ "categories": [
+ "biolink:Protein"
+ ]
+ }
+ }
+ }
+ }
+ }
+ nodes_by_qg_id, edges_by_qg_id, response = _run_query_and_do_standard_testing(json_query=query)
+ essences = [x.to_dict()['essence'].upper() for x in response.envelope.message.results]
+ assert 'VANILLOID RECEPTOR' in essences
+
+@pytest.mark.slow
+def test_workflow2():
+ """
+ Every possible combination of allowlist and qedge_keys
+ """
+ query = {
+ "workflow": [
+ {
+ "id": "fill",
+ "parameters": {
+ "allowlist": [
+ "infores:rtx-kg2",
+ "infores:biothings-explorer"
+ ],
+ "qedge_keys": [
+ "e0"
+ ]
+ }
+ },
+ {
+ "id": "fill",
+ "parameters": {
+ "qedge_keys": [
+ "e0"
+ ]
+ }
+ },
+ {
+ "id": "fill",
+ "parameters": {
+ "allowlist": [
+ "infores:rtx-kg2",
+ "infores:biothings-explorer"
+ ]
+ }
+ },
+ {
+ "id": "fill",
+ "parameters": {
+ "allowlist": [
+ "infores:connections-hypothesis"
+ ],
+ "qedge_keys": [
+ "e1",
+ "e2"
+ ]
+ }
+ },
+ {
+ "id": "fill",
+ "parameters": {
+ "allowlist": [
+ "infores:rtx-kg2",
+ "infores:biothings-explorer"
+ ],
+ "qedge_keys": [
+ "e2"
+ ]
+ }
+ },
+ {
+ "id": "bind"
+ },
+ {
+ "id": "complete_results"
+ },
+ {
+ "id": "score"
+ }
+ ],
+ "message": {
+ "query_graph": {
+ "nodes": {
+ "n0": {
+ "categories": [
+ "biolink:Disease"
+ ],
+ "ids": [
+ "MONDO:0009061"
+ ]
+ },
+ "n1": {
+ "categories": [
+ "biolink:GrossAnatomicalStructure"
+ ]
+ },
+ "n2": {
+ "categories": [
+ "biolink:Gene"
+ ]
+ },
+ "n3": {
+ "categories": [
+ "biolink:Drug",
+ "biolink:SmallMolecule"
+ ]
+ }
+ },
+ "edges": {
+ "e0": {
+ "subject": "n0",
+ "object": "n1",
+ "predicates": [
+ "biolink:related_to"
+ ]
+ },
+ "e1": {
+ "subject": "n1",
+ "object": "n2",
+ "predicates": [
+ "biolink:expresses"
+ ]
+ },
+ "e2": {
+ "subject": "n3",
+ "object": "n2",
+ "predicates": [
+ "biolink:affects"
+ ]
+ }
+ }
+ }
+ }
+ }
+ nodes_by_qg_id, edges_by_qg_id, response = _run_query_and_do_standard_testing(json_query=query)
+ assert response.status == 'OK'
+ essences = [x.to_dict()['essence'] for x in response.envelope.message.results]
+
+if __name__ == "__main__":
+ pytest.main(['-v', 'test_ARAX_json_queries.py'])
+
+
+
+
diff --git a/code/code-archive/old-arax-tests/test_ARAX_messenger.py b/code/code-archive/old-arax-tests/test_ARAX_messenger.py
new file mode 100644
index 000000000..5a6138938
--- /dev/null
+++ b/code/code-archive/old-arax-tests/test_ARAX_messenger.py
@@ -0,0 +1,242 @@
+#!/usr/bin/env python3
+
+import sys
+import os
+import pytest
+
+import copy
+import json
+import ast
+
+sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/../ARAXQuery")
+from ARAX_messenger import ARAXMessenger
+from ARAX_response import ARAXResponse
+
+
+def test_create_message_basic():
+ response = ARAXResponse()
+ messenger = ARAXMessenger()
+ messenger.create_envelope(response)
+ assert response.status == 'OK'
+ message = response.envelope.message
+ assert response.envelope.type == 'translator_reasoner_response'
+ assert response.envelope.schema_version == '1.6.0'
+
+
+def test_create_message_node_edge_types():
+ response = ARAXResponse()
+ messenger = ARAXMessenger()
+ messenger.create_envelope(response)
+ assert response.status == 'OK'
+ message = response.envelope.message
+ assert isinstance(message.knowledge_graph.nodes, dict)
+ assert isinstance(message.knowledge_graph.edges, dict)
+ assert isinstance(message.query_graph.nodes, dict)
+ assert isinstance(message.query_graph.edges, dict)
+
+
+def test_add_qnode_basic():
+ response = ARAXResponse()
+ messenger = ARAXMessenger()
+ messenger.create_envelope(response)
+ assert response.status == 'OK'
+ message = response.envelope.message
+ messenger.add_qnode(response,{})
+ assert response.status == 'OK'
+ assert isinstance(message.query_graph.nodes, dict)
+ assert len(message.query_graph.nodes) == 1
+ assert message.query_graph.nodes['n00'].ids == None
+
+
+def test_add_qnode_curie_scalar():
+ response = ARAXResponse()
+ messenger = ARAXMessenger()
+ messenger.create_envelope(response)
+ assert response.status == 'OK'
+ message = response.envelope.message
+ messenger.add_qnode(response,{ 'ids': ['UniProtKB:P14136'] })
+ assert response.status == 'OK'
+ assert isinstance(message.query_graph.nodes, dict)
+ assert len(message.query_graph.nodes) == 1
+ assert len(message.query_graph.nodes['n00'].ids) == 1
+
+
+def test_add_qnode_curie_list():
+ response = ARAXResponse()
+ messenger = ARAXMessenger()
+ messenger.create_envelope(response)
+ assert response.status == 'OK'
+ message = response.envelope.message
+ messenger.add_qnode(response,{ 'ids': ['UniProtKB:P14136','UniProtKB:P35579'] })
+ assert response.status == 'OK'
+ assert isinstance(message.query_graph.nodes, dict)
+ assert len(message.query_graph.nodes) == 1
+ assert len(message.query_graph.nodes['n00'].ids) == 2
+
+
+def test_add_qnode_name():
+ response = ARAXResponse()
+ messenger = ARAXMessenger()
+ messenger.create_envelope(response)
+ assert response.status == 'OK'
+ message = response.envelope.message
+ messenger.add_qnode(response,{ 'name': 'acetaminophen' })
+ assert response.status == 'OK'
+ assert isinstance(message.query_graph.nodes, dict)
+ assert len(message.query_graph.nodes) == 1
+ assert message.query_graph.nodes['n00'].ids[0] == 'CHEBI:46195'
+
+
+def test_add_qnode_type():
+ response = ARAXResponse()
+ messenger = ARAXMessenger()
+ messenger.create_envelope(response)
+ assert response.status == 'OK'
+ message = response.envelope.message
+ messenger.add_qnode(response,{ 'categories': ['biolink:Protein'] })
+ assert response.status == 'OK'
+ assert isinstance(message.query_graph.nodes, dict)
+ assert len(message.query_graph.nodes) == 1
+ assert message.query_graph.nodes['n00'].categories[0] == 'biolink:Protein'
+
+
+def test_add_qnode_group_id_is_set_false():
+ response = ARAXResponse()
+ messenger = ARAXMessenger()
+ messenger.create_envelope(response)
+ assert response.status == 'OK'
+ message = response.envelope.message
+ messenger.add_qnode(response,{ 'categories': ['biolink:Protein'], 'is_set' : 'false', 'option_group_id' : '0' })
+ assert response.status == 'ERROR'
+ assert isinstance(message.query_graph.nodes, dict)
+ assert len(message.query_graph.nodes) == 0
+ assert response.error_code == 'InputMismatch'
+
+
+def test_add_qnode_bad_name():
+ response = ARAXResponse()
+ messenger = ARAXMessenger()
+ messenger.create_envelope(response)
+ assert response.status == 'OK'
+ message = response.envelope.message
+ messenger.add_qnode(response,{ 'name': 'Big Bird' })
+ assert response.status == 'ERROR'
+ assert isinstance(message.query_graph.nodes, dict)
+ assert len(message.query_graph.nodes) == 0
+ assert response.error_code == 'UnresolvableNodeName'
+
+
+def test_add_qnode_duplicate_key():
+ response = ARAXResponse()
+ messenger = ARAXMessenger()
+ messenger.create_envelope(response)
+ assert response.status == 'OK'
+ message = response.envelope.message
+ messenger.add_qnode(response, { 'key': 'n00', 'ids': [ 'CHEMBL.COMPOUND:CHEMBL112' ] } )
+ assert response.status == 'OK'
+ messenger.add_qnode(response, { 'key': 'n00', 'ids': [ 'CHEBI:46195' ] } )
+ print(json.dumps(ast.literal_eval(repr(message.query_graph.nodes)), sort_keys=True, indent=2))
+ assert response.status == 'ERROR'
+ assert isinstance(message.query_graph.nodes, dict)
+ assert len(message.query_graph.nodes) == 1
+ assert response.error_code == 'QNodeDuplicateKey'
+
+
+def test_add_qedge_duplicate_key():
+ response = ARAXResponse()
+ messenger = ARAXMessenger()
+ messenger.create_envelope(response)
+ assert response.status == 'OK'
+ message = response.envelope.message
+ messenger.add_qnode(response, { 'key': 'n00', 'ids': [ 'CHEMBL.COMPOUND:CHEMBL112' ] } )
+ messenger.add_qnode(response, { 'key': 'n01', 'categories': [ 'biolink:Protein' ] } )
+ messenger.add_qedge(response, { 'key': 'e00', 'subject': 'n00', 'object': 'n01' } )
+ assert response.status == 'OK'
+ messenger.add_qedge(response, { 'key': 'e00', 'subject': 'n00', 'object': 'n01', 'predicates': [ 'biolink:treats' ] } )
+ print(json.dumps(ast.literal_eval(repr(message.query_graph.edges)), sort_keys=True, indent=2))
+ assert response.status == 'ERROR'
+ assert isinstance(message.query_graph.nodes, dict)
+ assert len(message.query_graph.edges) == 1
+ assert response.error_code == 'QEdgeDuplicateKey'
+
+
+def test_add_qnode_bad_parameters():
+ response = ARAXResponse()
+ messenger = ARAXMessenger()
+ messenger.create_envelope(response)
+ assert response.status == 'OK'
+ bad_parameters_list = [
+ { 'parameters': [ 'ids', 'PICKLES:123' ], 'error_code': 'ParametersNotDict' },
+ { 'parameters': { 'pickles': 'on the side' }, 'error_code': 'UnknownParameter' },
+ { 'parameters': { 'ids': 'n2', 'category': 'biolink:Disease' }, 'error_code': 'UnknownParameter' },
+ ]
+ template_response = copy.deepcopy(response)
+ for bad_parameters in bad_parameters_list:
+ response = copy.deepcopy(template_response)
+ message = response.envelope.message
+ print(bad_parameters)
+ messenger.add_qnode(response, bad_parameters['parameters'])
+ assert response.status == 'ERROR'
+ assert len(message.query_graph.nodes) == 0
+ assert response.error_code == bad_parameters['error_code']
+
+
+def test_add_qedge_multitest():
+ # Set up a message with two nodes
+ response = ARAXResponse()
+ messenger = ARAXMessenger()
+ messenger.create_envelope(response)
+ assert response.status == 'OK'
+ message = response.envelope.message
+ messenger.add_qnode(response,{ 'name': 'acetaminophen' })
+ assert response.status == 'OK'
+ messenger.add_qnode(response,{ 'categories': ['biolink:Protein'] })
+ assert response.status == 'OK'
+
+ # Set up a list of parameters to feed to add_qedge() and what the result should be
+ parameters_list = [
+ { 'status': 'ERROR', 'parameters': [ 'subject', 'n00' ], 'error_code': 'ParametersNotDict' },
+ { 'status': 'OK', 'parameters': { 'subject': 'n00', 'object': 'n01' }, 'error_code': 'OK' },
+ { 'status': 'OK', 'parameters': { 'subject': 'n00', 'object': 'n01', 'key': 'e99' }, 'error_code': 'OK' },
+ { 'status': 'OK', 'parameters': { 'subject': 'n00', 'object': 'n01', 'key': 'e99', 'predicates': ['biolink:physically_interacts_with'] }, 'error_code': 'OK' },
+ { 'status': 'ERROR', 'parameters': { 'subject': 'n00' }, 'error_code': 'MissingTargetKey' },
+ { 'status': 'ERROR', 'parameters': { 'object': 'n00' }, 'error_code': 'MissingSourceKey' },
+ { 'status': 'ERROR', 'parameters': { 'subject': 'n99', 'object': 'n01' }, 'error_code': 'UnknownSourceKey' },
+ { 'status': 'ERROR', 'parameters': { 'subject': 'n00', 'object': 'n99' }, 'error_code': 'UnknownTargetKey' },
+ { 'status': 'ERROR', 'parameters': { 'pickles': 'on the side' }, 'error_code': 'UnknownParameter' },
+ ]
+
+ # Loop over all the parameter sets and try to run it
+ template_response = copy.deepcopy(response)
+ for parameters in parameters_list:
+ response = copy.deepcopy(template_response)
+ message = response.envelope.message
+ print(parameters)
+ messenger.add_qedge(response, parameters['parameters'])
+ assert response.status == parameters['status']
+ if parameters['status'] == 'OK':
+ assert len(message.query_graph.edges) == 1
+ continue
+ assert len(message.query_graph.edges) == 0
+ assert response.error_code == parameters['error_code']
+
+def test_add_qpath():
+ response = ARAXResponse()
+ messenger = ARAXMessenger()
+ messenger.create_envelope(response)
+ assert response.status == 'OK'
+ message = response.envelope.message
+ messenger.add_qnode(response, { 'key': 'n00', 'ids': [ 'CHEMBL.COMPOUND:CHEMBL112' ] } )
+ messenger.add_qnode(response, { 'key': 'n01', 'ids': [ 'MONDO:0007739' ] } )
+ messenger.add_qpath(response, { 'subject': 'n00', 'object': 'n01' } )
+ assert response.status == 'OK'
+ messenger.add_qnode(response, { 'key': 'n02', 'ids': [ 'MONDO:0007740' ] } )
+ messenger.add_qpath(response, { 'subject': 'n00', 'object': 'n02' } )
+ assert response.status == 'OK'
+ assert len(message.query_graph.paths) == 2
+ messenger.add_qpath(response, { 'key': 'p01', 'subject': 'n00', 'object': 'n02' } )
+ assert response.status == 'ERROR'
+ assert response.error_code == 'QPathDuplicateKey'
+ #print(json.dumps(ast.literal_eval(repr(message.query_graph)), sort_keys=True, indent=2))
+
+if __name__ == "__main__": pytest.main(['-v'])
diff --git a/code/code-archive/old-arax-tests/test_ARAX_overlay.py b/code/code-archive/old-arax-tests/test_ARAX_overlay.py
new file mode 100644
index 000000000..51a642b8e
--- /dev/null
+++ b/code/code-archive/old-arax-tests/test_ARAX_overlay.py
@@ -0,0 +1,803 @@
+#!/usr/bin/env python3
+
+# Usage:
+# run all: pytest -v test_ARAX_overlay.py
+# run just certain tests: pytest -v test_ARAX_overlay.py -k test_jaccard
+
+import sys
+import os
+import pytest
+from collections import Counter
+import copy
+import json
+import ast
+from typing import List, Union
+
+sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/../ARAXQuery")
+from ARAX_query import ARAXQuery
+from ARAX_response import ARAXResponse
+
+PACKAGE_PARENT = '../../UI/OpenAPI/python-flask-server'
+sys.path.append(os.path.normpath(os.path.join(os.getcwd(), PACKAGE_PARENT)))
+from openapi_server.models.edge import Edge
+from openapi_server.models.node import Node
+from openapi_server.models.q_edge import QEdge
+from openapi_server.models.q_node import QNode
+from openapi_server.models.query_graph import QueryGraph
+from openapi_server.models.knowledge_graph import KnowledgeGraph
+from openapi_server.models.node_binding import NodeBinding
+from openapi_server.models.edge_binding import EdgeBinding
+from openapi_server.models.result import Result
+from openapi_server.models.message import Message
+
+
+def _do_arax_query(query: dict) -> List[Union[ARAXResponse, Message]]:
+ araxq = ARAXQuery()
+ response = araxq.query(query)
+ if response.status != 'OK':
+ print(response.show(level=response.DEBUG))
+ #return [response, araxq.message]
+ return [response, response.envelope.message]
+
+
+def _attribute_tester(message, attribute_name: str, attribute_type: str, num_different_values=2, num_edges_of_interest=1):
+ """
+ Tests attributes of a message
+ message: returned from _do_arax_query
+ attribute_name: the attribute name to test (eg. 'jaccard_index')
+ attribute_type: the attribute type (eg. 'EDAM-DATA:1234')
+ num_different_values: the number of distinct values you wish to see have been added as attributes
+ num_edges_of_interest: the minimum number of edges in the KG you wish to see have the attribute of interest
+ """
+ edges_of_interest = []
+ values = set()
+ for edge in message.knowledge_graph.edges.values():
+ assert 'primary_knowledge_source' in [source.resource_role for source in edge.sources]
+ if hasattr(edge, 'attributes') and edge.attributes:
+ for attr in edge.attributes:
+ if attr.original_attribute_name == attribute_name:
+ edges_of_interest.append(edge)
+ assert attr.attribute_type_id == attribute_type
+ values.add(attr.value)
+ assert len(edges_of_interest) >= num_edges_of_interest
+ if edges_of_interest:
+ assert len(values) >= num_different_values
+
+
+def _virtual_tester(message: Message, edge_predicate: str, relation: str, attribute_name: str, attribute_type: str,
+ num_different_values=2, num_edges_of_interest=1):
+ """
+ Tests overlay functions that add virtual edges
+ message: returned from _do_arax_query
+ edge_predicate: the name of the virtual edge (eg. biolink:has_jaccard_index_with)
+ relation: the relation you picked for the virtual_edge_relation (eg. N1)
+ attribute_name: the attribute name to test (eg. 'jaccard_index')
+ attribute_type: the attribute type (eg. 'EDAM-DATA:1234')
+ num_different_values: the number of distinct values you wish to see have been added as attributes
+ num_edges_of_interest: the minimum number of virtual edges you wish to see have been added to the KG
+ """
+ edge_predicates_in_kg = Counter([x.predicate for x in message.knowledge_graph.edges.values()])
+ assert edge_predicate in edge_predicates_in_kg
+ edges_of_interest = []
+ for edge in message.knowledge_graph.edges.values():
+ assert 'primary_knowledge_source' in [source.resource_role for source in edge.sources]
+ add_edge = False
+ for attribute in edge.attributes:
+ if attribute.original_attribute_name == "virtual_relation_label":
+ if attribute.value == relation:
+ add_edge = True
+ if add_edge:
+ edges_of_interest.append(edge)
+ #edges_of_interest = [x for x in message.knowledge_graph.edges.values() if x.relation == relation]
+ assert len(edges_of_interest) >= num_edges_of_interest
+ if edges_of_interest:
+ values = set()
+ for edge in edges_of_interest:
+ assert hasattr(edge, 'attributes')
+ assert edge.attributes
+ assert edge.attributes[0].original_attribute_name == attribute_name
+ values.add(edge.attributes[0].value)
+ assert edge.attributes[0].attribute_type_id == attribute_type
+ # make sure two or more values were added
+ assert len(values) >= num_different_values
+
+
+def test_jaccard():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=DOID:1947, key=n00)",
+ "add_qnode(categories=biolink:Protein, is_set=true, key=n01)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n02)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "add_qedge(subject=n01, object=n02, key=e01, predicates=biolink:physically_interacts_with)",
+ "expand(edge_key=[e00,e01], kp=infores:rtx-kg2)",
+ "overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J1)",
+ "resultify(ignore_edge_direction=true, debug=true)",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ print(response.show())
+ assert response.status == 'OK'
+ edge_predicates_in_kg = Counter([x.predicate for x in message.knowledge_graph.edges.values()])
+ assert 'biolink:has_jaccard_index_with' in edge_predicates_in_kg
+ jaccard_edges = []
+ for edge in message.knowledge_graph.edges.values():
+ assert 'primary_knowledge_source' in [source.resource_role for source in edge.sources]
+ add_edge = False
+ for attribute in edge.attributes:
+ if attribute.original_attribute_name == "virtual_relation_label":
+ if attribute.value == "J1":
+ add_edge = True
+ if add_edge:
+ jaccard_edges.append(edge)
+ assert len(jaccard_edges) > 0
+ for edge in jaccard_edges:
+ assert hasattr(edge, 'attributes')
+ assert edge.attributes
+ assert edge.attributes[0].original_attribute_name == 'jaccard_index'
+ assert edge.attributes[0].value >= 0
+
+
+def test_add_node_pmids():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=MONDO:0018077, key=n00)",
+ "add_qnode(categories=biolink:ChemicalEntity, is_set=true, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "overlay(action=add_node_pmids, max_num=15)",
+ "return(message=true, store=false)"
+ ]}}
+ [response, message] = _do_arax_query(query)
+ print(response.show())
+ # check response status
+ assert response.status == 'OK'
+ # check if there are nodes with attributes
+ nodes_with_attributes = [x for x in message.knowledge_graph.nodes.values() if hasattr(x, 'attributes')]
+ assert len(nodes_with_attributes) > 0
+ # check if pmids were added
+ nodes_with_pmids = []
+ for node in nodes_with_attributes:
+ for attr in node.attributes:
+ if attr.original_attribute_name == 'pubmed_ids':
+ nodes_with_pmids.append(node)
+ assert len(nodes_with_pmids) > 0
+ # check types
+ for node in nodes_with_pmids:
+ for attr in node.attributes:
+ if attr.original_attribute_name == "pubmed_ids":
+ assert attr.attribute_type_id == 'EDAM-DATA:0971'
+ assert attr.value.__class__ == list
+
+
+def test_compute_ngd_virtual():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=DOID:384, key=n00)",
+ "add_qnode(categories=biolink:ChemicalEntity, is_set=true, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "overlay(action=compute_ngd, subject_qnode_key=n00, object_qnode_key=n01, virtual_relation_label=N1)",
+ "resultify(ignore_edge_direction=true, debug=true)",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ print(response.show())
+ assert response.status == 'OK'
+ edge_predicates_in_kg = Counter([x.predicate for x in message.knowledge_graph.edges.values()])
+ assert 'biolink:occurs_together_in_literature_with' in edge_predicates_in_kg
+ ngd_edges = []
+ for edge in message.knowledge_graph.edges.values():
+ add_edge = False
+ assert 'primary_knowledge_source' in [source.resource_role for source in edge.sources]
+ for attribute in edge.attributes:
+ if attribute.original_attribute_name == "virtual_relation_label":
+ if attribute.value == "N1":
+ add_edge = True
+ if add_edge:
+ ngd_edges.append(edge)
+ assert len(ngd_edges) > 0
+ for edge in ngd_edges:
+ assert hasattr(edge, 'attributes')
+ assert edge.attributes
+ attribute_names = {attribute.original_attribute_name: attribute.value for attribute in edge.attributes}
+ if 'publications' in attribute_names:
+ assert len(attribute_names["publications"]) <= 30
+ assert edge.attributes[0].original_attribute_name == 'normalized_google_distance'
+ assert float(edge.attributes[0].value) >= 0
+
+
+def test_compute_ngd_attribute():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=DOID:384, key=n00)",
+ "add_qnode(categories=biolink:ChemicalEntity, is_set=true, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "overlay(action=compute_ngd)",
+ "resultify(ignore_edge_direction=true, debug=true)",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ print(response.show())
+ assert response.status == 'OK'
+ ngd_edges = []
+ for edge in message.knowledge_graph.edges.values():
+ assert 'primary_knowledge_source' in [source.resource_role for source in edge.sources]
+ if hasattr(edge, 'attributes'):
+ for attr in edge.attributes:
+ if attr.original_attribute_name == 'normalized_google_distance':
+ ngd_edges.append(edge)
+ assert float(attr.value) >= 0
+ assert attr.attribute_type_id == 'EDAM-DATA:2526'
+ assert len(ngd_edges) > 0
+ for edge in ngd_edges:
+ attribute_names = {attribute.original_attribute_name: attribute.value for attribute in edge.attributes}
+ if "publications" in attribute_names:
+ assert len(attribute_names["publications"]) <= 30
+
+
+def test_FET_ex1():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(ids=DOID:12889, key=n00, categories=biolink:Disease)",
+ "add_qnode(categories=biolink:Protein, is_set=true, key=n01)",
+ "add_qedge(subject=n00, object=n01,key=e00)",
+ "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "overlay(action=fisher_exact_test, subject_qnode_key=n00, object_qnode_key=n01, virtual_relation_label=FET1, rel_edge_key=e00)",
+ "filter_kg(action=remove_edges_by_continuous_attribute, edge_attribute=fisher_exact_test_p-value, direction=above, threshold=0.005, remove_connected_nodes=t, qnode_keys=[n01])",
+ "add_qnode(categories=biolink:ChemicalEntity, is_set=true, key=n02)",
+ "add_qedge(subject=n01, object=n02, key=e01, predicates=biolink:physically_interacts_with)",
+ "expand(edge_key=e01, kp=infores:rtx-kg2)",
+ "overlay(action=fisher_exact_test, subject_qnode_key=n01, object_qnode_key=n02, virtual_relation_label=FET2, filter_type=cutoff, value=0.05)",
+ "resultify()",
+ "return(message=true, store=false)"
+ ]}}
+ [response, message] = _do_arax_query(query)
+ print(response.show())
+ assert response.status == 'OK'
+ edge_predicates_in_kg = Counter([x.predicate for x in message.knowledge_graph.edges.values()])
+ assert 'biolink:has_fisher_exact_test_p_value_with' in edge_predicates_in_kg
+ FET_edges = []
+ FET_edge_labels = set()
+ for edge in message.knowledge_graph.edges.values():
+ relation_name = None
+ add_edge = False
+ for attribute in edge.attributes:
+ if attribute.original_attribute_name == "virtual_relation_label":
+ if attribute.value is not None and attribute.value.find("FET") != -1:
+ add_edge = True
+ FET_edge_labels.add(attribute.value)
+ relation_name = attribute.value
+ if add_edge:
+ FET_edges.append((edge, relation_name))
+ assert len(FET_edges) > 0
+ assert len(FET_edge_labels) == 2
+ for edge_tuple in FET_edges:
+ edge, relation_name = edge_tuple
+ assert hasattr(edge, 'attributes')
+ assert edge.attributes
+ assert edge.attributes[0].original_attribute_name == 'fisher_exact_test_p-value'
+ assert edge.attributes[0].attribute_type_id == 'EDAM-DATA:1669'
+ assert 'primary_knowledge_source' in [source.resource_role for source in edge.sources]
+ if relation_name == 'FET1':
+ assert 0 <= float(edge.attributes[0].value) < 0.005
+ else:
+ assert 0 <= float(edge.attributes[0].value) < 0.05
+ # FET_query_edges = {key:edge for key, edge in message.query_graph.edges.items() if key.find("FET") != -1}
+ # assert len(FET_query_edges) == 2
+ query_node_keys = [key for key, node in message.query_graph.nodes.items()]
+ assert len(query_node_keys) == 2
+ # for key, query_edge in FET_query_edges.items():
+ # assert hasattr(query_edge, 'predicates')
+ # assert 'biolink:has_fisher_exact_test_p_value_with' in query_edge.predicates
+ # assert key == query_edge.relation
+ # assert query_edge.subject in query_node_keys
+ # assert query_edge.object in query_node_keys
+
+
+@pytest.mark.slow
+def test_FET_ex2():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(ids=DOID:12889, key=n00, categories=biolink:Disease)",
+ "add_qnode(categories=biolink:Protein, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "overlay(action=fisher_exact_test, subject_qnode_key=n00, virtual_relation_label=FET, object_qnode_key=n01, rel_edge_key=e00, top_n=20)",
+ "resultify()",
+ "return(message=true, store=false)"
+ ]}}
+ [response, message] = _do_arax_query(query)
+ print(response.show())
+ assert response.status == 'OK'
+ edge_predicates_in_kg = Counter([x.predicate for x in message.knowledge_graph.edges.values()])
+ assert 'biolink:has_fisher_exact_test_p_value_with' in edge_predicates_in_kg
+ FET_edges = []
+ FET_edge_labels = set()
+ for edge in message.knowledge_graph.edges.values():
+ assert 'primary_knowledge_source' in [source.resource_role for source in edge.sources]
+ relation_name = None
+ add_edge = False
+ for attribute in edge.attributes:
+ if attribute.original_attribute_name == "virtual_relation_label":
+ if attribute.value is not None and attribute.value.find("FET") != -1:
+ add_edge = True
+ FET_edge_labels.add(attribute.value)
+ relation_name = attribute.value
+ if add_edge:
+ FET_edges.append((edge, relation_name))
+ assert len(FET_edges) >= 2
+ assert len(FET_edge_labels) == 1
+ for edge_tuple in FET_edges:
+ edge, relation_name = edge_tuple
+ assert hasattr(edge, 'attributes')
+ assert edge.attributes
+ assert edge.attributes[0].original_attribute_name == 'fisher_exact_test_p-value'
+ assert edge.attributes[0].attribute_type_id == 'EDAM-DATA:1669'
+ # FET_query_edges = {key:edge for key, edge in message.query_graph.edges.items() if key.find("FET") != -1}
+ # assert len(FET_query_edges) == 1
+ query_node_keys = [key for key, node in message.query_graph.nodes.items()]
+ assert len(query_node_keys) == 2
+ # for key, query_edge in FET_query_edges.items():
+ # assert hasattr(query_edge, 'predicates')
+ # assert 'biolink:has_fisher_exact_test_p_value_with' in query_edge.predicates
+ # assert key == query_edge.relation
+ # assert query_edge.subject in query_node_keys
+ # assert query_edge.object in query_node_keys
+
+
+@pytest.mark.slow
+def test_paired_concept_frequency_virtual():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=DOID:1588, key=n0)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n1)",
+ "add_qedge(subject=n0, object=n1, key=e0)",
+ "expand(edge_key=e0, kp=infores:rtx-kg2)",
+ "overlay(action=overlay_clinical_info, paired_concept_frequency=true, subject_qnode_key=n0, object_qnode_key=n1, virtual_relation_label=CP1)",
+ "resultify()",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ print(response.show())
+ assert response.status == 'OK'
+ _virtual_tester(message, 'biolink:associated_with', 'CP1', 'paired_concept_frequency', 'EDAM-DATA:0951', 2)
+
+
+@pytest.mark.slow
+def test_paired_concept_frequency_attribute():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=DOID:1588, key=n0)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n1)",
+ "add_qedge(subject=n0, object=n1, key=e0)",
+ "expand(edge_key=e0, kp=infores:rtx-kg2)",
+ "overlay(action=overlay_clinical_info, COHD_method=paired_concept_frequency)",
+ "resultify()",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ print(response.show())
+ assert response.status == 'OK'
+ _attribute_tester(message, 'paired_concept_frequency', 'EDAM-DATA:0951', 2)
+
+
+@pytest.mark.slow
+def test_observed_expected_ratio_virtual():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=DOID:1588, key=n0)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n1)",
+ "add_qedge(subject=n0, object=n1, key=e0)",
+ "expand(edge_key=e0, kp=infores:rtx-kg2)",
+ "overlay(action=overlay_clinical_info,observed_expected_ratio=true, subject_qnode_key=n0, object_qnode_key=n1, virtual_relation_label=CP1)",
+ "resultify()",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ print(response.show())
+ assert response.status == 'OK'
+ _virtual_tester(message, 'biolink:associated_with', 'CP1', 'observed_expected_ratio', 'EDAM-DATA:0951', 2)
+
+
+@pytest.mark.slow
+def test_observed_expected_ratio_attribute():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=DOID:1588, key=n0)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n1)",
+ "add_qedge(subject=n0, object=n1, key=e0)",
+ "expand(edge_key=e0, kp=infores:rtx-kg2)",
+ "overlay(action=overlay_clinical_info, COHD_method=observed_expected_ratio)",
+ "resultify()",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ print(response.show())
+ assert response.status == 'OK'
+ _attribute_tester(message, 'observed_expected_ratio', 'EDAM-DATA:0951', 2)
+
+
+@pytest.mark.slow
+def test_chi_square_virtual():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=DOID:1588, key=n0)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n1)",
+ "add_qedge(subject=n0, object=n1, key=e0)",
+ "expand(edge_key=e0, kp=infores:rtx-kg2)",
+ "overlay(action=overlay_clinical_info, chi_square=true, subject_qnode_key=n0, object_qnode_key=n1, virtual_relation_label=CP1)",
+ "resultify()",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ print(response.show())
+ assert response.status == 'OK'
+ _virtual_tester(message, 'biolink:associated_with', 'CP1', 'chi_square', 'EDAM-DATA:0951', 2)
+
+
+@pytest.mark.slow
+def test_chi_square_attribute():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=DOID:1588, key=n0)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n1)",
+ "add_qedge(subject=n0, object=n1, key=e0)",
+ "expand(edge_key=e0, kp=infores:rtx-kg2)",
+ "overlay(action=overlay_clinical_info, COHD_method=chi_square)",
+ "resultify()",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ print(response.show())
+ assert response.status == 'OK'
+ _attribute_tester(message, 'chi_square', 'EDAM-DATA:0951', 2)
+
+
+# CM: I changed 'MONDO:0004992' to 'DOID:0080909' for a more specific disease
+# @pytest.mark.slow
+@pytest.mark.skip(reason="retire DTD")
+def test_predict_drug_treats_disease_virtual():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(ids=DOID:0080909, key=n0, categories=biolink:Disease)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n1)",
+ "add_qedge(subject=n0, object=n1, key=e0)",
+ "expand(edge_key=e0, kp=infores:rtx-kg2)",
+ "overlay(action=predict_drug_treats_disease, subject_qnode_key=n1, object_qnode_key=n0, virtual_relation_label=P1)",
+ "resultify()",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ print(response.show())
+ assert response.status == 'OK'
+ _virtual_tester(message, 'biolink:probably_treats', 'P1', 'probability_treats', 'EDAM-DATA:0951', 2)
+
+
+# CM: I changed 'MONDO:0004992' to 'DOID:0080909' for a more specific disease
+# @pytest.mark.slow
+@pytest.mark.skip(reason="retire DTD")
+def test_predict_drug_treats_disease_attribute():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(ids=DOID:0080909, key=n0)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n1)",
+ "add_qedge(subject=n0, object=n1, key=e0)",
+ "expand(edge_key=e0, kp=infores:rtx-kg2)",
+ "overlay(action=predict_drug_treats_disease, threshold=0.7)",
+ "resultify()",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ print(response.show())
+ assert response.status == 'OK'
+ _attribute_tester(message, 'probability_treats', 'EDAM-DATA:0951', 2)
+
+
+# CM: I changed 'MONDO:0004992' to 'DOID:0080909' for a more specific disease
+# @pytest.mark.slow
+@pytest.mark.skip(reason="retire DTD")
+def test_issue_832():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(ids=DOID:0080909, key=n0)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n1)",
+ "add_qedge(subject=n0, object=n1, key=e0)",
+ "expand(edge_key=e0, kp=infores:rtx-kg2)",
+ "overlay(action=predict_drug_treats_disease, subject_qnode_key=n1, object_qnode_key=n0, virtual_relation_label=P1)",
+ "resultify()",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ print(response.show())
+ assert response.status == 'OK'
+ _virtual_tester(message, 'biolink:probably_treats', 'P1', 'probability_treats', 'EDAM-DATA:0951', 2)
+
+
+# @pytest.mark.slow
+@pytest.mark.skip(reason="retire DTD")
+def test_issue_832_non_drug():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(ids=UniProtKB:P62328, key=n0)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n1)",
+ "add_qedge(subject=n0, object=n1, key=e0)",
+ "expand(edge_key=e0, kp=infores:rtx-kg2)",
+ "overlay(action=predict_drug_treats_disease, subject_qnode_key=n1, object_qnode_key=n0, virtual_relation_label=P1)",
+ "resultify()",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ print(response.show())
+ assert response.status == 'OK'
+ # Make sure that no probability_treats were added
+ edge_predicates_in_kg = Counter([x.predicate for x in message.knowledge_graph.edges.values()])
+ assert 'probability_treats' not in edge_predicates_in_kg
+
+
+@pytest.mark.slow
+def test_issue_840():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=DOID:1588, key=n0)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n1)",
+ "add_qedge(subject=n0, object=n1, key=e0)",
+ "expand(edge_key=e0, kp=infores:rtx-kg2)",
+ "overlay(action=overlay_clinical_info, paired_concept_frequency=true, subject_qnode_key=n1, object_qnode_key=n0, virtual_relation_label=V1)",
+ "resultify()",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ print(response.show())
+ assert response.status == 'OK'
+ _virtual_tester(message, 'biolink:associated_with', 'V1', 'paired_concept_frequency', 'EDAM-DATA:0951', 2)
+
+ # And for the non-virtual test
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=DOID:1588, key=n0)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n1)",
+ "add_qedge(subject=n0, object=n1, key=e0)",
+ "expand(edge_key=e0, kp=infores:rtx-kg2)",
+ "overlay(action=overlay_clinical_info, paired_concept_frequency=true)",
+ "resultify()",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ print(response.show())
+ assert response.status == 'OK'
+ _attribute_tester(message, 'paired_concept_frequency', 'EDAM-DATA:0951', 2)
+
+
+@pytest.mark.slow
+def test_issue_840_non_drug():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=UniProtKB:P62328, key=n0)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n1)",
+ "add_qedge(subject=n0, object=n1, key=e0)",
+ "expand(edge_key=e0, kp=infores:rtx-kg2)",
+ "overlay(action=overlay_clinical_info, paired_concept_frequency=true, subject_qnode_key=n1, object_qnode_key=n0, virtual_relation_label=V1)",
+ "resultify()",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ print(response.show())
+ assert response.status == 'OK'
+ # Make sure that no probability_treats were added
+ edge_predicates_in_kg = Counter([x.predicate for x in message.knowledge_graph.edges.values()])
+ assert 'paired_concept_frequency' not in edge_predicates_in_kg
+
+ # Now for the non-virtual test
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=UniProtKB:P62328, key=n0)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n1)",
+ "add_qedge(subject=n0, object=n1, key=e0)",
+ "expand(edge_key=e0, kp=infores:rtx-kg2)",
+ "overlay(action=overlay_clinical_info, paired_concept_frequency=true)",
+ "resultify()",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ print(response.show())
+ assert response.status == 'OK'
+ # Make sure that no probability_treats were added
+ for edge in message.knowledge_graph.edges.values():
+ for attribute in edge.attributes:
+ assert attribute.original_attribute_name != 'paired_concept_frequency'
+
+
+# @pytest.mark.external
+# @pytest.mark.slow
+@pytest.mark.skip(reason="retire DTD")
+def test_issue_892():
+ query = {"operations": {"actions": [
+ "add_qnode(ids=DOID:11830, categories=biolink:Disease, key=n00)",
+ "add_qnode(categories=biolink:Gene, ids=[UniProtKB:P39060, UniProtKB:O43829, UniProtKB:P20849], is_set=true, key=n01)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n02)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "add_qedge(subject=n01, object=n02, key=e01)",
+ "expand(kp=infores:biothings-explorer)",
+ "overlay(action=predict_drug_treats_disease, subject_qnode_key=n02, object_qnode_key=n00, virtual_relation_label=P1, threshold=0.5)",
+ "resultify(ignore_edge_direction=true)",
+ "return(message=true, store=false)"
+ ]}}
+ [response, message] = _do_arax_query(query)
+ print(response.show())
+ assert response.status == 'OK'
+ _virtual_tester(message, 'biolink:probably_treats', 'P1', 'probability_treats', 'EDAM-DATA:0951', 10)
+
+
+@pytest.mark.external
+def test_overlay_exposures_data_virtual():
+ query = {"operations": {"actions": [
+ "add_qnode(name=CHEMBL.COMPOUND:CHEMBL635, key=n0)",
+ "add_qnode(name=MESH:D052638, key=n1)",
+ "expand(kp=infores:rtx-kg2)",
+ "overlay(action=overlay_exposures_data, virtual_relation_label=E1, subject_qnode_key=n0, object_qnode_key=n1)",
+ "resultify()",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ print(response.show())
+ _virtual_tester(message, 'biolink:has_icees_p-value_with', 'E1', 'icees_p-value', 'EDAM-DATA:1669', 1)
+
+
+@pytest.mark.external
+def test_overlay_exposures_data_attribute():
+ query = {"operations": {"actions": [
+ "add_qnode(name=MONDO:0012607, key=n0)",
+ "add_qnode(name=MONDO:0010940, key=n1)",
+ "add_qedge(subject=n0, object=n1, key=e0)",
+ "expand(kp=infores:rtx-kg2)",
+ "overlay(action=overlay_exposures_data)",
+ "resultify()",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ print(response.show())
+ _attribute_tester(message, 'icees_p-value', 'EDAM-DATA:1669', 1)
+
+
+@pytest.mark.slow
+def test_overlay_clinical_info_no_ids():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=acetaminophen, key=n0)",
+ "add_qnode(name=Sotos syndrome, key=n1)",
+ "expand(kp=infores:rtx-kg2)",
+ "overlay(action=overlay_clinical_info,COHD_method=paired_concept_frequency,virtual_relation_label=C1,subject_qnode_key=n0,object_qnode_key=n1)",
+ "overlay(action=overlay_clinical_info,COHD_method=observed_expected_ratio,virtual_relation_label=C2,subject_qnode_key=n0,object_qnode_key=n1)",
+ "overlay(action=overlay_clinical_info,COHD_method=chi_square,virtual_relation_label=C3,subject_qnode_key=n0,object_qnode_key=n1)",
+ "resultify()",
+ "return(message=true, store=false)"
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ _virtual_tester(message, 'biolink:associated_with', 'C1', 'paired_concept_frequency', 'EDAM-DATA:0951', 1)
+ _attribute_tester(message, 'paired_concept_frequency', 'EDAM-DATA:0951', 1)
+ _virtual_tester(message, 'biolink:associated_with', 'C2', 'observed_expected_ratio', 'EDAM-DATA:0951', 1)
+ _attribute_tester(message, 'observed_expected_ratio', 'EDAM-DATA:0951', 1)
+ _virtual_tester(message, 'biolink:associated_with', 'C3', 'chi_square', 'EDAM-DATA:0951', 1)
+ _attribute_tester(message, 'chi_square', 'EDAM-DATA:0951', 1)
+
+@pytest.mark.slow
+def test_missing_ngd_pmids():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(ids=UniProtKB:P52788, key=n0)",
+ "add_qnode(categories=[biolink:Protein,biolink:Gene], key=n1, is_set=true)",
+ "add_qedge(subject=n0, object=n1, key=e0)",
+ "add_qnode(categories=[biolink:ChemicalEntity,biolink:Drug], key=n2)",
+ "add_qedge(subject=n1, object=n2, key=e1)",
+ "expand(kp=infores:rtx-kg2)",
+ "overlay(action=compute_ngd, virtual_relation_label=N1, subject_qnode_key=n0, object_qnode_key=n1)",
+ "overlay(action=compute_ngd, virtual_relation_label=N2, subject_qnode_key=n1, object_qnode_key=n2)",
+ "overlay(action=compute_ngd, virtual_relation_label=N3, subject_qnode_key=n0, object_qnode_key=n2)",
+ "overlay(action=compute_jaccard, start_node_key=n0, intermediate_node_key=n1, end_node_key=n2, virtual_relation_label=J1)",
+ "resultify()",
+ "filter_results(action=limit_number_of_results, max_results=30)",
+ "return(message=true, store=false)"
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ ngd_publications = {}
+ for edge_key, edge in message.knowledge_graph.edges.items():
+ assert 'primary_knowledge_source' in [source.resource_role for source in edge.sources]
+ if edge.attributes is not None:
+ for attribute in edge.attributes:
+ if attribute.original_attribute_name == 'normalized_google_distance':
+ if edge_key not in ngd_publications:
+ ngd_publications[edge_key] = {}
+ ngd_publications[edge_key]['ngd'] = attribute.value
+ elif attribute.original_attribute_name == 'publications':
+ if edge_key not in ngd_publications:
+ ngd_publications[edge_key] = {}
+ ngd_publications[edge_key]['pubs'] = attribute.value
+
+ for edge_dict in ngd_publications.values():
+ if 'ngd' in edge_dict and 'pubs' in edge_dict:
+ if edge_dict['pubs'] == []:
+ assert edge_dict['ngd'] == 'inf'
+
+@pytest.mark.slow
+def test_jaccard_not_above_1():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(key=N0,ids=chembl.compound:CHEMBL787)",
+ "add_qnode(key=N1,categories=biolink:Protein)",
+ "add_qedge(key=E0,subject=N0,object=N1,predicates=biolink:physically_interacts_with)",
+ "add_qnode(key=N2,categories=biolink:ChemicalEntity)",
+ "add_qedge(key=E2,subject=N1,object=N2)",
+ "expand(kp=infores:rtx-kg2)",
+ "overlay(action=compute_ngd,default_value=inf,virtual_relation_label=V1,subject_qnode_key=N0,object_qnode_key=N1)",
+ "overlay(action=compute_ngd,default_value=inf,virtual_relation_label=V2,subject_qnode_key=N1,object_qnode_key=N2)",
+ "overlay(action=compute_ngd,default_value=inf,virtual_relation_label=V3,subject_qnode_key=N0,object_qnode_key=N2)",
+ "overlay(action=compute_jaccard,start_node_key=N0,intermediate_node_key=N1,end_node_key=N2,virtual_relation_label=VJ)",
+ "resultify()",
+ #"filter_results(action=limit_number_of_results,max_results=100,prune_kg=true)",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ print(response.show())
+ assert response.status == 'OK'
+ edge_predicates_in_kg = Counter([x.predicate for x in message.knowledge_graph.edges.values()])
+ assert 'biolink:has_jaccard_index_with' in edge_predicates_in_kg
+ jaccard_edges = []
+ for edge in message.knowledge_graph.edges.values():
+ assert 'primary_knowledge_source' in [source.resource_role for source in edge.sources]
+ add_edge = False
+ for attribute in edge.attributes:
+ if attribute.original_attribute_name == "virtual_relation_label":
+ if attribute.value == "VJ":
+ add_edge = True
+ if add_edge:
+ jaccard_edges.append(edge)
+ assert len(jaccard_edges) > 0
+ for edge in jaccard_edges:
+ assert hasattr(edge, 'attributes')
+ assert edge.attributes
+ assert edge.attributes[0].original_attribute_name == 'jaccard_index'
+ assert edge.attributes[0].value >= 0
+ assert edge.attributes[0].value <= 1
+
+@pytest.mark.slow
+def test_ngd_sqlite_syntax_error():
+ query = {"message":{"query_graph":{
+ "edges": {
+ "e00": {
+ "object": "n01",
+ "subject": "n00"
+ }
+ },
+ "nodes": {
+ "n00": {
+ "ids": [
+ "CHEBI:16680"
+ ],
+ "is_set": False
+ },
+ "n01": {
+ "categories": [
+ "biolink:Pathway",
+ "biolink:BiologicalProcess",
+ "biolink:MolecularActivity",
+ "biolink:CellularComponent",
+ "biolink:InformationContentEntity",
+ "biolink:NamedThing"
+ ],
+ "is_set": False
+ }
+ }
+ }
+ }}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+
+
+if __name__ == "__main__":
+ pytest.main(['-v'])
diff --git a/code/code-archive/old-arax-tests/test_ARAX_query.py b/code/code-archive/old-arax-tests/test_ARAX_query.py
new file mode 100644
index 000000000..7f9acaf6c
--- /dev/null
+++ b/code/code-archive/old-arax-tests/test_ARAX_query.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python3
+
+import sys
+import os
+import pytest
+
+
+sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/../ARAXQuery")
+from ARAX_query import ARAXQuery
+
+
+def test_query_by_query_graph_2():
+ query = { "message": { "query_graph": { "edges": {
+ "qg2": { "subject": "qg1", "object": "qg0", "predicates": ["biolink:physically_interacts_with"] }
+ },
+ "nodes": {
+ "qg0": { "name": "acetaminophen", "ids": ["CHEMBL.COMPOUND:CHEMBL112"], "categories": ["biolink:ChemicalEntity"] },
+ "qg1": { "name": None, "ids": None, "categories": ["biolink:Protein"] }
+ } } } }
+ araxq = ARAXQuery()
+ araxq.query(query)
+ response = araxq.response
+ print(response.show())
+
+ assert response.status == 'OK'
+ message = response.envelope.message
+ assert len(message.results) >= 10
+ assert response.envelope.schema_version == '1.6.0'
+
+
+if __name__ == "__main__": pytest.main(['-v'])
diff --git a/code/code-archive/old-arax-tests/test_ARAX_ranker.py b/code/code-archive/old-arax-tests/test_ARAX_ranker.py
new file mode 100644
index 000000000..7b87116c3
--- /dev/null
+++ b/code/code-archive/old-arax-tests/test_ARAX_ranker.py
@@ -0,0 +1,962 @@
+#!/usr/bin/env python3
+
+# Usage:
+# run all: pytest -v test_ARAX_ranker.py
+# run just certain tests: pytest -v test_ARAX_ranker.py -k test_ARAXRanker
+
+import sys
+import os
+import numpy as np
+import scipy.stats
+import pytest
+import requests_cache
+import pickle
+import copy
+import ast
+from typing import List, Union
+
+sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/../ARAXQuery")
+from ARAX_response import ARAXResponse
+from ARAX_messenger import ARAXMessenger
+from ARAX_query import ARAXQuery
+from query_graph_info import QueryGraphInfo
+from actions_parser import ActionsParser
+from result_transformer import ResultTransformer
+from ARAX_ranker import ARAXRanker
+
+sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/../NodeSynonymizer")
+from node_synonymizer import NodeSynonymizer
+synonymizer = NodeSynonymizer()
+
+PACKAGE_PARENT = '../../UI/OpenAPI/python-flask-server'
+sys.path.append(os.path.normpath(os.path.join(os.getcwd(), PACKAGE_PARENT)))
+from openapi_server.models.edge import Edge
+from openapi_server.models.node import Node
+from openapi_server.models.q_edge import QEdge
+from openapi_server.models.q_node import QNode
+from openapi_server.models.query_graph import QueryGraph
+from openapi_server.models.knowledge_graph import KnowledgeGraph
+from openapi_server.models.node_binding import NodeBinding
+from openapi_server.models.edge_binding import EdgeBinding
+from openapi_server.models.result import Result
+from openapi_server.models.message import Message
+
+def _extract_ARAX_online_results(response_id: str, api_link: str = 'https://arax.ncats.io/api/arax/v1.4/response/') -> List[Union[ARAXResponse, Message]]:
+ # Extracts the ARAXResponse objects from the ARAX online results
+
+ # generate an ARAX response object
+ response = ARAXResponse()
+ #### Create an empty envelope
+ messenger = ARAXMessenger()
+ messenger.create_envelope(response)
+ response.envelope.submitter = '?'
+
+ # extract results based on response_id
+ message = messenger.fetch_message(f'{api_link}{response_id}')
+ response.envelope.message = message
+ return [response, response.envelope.message]
+
+def _do_arax_query(query: dict) -> List[Union[ARAXResponse, Message]]:
+ # Perform the ARAX query
+
+ araxq = ARAXQuery()
+ response = araxq.query(query)
+ if response.status != 'OK':
+ print(response.show(level=response.DEBUG))
+ #return [response, araxq.message]
+ return [response, response.envelope.message]
+
+def _do_arax_rank(response: ARAXResponse) -> Message:
+ # Rank the ARAX results
+
+ ranker = ARAXRanker()
+ ranker.aggregate_scores_dmk(response)
+ if response.status != 'OK':
+ print(response.show(level=response.DEBUG))
+ return response.envelope.message
+
+def _ranker_tester(query: dict = None, response_id: str = None) -> Message:
+ # Test the ARAX ranker
+
+ if response_id is not None:
+ [response, _] = _extract_ARAX_online_results(response_id)
+ else:
+ [response, _] = _do_arax_query(query)
+ message = _do_arax_rank(response)
+ return message
+
+@pytest.mark.slow
+def test_ARAXRanker_test1_asset12():
+ # test 'rituximab treats Castleman Disease'
+ expected_answer = 'rituximab'
+ preferred_curie = synonymizer.get_canonical_curies(names=expected_answer)[expected_answer]
+ if preferred_curie is None:
+ expected_answer = expected_answer
+ else:
+ expected_answer = preferred_curie['preferred_name']
+
+ query = { "message": { "query_graph": {
+ "edges": {
+ "e01": {
+ "attribute_constraints": [],
+ "knowledge_type": "inferred",
+ "object": "ON",
+ "predicates": [
+ "biolink:treats"
+ ],
+ "qualifier_constraints": [],
+ "subject": "SN"
+ }
+ },
+ "nodes": {
+ "ON": {
+ "categories": [
+ "biolink:Disease"
+ ],
+ "constraints": [],
+ "ids": [
+ "MONDO:0015564"
+ ],
+ "set_interpretation": "BATCH"
+ },
+ "SN": {
+ "categories": [
+ "biolink:ChemicalEntity"
+ ],
+ "constraints": [],
+ "set_interpretation": "BATCH"
+ }
+ }
+ } } }
+ araxq = ARAXQuery()
+ araxq.query(query)
+ response = araxq.response
+ assert response.status == 'OK'
+ message = response.envelope.message
+
+ # returned_message = _ranker_tester(response_id='248097')
+ rank_right_answer = -1
+ for index, result in enumerate(message.results):
+ if result.essence.lower() == expected_answer.lower():
+ rank_right_answer = index + 1
+ break
+ total_results = len(message.results)
+
+ assert rank_right_answer != -1
+ assert (rank_right_answer < 0.1 * total_results) or (rank_right_answer < 0.3 * total_results)
+
+@pytest.mark.slow
+def test_ARAXRanker_test5_asset70():
+ # test 'Miglustat treats Niemann-Pick type C'
+ expected_answer = 'Miglustat'
+ preferred_curie = synonymizer.get_canonical_curies(names=expected_answer)[expected_answer]
+ if preferred_curie is None:
+ expected_answer = expected_answer
+ else:
+ expected_answer = preferred_curie['preferred_name']
+
+ query = { "message": { "query_graph": {
+ "edges": {
+ "e01": {
+ "attribute_constraints": [],
+ "knowledge_type": "inferred",
+ "object": "ON",
+ "predicates": [
+ "biolink:treats"
+ ],
+ "qualifier_constraints": [],
+ "subject": "SN"
+ }
+ },
+ "nodes": {
+ "ON": {
+ "categories": [
+ "biolink:Disease"
+ ],
+ "constraints": [],
+ "ids": [
+ "MONDO:0018982"
+ ],
+ "set_interpretation": "BATCH"
+ },
+ "SN": {
+ "categories": [
+ "biolink:ChemicalEntity"
+ ],
+ "constraints": [],
+ "set_interpretation": "BATCH"
+ }
+ }
+ } } }
+ araxq = ARAXQuery()
+ araxq.query(query)
+ response = araxq.response
+ assert response.status == 'OK'
+ message = response.envelope.message
+
+ # returned_message = _ranker_tester(response_id='248115')
+ rank_right_answer = -1
+ for index, result in enumerate(message.results):
+ if result.essence.lower() == expected_answer.lower():
+ rank_right_answer = index + 1
+ break
+ total_results = len(message.results)
+
+ assert rank_right_answer != -1
+ assert (rank_right_answer < 0.1 * total_results) or (rank_right_answer < 0.3 * total_results)
+
+@pytest.mark.slow
+def test_ARAXRanker_test6_asset72():
+ # test 'Lomitapide treats Homozygous Familial Hypercholesterolemia'
+ expected_answer = 'Lomitapide'
+ preferred_curie = synonymizer.get_canonical_curies(names=expected_answer)[expected_answer]
+ if preferred_curie is None:
+ expected_answer = expected_answer
+ else:
+ expected_answer = preferred_curie['preferred_name']
+
+ query = { "message": { "query_graph": {
+ "edges": {
+ "e01": {
+ "attribute_constraints": [],
+ "knowledge_type": "inferred",
+ "object": "ON",
+ "predicates": [
+ "biolink:treats"
+ ],
+ "qualifier_constraints": [],
+ "subject": "SN"
+ }
+ },
+ "nodes": {
+ "ON": {
+ "categories": [
+ "biolink:Disease"
+ ],
+ "constraints": [],
+ "ids": [
+ "MONDO:0018328"
+ ],
+ "set_interpretation": "BATCH"
+ },
+ "SN": {
+ "categories": [
+ "biolink:ChemicalEntity"
+ ],
+ "constraints": [],
+ "set_interpretation": "BATCH"
+ }
+ }
+ } } }
+ araxq = ARAXQuery()
+ araxq.query(query)
+ response = araxq.response
+ assert response.status == 'OK'
+ message = response.envelope.message
+
+ # returned_message = _ranker_tester(response_id='248120')
+ rank_right_answer = -1
+ for index, result in enumerate(message.results):
+ if result.essence.lower() == expected_answer.lower():
+ rank_right_answer = index + 1
+ break
+ total_results = len(message.results)
+
+ assert rank_right_answer != -1
+ assert (rank_right_answer < 0.1 * total_results) or (rank_right_answer < 0.3 * total_results)
+
+@pytest.mark.slow
+def test_ARAXRanker_test9_asset614():
+ # test 'famotidine treats Gastroesophageal Reflux Disease'
+ expected_answer = 'famotidine'
+ preferred_curie = synonymizer.get_canonical_curies(names=expected_answer)[expected_answer]
+ if preferred_curie is None:
+ expected_answer = expected_answer
+ else:
+ expected_answer = preferred_curie['preferred_name']
+
+ query = { "message": { "query_graph": {
+ "edges": {
+ "e01": {
+ "attribute_constraints": [],
+ "knowledge_type": "inferred",
+ "object": "ON",
+ "predicates": [
+ "biolink:treats"
+ ],
+ "qualifier_constraints": [],
+ "subject": "SN"
+ }
+ },
+ "nodes": {
+ "ON": {
+ "categories": [
+ "biolink:Disease"
+ ],
+ "constraints": [],
+ "ids": [
+ "MONDO:0007186"
+ ],
+ "set_interpretation": "BATCH"
+ },
+ "SN": {
+ "categories": [
+ "biolink:ChemicalEntity"
+ ],
+ "constraints": [],
+ "set_interpretation": "BATCH"
+ }
+ }
+ } } }
+ araxq = ARAXQuery()
+ araxq.query(query)
+ response = araxq.response
+ assert response.status == 'OK'
+ message = response.envelope.message
+
+ # returned_message = _ranker_tester(response_id='248142')
+ rank_right_answer = -1
+ for index, result in enumerate(message.results):
+ if result.essence.lower() == expected_answer.lower():
+ rank_right_answer = index + 1
+ break
+ total_results = len(message.results)
+
+ assert rank_right_answer != -1
+ assert (rank_right_answer < 0.1 * total_results) or (rank_right_answer < 0.3 * total_results)
+
+@pytest.mark.slow
+def test_ARAXRanker_test9_asset619():
+ # test 'lansoprazole treats Gastroesophageal Reflux Disease'
+ expected_answer = 'lansoprazole'
+ preferred_curie = synonymizer.get_canonical_curies(names=expected_answer)[expected_answer]
+ if preferred_curie is None:
+ expected_answer = expected_answer
+ else:
+ expected_answer = preferred_curie['preferred_name']
+
+ query = { "message": { "query_graph": {
+ "edges": {
+ "e01": {
+ "attribute_constraints": [],
+ "knowledge_type": "inferred",
+ "object": "ON",
+ "predicates": [
+ "biolink:treats"
+ ],
+ "qualifier_constraints": [],
+ "subject": "SN"
+ }
+ },
+ "nodes": {
+ "ON": {
+ "categories": [
+ "biolink:Disease"
+ ],
+ "constraints": [],
+ "ids": [
+ "MONDO:0007186"
+ ],
+ "set_interpretation": "BATCH"
+ },
+ "SN": {
+ "categories": [
+ "biolink:ChemicalEntity"
+ ],
+ "constraints": [],
+ "set_interpretation": "BATCH"
+ }
+ }
+ } } }
+ araxq = ARAXQuery()
+ araxq.query(query)
+ response = araxq.response
+ assert response.status == 'OK'
+ message = response.envelope.message
+
+ # returned_message = _ranker_tester(response_id='248142')
+ rank_right_answer = -1
+ for index, result in enumerate(message.results):
+ if result.essence.lower() == expected_answer.lower():
+ rank_right_answer = index + 1
+ break
+ total_results = len(message.results)
+
+ assert rank_right_answer != -1
+ assert (rank_right_answer < 0.1 * total_results) or (rank_right_answer < 0.3 * total_results)
+
+@pytest.mark.slow
+def test_ARAXRanker_test9_asset623():
+ # test 'rabeprazole treats Gastroesophageal Reflux Disease'
+ expected_answer = 'rabeprazole'
+ preferred_curie = synonymizer.get_canonical_curies(names=expected_answer)[expected_answer]
+ if preferred_curie is None:
+ expected_answer = expected_answer
+ else:
+ expected_answer = preferred_curie['preferred_name']
+
+ query = { "message": { "query_graph": {
+ "edges": {
+ "e01": {
+ "attribute_constraints": [],
+ "knowledge_type": "inferred",
+ "object": "ON",
+ "predicates": [
+ "biolink:treats"
+ ],
+ "qualifier_constraints": [],
+ "subject": "SN"
+ }
+ },
+ "nodes": {
+ "ON": {
+ "categories": [
+ "biolink:Disease"
+ ],
+ "constraints": [],
+ "ids": [
+ "MONDO:0007186"
+ ],
+ "set_interpretation": "BATCH"
+ },
+ "SN": {
+ "categories": [
+ "biolink:ChemicalEntity"
+ ],
+ "constraints": [],
+ "set_interpretation": "BATCH"
+ }
+ }
+ } } }
+ araxq = ARAXQuery()
+ araxq.query(query)
+ response = araxq.response
+ assert response.status == 'OK'
+ message = response.envelope.message
+
+ # returned_message = _ranker_tester(response_id='248142')
+ rank_right_answer = -1
+ for index, result in enumerate(message.results):
+ if result.essence.lower() == expected_answer.lower():
+ rank_right_answer = index + 1
+ break
+ total_results = len(message.results)
+
+ assert rank_right_answer != -1
+ assert (rank_right_answer < 0.1 * total_results) or (rank_right_answer < 0.3 * total_results)
+
+@pytest.mark.slow
+def test_ARAXRanker_test13_asset311():
+ # test 'Benazepril decreases activity or abundance of ACE'
+ expected_answer = 'Benazepril'
+ preferred_curie = synonymizer.get_canonical_curies(names=expected_answer)[expected_answer]
+ if preferred_curie is None:
+ expected_answer = expected_answer
+ else:
+ expected_answer = preferred_curie['preferred_name']
+
+ query = { "message": { "query_graph": {
+ "edges": {
+ "t_edge": {
+ "attribute_constraints": [],
+ "knowledge_type": "inferred",
+ "object": "ON",
+ "predicates": [
+ "biolink:affects"
+ ],
+ "qualifier_constraints": [
+ {
+ "qualifier_set": [
+ {
+ "qualifier_type_id": "biolink:object_aspect_qualifier",
+ "qualifier_value": "activity_or_abundance"
+ },
+ {
+ "qualifier_type_id": "biolink:object_direction_qualifier",
+ "qualifier_value": "decreased"
+ }
+ ]
+ }
+ ],
+ "subject": "SN"
+ }
+ },
+ "nodes": {
+ "ON": {
+ "categories": [
+ "biolink:Gene"
+ ],
+ "constraints": [],
+ "ids": [
+ "NCBIGene:1636"
+ ],
+ "set_interpretation": "BATCH"
+ },
+ "SN": {
+ "categories": [
+ "biolink:ChemicalEntity"
+ ],
+ "constraints": [],
+ "set_interpretation": "BATCH"
+ }
+ }
+ } } }
+ araxq = ARAXQuery()
+ araxq.query(query)
+ response = araxq.response
+ assert response.status == 'OK'
+ message = response.envelope.message
+
+ # returned_message = _ranker_tester(response_id='248160')
+ rank_right_answer = -1
+ for index, result in enumerate(message.results):
+ if result.essence.lower() == expected_answer.lower():
+ rank_right_answer = index + 1
+ break
+ total_results = len(message.results)
+
+ assert rank_right_answer != -1
+ assert (rank_right_answer < 0.1 * total_results) or (rank_right_answer < 0.3 * total_results)
+
+@pytest.mark.slow
+def test_ARAXRanker_test13_asset355():
+ # test 'Fosinopril decreases activity or abundance of ACE'
+ expected_answer = 'Fosinopril'
+ preferred_curie = synonymizer.get_canonical_curies(names=expected_answer)[expected_answer]
+ if preferred_curie is None:
+ expected_answer = expected_answer
+ else:
+ expected_answer = preferred_curie['preferred_name']
+
+ query = { "message": { "query_graph": {
+ "edges": {
+ "t_edge": {
+ "attribute_constraints": [],
+ "knowledge_type": "inferred",
+ "object": "ON",
+ "predicates": [
+ "biolink:affects"
+ ],
+ "qualifier_constraints": [
+ {
+ "qualifier_set": [
+ {
+ "qualifier_type_id": "biolink:object_aspect_qualifier",
+ "qualifier_value": "activity_or_abundance"
+ },
+ {
+ "qualifier_type_id": "biolink:object_direction_qualifier",
+ "qualifier_value": "decreased"
+ }
+ ]
+ }
+ ],
+ "subject": "SN"
+ }
+ },
+ "nodes": {
+ "ON": {
+ "categories": [
+ "biolink:Gene"
+ ],
+ "constraints": [],
+ "ids": [
+ "NCBIGene:1636"
+ ],
+ "set_interpretation": "BATCH"
+ },
+ "SN": {
+ "categories": [
+ "biolink:ChemicalEntity"
+ ],
+ "constraints": [],
+ "set_interpretation": "BATCH"
+ }
+ }
+ } } }
+ araxq = ARAXQuery()
+ araxq.query(query)
+ response = araxq.response
+ assert response.status == 'OK'
+ message = response.envelope.message
+
+ # returned_message = _ranker_tester(response_id='248160')
+ rank_right_answer = -1
+ for index, result in enumerate(message.results):
+ if result.essence.lower() == expected_answer.lower():
+ rank_right_answer = index + 1
+ break
+ total_results = len(message.results)
+
+ assert rank_right_answer != -1
+ assert (rank_right_answer < 0.1 * total_results) or (rank_right_answer < 0.3 * total_results)
+
+@pytest.mark.slow
+def test_ARAXRanker_test13_asset360():
+ # test 'Trandolapril decreases activity or abundance of ACE'
+ expected_answer = 'Trandolapril'
+ preferred_curie = synonymizer.get_canonical_curies(names=expected_answer)[expected_answer]
+ if preferred_curie is None:
+ expected_answer = expected_answer
+ else:
+ expected_answer = preferred_curie['preferred_name']
+
+ query = { "message": { "query_graph": {
+ "edges": {
+ "t_edge": {
+ "attribute_constraints": [],
+ "knowledge_type": "inferred",
+ "object": "ON",
+ "predicates": [
+ "biolink:affects"
+ ],
+ "qualifier_constraints": [
+ {
+ "qualifier_set": [
+ {
+ "qualifier_type_id": "biolink:object_aspect_qualifier",
+ "qualifier_value": "activity_or_abundance"
+ },
+ {
+ "qualifier_type_id": "biolink:object_direction_qualifier",
+ "qualifier_value": "decreased"
+ }
+ ]
+ }
+ ],
+ "subject": "SN"
+ }
+ },
+ "nodes": {
+ "ON": {
+ "categories": [
+ "biolink:Gene"
+ ],
+ "constraints": [],
+ "ids": [
+ "NCBIGene:1636"
+ ],
+ "set_interpretation": "BATCH"
+ },
+ "SN": {
+ "categories": [
+ "biolink:ChemicalEntity"
+ ],
+ "constraints": [],
+ "set_interpretation": "BATCH"
+ }
+ }
+ } } }
+ araxq = ARAXQuery()
+ araxq.query(query)
+ response = araxq.response
+ assert response.status == 'OK'
+ message = response.envelope.message
+
+ # returned_message = _ranker_tester(response_id='248160')
+ rank_right_answer = -1
+ for index, result in enumerate(message.results):
+ if result.essence.lower() == expected_answer.lower():
+ rank_right_answer = index + 1
+ break
+ total_results = len(message.results)
+
+ assert rank_right_answer != -1
+ assert (rank_right_answer < 0.1 * total_results) or (rank_right_answer < 0.3 * total_results)
+
+@pytest.mark.slow
+def test_ARAXRanker_test13_asset361():
+ # test 'Moexipril decreases activity or abundance of ACE'
+ expected_answer = 'Moexipril'
+ preferred_curie = synonymizer.get_canonical_curies(names=expected_answer)[expected_answer]
+ if preferred_curie is None:
+ expected_answer = expected_answer
+ else:
+ expected_answer = preferred_curie['preferred_name']
+
+ query = { "message": { "query_graph": {
+ "edges": {
+ "t_edge": {
+ "attribute_constraints": [],
+ "knowledge_type": "inferred",
+ "object": "ON",
+ "predicates": [
+ "biolink:affects"
+ ],
+ "qualifier_constraints": [
+ {
+ "qualifier_set": [
+ {
+ "qualifier_type_id": "biolink:object_aspect_qualifier",
+ "qualifier_value": "activity_or_abundance"
+ },
+ {
+ "qualifier_type_id": "biolink:object_direction_qualifier",
+ "qualifier_value": "decreased"
+ }
+ ]
+ }
+ ],
+ "subject": "SN"
+ }
+ },
+ "nodes": {
+ "ON": {
+ "categories": [
+ "biolink:Gene"
+ ],
+ "constraints": [],
+ "ids": [
+ "NCBIGene:1636"
+ ],
+ "set_interpretation": "BATCH"
+ },
+ "SN": {
+ "categories": [
+ "biolink:ChemicalEntity"
+ ],
+ "constraints": [],
+ "set_interpretation": "BATCH"
+ }
+ }
+ } } }
+ araxq = ARAXQuery()
+ araxq.query(query)
+ response = araxq.response
+ assert response.status == 'OK'
+ message = response.envelope.message
+
+ # returned_message = _ranker_tester(response_id='248160')
+ rank_right_answer = -1
+ for index, result in enumerate(message.results):
+ if result.essence.lower() == expected_answer.lower():
+ rank_right_answer = index + 1
+ break
+ total_results = len(message.results)
+
+ assert rank_right_answer != -1
+ assert (rank_right_answer < 0.1 * total_results) or (rank_right_answer < 0.3 * total_results)
+
+@pytest.mark.slow
+def test_ARAXRanker_test21_asset338():
+ # test 'canagliflozin decreases activity or abundance of SLC5A2 (human)'
+ expected_answer = 'canagliflozin'
+ preferred_curie = synonymizer.get_canonical_curies(names=expected_answer)[expected_answer]
+ if preferred_curie is None:
+ expected_answer = expected_answer
+ else:
+ expected_answer = preferred_curie['preferred_name']
+
+ query = { "message": { "query_graph": {
+ "edges": {
+ "t_edge": {
+ "attribute_constraints": [],
+ "knowledge_type": "inferred",
+ "object": "ON",
+ "predicates": [
+ "biolink:affects"
+ ],
+ "qualifier_constraints": [
+ {
+ "qualifier_set": [
+ {
+ "qualifier_type_id": "biolink:object_aspect_qualifier",
+ "qualifier_value": "activity_or_abundance"
+ },
+ {
+ "qualifier_type_id": "biolink:object_direction_qualifier",
+ "qualifier_value": "decreased"
+ }
+ ]
+ }
+ ],
+ "subject": "SN"
+ }
+ },
+ "nodes": {
+ "ON": {
+ "categories": [
+ "biolink:Gene"
+ ],
+ "constraints": [],
+ "ids": [
+ "NCBIGene:6524"
+ ],
+ "set_interpretation": "BATCH"
+ },
+ "SN": {
+ "categories": [
+ "biolink:ChemicalEntity"
+ ],
+ "constraints": [],
+ "set_interpretation": "BATCH"
+ }
+ }
+ } } }
+ araxq = ARAXQuery()
+ araxq.query(query)
+ response = araxq.response
+ assert response.status == 'OK'
+ message = response.envelope.message
+
+ # returned_message = _ranker_tester(response_id='248191')
+ rank_right_answer = -1
+ for index, result in enumerate(message.results):
+ if result.essence.lower() == expected_answer.lower():
+ rank_right_answer = index + 1
+ break
+ total_results = len(message.results)
+
+ assert rank_right_answer != -1
+ assert (rank_right_answer < 0.1 * total_results) or (rank_right_answer < 0.3 * total_results)
+
+@pytest.mark.slow
+def test_ARAXRanker_test23_asset381():
+ # test 'atenolol decreases activity or abundance of ADRB2'
+ expected_answer = 'atenolol'
+ preferred_curie = synonymizer.get_canonical_curies(names=expected_answer)[expected_answer]
+ if preferred_curie is None:
+ expected_answer = expected_answer
+ else:
+ expected_answer = preferred_curie['preferred_name']
+
+ query = { "message": { "query_graph": {
+ "edges": {
+ "t_edge": {
+ "attribute_constraints": [],
+ "knowledge_type": "inferred",
+ "object": "ON",
+ "predicates": [
+ "biolink:affects"
+ ],
+ "qualifier_constraints": [
+ {
+ "qualifier_set": [
+ {
+ "qualifier_type_id": "biolink:object_aspect_qualifier",
+ "qualifier_value": "activity_or_abundance"
+ },
+ {
+ "qualifier_type_id": "biolink:object_direction_qualifier",
+ "qualifier_value": "decreased"
+ }
+ ]
+ }
+ ],
+ "subject": "SN"
+ }
+ },
+ "nodes": {
+ "ON": {
+ "categories": [
+ "biolink:Gene"
+ ],
+ "constraints": [],
+ "ids": [
+ "NCBIGene:154"
+ ],
+ "set_interpretation": "BATCH"
+ },
+ "SN": {
+ "categories": [
+ "biolink:ChemicalEntity"
+ ],
+ "constraints": [],
+ "set_interpretation": "BATCH"
+ }
+ }
+ } } }
+ araxq = ARAXQuery()
+ araxq.query(query)
+ response = araxq.response
+ assert response.status == 'OK'
+ message = response.envelope.message
+
+ # returned_message = _ranker_tester(response_id='248199')
+ rank_right_answer = -1
+ for index, result in enumerate(message.results):
+ if result.essence.lower() == expected_answer.lower():
+ rank_right_answer = index + 1
+ break
+ total_results = len(message.results)
+
+ assert rank_right_answer != -1
+ assert (rank_right_answer < 0.1 * total_results) or (rank_right_answer < 0.3 * total_results)
+
+
+@pytest.mark.slow
+def test_ARAXRanker_test23_asset378():
+ # test 'propranolol decreases activity or abundance of ADRB2'
+ expected_answer = 'propranolol'
+ preferred_curie = synonymizer.get_canonical_curies(names=expected_answer)[expected_answer]
+ if preferred_curie is None:
+ expected_answer = expected_answer
+ else:
+ expected_answer = preferred_curie['preferred_name']
+
+ query = { "message": { "query_graph": {
+ "edges": {
+ "t_edge": {
+ "attribute_constraints": [],
+ "knowledge_type": "inferred",
+ "object": "ON",
+ "predicates": [
+ "biolink:affects"
+ ],
+ "qualifier_constraints": [
+ {
+ "qualifier_set": [
+ {
+ "qualifier_type_id": "biolink:object_aspect_qualifier",
+ "qualifier_value": "activity_or_abundance"
+ },
+ {
+ "qualifier_type_id": "biolink:object_direction_qualifier",
+ "qualifier_value": "decreased"
+ }
+ ]
+ }
+ ],
+ "subject": "SN"
+ }
+ },
+ "nodes": {
+ "ON": {
+ "categories": [
+ "biolink:Gene"
+ ],
+ "constraints": [],
+ "ids": [
+ "NCBIGene:154"
+ ],
+ "set_interpretation": "BATCH"
+ },
+ "SN": {
+ "categories": [
+ "biolink:ChemicalEntity"
+ ],
+ "constraints": [],
+ "set_interpretation": "BATCH"
+ }
+ }
+ } } }
+ araxq = ARAXQuery()
+ araxq.query(query)
+ response = araxq.response
+ assert response.status == 'OK'
+ message = response.envelope.message
+
+ # returned_message = _ranker_tester(response_id='248199')
+ rank_right_answer = -1
+ for index, result in enumerate(message.results):
+ if result.essence.lower() == expected_answer.lower():
+ rank_right_answer = index + 1
+ break
+ total_results = len(message.results)
+
+ assert rank_right_answer != -1
+ assert (rank_right_answer < 0.1 * total_results) or (rank_right_answer < 0.3 * total_results)
+
+
+if __name__ == "__main__":
+ pytest.main(['-v'])
diff --git a/code/code-archive/old-arax-tests/test_ARAX_resultify.py b/code/code-archive/old-arax-tests/test_ARAX_resultify.py
new file mode 100644
index 000000000..90800839b
--- /dev/null
+++ b/code/code-archive/old-arax-tests/test_ARAX_resultify.py
@@ -0,0 +1,1689 @@
+#!/usr/bin/env python3
+# Usage: python3 ARAX_resultify_testcases.py
+# python3 ARAX_resultify_testcases.py test_issue692
+
+import os
+import sys
+import pytest
+
+sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/../ARAXQuery")
+from ARAX_response import ARAXResponse
+from ARAX_messenger import ARAXMessenger
+from ARAX_expander import ARAXExpander
+from typing import List, Dict, Tuple, Set, Iterable
+import ARAX_resultify
+from ARAX_resultify import ARAXResultify
+from ARAX_query import ARAXQuery
+
+# is there a better way to import openapi_server? Following SO posting 16981921
+PACKAGE_PARENT = '../../UI/OpenAPI/python-flask-server'
+sys.path.append(os.path.normpath(os.path.join(os.getcwd(), PACKAGE_PARENT)))
+from openapi_server.models.edge import Edge
+from openapi_server.models.node import Node
+from openapi_server.models.q_edge import QEdge
+from openapi_server.models.q_node import QNode
+from openapi_server.models.query_graph import QueryGraph
+from openapi_server.models.knowledge_graph import KnowledgeGraph
+from openapi_server.models.result import Result
+from openapi_server.models.message import Message
+from openapi_server.models.retrieval_source import RetrievalSource
+
+DIABETES_CURIE = "MONDO:0005015"
+TYPE_1_DIABETES_CURIE = "MONDO:0005147"
+INSULIN_CURIE = "CHEBI:5931"
+HEART_DISEASE_CURIE = "MONDO:0005267"
+
+
+def _slim_kg(kg: KnowledgeGraph) -> KnowledgeGraph:
+ slimmed_nodes = {node_key: Node(categories=node.categories,
+ name=node.name,
+ qnode_keys=node.qnode_keys) for node_key, node in kg.nodes.items()}
+ slimmed_edges = {edge_key: Edge(subject=edge.subject,
+ object=edge.object,
+ predicate=edge.predicate,
+ qedge_keys=edge.qedge_keys) for edge_key, edge in kg.edges.items()}
+ return KnowledgeGraph(nodes=slimmed_nodes, edges=slimmed_edges)
+
+
+def _create_nodes(kg_node_info: Iterable[Dict[str, any]]) -> Dict[str, Node]:
+ nodes_dict = dict()
+ for kg_node in kg_node_info:
+ node = Node(categories=kg_node.get("categories"),
+ name=kg_node.get("name"))
+ node.qnode_keys = kg_node["qnode_keys"]
+ nodes_dict[kg_node["node_key"]] = node
+ return nodes_dict
+
+
+def _create_edges(kg_edge_info: Iterable[Dict[str, any]]) -> Dict[str, Edge]:
+ edges_dict = dict()
+ for kg_edge in kg_edge_info:
+ edge = Edge(subject=kg_edge["subject"],
+ object=kg_edge["object"],
+ predicate=kg_edge.get("predicate", "biolink:related_to"),
+ sources=[RetrievalSource(resource_id="infores:arax",
+ resource_role="aggregator_knowledge_source")])
+ edge.qedge_keys = kg_edge["qedge_keys"]
+ edges_dict[kg_edge["edge_key"]] = edge
+ return edges_dict
+
+
+def _create_qnodes(qg_node_info: Iterable[Dict[str, any]]) -> Dict[str, QNode]:
+ return {qnode_info["node_key"]: QNode(categories=qnode_info['categories'],
+ is_set=qnode_info['is_set']) for qnode_info in qg_node_info}
+
+
+def _create_qedges(qg_edge_info: Iterable[Dict[str, any]]) -> Dict[str, QEdge]:
+ return {qedge_info["edge_key"]: QEdge(subject=qedge_info['subject'],
+ object=qedge_info['object']) for qedge_info in qg_edge_info}
+
+
+def _print_results_for_debug(message: Message):
+ print()
+ qg = message.query_graph
+ kg = message.knowledge_graph
+ for result in message.results:
+ print(result.essence)
+ for qnode_key, node_bindings_list in result.node_bindings.items():
+ qnode = qg.nodes[qnode_key]
+ print(f" qnode {qnode_key}{f' (option group {qnode.option_group_id})' if qnode.option_group_id else ''}:")
+ for node_binding in node_bindings_list:
+ print(f" {node_binding.id} {kg.nodes[node_binding.id].name}")
+ for qedge_key, edge_bindings_list in result.analyses[0].edge_bindings.items():
+ qedge = qg.edges[qedge_key]
+ print(f" qedge {qedge_key}{f' (option group {qedge.option_group_id})' if qedge.option_group_id else ''}:")
+ for edge_binding in edge_bindings_list:
+ print(f" {edge_binding.id}")
+ # Display the query graph
+ import graphviz
+ dot = graphviz.Digraph(comment='QG')
+ for qnode_key, qnode in qg.nodes.items():
+ node_id_line = f"{qnode_key}{f' (group {qnode.option_group_id})' if qnode.option_group_id else ''}"
+ if qnode.ids:
+ node_details_line = ", ".join(qnode.ids)
+ elif qnode.categories:
+ node_details_line = ", ".join(qnode.categories)
+ else:
+ node_details_line = ""
+ dot.node(qnode_key, f"{node_id_line}\n{node_details_line}")
+ for qedge_key, qedge in qg.edges.items():
+ dot.edge(qedge.subject,
+ qedge.object,
+ label=f"{qedge_key}{f' (NOT)' if qedge.exclude else ''}{f' (group {qedge.option_group_id})' if qedge.option_group_id else ''}\n{', '.join(qedge.predicates) if qedge.predicates else ''}")
+ dot.render("qg.gv", view=True)
+
+
+def _get_result_node_keys_by_qg_key(result: Result) -> Dict[str, Set[str]]:
+ return {qnode_key: {node_binding.id for node_binding in result.node_bindings[qnode_key]} for qnode_key in result.node_bindings}
+
+
+def _get_result_edge_keys_by_qg_key(result: Result) -> Dict[str, Set[str]]:
+ return {qedge_key: {edge_binding.id for edge_binding in result.analyses[0].edge_bindings[qedge_key]} for qedge_key in result.analyses[0].edge_bindings}
+
+
+def _do_arax_query(actions_list: List[str], debug=False, enforce_connected=True) -> Tuple[ARAXResponse, Message]:
+ query = {"operations": {"actions": actions_list}}
+ araxq = ARAXQuery()
+ response = araxq.query(query)
+ message = araxq.message
+ if debug:
+ _print_results_for_debug(message)
+ print(response.show(level=response.DEBUG))
+ elif response.status != 'OK':
+ print(response.show(level=response.DEBUG))
+
+ # Ensure results are connected
+ if enforce_connected:
+ for result in message.results:
+ # First grab all edge keys used in this result
+ all_edge_keys_in_result = {edge_binding.id
+ for qedge_key, edge_bindings in result.analyses[0].edge_bindings.items()
+ for edge_binding in edge_bindings}
+ if all_edge_keys_in_result: # Skip checking for single-node queries
+ # Then figure out all nodes that those edges link to
+ all_subjects = {message.knowledge_graph.edges[edge_key].object for edge_key in all_edge_keys_in_result}
+ all_objects = {message.knowledge_graph.edges[edge_key].subject for edge_key in all_edge_keys_in_result}
+ all_nodes_used_by_edges = all_subjects.union(all_objects)
+ # Then ensure that every node in the result is used by an edge (catches subclass nodes that only point
+ # to parent since there is no subclass qedge in the final results) and every node used by an edge has
+ # a node binding
+ all_node_keys_in_result = {node_binding.id
+ for qnode_key, node_bindings in result.node_bindings.items()
+ for node_binding in node_bindings}
+ assert all_node_keys_in_result == all_nodes_used_by_edges
+
+ return response, message
+
+
+def _run_resultify_directly(query_graph: QueryGraph,
+ knowledge_graph: KnowledgeGraph,
+ ignore_edge_direction=True,
+ debug=False) -> Tuple[ARAXResponse, Message]:
+ response = ARAXResponse()
+ messenger = ARAXMessenger()
+ messenger.create_envelope(response)
+ from actions_parser import ActionsParser
+ actions_parser = ActionsParser()
+ actions_list = [f"resultify(ignore_edge_direction={ignore_edge_direction})"]
+ result = actions_parser.parse(actions_list)
+ response.merge(result)
+ actions = result.data['actions']
+ assert result.status == 'OK'
+ resultifier = ARAXResultify()
+ message_original = Message(query_graph=query_graph,
+ knowledge_graph=knowledge_graph,
+ results=[])
+ message = ARAXMessenger().from_dict(message_original.to_dict())
+ # qnode_keys/qedge_keys are lost when grabbing message from_dict() - so we add them back
+ for node_key, node in message_original.knowledge_graph.nodes.items():
+ message.knowledge_graph.nodes[node_key].qnode_keys = node.qnode_keys
+ for edge_key, edge in message_original.knowledge_graph.edges.items():
+ message.knowledge_graph.edges[edge_key].qedge_keys = edge.qedge_keys
+ response.envelope.message = message
+ parameters = actions[0]['parameters']
+ parameters['debug'] = 'true'
+ resultifier.apply(response, parameters)
+ if response.status != 'OK':
+ if debug:
+ _print_results_for_debug(message)
+ print(response.show(level=response.DEBUG))
+ return response, message
+
+
+def _convert_shorthand_to_qg(
+ shorthand_qnodes: Dict[str, str],
+ shorthand_qedges: Dict[str, str]
+) -> QueryGraph:
+ return QueryGraph(nodes={qnode_key: QNode(is_set=bool(is_set)) \
+ for qnode_key, is_set in shorthand_qnodes.items()},
+ edges={qedge_key: QEdge(subject=qnodes.split("--")[0],
+ object=qnodes.split("--")[1]) \
+ for qedge_key, qnodes in shorthand_qedges.items()})
+
+
+def _convert_shorthand_to_kg(shorthand_nodes: Dict[str, List[str]],
+ shorthand_edges: Dict[str, List[str]]) -> KnowledgeGraph:
+ nodes_dict = {}
+ for qnode_key, nodes_list in shorthand_nodes.items():
+ for node_key in nodes_list:
+ node = nodes_dict.get(node_key, Node())
+ if not hasattr(node, "qnode_keys"):
+ node.qnode_keys = []
+ node.qnode_keys.append(qnode_key)
+ nodes_dict[node_key] = node
+ edges_dict = {}
+ for qedge_key, edges_list in shorthand_edges.items():
+ for edge_key in edges_list:
+ source_node_key = edge_key.split("--")[0]
+ target_node_key = edge_key.split("--")[1]
+ edge = edges_dict.get(edge_key, Edge(subject=source_node_key,
+ object=target_node_key,
+ predicate="biolink:related_to",
+ sources=[RetrievalSource(resource_id="infores:arax",
+ resource_role="aggregator_knowledge_source")]))
+ if not hasattr(edge, "qedge_keys"):
+ edge.qedge_keys = []
+ edge.qedge_keys.append(qedge_key)
+ edges_dict[f"{qedge_key}:{edge_key}"] = edge
+ return KnowledgeGraph(nodes=nodes_dict, edges=edges_dict)
+
+
+def _get_kg_edge_keys_using_node(node_key: str, kg: KnowledgeGraph) -> Set[str]:
+ return {edge_key for edge_key, edge in kg.edges.items() if node_key in {edge.subject, edge.object}}
+
+
+def test01():
+ kg_node_info = ({'node_key': 'UniProtKB:12345',
+ 'categories': 'protein',
+ 'qnode_keys': ['n01']},
+ {'node_key': 'UniProtKB:23456',
+ 'categories': 'protein',
+ 'qnode_keys': ['n01']},
+ {'node_key': 'DOID:12345',
+ 'categories': 'disease',
+ 'qnode_keys': ['DOID:12345']},
+ {'node_key': 'HP:56789',
+ 'categories': 'phenotypic_feature',
+ 'qnode_keys': ['n02']},
+ {'node_key': 'HP:67890',
+ 'categories': 'phenotypic_feature',
+ 'qnode_keys': ['n02']},
+ {'node_key': 'HP:34567',
+ 'categories': 'phenotypic_feature',
+ 'qnode_keys': ['n02']})
+
+ kg_edge_info = ({'edge_key': 'ke01',
+ 'subject': 'UniProtKB:12345',
+ 'object': 'DOID:12345',
+ 'qedge_keys': ['qe01']},
+ {'edge_key': 'ke02',
+ 'subject': 'UniProtKB:23456',
+ 'object': 'DOID:12345',
+ 'qedge_keys': ['qe01']},
+ {'edge_key': 'ke03',
+ 'subject': 'DOID:12345',
+ 'object': 'HP:56789',
+ 'qedge_keys': ['qe02']},
+ {'edge_key': 'ke04',
+ 'subject': 'DOID:12345',
+ 'object': 'HP:67890',
+ 'qedge_keys': ['qe02']},
+ {'edge_key': 'ke05',
+ 'subject': 'DOID:12345',
+ 'object': 'HP:34567',
+ 'qedge_keys': ['qe02']})
+
+ kg_nodes = _create_nodes(kg_node_info)
+ kg_edges = _create_edges(kg_edge_info)
+
+ knowledge_graph = KnowledgeGraph(kg_nodes, kg_edges)
+
+ qg_node_info = ({'node_key': 'n01',
+ 'categories': 'protein',
+ 'is_set': False},
+ {'node_key': 'DOID:12345',
+ 'categories': 'disease',
+ 'is_set': False},
+ {'node_key': 'n02',
+ 'categories': 'phenotypic_feature',
+ 'is_set': True})
+
+ qg_edge_info = ({'edge_key': 'qe01',
+ 'subject': 'n01',
+ 'object': 'DOID:12345'},
+ {'edge_key': 'qe02',
+ 'subject': 'DOID:12345',
+ 'object': 'n02'})
+
+ qg_nodes = _create_qnodes(qg_node_info)
+ qg_edges = _create_qedges(qg_edge_info)
+ query_graph = QueryGraph(qg_nodes, qg_edges)
+
+ results_list = ARAX_resultify._get_results_for_kg_by_qg(knowledge_graph,
+ query_graph)
+
+ assert len(results_list) == 2
+
+
+def test02():
+ kg_node_info = ({'node_key': 'UniProtKB:12345',
+ 'categories': 'protein',
+ 'qnode_keys': ['n01']},
+ {'node_key': 'UniProtKB:23456',
+ 'categories': 'protein',
+ 'qnode_keys': ['n01']},
+ {'node_key': 'DOID:12345',
+ 'categories': 'disease',
+ 'qnode_keys': ['DOID:12345']},
+ {'node_key': 'HP:56789',
+ 'categories': 'phenotypic_feature',
+ 'qnode_keys': ['n02']},
+ {'node_key': 'HP:67890',
+ 'categories': 'phenotypic_feature',
+ 'qnode_keys': ['n02']},
+ {'node_key': 'HP:34567',
+ 'categories': 'phenotypic_feature',
+ 'qnode_keys': ['n02']})
+
+ kg_edge_info = ({'edge_key': 'ke01',
+ 'subject': 'UniProtKB:12345',
+ 'object': 'DOID:12345',
+ 'qedge_keys': ['qe01']},
+ {'edge_key': 'ke02',
+ 'subject': 'UniProtKB:23456',
+ 'object': 'DOID:12345',
+ 'qedge_keys': ['qe01']},
+ {'edge_key': 'ke03',
+ 'subject': 'DOID:12345',
+ 'object': 'HP:56789',
+ 'qedge_keys': ['qe02']},
+ {'edge_key': 'ke04',
+ 'subject': 'DOID:12345',
+ 'object': 'HP:67890',
+ 'qedge_keys': ['qe02']},
+ {'edge_key': 'ke05',
+ 'subject': 'DOID:12345',
+ 'object': 'HP:34567',
+ 'qedge_keys': ['qe02']})
+
+ kg_nodes = _create_nodes(kg_node_info)
+ kg_edges = _create_edges(kg_edge_info)
+
+ knowledge_graph = KnowledgeGraph(kg_nodes, kg_edges)
+
+ qg_node_info = ({'node_key': 'n01',
+ 'categories': 'protein',
+ 'is_set': None},
+ {'node_key': 'DOID:12345',
+ 'categories': 'disease',
+ 'is_set': False},
+ {'node_key': 'n02',
+ 'categories': 'phenotypic_feature',
+ 'is_set': True})
+
+ qg_edge_info = ({'edge_key': 'qe01',
+ 'subject': 'n01',
+ 'object': 'DOID:12345'},
+ {'edge_key': 'qe02',
+ 'subject': 'DOID:12345',
+ 'object': 'n02'})
+
+ qg_nodes = _create_qnodes(qg_node_info)
+ qg_edges = _create_qedges(qg_edge_info)
+ query_graph = QueryGraph(qg_nodes, qg_edges)
+
+ results_list = ARAX_resultify._get_results_for_kg_by_qg(knowledge_graph,
+ query_graph)
+ assert len(results_list) == 2
+
+
+def test03():
+ kg_node_info = ({'node_key': 'UniProtKB:12345',
+ 'categories': 'protein',
+ 'qnode_keys': ['n01']},
+ {'node_key': 'UniProtKB:23456',
+ 'categories': 'protein',
+ 'qnode_keys': ['n01']},
+ {'node_key': 'DOID:12345',
+ 'categories': 'disease',
+ 'qnode_keys': ['DOID:12345']},
+ {'node_key': 'HP:56789',
+ 'categories': 'phenotypic_feature',
+ 'qnode_keys': ['n02']},
+ {'node_key': 'HP:67890',
+ 'categories': 'phenotypic_feature',
+ 'qnode_keys': ['n02']},
+ {'node_key': 'HP:34567',
+ 'categories': 'phenotypic_feature',
+ 'qnode_keys': ['n02']})
+
+ kg_edge_info = ({'edge_key': 'ke01',
+ 'subject': 'DOID:12345',
+ 'object': 'UniProtKB:12345',
+ 'qedge_keys': ['qe01']},
+ {'edge_key': 'ke02',
+ 'subject': 'UniProtKB:23456',
+ 'object': 'DOID:12345',
+ 'qedge_keys': ['qe01']},
+ {'edge_key': 'ke03',
+ 'subject': 'DOID:12345',
+ 'object': 'HP:56789',
+ 'qedge_keys': ['qe02']},
+ {'edge_key': 'ke04',
+ 'subject': 'DOID:12345',
+ 'object': 'HP:67890',
+ 'qedge_keys': ['qe02']},
+ {'edge_key': 'ke05',
+ 'subject': 'DOID:12345',
+ 'object': 'HP:34567',
+ 'qedge_keys': ['qe02']})
+
+ kg_nodes = _create_nodes(kg_node_info)
+ kg_edges = _create_edges(kg_edge_info)
+
+ knowledge_graph = KnowledgeGraph(kg_nodes, kg_edges)
+
+ qg_node_info = ({'node_key': 'n01',
+ 'categories': 'protein',
+ 'is_set': None},
+ {'node_key': 'DOID:12345',
+ 'categories': 'disease',
+ 'is_set': False},
+ {'node_key': 'n02',
+ 'categories': 'phenotypic_feature',
+ 'is_set': True})
+
+ qg_edge_info = ({'edge_key': 'qe01',
+ 'subject': 'n01',
+ 'object': 'DOID:12345'},
+ {'edge_key': 'qe02',
+ 'subject': 'DOID:12345',
+ 'object': 'n02'})
+
+ qg_nodes = _create_qnodes(qg_node_info)
+ qg_edges = _create_qedges(qg_edge_info)
+ query_graph = QueryGraph(qg_nodes, qg_edges)
+
+ results_list = ARAX_resultify._get_results_for_kg_by_qg(knowledge_graph,
+ query_graph,
+ ignore_edge_direction=True)
+ assert len(results_list) == 2
+
+
+def test04():
+ kg_node_info = ({'node_key': 'UniProtKB:12345',
+ 'categories': 'protein',
+ 'qnode_keys': ['n01']},
+ {'node_key': 'UniProtKB:23456',
+ 'categories': 'protein',
+ 'qnode_keys': ['n01']},
+ {'node_key': 'DOID:12345',
+ 'categories': 'disease',
+ 'qnode_keys': ['DOID:12345']},
+ {'node_key': 'UniProtKB:56789',
+ 'categories': 'protein',
+ 'qnode_keys': ['n01']},
+ {'node_key': 'ChEMBL.COMPOUND:12345',
+ 'categories': 'chemical_substance',
+ 'qnode_keys': ['n02']},
+ {'node_key': 'ChEMBL.COMPOUND:23456',
+ 'categories': 'chemical_substance',
+ 'qnode_keys': ['n02']})
+
+ kg_edge_info = ({'edge_key': 'ke01',
+ 'subject': 'ChEMBL.COMPOUND:12345',
+ 'object': 'UniProtKB:12345',
+ 'qedge_keys': ['qe01']},
+ {'edge_key': 'ke02',
+ 'subject': 'ChEMBL.COMPOUND:12345',
+ 'object': 'UniProtKB:23456',
+ 'qedge_keys': ['qe01']},
+ {'edge_key': 'ke03',
+ 'subject': 'ChEMBL.COMPOUND:23456',
+ 'object': 'UniProtKB:12345',
+ 'qedge_keys': ['qe01']},
+ {'edge_key': 'ke04',
+ 'subject': 'ChEMBL.COMPOUND:23456',
+ 'object': 'UniProtKB:23456',
+ 'qedge_keys': ['qe01']},
+ {'edge_key': 'ke05',
+ 'subject': 'DOID:12345',
+ 'object': 'UniProtKB:12345',
+ 'qedge_keys': ['qe02']},
+ {'edge_key': 'ke06',
+ 'subject': 'DOID:12345',
+ 'object': 'UniProtKB:23456',
+ 'qedge_keys': ['qe02']})
+
+ kg_nodes = _create_nodes(kg_node_info)
+ kg_edges = _create_edges(kg_edge_info)
+
+ knowledge_graph = KnowledgeGraph(kg_nodes, kg_edges)
+
+ qg_node_info = ({'node_key': 'n01',
+ 'categories': 'protein',
+ 'is_set': True},
+ {'node_key': 'DOID:12345',
+ 'categories': 'disease',
+ 'is_set': False},
+ {'node_key': 'n02',
+ 'categories': 'chemical_substance',
+ 'is_set': False})
+
+ qg_edge_info = ({'edge_key': 'qe01',
+ 'subject': 'n02',
+ 'object': 'n01'},
+ {'edge_key': 'qe02',
+ 'subject': 'DOID:12345',
+ 'object': 'n01'})
+
+ qg_nodes = _create_qnodes(qg_node_info)
+ qg_edges = _create_qedges(qg_edge_info)
+ query_graph = QueryGraph(qg_nodes, qg_edges)
+
+ results_list = ARAX_resultify._get_results_for_kg_by_qg(knowledge_graph,
+ query_graph,
+ ignore_edge_direction=True)
+ assert len(results_list) == 2
+
+
+def test05():
+ kg_node_info = ({'node_key': 'UniProtKB:12345',
+ 'categories': 'protein',
+ 'qnode_keys': ['n01']},
+ {'node_key': 'UniProtKB:23456',
+ 'categories': 'protein',
+ 'qnode_keys': ['n01']},
+ {'node_key': 'DOID:12345',
+ 'categories': 'disease',
+ 'qnode_keys': ['DOID:12345']},
+ {'node_key': 'UniProtKB:56789',
+ 'categories': 'protein',
+ 'qnode_keys': ['n01']},
+ {'node_key': 'ChEMBL.COMPOUND:12345',
+ 'categories': 'chemical_substance',
+ 'qnode_keys': ['n02']},
+ {'node_key': 'ChEMBL.COMPOUND:23456',
+ 'categories': 'chemical_substance',
+ 'qnode_keys': ['n02']})
+
+ kg_edge_info = ({'edge_key': 'ke01',
+ 'subject': 'ChEMBL.COMPOUND:12345',
+ 'object': 'UniProtKB:12345',
+ 'qedge_keys': ['qe01']},
+ {'edge_key': 'ke02',
+ 'subject': 'ChEMBL.COMPOUND:12345',
+ 'object': 'UniProtKB:23456',
+ 'qedge_keys': ['qe01']},
+ {'edge_key': 'ke03',
+ 'subject': 'ChEMBL.COMPOUND:23456',
+ 'object': 'UniProtKB:12345',
+ 'qedge_keys': ['qe01']},
+ {'edge_key': 'ke04',
+ 'subject': 'ChEMBL.COMPOUND:23456',
+ 'object': 'UniProtKB:23456',
+ 'qedge_keys': ['qe01']},
+ {'edge_key': 'ke05',
+ 'subject': 'DOID:12345',
+ 'object': 'UniProtKB:12345',
+ 'qedge_keys': ['qe02']},
+ {'edge_key': 'ke06',
+ 'subject': 'DOID:12345',
+ 'object': 'UniProtKB:23456',
+ 'qedge_keys': ['qe02']})
+
+ kg_nodes = _create_nodes(kg_node_info)
+ kg_edges = _create_edges(kg_edge_info)
+ knowledge_graph = KnowledgeGraph(kg_nodes, kg_edges)
+
+ qg_node_info = ({'node_key': 'n01',
+ 'categories': 'protein',
+ 'is_set': True},
+ {'node_key': 'DOID:12345',
+ 'categories': 'disease',
+ 'is_set': False},
+ {'node_key': 'n02',
+ 'categories': 'chemical_substance',
+ 'is_set': False})
+
+ qg_edge_info = ({'edge_key': 'qe01',
+ 'subject': 'n02',
+ 'object': 'n01'},
+ {'edge_key': 'qe02',
+ 'subject': 'DOID:12345',
+ 'object': 'n01'})
+
+ qg_nodes = _create_qnodes(qg_node_info)
+ qg_edges = _create_qedges(qg_edge_info)
+ query_graph = QueryGraph(qg_nodes, qg_edges)
+
+ response, message = _run_resultify_directly(query_graph, knowledge_graph, ignore_edge_direction=True)
+ assert response.status == 'OK'
+ assert len(message.results) == 2
+
+
+def test07():
+ kg_node_info = ({'node_key': 'UniProtKB:12345',
+ 'categories': 'protein',
+ 'qnode_keys': ['n01']},
+ {'node_key': 'UniProtKB:23456',
+ 'categories': 'protein',
+ 'qnode_keys': ['n01']},
+ {'node_key': 'DOID:12345',
+ 'categories': 'disease',
+ 'qnode_keys': ['DOID:12345']},
+ {'node_key': 'UniProtKB:56789',
+ 'categories': 'protein',
+ 'qnode_keys': ['n01']},
+ {'node_key': 'ChEMBL.COMPOUND:12345',
+ 'categories': 'chemical_substance',
+ 'qnode_keys': ['n02']},
+ {'node_key': 'ChEMBL.COMPOUND:23456',
+ 'categories': 'chemical_substance',
+ 'qnode_keys': ['n02']})
+
+ kg_edge_info = ({'edge_key': 'ke01',
+ 'subject': 'ChEMBL.COMPOUND:12345',
+ 'object': 'UniProtKB:12345',
+ 'qedge_keys': ['qe01']},
+ {'edge_key': 'ke02',
+ 'subject': 'ChEMBL.COMPOUND:12345',
+ 'object': 'UniProtKB:23456',
+ 'qedge_keys': ['qe01']},
+ {'edge_key': 'ke03',
+ 'subject': 'ChEMBL.COMPOUND:23456',
+ 'object': 'UniProtKB:12345',
+ 'qedge_keys': ['qe01']},
+ {'edge_key': 'ke04',
+ 'subject': 'ChEMBL.COMPOUND:23456',
+ 'object': 'UniProtKB:23456',
+ 'qedge_keys': ['qe01']},
+ {'edge_key': 'ke05',
+ 'subject': 'DOID:12345',
+ 'object': 'UniProtKB:12345',
+ 'qedge_keys': ['qe02']},
+ {'edge_key': 'ke06',
+ 'subject': 'DOID:12345',
+ 'object': 'UniProtKB:23456',
+ 'qedge_keys': ['qe02']})
+
+ kg_nodes = _create_nodes(kg_node_info)
+ kg_edges = _create_edges(kg_edge_info)
+
+ knowledge_graph = KnowledgeGraph(kg_nodes, kg_edges)
+
+ qg_node_info = ({'node_key': 'n01',
+ 'categories': 'protein',
+ 'is_set': True},
+ {'node_key': 'DOID:12345',
+ 'categories': 'disease',
+ 'is_set': False},
+ {'node_key': 'n02',
+ 'categories': 'chemical_substance',
+ 'is_set': False})
+
+ qg_edge_info = ({'edge_key': 'qe01',
+ 'subject': 'n02',
+ 'object': 'n01'},
+ {'edge_key': 'qe02',
+ 'subject': 'DOID:12345',
+ 'object': 'n01'})
+
+ qg_nodes = _create_qnodes(qg_node_info)
+ qg_edges = _create_qedges(qg_edge_info)
+ query_graph = QueryGraph(qg_nodes, qg_edges)
+
+ response, message = _run_resultify_directly(query_graph, knowledge_graph, ignore_edge_direction=True)
+ assert len(message.results) == 2
+ assert response.status == 'OK'
+
+
+def test08():
+ shorthand_qnodes = {"n00": "",
+ "n01": ""}
+ shorthand_qedges = {"e00": "n00--n01"}
+ query_graph = _convert_shorthand_to_qg(shorthand_qnodes, shorthand_qedges)
+ shorthand_kg_nodes = {"n00": ["DOID:731"],
+ "n01": ["HP:01", "HP:02", "HP:03", "HP:04"]}
+ shorthand_kg_edges = {"e00": ["DOID:731--HP:01", "DOID:731--HP:02", "DOID:731--HP:03", "DOID:731--HP:04"]}
+ knowledge_graph = _convert_shorthand_to_kg(shorthand_kg_nodes, shorthand_kg_edges)
+ response, message = _run_resultify_directly(query_graph, knowledge_graph)
+ assert response.status == 'OK'
+ n01_nodes = {node_key for node_key, node in message.knowledge_graph.nodes.items() if "n01" in node.qnode_keys}
+ assert message.results and len(message.results) == len(n01_nodes)
+
+
+@pytest.mark.slow
+def test09():
+ actions = [
+ "add_qnode(name=DOID:731, key=n00, categories=biolink:Disease, is_set=false)",
+ "add_qnode(categories=biolink:PhenotypicFeature, is_set=false, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "resultify(ignore_edge_direction=true, debug=true)",
+ "filter_results(action=limit_number_of_results, max_results=100)",
+ "return(message=true, store=false)"
+ ]
+ response, message = _do_arax_query(actions)
+ assert response.status == 'OK'
+ assert len(message.results) == 100
+
+
+def test10():
+ resultifier = ARAXResultify()
+ desc = resultifier.describe_me()
+ assert "description" in desc[0]
+ assert "ignore_edge_direction" in desc[0]["parameters"]
+
+
+@pytest.mark.slow
+def test_example1():
+ actions = [
+ "add_qnode(key=qg0, ids=CHEMBL.COMPOUND:CHEMBL112)",
+ "add_qnode(key=qg1, categories=biolink:Protein)",
+ "add_qedge(subject=qg1, object=qg0, key=qe0)",
+ "expand(edge_key=qe0, kp=infores:rtx-kg2)",
+ "resultify(ignore_edge_direction=true, debug=true)",
+ "return(message=true, store=false)"
+ ]
+ response, message = _do_arax_query(actions)
+ assert response.status == 'OK'
+ qg1_nodes = {node_key for node_key, node in message.knowledge_graph.nodes.items() if "qg1" in node.qnode_keys}
+ assert message.results and len(message.results) == len(qg1_nodes)
+ assert message.results[0].essence is not None
+
+
+def test_bfs():
+ qg_node_info = ({'node_key': 'n01',
+ 'categories': 'protein',
+ 'is_set': None},
+ {'node_key': 'DOID:12345',
+ 'categories': 'disease',
+ 'is_set': False},
+ {'node_key': 'n02',
+ 'categories': 'phenotypic_feature',
+ 'is_set': True})
+
+ qg_edge_info = ({'edge_key': 'qe01',
+ 'subject': 'n01',
+ 'object': 'DOID:12345'},
+ {'edge_key': 'qe02',
+ 'subject': 'DOID:12345',
+ 'object': 'n02'})
+
+ qg_nodes = _create_qnodes(qg_node_info)
+ qg_edges = _create_qedges(qg_edge_info)
+ qg = QueryGraph(qg_nodes, qg_edges)
+ adj_map = ARAX_resultify._make_adj_maps(qg, directed=False, droploops=True)['both']
+ bfs_dists = ARAX_resultify._bfs_dists(adj_map, 'n01')
+ assert bfs_dists == {'n01': 0, 'DOID:12345': 1, 'n02': 2}
+ bfs_dists = ARAX_resultify._bfs_dists(adj_map, 'DOID:12345')
+ assert bfs_dists == {'n01': 1, 'DOID:12345': 0, 'n02': 1}
+
+
+def test_bfs_in_essence_code():
+ kg_node_info = ({'node_key': 'DOID:12345',
+ 'categories': 'disease',
+ 'qnode_keys': ['n00']},
+ {'node_key': 'UniProtKB:12345',
+ 'categories': 'protein',
+ 'qnode_keys': ['n01']},
+ {'node_key': 'UniProtKB:23456',
+ 'categories': 'protein',
+ 'qnode_keys': ['n01']},
+ {'node_key': 'FOO:12345',
+ 'categories': 'gene',
+ 'qnode_keys': ['n02']},
+ {'node_key': 'HP:56789',
+ 'categories': 'phenotypic_feature',
+ 'qnode_keys': ['n03']})
+
+ kg_edge_info = ({'edge_key': 'ke01',
+ 'object': 'UniProtKB:12345',
+ 'subject': 'DOID:12345',
+ 'qedge_keys': ['qe01']},
+ {'edge_key': 'ke02',
+ 'object': 'UniProtKB:23456',
+ 'subject': 'DOID:12345',
+ 'qedge_keys': ['qe01']},
+ {'edge_key': 'ke03',
+ 'subject': 'UniProtKB:12345',
+ 'object': 'FOO:12345',
+ 'qedge_keys': ['qe02']},
+ {'edge_key': 'ke04',
+ 'subject': 'UniProtKB:23456',
+ 'object': 'FOO:12345',
+ 'qedge_keys': ['qe02']},
+ {'edge_key': 'ke05',
+ 'subject': 'FOO:12345',
+ 'object': 'HP:56789',
+ 'qedge_keys': ['qe03']})
+
+ kg_nodes = _create_nodes(kg_node_info)
+ kg_edges = _create_edges(kg_edge_info)
+
+ knowledge_graph = KnowledgeGraph(kg_nodes, kg_edges)
+
+ qg_node_info = ({'node_key': 'n00', # DOID:12345
+ 'categories': 'disease',
+ 'is_set': False},
+ {'node_key': 'n01',
+ 'categories': 'protein',
+ 'is_set': False},
+ {'node_key': 'n02',
+ 'categories': 'gene',
+ 'is_set': False},
+ {'node_key': 'n03', # HP:56789
+ 'categories': 'phenotypic_feature',
+ 'is_set': False})
+
+ qg_edge_info = ({'edge_key': 'qe01',
+ 'subject': 'n00',
+ 'object': 'n01'},
+ {'edge_key': 'qe02',
+ 'subject': 'n01',
+ 'object': 'n02'},
+ {'edge_key': 'qe03',
+ 'subject': 'n02',
+ 'object': 'n03'})
+
+ qg_nodes = _create_qnodes(qg_node_info)
+ qg_edges = _create_qedges(qg_edge_info)
+ query_graph = QueryGraph(qg_nodes, qg_edges)
+
+ results_list = ARAX_resultify._get_results_for_kg_by_qg(knowledge_graph,
+ query_graph)
+ assert len(results_list) == 2
+ assert results_list[0].essence is not None
+
+
+@pytest.mark.skip
+def test_issue680():
+ # NOTE: Currently failing, seemingly due to an issue with filter_kg... skipping for now
+ actions = [
+ "add_qnode(ids=DOID:14330, key=n00, categories=biolink:Disease)",
+ "add_qnode(categories=biolink:Protein, is_set=true, key=n01)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n02)",
+ "add_qedge(subject=n01, object=n00, key=e00, predicates=biolink:causes)",
+ "add_qedge(subject=n01, object=n02, key=e01, predicates=biolink:physically_interacts_with)",
+ "expand(edge_key=[e00,e01], kp=infores:rtx-kg2)",
+ "overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J1)",
+ "filter_kg(action=remove_edges_by_continuous_attribute, edge_attribute=jaccard_index, direction=below, threshold=.2, remove_connected_nodes=t, qnode_keys=[n02])",
+ "resultify(ignore_edge_direction=true, debug=true)",
+ "return(message=true, store=false)",
+ ]
+ response, message = _do_arax_query(actions)
+ assert response.status == 'OK'
+ assert message.results[0].essence is not None
+ kg = message.knowledge_graph
+ for result in message.results:
+ result_nodes_by_qg_id = _get_result_node_keys_by_qg_key(result)
+ result_edges_by_qg_id = _get_result_edge_keys_by_qg_key(result)
+ # Make sure all intermediate nodes are connected to at least one (real, not virtual) edge on BOTH sides
+ for n01_node_key in result_nodes_by_qg_id['n01']:
+ assert any(edge_key for edge_key in result_edges_by_qg_id['e00'] if
+ kg.edges[edge_key].subject == n01_node_key or kg.edges[edge_key].object == n01_node_key)
+ assert any(edge_key for edge_key in result_edges_by_qg_id['e01'] if
+ kg.edges[edge_key].subject == n01_node_key or kg.edges[edge_key].object == n01_node_key)
+ # Make sure all edges' nodes actually exist in this result (includes virtual and real edges)
+ for qedge_key, edge_keys in result_edges_by_qg_id.items():
+ qedge = message.query_graph.edges[qedge_key]
+ for edge_key in edge_keys:
+ edge = kg.edges[edge_key]
+ assert (edge.subject in result_nodes_by_qg_id[qedge.subject] and edge.object in
+ result_nodes_by_qg_id[qedge.object]) or \
+ (edge.object in result_nodes_by_qg_id[qedge.subject] and edge.subject in
+ result_nodes_by_qg_id[qedge.object])
+
+
+def test_issue686a():
+ # Tests that an error is thrown when an invalid parameter is passed to resultify
+ actions = [
+ 'add_qnode(key=qg0, ids=CHEMBL.COMPOUND:CHEMBL112)',
+ 'expand(kp=infores:rtx-kg2)',
+ 'resultify(ignore_edge_direction=true, INVALID_PARAMETER_NAME=true)',
+ "return(message=true, store=false)"
+ ]
+ response, message = _do_arax_query(actions)
+ assert 'INVALID_PARAMETER_NAME' in response.show()
+
+
+def test_issue686b():
+ # Tests that resultify can be called with no parameters passed in
+ actions = [
+ 'add_qnode(key=qg0, ids=CHEMBL.COMPOUND:CHEMBL112)',
+ 'expand(kp=infores:rtx-kg2)',
+ 'resultify()',
+ "return(message=true, store=false)"
+ ]
+ response, message = _do_arax_query(actions)
+ assert response.status == 'OK'
+
+
+def test_issue686c():
+ # Tests that setting ignore_edge_direction to an invalid value results in an error
+ actions = [
+ 'add_qnode(key=qg0, ids=CHEMBL.COMPOUND:CHEMBL112)',
+ 'expand(kp=infores:rtx-kg2)',
+ 'resultify(ignore_edge_direction=foo)',
+ "return(message=true, store=false)"
+ ]
+ response, message = _do_arax_query(actions)
+ assert response.status != 'OK' and 'foo' in response.show()
+
+
+def test_issue687():
+ # Tests that ignore_edge_direction need not be specified
+ actions = [
+ 'add_qnode(key=qg0, ids=CHEMBL.COMPOUND:CHEMBL112)',
+ 'expand(kp=infores:rtx-kg2)',
+ 'resultify(debug=true)',
+ "return(message=true, store=false)"
+ ]
+ response, message = _do_arax_query(actions)
+ assert response.status == 'OK'
+ assert message.results and len(message.results) == len(message.knowledge_graph.nodes)
+
+
+def test_issue727():
+ # Check resultify ignores edge direction appropriately
+ shorthand_qnodes = {"n00": "",
+ "n01": ""}
+ shorthand_qedges = {"e00": "n00--n01"}
+ query_graph = _convert_shorthand_to_qg(shorthand_qnodes, shorthand_qedges)
+ shorthand_kg_nodes = {"n00": ["DOID:111"],
+ "n01": ["PR:01", "PR:02"]}
+ shorthand_kg_edges = {"e00": ["PR:01--DOID:111", "PR:02--DOID:111"]} # Edges are reverse direction of QG
+ knowledge_graph = _convert_shorthand_to_kg(shorthand_kg_nodes, shorthand_kg_edges)
+ response, message = _run_resultify_directly(query_graph, knowledge_graph)
+ assert response.status == 'OK'
+ assert len(message.results) == 2
+
+
+def test_issue731():
+ # Return no results if QG is unfulfilled
+ shorthand_qnodes = {"n0": "",
+ "n1": "is_set",
+ "n2": ""}
+ shorthand_qedges = {"e0": "n0--n1",
+ "e1": "n1--n2"}
+ query_graph = _convert_shorthand_to_qg(shorthand_qnodes, shorthand_qedges)
+ shorthand_kg_nodes = {"n0": [],
+ "n1": ["UniProtKB:123", "UniProtKB:124"],
+ "n2": ["DOID:122"]}
+ shorthand_kg_edges = {"e0": [],
+ "e1": ["UniProtKB:123--DOID:122", "UniProtKB:124--DOID:122"]}
+ knowledge_graph = _convert_shorthand_to_kg(shorthand_kg_nodes, shorthand_kg_edges)
+ response, message = _run_resultify_directly(query_graph, knowledge_graph)
+ assert response.status == 'OK'
+ assert len(message.results) == 0
+
+
+@pytest.mark.slow
+def test_issue731b():
+ actions = [
+ "add_qnode(name=MONDO:0005737, key=n0, categories=biolink:Disease)",
+ "add_qnode(categories=biolink:Protein, key=n1)",
+ "add_qnode(categories=biolink:Disease, key=n2)",
+ "add_qedge(subject=n0, object=n1, key=e0)",
+ "add_qedge(subject=n1, object=n2, key=e1)",
+ "expand(edge_key=[e0,e1], kp=infores:rtx-kg2)",
+ "resultify(debug=true)",
+ "return(message=true, store=false)"
+ ]
+ response, message = _do_arax_query(actions)
+ assert response.status == 'OK'
+ for result in message.results:
+ found_e01 = result.analyses[0].edge_bindings.get('e1')
+ assert found_e01
+
+
+def test_issue731c():
+ qg = QueryGraph(nodes={'n0': QNode(ids='MONDO:0005737',
+ categories='biolink:Disease'),
+ 'n1': QNode(categories='biolink:Protein'),
+ 'n2': QNode(categories='biolink:Disease')},
+ edges={'e0': QEdge(subject='n0',
+ object='n1'),
+ 'e1': QEdge(subject='n1',
+ object='n2')})
+ kg_node_info = ({'node_key': 'MONDO:0005737',
+ 'categories': 'disease',
+ 'qnode_keys': ['n0']},
+ {'node_key': 'UniProtKB:Q14943',
+ 'categories': 'protein',
+ 'qnode_keys': ['n1']},
+ {'node_key': 'DOID:12297',
+ 'categories': 'disease',
+ 'qnode_keys': ['n2']},
+ {'node_key': 'DOID:11077',
+ 'categories': 'disease',
+ 'qnode_keys': ['n2']})
+ kg_edge_info = ({'edge_key': 'UniProtKB:Q14943--MONDO:0005737',
+ 'object': 'MONDO:0005737',
+ 'subject': 'UniProtKB:Q14943',
+ 'qedge_keys': ['e0']},
+ {'edge_key': 'DOID:12297--UniProtKB:Q14943',
+ 'object': 'UniProtKB:Q14943',
+ 'subject': 'DOID:12297',
+ 'qedge_keys': ['e1']})
+
+ kg_nodes = _create_nodes(kg_node_info)
+ kg_edges = _create_edges(kg_edge_info)
+
+ kg = KnowledgeGraph(nodes=kg_nodes, edges=kg_edges)
+ results = ARAX_resultify._get_results_for_kg_by_qg(kg, qg)
+ indexes_results_with_single_edge = [index for index, result in enumerate(results) if len(result.analyses[0].edge_bindings) == 1]
+ assert len(indexes_results_with_single_edge) == 0
+
+
+def test_issue740():
+ # Tests that self-edges are handled properly
+ shorthand_qnodes = {"n00": "",
+ "n01": ""}
+ shorthand_qedges = {"e00": "n00--n01"}
+ query_graph = _convert_shorthand_to_qg(shorthand_qnodes, shorthand_qedges)
+ shorthand_kg_nodes = {"n00": ["UMLS:C0004572"], # Babesia
+ "n01": ["HP:01", "HP:02", "UMLS:C0004572"]}
+ shorthand_kg_edges = {"e00": ["UMLS:C0004572--HP:01", "UMLS:C0004572--HP:02", "UMLS:C0004572--UMLS:C0004572"]}
+ knowledge_graph = _convert_shorthand_to_kg(shorthand_kg_nodes, shorthand_kg_edges)
+ response, message = _run_resultify_directly(query_graph, knowledge_graph)
+ assert response.status == 'OK'
+ assert len(message.results) == 3
+
+
+def test_issue692():
+ kg = KnowledgeGraph(nodes=dict(),
+ edges=dict())
+ qg = QueryGraph(nodes=dict(),
+ edges=dict())
+ results_list = ARAX_resultify._get_results_for_kg_by_qg(kg, qg)
+ assert len(results_list) == 0
+
+
+def test_issue692b():
+ query_graph = QueryGraph(nodes=dict(), edges=dict())
+ knowledge_graph = KnowledgeGraph(nodes=dict(), edges=dict())
+ response, message = _run_resultify_directly(query_graph, knowledge_graph)
+ assert 'no results returned; empty knowledge graph' in response.messages_list()[0]['message']
+
+
+@pytest.mark.slow
+def test_issue720_1():
+ # Make sure pinned node can't be returned for unpinned node in same result
+ actions = [
+ "add_qnode(ids=DOID:14330, key=n00)",
+ "add_qnode(categories=biolink:Protein, ids=[UniProtKB:Q02878, UniProtKB:Q9BXM7], is_set=true, key=n01)",
+ "add_qnode(categories=biolink:Disease, key=n02)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "add_qedge(subject=n01, object=n02, key=e01)",
+ "expand(kp=infores:rtx-kg2)",
+ "resultify(debug=true)",
+ "return(message=true, store=false)"
+ ]
+ response, message = _do_arax_query(actions)
+ n02_nodes_in_kg = [node for node in message.knowledge_graph.nodes.values() if "n02" in node.qnode_keys]
+ assert message.results and len(message.results) >= len(n02_nodes_in_kg)
+ for result in message.results:
+ n02s = {node_binding.id for node_binding in result.node_bindings["n02"]}
+ assert "DOID:14330" not in n02s
+ assert response.status == 'OK'
+
+
+@pytest.mark.slow
+def test_issue720_2():
+ # Test when same node fulfills different (unpinned) qnode_keys within same result
+ actions = [
+ "add_qnode(key=n00, ids=DOID:14330)", # parkinson's
+ "add_qnode(key=n01, categories=biolink:Protein)",
+ "add_qnode(key=n02, categories=biolink:ChemicalEntity, ids=CHEMBL.COMPOUND:CHEMBL1489)",
+ "add_qnode(key=n03, categories=biolink:Protein)",
+ "add_qedge(key=e00, subject=n01, object=n00, predicates=biolink:causes)",
+ "add_qedge(key=e01, subject=n01, object=n02, predicates=biolink:interacts_with)",
+ "add_qedge(key=e02, subject=n02, object=n03, predicates=biolink:interacts_with)",
+ "expand(kp=infores:rtx-kg2)",
+ "resultify(debug=true)",
+ "return(message=true, store=false)"
+ ]
+ response, message = _do_arax_query(actions)
+ assert response.status == 'OK'
+
+
+def test_issue833_extraneous_intermediate_nodes():
+ # Test for extraneous intermediate nodes
+ shorthand_qnodes = {"n00": "",
+ "n01": "is_set",
+ "n02": "is_set",
+ "n03": ""}
+ shorthand_qedges = {"e00": "n00--n01",
+ "e01": "n01--n02",
+ "e02": "n02--n03"}
+ query_graph = _convert_shorthand_to_qg(shorthand_qnodes, shorthand_qedges)
+ shorthand_kg_nodes = {"n00": ["DOID:1056"],
+ "n01": ["UniProtKB:111", "UniProtKB:222"],
+ "n02": ["MONDO:111", "MONDO:222"], # Last one is dead-end
+ "n03": ["CHEBI:111"]}
+ shorthand_kg_edges = {"e00": ["DOID:1056--UniProtKB:111", "DOID:1056--UniProtKB:222"],
+ "e01": ["UniProtKB:111--MONDO:111", "UniProtKB:222--MONDO:222"],
+ "e02": ["MONDO:111--CHEBI:111"]}
+ knowledge_graph = _convert_shorthand_to_kg(shorthand_kg_nodes, shorthand_kg_edges)
+ response, message = _run_resultify_directly(query_graph, knowledge_graph)
+ assert response.status == 'OK'
+ for result in message.results:
+ result_n01_nodes = {node_binding.id for node_binding in result.node_bindings["n01"]}
+ result_e01_edges = {edge_binding.id for edge_binding in result.analyses[0].edge_bindings["e01"]}
+ result_e00_edges = {edge_binding.id for edge_binding in result.analyses[0].edge_bindings["e00"]}
+ for n01_node_key in result_n01_nodes:
+ kg_edges_using_this_node = _get_kg_edge_keys_using_node(n01_node_key, message.knowledge_graph)
+ assert result_e01_edges.intersection(kg_edges_using_this_node)
+ assert result_e00_edges.intersection(kg_edges_using_this_node)
+
+
+def test_single_node():
+ actions = [
+ "add_qnode(name=ibuprofen, key=n00)",
+ "expand(node_key=n00, kp=infores:rtx-kg2)",
+ "resultify(debug=true)",
+ "return(message=true, store=false)"
+ ]
+ response, message = _do_arax_query(actions)
+ assert response.status == 'OK'
+ assert len(message.results) > 0
+
+
+def test_parallel_edges_between_nodes():
+ qg_nodes = {"n00": "",
+ "n01": "is_set",
+ "n02": ""}
+ qg_edges = {"e00": "n00--n01",
+ "e01": "n01--n02",
+ "parallel01": "n01--n02"}
+ query_graph = _convert_shorthand_to_qg(qg_nodes, qg_edges)
+ kg_nodes = {"n00": ["DOID:11830"],
+ "n01": ["UniProtKB:P39060", "UniProtKB:P20849"],
+ "n02": ["CHEBI:85164", "CHEBI:29057"]}
+ kg_edges = {"e00": ["DOID:11830--UniProtKB:P39060", "DOID:11830--UniProtKB:P20849"],
+ "e01": ["UniProtKB:P39060--CHEBI:85164", "UniProtKB:P20849--CHEBI:29057"],
+ "parallel01": ["UniProtKB:P39060--CHEBI:85164", "UniProtKB:P20849--CHEBI:29057", "UniProtKB:P39060--CHEBI:29057"]}
+ kg_before_resultify = _convert_shorthand_to_kg(kg_nodes, kg_edges)
+ response, message = _run_resultify_directly(query_graph, kg_before_resultify)
+ kg = message.knowledge_graph
+ assert response.status == 'OK'
+ n02_nodes = {node_key for node_key, node in kg.nodes.items() if "n02" in node.qnode_keys}
+ assert message.results and len(message.results) == len(n02_nodes)
+ # Make sure every n01 node is connected to both an e01 edge and a parallel01 edge in each result
+ for result in message.results:
+ result_node_keys_by_qg_key = _get_result_node_keys_by_qg_key(result)
+ result_edge_keys_by_qg_key = _get_result_edge_keys_by_qg_key(result)
+ node_keys_used_by_e01_edges = {node_key for edge_key in result_edge_keys_by_qg_key['e01']
+ for node_key in {kg.edges[edge_key].subject, kg.edges[edge_key].object}}
+ node_keys_used_by_parallel01_edges = {node_key for edge_key in result_edge_keys_by_qg_key['parallel01']
+ for node_key in {kg.edges[edge_key].subject, kg.edges[edge_key].object}}
+ for node_key in result_node_keys_by_qg_key['n01']:
+ assert node_key in node_keys_used_by_e01_edges
+ assert node_key in node_keys_used_by_parallel01_edges
+
+
+def test_issue912_clean_up_kg():
+ # Tests that the returned knowledge graph contains only nodes used in the results
+ qg_nodes = {"n00": "",
+ "n01": "is_set",
+ "n02": ""}
+ qg_edges = {"e00": "n00--n01",
+ "e01": "n01--n02"}
+ query_graph = _convert_shorthand_to_qg(qg_nodes, qg_edges)
+ kg_nodes = {"n00": ["DOID:11", "DOID:NotConnected"],
+ "n01": ["PR:110", "PR:111", "PR:DeadEnd"],
+ "n02": ["CHEBI:11", "CHEBI:NotConnected"]}
+ kg_edges = {"e00": ["DOID:11--PR:110", "DOID:11--PR:111", "DOID:11--PR:DeadEnd"],
+ "e01": ["PR:110--CHEBI:11", "PR:111--CHEBI:11"]}
+ knowledge_graph = _convert_shorthand_to_kg(kg_nodes, kg_edges)
+ response, message = _run_resultify_directly(query_graph, knowledge_graph)
+ assert response.status == 'OK'
+ assert len(message.results) == 1
+ returned_kg_node_keys = set(message.knowledge_graph.nodes)
+ assert returned_kg_node_keys == {"DOID:11", "PR:110", "PR:111", "CHEBI:11"}
+ orphan_edges = {edge_key for edge_key, edge in message.knowledge_graph.edges.items()
+ if not {edge.subject, edge.object}.issubset(returned_kg_node_keys)}
+ assert not orphan_edges
+
+
+@pytest.mark.slow
+def test_issue1119_a():
+ # Run a query to identify chemical substances that are both indicated for and contraindicated for the same disease
+ actions = [
+ "add_qnode(name=HP:0020110, key=n00, is_set=True)",
+ "add_qnode(categories=biolink:Drug, key=n01)",
+ "add_qedge(subject=n01, object=n00, predicates=biolink:treats_or_applied_or_studied_to_treat, key=e00)",
+ "add_qedge(subject=n01, object=n00, predicates=biolink:predisposes_to_condition, key=e01)",
+ "expand(kp=infores:rtx-kg2)",
+ "resultify()",
+ "return(message=true, store=false)"
+ ]
+ response, message = _do_arax_query(actions)
+ assert response.status == 'OK'
+ assert message.results
+ # NOTE: Compare *edges* instead of n01 drugs because 'exclude=True' doesn't chain subclass relationships
+ contraindicated_pairs = {tuple(sorted([edge.subject, edge.object])) for edge in message.knowledge_graph.edges.values()
+ if "e01" in edge.qedge_keys}
+
+ # Verify those chemical substances aren't returned when we make the predisposes edge kryptonite
+ actions = [
+ "add_qnode(name=HP:0020110, key=n00, is_set=True)",
+ "add_qnode(categories=biolink:Drug, key=n01)",
+ "add_qedge(subject=n01, object=n00, predicates=biolink:treats_or_applied_or_studied_to_treat, key=e00)",
+ "add_qedge(subject=n01, object=n00, predicates=biolink:predisposes_to_condition, exclude=true, key=ex0)",
+ "expand(kp=infores:rtx-kg2)",
+ "resultify()",
+ "return(message=true, store=false)"
+ ]
+ kryptonite_response, kryptonite_message = _do_arax_query(actions)
+ assert kryptonite_response.status == 'OK'
+ assert kryptonite_message.results
+ treats_pairs = {tuple(sorted([edge.subject, edge.object])) for edge in kryptonite_message.knowledge_graph.edges.values()
+ if "e00" in edge.qedge_keys}
+
+ assert not contraindicated_pairs.intersection(treats_pairs)
+
+
+@pytest.mark.slow
+def test_issue1119_b():
+ # Tests a perpendicular kryptonite qedge situation
+ actions = [
+ "add_qnode(ids=DOID:3312, key=n00)",
+ "add_qnode(categories=biolink:Protein, key=n01)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n02)",
+ "add_qedge(subject=n00, object=n01, key=e00, predicates=biolink:related_to)",
+ "add_qedge(subject=n01, object=n02, key=e01, predicates=biolink:physically_interacts_with)",
+ "add_qnode(categories=biolink:Pathway, key=n03)",
+ "add_qedge(subject=n01, object=n03, key=e02, predicates=biolink:participates_in, exclude=true)",
+ "expand(kp=infores:rtx-kg2)",
+ "resultify()",
+ "return(message=true, store=false)"
+ ]
+ response, message = _do_arax_query(actions)
+ assert response.status == 'OK'
+ assert message.results
+ # Make sure the kryptonite edge and its leaf qnode don't appear in any results
+ assert not any(result.node_bindings.get("n03") for result in message.results)
+ assert not any(result.analyses[0].edge_bindings.get("e02") for result in message.results)
+
+
+@pytest.mark.slow
+def test_issue1119_c():
+ # Test a simple one-hop query with one single-edge option group
+ actions = [
+ "add_qnode(key=n00, ids=MONDO:0005015)",
+ "add_qnode(key=n01, categories=biolink:ChemicalEntity)",
+ "add_qedge(key=e00, subject=n01, object=n00, predicates=biolink:causes)",
+ "add_qedge(key=e01, subject=n01, object=n00, predicates=biolink:predisposes_to_condition, option_group_id=1)",
+ "expand(kp=infores:rtx-kg2)",
+ "resultify(debug=true)",
+ "return(message=true, store=false)"
+ ]
+ response, message = _do_arax_query(actions)
+ assert response.status == 'OK'
+ assert message.results
+ assert all(result.analyses[0].edge_bindings.get("e00") for result in message.results)
+ # Make sure at least one of our results has the "optional" group 1 edge
+ results_with_optional_edge = [result for result in message.results if result.analyses[0].edge_bindings.get("e01")]
+ assert results_with_optional_edge
+
+ # Make sure the number of results is the same as if we asked only for the required portion
+ actions = [
+ "add_qnode(key=n00, ids=MONDO:0005015)",
+ "add_qnode(key=n01, categories=biolink:ChemicalEntity)",
+ "add_qedge(key=e00, subject=n01, object=n00, predicates=biolink:causes)",
+ "expand(kp=infores:rtx-kg2)",
+ "resultify(debug=true)",
+ "return(message=true, store=false)"
+ ]
+ response, message_without_option_group = _do_arax_query(actions)
+ assert response.status == 'OK'
+ assert len(message_without_option_group.results) == len(message.results)
+
+ # And make sure the number of results with an option group edge makes sense
+ n01_node_keys_original = {node_key for node_key, node in message.knowledge_graph.nodes.items()
+ if "n01" in node.qnode_keys}
+ actions = [
+ "add_qnode(key=n00, ids=MONDO:0005015)",
+ f"add_qnode(key=n01, ids=[{', '.join(n01_node_keys_original)}])",
+ "add_qedge(key=e01, subject=n01, object=n00, predicates=biolink:predisposes_to_condition)",
+ "expand(kp=infores:rtx-kg2)",
+ "resultify(debug=true)",
+ "return(message=true, store=false)"
+ ]
+ response, message_option_edge_only = _do_arax_query(actions)
+ assert response.status == 'OK'
+ assert len(message_option_edge_only.results) == len(results_with_optional_edge)
+
+
+@pytest.mark.slow
+def test_issue1119_d():
+ # Test one-hop query with a single-edge option group and a required 'not' edge
+ actions = [
+ "add_qnode(key=n00, ids=DOID:3312)",
+ "add_qnode(key=n01, categories=biolink:ChemicalEntity)",
+ "add_qedge(key=e00, subject=n01, object=n00, predicates=biolink:affects)",
+ "add_qedge(key=e01, subject=n01, object=n00, predicates=biolink:treats_or_applied_or_studied_to_treat, option_group_id=1)",
+ "add_qedge(key=e03, subject=n01, object=n00, exclude=True, predicates=biolink:predisposes_to_condition)",
+ "expand(kp=infores:rtx-kg2)",
+ "resultify(debug=true)",
+ "return(message=true, store=false)"
+ ]
+ response, message = _do_arax_query(actions)
+ assert response.status == 'OK'
+ assert message.results
+ # Make sure every result has a required edge
+ assert all(result.analyses[0].edge_bindings.get("e00") for result in message.results)
+ assert not any(result.analyses[0].edge_bindings.get("e03") for result in message.results)
+ # Make sure our "optional" edges appear in one or more results
+ assert any(result for result in message.results if result.analyses[0].edge_bindings.get("e01"))
+ # Verify there are some results without any optional portion (happens to be true for this query)
+ assert any(result for result in message.results if not {"e01"}.issubset(set(result.analyses[0].edge_bindings)))
+
+
+@pytest.mark.slow
+def test_issue1146_a():
+ actions = [
+ "add_qnode(key=n0, ids=MONDO:0008380, categories=biolink:Disease)",
+ "add_qnode(key=n2, categories=biolink:ChemicalEntity)",
+ "add_qnode(key=n1, categories=biolink:Protein, is_set=true)",
+ "add_qedge(key=e0, subject=n2, object=n1, predicates=biolink:physically_interacts_with)",
+ "add_qedge(key=e1, subject=n1, object=n0, predicates=biolink:causes)",
+ "expand(kp=infores:rtx-kg2)",
+ "overlay(action=compute_ngd, virtual_relation_label=N2, subject_qnode_key=n0, object_qnode_key=n2)",
+ "resultify(debug=true)",
+ "filter_results(action=limit_number_of_results, max_results=4)",
+ "return(message=true, store=false)"
+ ]
+ response, message = _do_arax_query(actions)
+ assert response.status == 'OK'
+ assert len(message.results) == 4
+ # Make sure every n1 node is connected to an e1 and e0 edge
+ for result in message.results:
+ result_n1_nodes = {node_binding.id for node_binding in result.node_bindings["n1"]}
+ result_e1_edges = {edge_binding.id for edge_binding in result.analyses[0].edge_bindings["e1"]}
+ result_e0_edges = {edge_binding.id for edge_binding in result.analyses[0].edge_bindings["e0"]}
+ for n1_node in result_n1_nodes:
+ kg_edges_using_this_node = _get_kg_edge_keys_using_node(n1_node, message.knowledge_graph)
+ assert result_e1_edges.intersection(kg_edges_using_this_node)
+ assert result_e0_edges.intersection(kg_edges_using_this_node)
+
+
+def test_disconnected_qg():
+ # Ensure an (informative) error is thrown when the QG is disconnected (has more than one component)
+ actions = [
+ "add_qnode(name=ibuprofen, key=n00)",
+ "add_qnode(name=acetaminophen, key=n01)",
+ "add_qnode(categories=biolink:Disease, key=n02)",
+ "add_qedge(key=e00, subject=n01, object=n02)",
+ "expand(kp=infores:rtx-kg2)",
+ "resultify(debug=true)",
+ "return(message=true, store=false)"
+ ]
+ response, message = _do_arax_query(actions)
+ assert response.status != 'OK'
+ assert "QG is disconnected" in response.show()
+
+
+def test_recompute_qg_keys():
+ shorthand_qnodes = {"n00": "",
+ "n01": ""}
+ shorthand_qedges = {"e00": "n00--n01"}
+ query_graph = _convert_shorthand_to_qg(shorthand_qnodes, shorthand_qedges)
+ shorthand_kg_nodes = {"n00": ["DOID:731"],
+ "n01": ["HP:01", "HP:02", "HP:03", "HP:04"]}
+ shorthand_kg_edges = {"e00": ["DOID:731--HP:01", "DOID:731--HP:02", "DOID:731--HP:03", "DOID:731--HP:04"]}
+ knowledge_graph = _convert_shorthand_to_kg(shorthand_kg_nodes, shorthand_kg_edges)
+ response, message = _run_resultify_directly(query_graph, knowledge_graph)
+ assert response.status == 'OK'
+ assert message.results
+ # Clear all qnode_keys/qedge_keys from the KG
+ for node_key, node in message.knowledge_graph.nodes.items():
+ node.qnode_keys = []
+ for edge_key, edge in message.knowledge_graph.edges.items():
+ edge.qedge_keys = []
+ # Then recompute qg keys and make sure look ok
+ resultifier = ARAXResultify()
+ resultifier.recompute_qg_keys(response)
+ assert response.status == 'OK'
+ kg = response.envelope.message.knowledge_graph
+ assert kg.nodes and kg.edges
+ for node_key, node in kg.nodes.items():
+ assert node.qnode_keys == ["n00"] if node_key in shorthand_kg_nodes["n00"] else ["n01"]
+ for edge_key, edge in kg.edges.items():
+ assert edge.qedge_keys == ["e00"]
+
+
+def test_multi_node_edgeless_qg():
+ shorthand_qnodes = {"n00": "",
+ "n01": ""}
+ shorthand_qedges = {}
+ query_graph = _convert_shorthand_to_qg(shorthand_qnodes, shorthand_qedges)
+ shorthand_kg_nodes = {"n00": ["CHEMBL.COMPOUND:CHEMBL635"],
+ "n01": ["MESH:D052638"]}
+ shorthand_kg_edges = {}
+ knowledge_graph = _convert_shorthand_to_kg(shorthand_kg_nodes, shorthand_kg_edges)
+ response, message = _run_resultify_directly(query_graph, knowledge_graph)
+ assert response.status == 'OK'
+ assert len(message.results) == 1
+
+
+@pytest.mark.slow
+def test_issue1446():
+ # Test multiple single-edge option groups
+ actions = [
+ "add_qnode(ids=HGNC:6284, key=n0, categories=biolink:Gene)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n1)",
+ "add_qedge(key=e0,subject=n1,object=n0, predicates=biolink:affects)",
+ "add_qedge(key=e1,subject=n1,object=n0, predicates=biolink:associated_with, option_group_id=1)",
+ "add_qedge(key=e2,subject=n1,object=n0, predicates=biolink:related_to, option_group_id=2)",
+ "expand(kp=infores:rtx-kg2)",
+ "overlay(action=compute_ngd, virtual_relation_label=N1, subject_qnode_key=n0, object_qnode_key=n1)",
+ "resultify()",
+ "filter_results(action=limit_number_of_results, max_results=100)",
+ "return(message=true, store=false)"
+ ]
+ response, message = _do_arax_query(actions)
+ assert response.status == "OK"
+ assert message.results
+
+
+@pytest.mark.slow
+def test_issue1848():
+ # Verifies that only the part of the QG that's already been expanded is resultified
+ actions = [
+ "add_qnode(key=n0, ids=MONDO:0019391)",
+ "add_qnode(key=n1, categories=biolink:Gene)",
+ "add_qedge(key=e0, subject=n1, object=n0, predicates=biolink:causes)",
+ "expand(kp=infores:rtx-kg2)",
+ "add_qnode(key=n2, categories=biolink:Drug)",
+ "add_qedge(key=e1, subject=n1, object=n2)",
+ "resultify()",
+ "return(message=true, store=false)"
+ ]
+ response, message = _do_arax_query(actions)
+ kg = response.envelope.message.knowledge_graph
+ assert response.status == "OK"
+ assert kg.nodes
+ assert kg.edges
+ assert message.results
+ qedge_bindings_in_kg = {qedge_key for edge in kg.edges.values() for qedge_key in edge.qedge_keys}
+ non_subclass_qedge_bindings_in_kg = {qedge_key for qedge_key in qedge_bindings_in_kg if not qedge_key.startswith("subclass:")}
+ assert non_subclass_qedge_bindings_in_kg == {"e0"}
+
+
+def test_node_binding_query_id_one_hop_single_input_curie():
+
+ actions = [
+ f"add_qnode(ids={DIABETES_CURIE}, key=n00)",
+ f"add_qnode(ids={INSULIN_CURIE}, key=n01)",
+# f"add_qnode(categories=biolink:Drug, key=n01)",
+ "add_qedge(subject=n01, object=n00, predicates=biolink:treats_or_applied_or_studied_to_treat, key=e00)",
+ "expand(kp=infores:rtx-kg2)",
+ "resultify(debug=true)",
+ "return(message=true, store=false)"
+ ]
+ response, message = _do_arax_query(actions)
+ assert response.status == 'OK'
+ assert len(message.results) > 0
+ kg = response.envelope.message.knowledge_graph
+ # Make sure the input curie and one of its children appear somewhere in the results
+ assert DIABETES_CURIE in kg.nodes
+ assert TYPE_1_DIABETES_CURIE in kg.nodes
+ # Make sure node bindings do/don't have 'query_id' filled out as appropriate
+ for result in message.results:
+ for node_binding in result.node_bindings["n00"]:
+ if node_binding.id == DIABETES_CURIE:
+ assert node_binding.query_id is None
+ else:
+ assert node_binding.query_id == DIABETES_CURIE
+ for node_binding in result.node_bindings["n01"]:
+ assert node_binding.query_id is None
+ # Make sure we have some subclass edges
+ assert any(edge.predicate == "biolink:subclass_of" for edge in message.knowledge_graph.edges.values())
+ insulin_results = [result for result in message.results if any([node_binding.id == INSULIN_CURIE
+ for node_binding in result.node_bindings["n01"]])]
+
+ assert len(insulin_results) == 1
+
+
+def test_node_binding_query_id_one_hop_multiple_input_curies():
+ parent_query_ids = {DIABETES_CURIE, TYPE_1_DIABETES_CURIE}
+ actions = [
+ f"add_qnode(ids=[{','.join(parent_query_ids)}], key=n00)",
+ f"add_qnode(categories=biolink:Drug, key=n01)",
+ "add_qedge(subject=n01, object=n00, predicates=biolink:treats_or_applied_or_studied_to_treat, key=e00)",
+ "expand(kp=infores:rtx-kg2)",
+ "resultify(debug=true)",
+ "return(message=true, store=false)"
+ ]
+ response, message = _do_arax_query(actions)
+ assert response.status == 'OK'
+ assert len(message.results) > 1
+ kg = response.envelope.message.knowledge_graph
+ # Make sure both input curies appear somewhere in the results
+ assert DIABETES_CURIE in kg.nodes
+ # TODO: Do the below check after we've figured out the multiple query IDs problem (nodes could fulfill either
+ # diabetes or type 1 diabetes), but can only have 1 n00 parent specified
+ # assert type_1_diabetes_curie in kg.nodes
+ # Make sure node bindings do/don't have 'query_id' filled out as appropriate
+ for result in message.results:
+ for node_binding in result.node_bindings["n00"]:
+ if node_binding.id in parent_query_ids:
+ assert node_binding.query_id is None
+ else:
+ assert node_binding.query_id in parent_query_ids
+ for node_binding in result.node_bindings["n01"]:
+ assert node_binding.query_id is None
+ # Make sure we have some results with subclass self-edges (Expand assigns such qedges keys like 'subclass:n00-n00')
+ assert any(edge.predicate == "biolink:subclass_of" for edge in message.knowledge_graph.edges.values())
+ insulin_results = [result for result in message.results if any([node_binding.id == INSULIN_CURIE
+ for node_binding in result.node_bindings["n01"]])]
+ assert len(insulin_results) in range(1, 3)
+
+
+@pytest.mark.slow
+def test_node_binding_query_id_two_hop_double_pinned():
+ actions = [
+ f"add_qnode(ids={DIABETES_CURIE}, key=n00)",
+ f"add_qnode(ids={HEART_DISEASE_CURIE}, key=n01)",
+ f"add_qnode(categories=biolink:Drug, key=n02)",
+ "add_qedge(subject=n01, object=n00, predicates=biolink:related_to, key=e00)",
+ "add_qedge(subject=n01, object=n02, predicates=biolink:treats_or_applied_or_studied_to_treat, key=e01)",
+ "expand(kp=infores:rtx-kg2)",
+ "resultify(debug=true)",
+ "return(message=true, store=false)"
+ ]
+ response, message = _do_arax_query(actions)
+ assert response.status == 'OK'
+ assert len(message.results) > 1
+ kg = response.envelope.message.knowledge_graph
+ # Make the input curie and one of its children appear somewhere in the results
+ assert DIABETES_CURIE in kg.nodes
+ assert HEART_DISEASE_CURIE in kg.nodes
+ # Make sure node bindings do/don't have 'query_id' filled out as appropriate
+ for result in message.results:
+ for node_binding in result.node_bindings["n00"]:
+ if node_binding.id == DIABETES_CURIE:
+ assert node_binding.query_id is None
+ else:
+ assert node_binding.query_id == DIABETES_CURIE
+ for node_binding in result.node_bindings["n01"]:
+ if node_binding.id == HEART_DISEASE_CURIE:
+ assert node_binding.query_id is None
+ else:
+ assert node_binding.query_id == HEART_DISEASE_CURIE
+ for node_binding in result.node_bindings["n02"]:
+ assert node_binding.query_id is None
+ # Make sure there's one result for Dabigatran and its structure is as expected
+ dabigatran_results = [result for result in message.results if result.essence.upper() == "DABIGATRAN"]
+ assert len(dabigatran_results) == 1
+ dabigatran_result = dabigatran_results[0]
+ edge_keys_that_should_be_filled = {"e00", "e01"}
+ assert set(dabigatran_result.analyses[0].edge_bindings) == edge_keys_that_should_be_filled
+ for edge_key in edge_keys_that_should_be_filled:
+ assert len(dabigatran_result.analyses[0].edge_bindings[edge_key])
+
+
+@pytest.mark.external
+def test_missing_chp_results():
+ # Note: for this test to pass, need to use a maturity that CHP has an endpoint for (they don't have dev currently)
+ uberon_curies = ["UBERON:0009912", "UBERON:0002535", "UBERON:0000019", "UBERON:0002365", "UBERON:0000017",
+ "UBERON:0000970", "UBERON:0001831", "UBERON:0016410", "UBERON:0001737", "UBERON:0000945"]
+ actions = [
+ f"add_qnode(ids=[{','.join(uberon_curies)}], categories=biolink:GrossAnatomicalStructure, key=n1)",
+ f"add_qnode(categories=biolink:Gene, key=n2)",
+ "add_qedge(subject=n1, object=n2, predicates=biolink:expresses, key=e1)",
+ "expand(kp=infores:connections-hypothesis)",
+ "resultify(debug=true)",
+ "return(message=true, store=false)"
+ ]
+ response, message = _do_arax_query(actions)
+ assert response.status == 'OK'
+ assert len(message.results) > 20
+ assert any(edge.predicate == "biolink:subclass_of" for edge in message.knowledge_graph.edges.values())
+
+
+@pytest.mark.slow
+@pytest.mark.external
+def test_too_few_results():
+ # Note: for this test to pass, need to use a maturity that CHP has an endpoint for (they don't have dev currently)
+ actions = [
+ "add_qnode(key=n0, ids=MONDO:0009061, categories=biolink:Disease)",
+ "add_qnode(key=n1, categories=biolink:GrossAnatomicalStructure)",
+ "add_qnode(key=n2, categories=biolink:Gene)",
+ "add_qnode(key=n3, categories=[biolink:Drug, biolink:SmallMolecule])",
+ "add_qedge(key=e0, subject=n0, object=n1, predicates=biolink:located_in)",
+ "add_qedge(key=e1, subject=n1, object=n2, predicates=biolink:expresses)",
+ "add_qedge(key=e2, subject=n3, object=n2, predicates=biolink:affects)",
+ "expand(edge_key=e0, prune_threshold=1000, kp_timeout=75)",
+ "expand(edge_key=e1, kp=infores:connections-hypothesis, prune_threshold=1000, kp_timeout=75)",
+ "expand(edge_key=e2, prune_threshold=1000, kp_timeout=75)",
+ "resultify()",
+ "return(message=true, store=false)"
+ ]
+ response, message = _do_arax_query(actions)
+ assert response.status == 'OK'
+ assert len(message.results) > 200
+
+
+@pytest.mark.slow
+@pytest.mark.external
+def test_issue1923_multiple_essence_candidates_subclass():
+ actions = [
+ "add_qnode(name=ATP1A3, key=n0)",
+ "add_qnode(categories=biolink:PhenotypicFeature, key=n1)",
+ "add_qnode(categories=biolink:Protein, key=n2)",
+ "add_qnode(categories=biolink:ChemicalSubstance, key=n3)",
+ "add_qedge(subject=n0, object=n1, key=e0)",
+ "add_qedge(subject=n1, object=n2, key=e1)",
+ "add_qedge(subject=n2, object=n3, key=e2)",
+ "expand(prune_threshold=50, kp_timeout=30)",
+ "resultify()",
+ "return(message=true, store=false)"
+ ]
+ response, message = _do_arax_query(actions)
+ assert response.status == 'OK'
+
+
+@pytest.mark.slow
+def test_issue2166():
+ actions = [
+ # NSCLC -> n2 -> MET
+ "add_qnode(key=nCANCER, ids=OMIM:MTHU063395)",
+ "add_qnode(key=n2)",
+ "add_qnode(key=nMET, ids=NCBIGene:4233)",
+ "add_qedge(key=e1, subject=nCANCER, object=n2)",
+ "add_qedge(key=e2, subject=n2, object=nMET)",
+ # NSCLC -> n2 -[optional]-> n3 ->MET
+ "add_qnode(key=n3, option_group_id=option1)",
+ "add_qedge(key=e3, subject=n2, object=n3, option_group_id=option1)",
+ "add_qedge(key=e4, subject=n3, object=nMET, option_group_id=option1)",
+ # expand
+ "expand(kp=infores:rtx-kg2)",
+ "resultify(ignore_edge_direction=true)"
+ ]
+ response, message = _do_arax_query(actions)
+ assert response.status == 'OK'
+
+def test_legacy_subclass_of_handling():
+ query_graph = {
+ "edges": {
+ "e0": {
+ "knowledge_type": "lookup",
+ "object": "on",
+ "predicates": [
+ "biolink:treats"
+ ],
+ "subject": "sn"
+ }
+ },
+ "nodes": {
+ "on": {
+ "ids": [
+ "MONDO:0005015"
+ ]
+ },
+ "sn": {
+ "ids": [
+ "CHEBI:5931"
+ ]
+ }
+ }
+ }
+ response = ARAXResponse(status='OK',
+ logging_level=ARAXResponse.DEBUG)
+ ARAXMessenger().create_envelope(response)
+ response.envelope.message.query_graph = QueryGraph.from_dict(query_graph)
+ expander = ARAXExpander()
+ expander.apply(response, {"kp": "infores:rtx-kg2"})
+ resultifier = ARAXResultify()
+ resultifier.apply(response, {})
+ message = response.envelope.message
+ assert any(edge.predicate == "biolink:subclass_of" for edge in message.knowledge_graph.edges.values())
+
+
+if __name__ == '__main__':
+ pytest.main(['-v', 'test_ARAX_resultify.py'])
diff --git a/code/code-archive/old-arax-tests/test_ARAX_standup_queries.py b/code/code-archive/old-arax-tests/test_ARAX_standup_queries.py
new file mode 100644
index 000000000..86caf86c5
--- /dev/null
+++ b/code/code-archive/old-arax-tests/test_ARAX_standup_queries.py
@@ -0,0 +1,301 @@
+#!/usr/bin/env python3
+
+# Intended to test our more complicated workflows
+
+import sys
+import os
+import pytest
+from collections import Counter
+import copy
+import json
+import ast
+from typing import List, Union, Optional
+
+import numpy as np
+
+sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/../../ARAXQuery")
+sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/../ARAXQuery")
+from ARAX_query import ARAXQuery
+from ARAX_response import ARAXResponse
+
+PACKAGE_PARENT = '../../UI/OpenAPI/openapi_server'
+sys.path.append(os.path.normpath(os.path.join(os.getcwd(), PACKAGE_PARENT)))
+from openapi_server.models.message import Message
+
+
+def _do_arax_query(query: dict, timeout: Optional[int] = None) -> List[Union[ARAXResponse, Message]]:
+ araxq = ARAXQuery()
+ if timeout:
+ query["query_options"] = {"kp_timeout": timeout}
+ response = araxq.query(query)
+ if response.status != 'OK':
+ print(response.show(level=response.DEBUG))
+ return [response, response.envelope.message]
+
+
+def _attribute_tester(message, attribute_name: str, attribute_type: str, num_different_values=2):
+ """
+ Tests attributes of a message
+ message: returned from _do_arax_query
+ attribute_name: the attribute name to test (eg. 'jaccard_index')
+ attribute_type: the attribute type (eg. 'EDAM-DATA:1234')
+ num_different_values: the number of distinct values you wish to see have been added as attributes
+ """
+ edges_of_interest = []
+ values = set()
+ for key, edge in message.knowledge_graph.edges.items():
+ assert 'primary_knowledge_source' in [attribute.attribute_type_id for attribute in edge.attributes]
+ if hasattr(edge, 'edge_attributes'):
+ for attr in edge.edge_attributes:
+ if attr.name == attribute_name:
+ edges_of_interest.append(edge)
+ assert attr.type == attribute_type
+ values.add(attr.value)
+ assert len(edges_of_interest) > 0
+ assert len(values) >= num_different_values
+
+
+def _virtual_tester(message: Message, edge_predicate: str, relation: str, attribute_name: str, attribute_type: str, num_different_values=2):
+ """
+ Tests overlay functions that add virtual edges
+ message: returned from _do_arax_query
+ edge_predicate: the name of the virtual edge (eg. biolink:has_jaccard_index_with)
+ relation: the relation you picked for the virtual_edge_relation (eg. N1)
+ attribute_name: the attribute name to test (eg. 'jaccard_index')
+ attribute_type: the attribute type (eg. 'EDAM-DATA:1234')
+ num_different_values: the number of distinct values you wish to see have been added as attributes
+ """
+ edge_predicates_in_kg = Counter([x.predicate for x in message.knowledge_graph.edges.values()])
+ assert edge_predicate in edge_predicates_in_kg
+ edges_of_interest = [x for x in message.knowledge_graph.edges.values() if x.relation == relation]
+ values = set()
+ assert len(edges_of_interest) > 0
+ for edge in edges_of_interest:
+ assert 'primary_knowledge_source' in [attribute.attribute_type_id for attribute in edge.attributes]
+ assert hasattr(edge, 'attributes')
+ assert edge.attributes
+ assert edge.attributes[0].name == attribute_name
+ values.add(edge.attributes[0].value)
+ assert edge.attributes[0].type == attribute_type
+ # make sure two or more values were added
+ assert len(values) >= num_different_values
+
+
+def test_gene_to_pathway_issue_9():
+ query = {
+ "message": {
+ "query_graph": {
+ "nodes": {
+ "n0": {
+ "ids": ["NCBIGene:1017"],
+ "categories": ["biolink:Gene"]
+ },
+ "n1": {
+
+ "categories": ["biolink:Pathway"]
+ }
+ },
+ "edges": {
+ "e01": {
+ "subject": "n0",
+ "object": "n1"
+ }
+ }
+ }
+ }
+ }
+ [response, message] = _do_arax_query(query, timeout=30)
+ assert response.status == 'OK'
+ assert len(message.results) > 0
+
+
+def test_chemicals_to_gene_issue_10():
+ query = {
+ "message": {
+ "query_graph": {
+ "nodes": {
+ "n0": {
+ "ids": ["UniProtKB:P52788"],
+ "categories":["biolink:Gene"]
+ },
+ "n1": {
+ "categories": ["biolink:ChemicalEntity"]
+ }
+ },
+ "edges": {
+ "e01": {
+ "subject": "n0",
+ "object": "n1"
+ }
+ }
+ }
+ }
+ }
+ [response, message] = _do_arax_query(query, timeout=30)
+ assert response.status == 'OK'
+ assert len(message.results) > 0
+
+
+def test_named_thing_associated_with_acrocynaosis_issue_12():
+ query = {
+ "message": {
+ "query_graph": {
+ "nodes": {
+ "n0": {
+ "ids": ["UMLS:C0221347"],
+ "categories":["biolink:PhenotypicFeature"]
+ },
+ "n1": {
+ "categories": ["biolink:NamedThing"]
+ }
+ },
+ "edges": {
+ "e01": {
+ "subject": "n0",
+ "object": "n1"
+ }
+ }
+ }
+ }
+ }
+ [response, message] = _do_arax_query(query, timeout=30)
+ assert response.status == 'OK'
+ assert len(message.results) > 0
+
+
+@pytest.mark.external
+def test_chemical_substances_correlated_with_asthma_issue_18():
+ query = {
+ "message": {
+ "query_graph": {
+ "nodes": {
+ "n0": {
+ "ids": ["MONDO:0004979"],
+ "categories": ["biolink:Disease"]
+ },
+ "n1": {
+ "categories": ["biolink:ChemicalEntity"]
+ }
+ },
+ "edges": {
+ "e01": {
+ "subject": "n0",
+ "object": "n1",
+ "predicates": ["biolink:correlated_with"]
+ }
+ }
+ }
+ }
+ }
+ [response, message] = _do_arax_query(query, timeout=30)
+ assert response.status == 'OK'
+ assert len(message.results) > 0
+
+
+@pytest.mark.slow
+def test_diseases_treated_by_drug_issue_20():
+ query = {
+ "message": {
+ "query_graph": {
+ "edges": {
+ "e01": {
+ "object": "n0",
+ "predicates": ["biolink:treated_by"],
+ "subject": "n1"
+ }
+ },
+ "nodes": {
+ "n0": {
+ "categories": ["biolink:Drug"],
+ "ids": ["DRUGBANK:DB00394"]
+ },
+ "n1": {
+ "categories": ["biolink:Disease"]
+ }
+ }
+ }
+ }
+ }
+ [response, message] = _do_arax_query(query, timeout=30)
+ assert response.status == 'OK'
+ assert len(message.results) > 0
+
+
+@pytest.mark.skip # TODO: Needs to be rewritten using qualifiers (many of the predicates are no longer valid)
+def test_chemical_substances_that_down_regulate_STK11_issue_28():
+ query = {
+ "message": {
+ "query_graph": {
+ "edges": {
+ "e01": {
+ "object": "n0",
+ "predicates": ["biolink:prevents",
+ "biolink:negatively_regulates",
+ "biolink:decreases_secretion_of",
+ "biolink:decreases_secretion_of",
+ "biolink:decreases_transport_of",
+ "biolink:decreases_activity_of",
+ "biolink:decreases_synthesis_of",
+ "biolink:decreases_expression_of",
+ "biolink:increases_degradation_of",
+ "biolink:entity_negatively_regulates_entity",
+ "biolink:disrupts",
+ "biolink:directly_negatively_regulates",
+ "biolink:inhibits",
+ "biolink:inhibitor",
+ "biolink:channel_blocker",
+ "biolink:disrupts",
+ "biolink:may_inhibit_effect_of"
+ ],
+ "subject": "n1"
+ }
+ },
+ "nodes": {
+ "n0": {
+ "ids": ["HGNC:11389"]
+ },
+ "n1": {
+ "categories": ["biolink:ChemicalEntity"]
+ }
+ }
+ }
+ }
+ }
+ [response, message] = _do_arax_query(query, timeout=30)
+ assert response.status == 'OK'
+ assert len(message.results) > 0
+
+
+# This query doesn't find results after conflations were resolved in KG2.6.7
+@pytest.mark.skip
+def test_phenotypes_for_angel_shaped_phalango_epiphyseal_dysplasia_issue_33():
+ query = {
+ "message": {
+ "query_graph": {
+ "edges": {
+ "e01": {
+ "object": "n0",
+ "subject": "n1",
+ "predicates":["biolink:has_phenotype"]
+ }
+ },
+ "nodes": {
+ "n0": {
+ "ids": ["MONDO:0007114"],
+ "categories":["biolink:Disease"]
+ },
+ "n1": {
+ "categories": ["biolink:PhenotypicFeature"]
+ }
+ }
+ }
+ }
+ }
+ [response, message] = _do_arax_query(query, timeout=30)
+ assert response.status == 'OK'
+ assert len(message.results) > 0
+
+
+if __name__ == "__main__":
+ pytest.main(['-v'])
+
diff --git a/code/code-archive/old-arax-tests/test_ARAX_synonymizer.py b/code/code-archive/old-arax-tests/test_ARAX_synonymizer.py
new file mode 100644
index 000000000..3f8500e98
--- /dev/null
+++ b/code/code-archive/old-arax-tests/test_ARAX_synonymizer.py
@@ -0,0 +1,442 @@
+#!/bin/env python3
+"""
+Usage:
+ Run all tests: pytest -v test_ARAX_synonymizer.py
+ Run a single test: pytest -v test_ARAX_synonymizer.py -k test_example_9
+"""
+import copy
+import json
+import os
+import sys
+import timeit
+
+import pytest
+
+sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/../NodeSynonymizer/")
+from node_synonymizer import NodeSynonymizer
+
+ATRIAL_FIBRILLATION_CURIE = "MONDO:0004981"
+PARKINSONS_CURIE = "DOID:14330"
+PARKINSONS_CURIE_2 = "MONDO:0005180"
+IBUPROFEN_CURIE = "DRUGBANK:DB01050"
+ACETAMINOPHEN_CURIE = "CHEMBL.COMPOUND:CHEMBL112"
+ACETAMINOPHEN_CURIE_2 = "CHEBI:46195"
+SNCA_CURIE = "NCBIGene:6622"
+FAKE_CURIE = "NOTAREALCURIE!"
+
+CERVICAL_RIB_NAME = "Cervical rib"
+PARKINSONS_NAME = "Parkinson's disease"
+WARFARIN_NAME = "Warfarin"
+BRCA1_NAME = "BRCA1"
+FAKE_NAME = "THISISNOTAREALNODENAME!"
+PTGS1_NAME = "PTGS1"
+
+
+# ------------------------------- LEGACY TESTS FROM ORIGINAL SYNONYMIZER ------------------------------------- #
+
+def test_example_6b():
+ synonymizer = NodeSynonymizer()
+
+ print("==== Get all equivalent nodes in a KG for an input curie ============================")
+ tests = [ "DOID:14330", "UMLS:C0031485", "FMA:7203", "MESH:D005199", "CHEBI:5855", "DOID:9281" ]
+
+ t0 = timeit.default_timer()
+ for test in tests:
+ nodes = synonymizer.get_equivalent_nodes(test)
+ print(f"{test} = " + str(nodes))
+ print()
+ t1 = timeit.default_timer()
+ print("Elapsed time: "+str(t1-t0))
+
+
+def test_example_9():
+ synonymizer = NodeSynonymizer()
+
+ print("==== Get canonical curies for a set of input curies ============================")
+ curies = ["DOID:14330", "UMLS:C0031485", "FMA:7203", "MESH:D005199", "CHEBI:5855", "DOID:9281xxxxx",
+ "MONDO:0005520"]
+ names = ["phenylketonuria", "ibuprofen", "P06865", "HEXA", "Parkinson's disease", 'supernovas', "Bob's Uncle",
+ 'double "quotes"', None]
+
+ combined_list = copy.copy(curies)
+ combined_list.extend(names)
+
+ t0 = timeit.default_timer()
+ canonical_curies = synonymizer.get_canonical_curies(curies=curies, return_all_categories=True)
+ print(f"Canonical curies for input normal curies is: \n{canonical_curies}")
+ t1 = timeit.default_timer()
+ print("Elapsed time: " + str(t1 - t0))
+ canonical_curies2 = synonymizer.get_canonical_curies(names=names, return_all_categories=True)
+ t2 = timeit.default_timer()
+ print("Elapsed time: " + str(t2 - t1))
+
+
+def test_example_10():
+ synonymizer = NodeSynonymizer()
+ print("==== Complex name query ============================")
+ node_ids = ['CHEMBL.MECHANISM:potassium_channel,_inwardly_rectifying,_subfamily_j,_member_11_opener',
+ 'CHEMBL.MECHANISM:potassium_channel,_inwardly_rectifying,_subfamily_j,_member_8_opener',
+ 'CHEMBL.MECHANISM:endothelin_receptor,_et-a/et-b_antagonist',
+ 'CHEMBL.MECHANISM:amylin_receptor_amy1,_calcr/ramp1_agonist',
+ 'CHEMBL.MECHANISM:sulfonylurea_receptor_2,_kir6.2_opener',
+ 'CHEMBL.MECHANISM:sulfonylurea_receptor_1,_kir6.2_blocker',
+ 'CHEMBL.MECHANISM:amiloride-sensitive_sodium_channel,_enac_blocker',
+ 'CHEMBL.MECHANISM:hepatitis_c_virus_serine_protease,_ns3/ns4a_inhibitor',
+ 'CHEMBL.MECHANISM:1,3-beta-glucan_synthase_inhibitor',
+ "CHEMBL.MECHANISM:3',5'-cyclic_phosphodiesterase_inhibitor",
+ 'CHEMBL.MECHANISM:dna_topoisomerase_i,_mitochondrial_inhibitor',
+ 'CHEMBL.MECHANISM:carbamoyl-phosphate_synthase_[ammonia],_mitochondrial_positive_allosteric_modulator',
+ 'CHEMBL.MECHANISM:parp_1,_2_and_3_inhibitor', 'CHEMBL.MECHANISM:c-jun_n-terminal_kinase,_jnk_inhibitor',
+ 'CHEMBL.MECHANISM:voltage-gated_potassium_channel,_kqt;_kcnq2(kv7.2)/kcnq3(kv7.3)_activator',
+ 'CHEMBL.MECHANISM:hla_class_ii_histocompatibility_antigen,_drb1-10_beta_chain_other',
+ 'CHEMBL.MECHANISM:hla_class_ii_histocompatibility_antigen,_drb1-15_beta_chain_modulator',
+ 'CHEMBL.MECHANISM:indoleamine_2,3-dioxygenase_inhibitor',
+ 'CHEMBL.MECHANISM:5,6-dihydroxyindole-2-carboxylic_acid_oxidase_other',
+ 'CHEMBL.MECHANISM:amine_oxidase,_copper_containing_inhibitor',
+ 'CHEMBL.MECHANISM:carnitine_o-palmitoyltransferase_1,_muscle_isoform_inhibitor',
+ 'CHEMBL.MECHANISM:troponin,_cardiac_muscle_positive_modulator',
+ 'CHEMBL.MECHANISM:isocitrate_dehydrogenase_[nadp],_mitochondrial_inhibitor']
+ t0 = timeit.default_timer()
+ canonical_curies = synonymizer.get_canonical_curies(node_ids)
+ print(canonical_curies)
+ t1 = timeit.default_timer()
+ print(json.dumps(canonical_curies, sort_keys=True, indent=2))
+ print("Elapsed time: " + str(t1 - t0))
+
+
+def test_example_11():
+ synonymizer = NodeSynonymizer()
+ print("==== Get equivalent curies for a set of input curies ============================")
+ curies = ["DOID:14330", "UMLS:C0031485", "UNICORN"]
+ t0 = timeit.default_timer()
+ canonical_curies = synonymizer.get_equivalent_nodes(curies=curies)
+ t1 = timeit.default_timer()
+ print(json.dumps(canonical_curies, sort_keys=True, indent=2))
+ print("Elapsed time: " + str(t1 - t0))
+
+
+def test_example_12():
+ synonymizer = NodeSynonymizer()
+ print("==== Get full information in nouveau normalizer format ============================")
+ entities = ["DOID:14330", "anemia", "aardvark"]
+ t0 = timeit.default_timer()
+ normalizer_results = synonymizer.get_normalizer_results(entities=entities)
+ t1 = timeit.default_timer()
+ print(json.dumps(normalizer_results, sort_keys=True, indent=2))
+ print("Elapsed time: " + str(t1 - t0))
+
+
+# ------------------------------------------ NEW TESTS ---------------------------------------------------- #
+
+def test_get_canonical_curies_simple():
+ curies = [ATRIAL_FIBRILLATION_CURIE, IBUPROFEN_CURIE, SNCA_CURIE]
+ synonymizer = NodeSynonymizer()
+ results = synonymizer.get_canonical_curies(curies)
+ print(results)
+ assert len(results) == 3
+ for curie in curies:
+ assert results.get(curie)
+ assert {"preferred_name", "preferred_category", "preferred_curie"} == set(results[curie])
+ assert results[curie]["preferred_curie"]
+ assert results[curie]["preferred_category"]
+ assert results[curie]["preferred_category"].startswith("biolink:")
+
+
+def test_get_canonical_curies_single_curie():
+ synonymizer = NodeSynonymizer()
+ results = synonymizer.get_canonical_curies(ATRIAL_FIBRILLATION_CURIE)
+ print(results)
+ assert len(results) == 1
+ assert ATRIAL_FIBRILLATION_CURIE in results
+ assert results[ATRIAL_FIBRILLATION_CURIE]
+
+
+def test_get_canonical_curies_unrecognized():
+ curies = [ATRIAL_FIBRILLATION_CURIE, FAKE_CURIE]
+ synonymizer = NodeSynonymizer()
+ results = synonymizer.get_canonical_curies(curies)
+ print(results)
+ assert results.get(ATRIAL_FIBRILLATION_CURIE)
+ assert FAKE_CURIE in results
+ assert results[FAKE_CURIE] is None
+
+ results = synonymizer.get_canonical_curies(FAKE_CURIE)
+ print(results)
+ assert len(results) == 1
+ assert FAKE_CURIE in results
+ assert results[FAKE_CURIE] is None
+
+
+def test_get_canonical_curies_by_names():
+ synonymizer = NodeSynonymizer()
+ names = [CERVICAL_RIB_NAME, WARFARIN_NAME, FAKE_NAME]
+ results = synonymizer.get_canonical_curies(names=names)
+ print(results)
+ assert len(results) == 3
+ assert results[FAKE_NAME] is None
+ for name in [CERVICAL_RIB_NAME, WARFARIN_NAME]:
+ assert results.get(name)
+ assert {"preferred_name", "preferred_category", "preferred_curie"} == set(results[name])
+ assert results[name]["preferred_curie"]
+ assert results[name]["preferred_category"]
+ assert results[name]["preferred_category"].startswith("biolink:")
+
+
+def test_get_canonical_curies_single_name():
+ synonymizer = NodeSynonymizer()
+ results = synonymizer.get_canonical_curies(names=CERVICAL_RIB_NAME)
+ print(results)
+ assert len(results) == 1
+ assert CERVICAL_RIB_NAME in results
+ assert results[CERVICAL_RIB_NAME]
+
+
+def test_get_canonical_curies_by_names_and_curies():
+ synonymizer = NodeSynonymizer()
+ curies = [ACETAMINOPHEN_CURIE, SNCA_CURIE]
+ names = [PARKINSONS_NAME, WARFARIN_NAME]
+ results = synonymizer.get_canonical_curies(curies=curies, names=names)
+ print(results)
+ all_input_entities = set(curies + names)
+ assert all_input_entities == set(results)
+ for input_entity in all_input_entities:
+ assert results[input_entity]
+
+
+def test_get_canonical_curies_return_all_categories():
+ curies = [ATRIAL_FIBRILLATION_CURIE, IBUPROFEN_CURIE, SNCA_CURIE]
+ synonymizer = NodeSynonymizer()
+ results = synonymizer.get_canonical_curies(curies=curies, names=WARFARIN_NAME, return_all_categories=True)
+ print(results)
+ assert(len(results) == 4)
+ input_entities = curies + [WARFARIN_NAME]
+ for input_entity in input_entities:
+ assert results.get(input_entity)
+ assert {"preferred_name", "preferred_category", "preferred_curie", "all_categories"} == set(results[input_entity])
+ assert results[input_entity]["preferred_curie"]
+ assert results[input_entity]["preferred_category"]
+ assert results[input_entity]["preferred_category"].startswith("biolink:")
+ assert results[input_entity]["all_categories"]
+ for category, count in results[input_entity]["all_categories"].items():
+ assert count > 0
+ assert category.startswith("biolink:")
+
+
+def test_get_equivalent_nodes():
+ synonymizer = NodeSynonymizer()
+ curies = [ACETAMINOPHEN_CURIE, PARKINSONS_CURIE]
+ results = synonymizer.get_equivalent_nodes(curies)
+ print(results)
+ assert set(curies) == set(results)
+ for curie in curies:
+ assert results[curie]
+ assert len(results[curie]) > 1
+ assert ACETAMINOPHEN_CURIE_2 in results[ACETAMINOPHEN_CURIE]
+ assert ACETAMINOPHEN_CURIE in results[ACETAMINOPHEN_CURIE]
+ assert PARKINSONS_CURIE_2 in results[PARKINSONS_CURIE]
+ assert PARKINSONS_CURIE in results[PARKINSONS_CURIE]
+
+
+def test_get_equivalent_nodes_by_name():
+ synonymizer = NodeSynonymizer()
+ names = [PARKINSONS_NAME, WARFARIN_NAME]
+ results = synonymizer.get_equivalent_nodes(names=names)
+ print(results)
+ assert set(names) == set(results)
+ for name in names:
+ assert results[name]
+ assert len(results[name]) > 1
+ assert PARKINSONS_CURIE in results[PARKINSONS_NAME]
+ assert PARKINSONS_CURIE_2 in results[PARKINSONS_NAME]
+
+
+def test_get_equivalent_nodes_by_curies_and_names():
+ synonymizer = NodeSynonymizer()
+ curies = [ACETAMINOPHEN_CURIE, FAKE_CURIE]
+ names = [PARKINSONS_NAME, WARFARIN_NAME]
+ results = synonymizer.get_equivalent_nodes(curies=curies, names=names)
+ print(results)
+ input_entities = curies + names
+ assert set(input_entities) == set(results)
+ assert results[FAKE_CURIE] is None
+ for input_entity in input_entities:
+ if input_entity != FAKE_CURIE:
+ assert results[input_entity]
+ assert len(results[input_entity]) > 1
+ assert PARKINSONS_CURIE in results[PARKINSONS_NAME]
+ assert PARKINSONS_CURIE_2 in results[PARKINSONS_NAME]
+ assert ACETAMINOPHEN_CURIE in results[ACETAMINOPHEN_CURIE]
+ assert ACETAMINOPHEN_CURIE_2 in results[ACETAMINOPHEN_CURIE]
+
+
+def test_get_curie_names():
+ curies = [ACETAMINOPHEN_CURIE, ACETAMINOPHEN_CURIE_2]
+ synonymizer = NodeSynonymizer()
+ results = synonymizer.get_curie_names(curies)
+ print(results)
+ assert len(results) == 2
+ for curie in curies:
+ assert results.get(curie)
+ assert len(set(results.values())) == 2 # Names should be distinct
+
+
+def test_get_preferred_names():
+ curies = [ATRIAL_FIBRILLATION_CURIE, IBUPROFEN_CURIE, SNCA_CURIE]
+ synonymizer = NodeSynonymizer()
+ results = synonymizer.get_preferred_names(curies)
+ print(results)
+ assert len(results) == 3
+ for curie in curies:
+ assert results.get(curie)
+ assert len(set(results.values())) == 3 # Preferred names for different concepts should be distinct
+
+
+def test_get_normalizer_results():
+ synonymizer = NodeSynonymizer()
+ input_entities = [PARKINSONS_CURIE, CERVICAL_RIB_NAME, IBUPROFEN_CURIE, FAKE_NAME]
+ results = synonymizer.get_normalizer_results(input_entities)
+ print(json.dumps(results, indent=2))
+ assert len(results) == len(input_entities)
+ assert results[FAKE_NAME] is None
+ for input_entity in input_entities:
+ if input_entity != FAKE_NAME:
+ assert results[input_entity]["id"]
+ assert {"identifier", "name", "category", "SRI_normalizer_name",
+ "SRI_normalizer_category", "SRI_normalizer_curie"} == set(results[input_entity]["id"])
+ assert results[input_entity]["id"]["identifier"]
+ assert results[input_entity]["id"]["category"]
+ assert results[input_entity]["id"]["category"].startswith("biolink:")
+ if results[input_entity]["id"]["SRI_normalizer_category"]:
+ assert results[input_entity]["id"]["SRI_normalizer_category"].startswith("biolink:")
+
+ assert results[input_entity]["categories"]
+ for category, count in results[input_entity]["categories"].items():
+ assert count > 0
+ assert category.startswith("biolink:")
+
+ assert results[input_entity]["nodes"]
+ assert len(results[input_entity]["nodes"]) > 1
+ for equivalent_node in results[input_entity]["nodes"]:
+ assert {"identifier", "category", "label", "major_branch", "in_sri", "name_sri", "category_sri",
+ "in_kg2pre", "name_kg2pre", "category_kg2pre"} == set(equivalent_node)
+ assert equivalent_node["identifier"]
+ assert equivalent_node["category"]
+ assert equivalent_node["category"].startswith("biolink:")
+ if equivalent_node["category_sri"]:
+ assert equivalent_node["category_sri"].startswith("biolink:")
+ if equivalent_node["category_kg2pre"]:
+ assert equivalent_node["category_kg2pre"].startswith("biolink:")
+
+
+def test_improper_curie_prefix_capitalization():
+ synonymizer = NodeSynonymizer()
+ improper_curie = "NCBIGENE:1017"
+
+ results = synonymizer.get_canonical_curies(improper_curie)
+ assert results[improper_curie]
+ assert len(results) == 1
+
+ results = synonymizer.get_canonical_curies(improper_curie, return_all_categories=True)
+ assert results[improper_curie]
+ assert len(results) == 1
+
+ results = synonymizer.get_equivalent_nodes(improper_curie)
+ assert results[improper_curie]
+ assert len(results) == 1
+
+ results = synonymizer.get_normalizer_results(improper_curie)
+ assert results[improper_curie]
+ assert len(results) == 1
+
+
+def test_approximate_name_based_matching():
+ synonymizer = NodeSynonymizer()
+
+ name_not_exactly_in_synonymizer = "Parkinsons disease"
+ results = synonymizer.get_equivalent_nodes(names=name_not_exactly_in_synonymizer)
+ assert results[name_not_exactly_in_synonymizer]
+ assert len(results) == 1
+
+ name_not_exactly_in_synonymizer_2 = "ATRIAL FIBRILLATION"
+ results = synonymizer.get_canonical_curies(names=name_not_exactly_in_synonymizer_2)
+ assert results[name_not_exactly_in_synonymizer_2]
+ assert len(results) == 1
+
+ results = synonymizer.get_canonical_curies(names=name_not_exactly_in_synonymizer_2, return_all_categories=True)
+ assert results[name_not_exactly_in_synonymizer_2]
+ assert len(results) == 1
+
+ results = synonymizer.get_equivalent_nodes(names=name_not_exactly_in_synonymizer_2)
+ assert results[name_not_exactly_in_synonymizer_2]
+ assert len(results) == 1
+
+ results = synonymizer.get_normalizer_results(name_not_exactly_in_synonymizer_2)
+ assert results[name_not_exactly_in_synonymizer_2]
+ assert len(results) == 1
+
+
+def test_entity_controller_input_no_format():
+ synonymizer = NodeSynonymizer()
+ controller_param = {"terms": [PARKINSONS_CURIE]}
+ results = synonymizer.get_normalizer_results(controller_param)
+ print(json.dumps(results, indent=2))
+ assert PARKINSONS_CURIE in results
+ assert len(results) == 1
+
+
+def test_entity_controller_input_minimal_format():
+ synonymizer = NodeSynonymizer()
+ controller_param = {"terms": [WARFARIN_NAME], "format": "minimal"}
+ results = synonymizer.get_normalizer_results(controller_param)
+ print(json.dumps(results, indent=2))
+ assert WARFARIN_NAME in results
+ assert len(results) == 1
+
+
+def test_cluster_graphs():
+ synonymizer = NodeSynonymizer()
+ results = synonymizer.get_normalizer_results(PTGS1_NAME)
+ assert results[PTGS1_NAME]
+ assert results[PTGS1_NAME]["knowledge_graph"]
+ print(json.dumps(results[PTGS1_NAME]["knowledge_graph"], indent=2))
+ assert results[PTGS1_NAME]["knowledge_graph"]["nodes"]
+ assert results[PTGS1_NAME]["knowledge_graph"]["edges"]
+ assert len(results[PTGS1_NAME]["knowledge_graph"]["nodes"]) == len(results[PTGS1_NAME]["nodes"])
+
+ for edge in results[PTGS1_NAME]["knowledge_graph"]["edges"].values():
+ assert edge["subject"] in results[PTGS1_NAME]["knowledge_graph"]["nodes"]
+ assert edge["object"] in results[PTGS1_NAME]["knowledge_graph"]["nodes"]
+ assert edge["predicate"].startswith("biolink:")
+ assert edge["sources"]
+ assert edge["attributes"]
+
+ for node in results[PTGS1_NAME]["knowledge_graph"]["nodes"].values():
+ assert node["categories"]
+ for category in node["categories"]:
+ assert category.startswith("biolink:")
+ assert node["attributes"]
+
+
+def test_truncate_cluster():
+ synonymizer = NodeSynonymizer()
+ results = synonymizer.get_normalizer_results([ACETAMINOPHEN_CURIE, PARKINSONS_CURIE], max_synonyms=2)
+
+ print(json.dumps(results[ACETAMINOPHEN_CURIE]["nodes"], indent=2))
+ assert len(results[ACETAMINOPHEN_CURIE]["nodes"]) == 2
+ assert len(results[ACETAMINOPHEN_CURIE]["knowledge_graph"]["nodes"]) == 2
+ assert len(results[ACETAMINOPHEN_CURIE]["knowledge_graph"]["edges"]) < 20
+ assert results[ACETAMINOPHEN_CURIE]["total_synonyms"] > 2
+ assert results[ACETAMINOPHEN_CURIE]["categories"]["biolink:Drug"] > 2
+ assert "biolink:Disease" not in results[ACETAMINOPHEN_CURIE]["categories"]
+
+ print(json.dumps(results[PARKINSONS_CURIE]["nodes"], indent=2))
+ assert len(results[PARKINSONS_CURIE]["nodes"]) == 2
+ assert len(results[PARKINSONS_CURIE]["knowledge_graph"]["nodes"]) == 2
+ assert len(results[PARKINSONS_CURIE]["knowledge_graph"]["edges"]) < 20
+ assert results[PARKINSONS_CURIE]["total_synonyms"] > 2
+ assert results[PARKINSONS_CURIE]["categories"]["biolink:Disease"] > 2
+ assert "biolink:Drug" not in results[PARKINSONS_CURIE]["categories"]
+
+
+if __name__ == "__main__":
+ pytest.main(['-v', 'test_ARAX_synonymizer.py'])
diff --git a/code/code-archive/old-arax-tests/test_ARAX_translate.py b/code/code-archive/old-arax-tests/test_ARAX_translate.py
new file mode 100644
index 000000000..eb2063dd5
--- /dev/null
+++ b/code/code-archive/old-arax-tests/test_ARAX_translate.py
@@ -0,0 +1,707 @@
+#!/usr/bin/env python3
+
+# Intended to test our translate to ARAXi functionality
+
+import sys
+import os
+import pytest
+from collections import Counter
+import copy
+import json
+import ast
+from typing import List, Union
+
+import numpy as np
+
+sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/../../ARAXQuery")
+sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/../ARAXQuery")
+from ARAX_query import ARAXQuery
+from ARAX_response import ARAXResponse
+
+PACKAGE_PARENT = '../../UI/OpenAPI/openapi_server'
+sys.path.append(os.path.normpath(os.path.join(os.getcwd(), PACKAGE_PARENT)))
+from openapi_server.models.message import Message
+
+
+def _do_arax_query(query: dict) -> List[Union[ARAXResponse, Message]]:
+ araxq = ARAXQuery()
+ response = araxq.query(query)
+ if response.status != 'OK':
+ print(response.show(level=response.DEBUG))
+ return [response, response.envelope.message]
+
+
+def _attribute_tester(message, attribute_name: str, attribute_type: str, num_different_values=2):
+ """
+ Tests attributes of a message
+ message: returned from _do_arax_query
+ attribute_name: the attribute name to test (eg. 'jaccard_index')
+ attribute_type: the attribute type (eg. 'EDAM-DATA:1234')
+ num_different_values: the number of distinct values you wish to see have been added as attributes
+ """
+ edges_of_interest = []
+ values = set()
+ for key, edge in message.knowledge_graph.edges.items():
+ assert 'primary_knowledge_source' in [source.resource_role for source in edge.sources]
+ if hasattr(edge, 'edge_attributes'):
+ for attr in edge.edge_attributes:
+ if attr.original_attribute_name == attribute_name:
+ edges_of_interest.append(edge)
+ assert attr.attribute_type_id == attribute_type
+ values.add(attr.value)
+ assert len(edges_of_interest) > 0
+ assert len(values) >= num_different_values
+
+
+def _virtual_tester(message: Message, edge_predicate: str, relation: str, attribute_name: str, attribute_type: str, num_different_values=2):
+ """
+ Tests overlay functions that add virtual edges
+ message: returned from _do_arax_query
+ edge_predicate: the name of the virtual edge (eg. biolink:has_jaccard_index_with)
+ relation: the relation you picked for the virtual_edge_relation (eg. N1)
+ attribute_name: the attribute name to test (eg. 'jaccard_index')
+ attribute_type: the attribute type (eg. 'EDAM-DATA:1234')
+ num_different_values: the number of distinct values you wish to see have been added as attributes
+ """
+ edge_predicates_in_kg = Counter([x.predicate for x in message.knowledge_graph.edges.values()])
+ assert edge_predicate in edge_predicates_in_kg
+ edges_of_interest = [x for x in message.knowledge_graph.edges.values() if x.relation == relation]
+ values = set()
+ assert len(edges_of_interest) > 0
+ for edge in edges_of_interest:
+ assert 'primary_knowledge_source' in [source.resource_role for source in edge.sources]
+ assert hasattr(edge, 'attributes')
+ assert edge.attributes
+ assert edge.attributes[0].original_attribute_name == attribute_name
+ values.add(edge.attributes[0].value)
+ assert edge.attributes[0].attribute_type_id == attribute_type
+ # make sure two or more values were added
+ assert len(values) >= num_different_values
+
+
+def test_lookup():
+ query = {
+ "workflow": [
+ {
+ "id": "lookup"
+ }
+
+ ],
+ "message": {
+ "query_graph": {
+ "nodes": {
+ "n0": {
+ "categories": [
+ "biolink:Gene"
+ ]
+ },
+ "n1": {
+ "ids": [
+ "CHEBI:45783"
+ ],
+ "categories": [
+ "biolink:SmallMolecule"
+ ]
+ }
+ },
+ "edges": {
+ "e01": {
+ "subject": "n0",
+ "object": "n1",
+ "predicates": [
+ "biolink:related_to"
+ ]
+ }
+ }
+ }
+ }
+ }
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ assert len(message.results) > 0
+ for result in message.results:
+ assert result.score is None
+
+def test_fill_success():
+ query = {
+ "workflow": [
+ {
+ "id": "fill",
+ "parameters": {
+ "allowlist": ["infores:rtx-kg2"],
+ "qedge_keys": ["e01"]
+ }
+ }
+ ],
+ "message": {
+ "query_graph": {
+ "nodes": {
+ "n0": {
+ "categories": [
+ "biolink:Gene"
+ ]
+ },
+ "n1": {
+ "ids": [
+ "CHEBI:45783"
+ ],
+ "categories": [
+ "biolink:ChemicalSubstance"
+ ]
+ }
+ },
+ "edges": {
+ "e01": {
+ "subject": "n0",
+ "object": "n1",
+ "predicates": [
+ "biolink:related_to"
+ ]
+ }
+ }
+ }
+ }
+ }
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ assert len(message.knowledge_graph.nodes) > 0
+ assert len(message.knowledge_graph.edges) > 0
+
+def test_fill_error():
+ query = {
+ "workflow": [
+ {
+ "id": "fill",
+ "parameters": {
+ "allowlist": ["infores:rtx-kg2"],
+ "qedge_keys": ["asdf"]
+ }
+ }
+ ],
+ "message": {
+ "query_graph": {
+ "nodes": {
+ "n0": {
+ "categories": [
+ "biolink:Gene"
+ ]
+ },
+ "n1": {
+ "ids": [
+ "CHEBI:45783"
+ ],
+ "categories": [
+ "biolink:ChemicalSubstance"
+ ]
+ }
+ },
+ "edges": {
+ "e01": {
+ "subject": "n0",
+ "object": "n1",
+ "predicates": [
+ "biolink:related_to"
+ ]
+ }
+ }
+ }
+ }
+ }
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'ERROR'
+ assert len(message.knowledge_graph.nodes) == 0
+ assert len(message.knowledge_graph.edges) == 0
+
+def test_score():
+ query = {
+ "workflow": [
+ {
+ "id": "lookup"
+ },
+ {
+ "id": "score"
+ }
+
+ ],
+ "message": {
+ "query_graph": {
+ "nodes": {
+ "n0": {
+ "categories": [
+ "biolink:Gene"
+ ]
+ },
+ "n1": {
+ "ids": [
+ "CHEBI:45783"
+ ],
+ "categories": [
+ "biolink:SmallMolecule"
+ ]
+ }
+ },
+ "edges": {
+ "e01": {
+ "subject": "n0",
+ "object": "n1",
+ "predicates": [
+ "biolink:related_to"
+ ]
+ }
+ }
+ }
+ }
+ }
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ assert len(message.results) > 0
+ for result in message.results:
+ assert result.analyses[0].score is not None
+
+def test_bind():
+ query = {
+ "workflow": [
+ {
+ "id": "fill",
+ "parameters": {
+ "allowlist": ["infores:rtx-kg2"]
+ }
+ },
+ {
+ "id": "bind"
+ }
+
+ ],
+ "message": {
+ "query_graph": {
+ "nodes": {
+ "n0": {
+ "categories": [
+ "biolink:Gene"
+ ]
+ },
+ "n1": {
+ "ids": [
+ "CHEBI:45783"
+ ],
+ "categories": [
+ "biolink:ChemicalSubstance"
+ ]
+ }
+ },
+ "edges": {
+ "e01": {
+ "subject": "n0",
+ "object": "n1",
+ "predicates": [
+ "biolink:related_to"
+ ]
+ }
+ }
+ }
+ }
+ }
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ assert len(message.results) > 0
+
+def test_complete_results():
+ query = {
+ "workflow": [
+ {
+ "id": "fill",
+ "parameters": {
+ "allowlist": ["infores:rtx-kg2"]
+ }
+ },
+ {
+ "id": "complete_results"
+ }
+
+ ],
+ "message": {
+ "query_graph": {
+ "nodes": {
+ "n0": {
+ "categories": [
+ "biolink:Gene"
+ ]
+ },
+ "n1": {
+ "ids": [
+ "CHEBI:45783"
+ ],
+ "categories": [
+ "biolink:ChemicalSubstance"
+ ]
+ }
+ },
+ "edges": {
+ "e01": {
+ "subject": "n0",
+ "object": "n1",
+ "predicates": [
+ "biolink:related_to"
+ ]
+ }
+ }
+ }
+ }
+ }
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ assert len(message.results) > 0
+
+def test_filter_results_top_n():
+ query = {
+ "workflow": [
+ {
+ "id": "fill",
+ "parameters": {
+ "allowlist": ["infores:rtx-kg2"]
+ }
+ },
+ {
+ "id": "overlay_compute_ngd",
+ "parameters": {
+ "virtual_relation_label": "NGD1",
+ "qnode_keys": ["n0", "n1"]
+ }
+ },
+ {
+ "id": "bind"
+ },
+ {
+ "id": "score"
+ },
+ {
+ "id": "filter_results_top_n",
+ "parameters": {
+ "max_results": 20
+ }
+ }
+ ],
+ "message": {
+ "query_graph": {
+ "nodes": {
+ "n0": {
+ "categories": [
+ "biolink:Gene"
+ ]
+ },
+ "n1": {
+ "ids": [
+ "CHEBI:45783"
+ ],
+ "categories": [
+ "biolink:SmallMolecule"
+ ]
+ }
+ },
+ "edges": {
+ "e01": {
+ "subject": "n0",
+ "object": "n1",
+ "predicates": [
+ "biolink:related_to"
+ ]
+ }
+ }
+ }
+ }
+ }
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ assert len(message.results) == 20
+ for result in message.results:
+ assert result.analyses[0].score is not None
+
+def test_overlay_after_lookup():
+ query = {
+ "workflow": [
+ {
+ "id": "lookup"
+ },
+ {
+ "id": "overlay_compute_ngd",
+ "parameters": {
+ "virtual_relation_label": "NGD1",
+ "qnode_keys": ["n0", "n1"]
+ }
+ },
+ {
+ "id": "score"
+ },
+ {
+ "id": "filter_results_top_n",
+ "parameters": {
+ "max_results": 20
+ }
+ }
+ ],
+ "message": {
+ "query_graph": {
+ "nodes": {
+ "n0": {
+ "categories": [
+ "biolink:Gene"
+ ]
+ },
+ "n1": {
+ "ids": [
+ "CHEBI:45783"
+ ],
+ "categories": [
+ "biolink:SmallMolecule"
+ ]
+ }
+ },
+ "edges": {
+ "e01": {
+ "subject": "n0",
+ "object": "n1",
+ "predicates": [
+ "biolink:related_to"
+ ]
+ }
+ }
+ }
+ }
+ }
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ assert len(message.results) == 20
+ ngd_bindings = set()
+ for result in message.results:
+ assert result.analyses[0].score is not None
+ for eb_key, edge_bindings in result.analyses[0].edge_bindings.items():
+ for edge_binding in edge_bindings:
+ if edge_binding.id.startswith("NGD1"):
+ ngd_bindings.add(edge_binding.id)
+ assert len(ngd_bindings) == len(message.results)
+
+@pytest.mark.slow
+def test_connect_knodes_2_nodes():
+ query = {
+ "workflow": [
+ {
+ "id": "fill"
+ },
+ {
+ "id": "overlay_connect_knodes"
+ },
+ {
+ "id": "complete_results"
+ },
+ {
+ "id": "score"
+ },
+ {
+ "id": "sort_results_score",
+ "parameters": {
+ "ascending_or_descending": "descending"
+ }
+ },
+ {
+ "id": "filter_results_top_n",
+ "parameters": {
+ "max_results": 30
+ }
+ }
+ ],
+ "message": {
+ "query_graph": {
+ "nodes": {
+ "n0": {
+ "categories": [
+ "biolink:Disease"
+ ]
+ },
+ "n1": {
+ "ids": [
+ "CHEBI:45783"
+ ],
+ "categories": [
+ "biolink:ChemicalEntity"
+ ]
+ }
+ },
+ "edges": {
+ "e01": {
+ "subject": "n0",
+ "object": "n1",
+ "predicates": [
+ "biolink:treats"
+ ]
+ }
+ }
+ }
+ }
+ }
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ assert len(message.results) == 30
+ connected_bindings_ngd = set()
+ connected_bindings_fisher = set()
+ connected_bindings_paired_freq = set()
+ connected_bindings_pred_dtd = set()
+ for result in message.results:
+ assert result.score is not None
+ for eb_key, edge_bindings in result.edge_bindings.items():
+ for edge_binding in edge_bindings:
+ if edge_binding.id.startswith("connect_knodes_fisher"):
+ connected_bindings_fisher.add(edge_binding.id)
+ elif edge_binding.id.startswith("connect_knodes_ngd"):
+ connected_bindings_ngd.add(edge_binding.id)
+ elif edge_binding.id.startswith("connect_knodes_paired_freq"):
+ connected_bindings_paired_freq.add(edge_binding.id)
+ elif edge_binding.id.startswith("connect_knodes_pred_dtd"):
+ connected_bindings_pred_dtd.add(edge_binding.id)
+
+ assert len(connected_bindings_ngd) > 0
+ assert len(connected_bindings_fisher) > 0
+ assert len(connected_bindings_paired_freq) > 0
+ assert len(connected_bindings_pred_dtd) > 0
+
+@pytest.mark.slow
+def test_connect_knodes_3_nodes():
+ query = {
+ "workflow": [
+ {
+ "id": "fill"
+ },
+ {
+ "id": "overlay_connect_knodes"
+ },
+ {
+ "id": "complete_results"
+ },
+ {
+ "id": "score"
+ },
+ {
+ "id": "sort_results_score",
+ "parameters": {
+ "ascending_or_descending": "descending"
+ }
+ },
+ {
+ "id": "filter_results_top_n",
+ "parameters": {
+ "max_results": 30
+ }
+ }
+ ],
+ "message": {
+ "query_graph": {
+ "nodes": {
+ "n0": {
+ "categories": [
+ "biolink:Gene"
+ ]
+ },
+ "n1": {
+ "ids": [
+ "CHEBI:45783"
+ ],
+ "categories": [
+ "biolink:ChemicalEntity"
+ ]
+ },
+ "n2": {
+ "categories": [
+ "biolink:Disease"
+ ]
+ }
+ },
+ "edges": {
+ "e01": {
+ "subject": "n1",
+ "object": "n0",
+ "predicates": [
+ "biolink:related_to"
+ ]
+ },
+ "e02": {
+ "subject": "n0",
+ "object": "n2",
+ "predicates": [
+ "biolink:related_to"
+ ]
+ }
+ }
+ }
+ }
+ }
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ assert len(message.results) == 30
+ connected_bindings = set()
+ for result in message.results:
+ assert result.score is not None
+ for eb_key, edge_bindings in result.edge_bindings.items():
+ for edge_binding in edge_bindings:
+ if edge_binding.id.startswith("connect_knodes"):
+ connected_bindings.add(edge_binding.id)
+ assert len(connected_bindings) > 0
+
+
+def test_unknown_operation():
+ # Tests if unknown operations are handled correctly
+ query = {
+ "workflow": [
+ {
+ "id": "gobbledegook"
+ },
+ {
+ "id": "overlay_compute_ngd",
+ "parameters": {
+ "virtual_relation_label": "NGD1",
+ "qnode_keys": ["n0", "n1"]
+ }
+ },
+ {
+ "id": "score"
+ },
+ {
+ "id": "filter_results_top_n",
+ "parameters": {
+ "max_results": 20
+ }
+ }
+ ],
+ "message": {
+ "query_graph": {
+ "nodes": {
+ "n0": {
+ "categories": [
+ "biolink:Gene"
+ ]
+ },
+ "n1": {
+ "ids": [
+ "CHEBI:45783"
+ ],
+ "categories": [
+ "biolink:SmallMolecule"
+ ]
+ }
+ },
+ "edges": {
+ "e01": {
+ "subject": "n0",
+ "object": "n1",
+ "predicates": [
+ "biolink:related_to"
+ ]
+ }
+ }
+ }
+ }
+ }
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'ERROR'
+
+
+if __name__ == "__main__":
+ pytest.main(['-v'])
diff --git a/code/code-archive/old-arax-tests/test_ARAX_workflows.py b/code/code-archive/old-arax-tests/test_ARAX_workflows.py
new file mode 100644
index 000000000..213823f1f
--- /dev/null
+++ b/code/code-archive/old-arax-tests/test_ARAX_workflows.py
@@ -0,0 +1,849 @@
+#!/usr/bin/env python3
+
+# Intended to test our more complicated workflows
+
+import sys
+import os
+import pytest
+from collections import Counter
+import copy
+import json
+import ast
+from typing import List, Union
+
+import numpy as np
+
+sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/../../ARAXQuery")
+sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/../ARAXQuery")
+from ARAX_query import ARAXQuery
+from ARAX_response import ARAXResponse
+
+PACKAGE_PARENT = '../../UI/OpenAPI/openapi_server'
+sys.path.append(os.path.normpath(os.path.join(os.getcwd(), PACKAGE_PARENT)))
+from openapi_server.models.message import Message
+
+
+def _do_arax_query(query: dict) -> List[Union[ARAXResponse, Message]]:
+ araxq = ARAXQuery()
+ response = araxq.query(query)
+ if response.status != 'OK':
+ print(response.show(level=response.DEBUG))
+ return [response, response.envelope.message]
+
+
+def _attribute_tester(message, attribute_name: str, attribute_type: str, num_different_values=2):
+ """
+ Tests attributes of a message
+ message: returned from _do_arax_query
+ attribute_name: the attribute name to test (eg. 'jaccard_index')
+ attribute_type: the attribute type (eg. 'EDAM-DATA:1234')
+ num_different_values: the number of distinct values you wish to see have been added as attributes
+ """
+ edges_of_interest = []
+ values = set()
+ for key, edge in message.knowledge_graph.edges.items():
+ assert 'primary_knowledge_source' in [source.resource_role for source in edge.sources]
+ if hasattr(edge, 'edge_attributes'):
+ for attr in edge.edge_attributes:
+ if attr.original_attribute_name == attribute_name:
+ edges_of_interest.append(edge)
+ assert attr.attribute_type_id == attribute_type
+ values.add(attr.value)
+ assert len(edges_of_interest) > 0
+ assert len(values) >= num_different_values
+
+
+def _virtual_tester(message: Message, edge_predicate: str, relation: str, attribute_name: str, attribute_type: str, num_different_values=2):
+ """
+ Tests overlay functions that add virtual edges
+ message: returned from _do_arax_query
+ edge_predicate: the name of the virtual edge (eg. biolink:has_jaccard_index_with)
+ relation: the relation you picked for the virtual_edge_relation (eg. N1)
+ attribute_name: the attribute name to test (eg. 'jaccard_index')
+ attribute_type: the attribute type (eg. 'EDAM-DATA:1234')
+ num_different_values: the number of distinct values you wish to see have been added as attributes
+ """
+ edge_predicates_in_kg = Counter([x.predicate for x in message.knowledge_graph.edges.values()])
+ assert edge_predicate in edge_predicates_in_kg
+ edges_of_interest = [x for x in message.knowledge_graph.edges.values() if x.relation == relation]
+ values = set()
+ assert len(edges_of_interest) > 0
+ for edge in edges_of_interest:
+ assert 'primary_knowledge_source' in [source.resource_role for source in edge.sources]
+ assert hasattr(edge, 'attributes')
+ assert edge.attributes
+ assert edge.attributes[0].original_attribute_name == attribute_name
+ values.add(edge.attributes[0].value)
+ assert edge.attributes[0].attribute_type_id == attribute_type
+ # make sure two or more values were added
+ assert len(values) >= num_different_values
+
+
+def test_option_group_id():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=DOID:3312, key=n00)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n01)",
+ "add_qedge(subject=n00, object=n01, predicates=biolink:indicated_for, option_group_key=a, id=e00)",
+ "add_qedge(subject=n00, object=n01, predicates=biolink:contraindicated_for, option_group_key=1, id=e01)",
+ "expand(edge_key=[e00,e01], kp=infores:rtx-kg2)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ for key, edge in message.query_graph.edges.items():
+ if key == 'e01':
+ assert edge.option_group_id == '1'
+ elif key == 'e00':
+ assert edge.option_group_id == 'a'
+
+def test_exclude():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=DOID:3312, key=n00)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n01)",
+ "add_qedge(subject=n00, object=n01, predicates=biolink:treats, key=e00)",
+ "add_qedge(subject=n00, object=n01, predicates=biolink:contraindicated_for, exclude=true, key=e01)",
+ "expand(edge_key=[e00,e01], kp=infores:rtx-kg2)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ for key, edge in message.query_graph.edges.items():
+ if key == 'e01':
+ assert edge.exclude
+ if key == 'e00':
+ assert not edge.exclude
+
+@pytest.mark.slow
+def test_example_2():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=DOID:7551, key=n00)",
+ "add_qnode(categories=biolink:Protein, is_set=true, key=n01)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n02)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "add_qedge(subject=n01, object=n02, key=e01, predicates=biolink:physically_interacts_with)",
+ "expand(edge_key=[e00,e01], kp=infores:rtx-kg2)",
+ "overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J1)",
+ "filter_kg(action=remove_edges_by_continuous_attribute, edge_attribute=jaccard_index, direction=below, threshold=.2, remove_connected_nodes=t, qnode_keys=[n02])",
+ "filter_kg(action=remove_edges_by_discrete_attribute,edge_attribute=provided_by, value=Pharos)",
+ # "overlay(action=predict_drug_treats_disease, subject_qnode_key=n02, object_qnode_key=n00, virtual_relation_label=P1, threshold=0)",
+ "resultify(ignore_edge_direction=true)",
+ "filter_results(action=sort_by_edge_attribute, edge_attribute=jaccard_index, direction=descending, max_results=15)",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ assert len(message.results) > 0 # :BUG: sometimes the workflow returns 47 results, sometimes 48 (!?)
+ assert message.results[0].essence is not None
+ # _virtual_tester(message, 'biolink:probably_treats', 'P1', 'probability_treats', 'EDAM-DATA:0951', 2)
+ _virtual_tester(message, 'biolink:has_jaccard_index_with', 'J1', 'jaccard_index', 'EDAM-DATA:1772', 2)
+
+
+@pytest.mark.slow
+def test_example_3():
+ query = {"operations": {"actions": [
+ "add_qnode(name=MONDO:0005301, key=n00)", # CM: change "DOID:9406" to "MONDO:0005301" because DOID:9406 has no matched OMOP id.
+ "add_qnode(categories=biolink:ChemicalEntity, is_set=true, key=n01)",
+ "add_qnode(categories=biolink:Protein, key=n02)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "add_qedge(subject=n01, object=n02, key=e01)",
+ "expand(edge_key=[e00,e01], kp=infores:rtx-kg2)",
+ "overlay(action=overlay_clinical_info, observed_expected_ratio=true, virtual_relation_label=C1, subject_qnode_key=n00, object_qnode_key=n01)",
+ "filter_kg(action=remove_edges_by_continuous_attribute, edge_attribute=observed_expected_ratio, direction=below, threshold=1, remove_connected_nodes=t, qnode_keys=[n01])",
+ "filter_kg(action=remove_orphaned_nodes, node_category=biolink:Protein)",
+ "overlay(action=compute_ngd, virtual_relation_label=N1, subject_qnode_key=n01, object_qnode_key=n02)",
+ "filter_kg(action=remove_edges_by_continuous_attribute, edge_attribute=normalized_google_distance, direction=above, threshold=0.85, remove_connected_nodes=t, qnode_keys=[n02])",
+ "resultify(ignore_edge_direction=true, debug=true)",
+ "return(message=true, store=false)"
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ #assert len(message.results) in [47, 48] # :BUG: sometimes the workflow returns 47 results, sometimes 48 (!?)
+ assert len(message.results) >= 60
+ assert message.results[0].essence is not None
+ _virtual_tester(message, 'biolink:has_observed_expected_ratio_with', 'C1', 'observed_expected_ratio', 'EDAM-DATA:0951', 2)
+ _virtual_tester(message, 'biolink:occurs_together_in_literature_with', 'N1', 'normalized_google_distance', 'EDAM-DATA:2526', 2)
+
+
+def test_FET_example_1():
+ # This a FET 3-top example: try to find the phenotypes of drugs connected to proteins connected to DOID:14330
+ query = {"operations": {"actions": [
+ "add_qnode(ids=DOID:12889, key=n00, categories=biolink:Disease)",
+ "add_qnode(categories=biolink:Protein, is_set=true, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "overlay(action=fisher_exact_test, subject_qnode_key=n00, object_qnode_key=n01, virtual_relation_label=FET1, rel_edge_key=e00)",
+ "filter_kg(action=remove_edges_by_continuous_attribute, edge_attribute=fisher_exact_test_p-value, direction=above, threshold=0.005, remove_connected_nodes=t, qnode_keys=[n01])",
+ "add_qnode(categories=biolink:ChemicalEntity, is_set=true, key=n02)",
+ "add_qedge(subject=n01, object=n02, key=e01, predicates=biolink:physically_interacts_with)",
+ "expand(edge_key=e01, kp=infores:rtx-kg2)",
+ "overlay(action=fisher_exact_test, subject_qnode_key=n01, object_qnode_key=n02, virtual_relation_label=FET2, rel_edge_key=e01)",
+ "filter_kg(action=remove_edges_by_continuous_attribute, edge_attribute=fisher_exact_test_p-value, direction=above, threshold=0.005, remove_connected_nodes=t, qnode_keys=[n02])",
+ "add_qnode(categories=biolink:PhenotypicFeature, key=n03)",
+ "add_qedge(subject=n02, object=n03, key=e02)",
+ "expand(edge_key=e02, kp=infores:rtx-kg2)",
+ "resultify()",
+ "return(message=true, store=false)"
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ assert message.n_results > 0
+ edge_predicates_in_kg = Counter([x.predicate for x in message.knowledge_graph.edges.values()])
+ assert 'biolink:has_fisher_exact_test_p_value_with' in edge_predicates_in_kg
+ FET_edges = [x for x in message.knowledge_graph.edges.values() if x.predicate.find("fisher_exact_test") != -1]
+ FET_edge_labels = set([attr.value for edge in FET_edges for attr in edge.attributes if attr.original_attribute_name == 'virtual_relation_label'])
+ assert len(FET_edge_labels) == 2
+ for edge in FET_edges:
+ assert hasattr(edge, 'attributes')
+ FET_edge_attribute = [attr for attr in edge.attributes if attr.original_attribute_name == 'fisher_exact_test_p-value']
+ assert 0 <= float(FET_edge_attribute[0].value) < 0.005
+ assert FET_edge_attribute[0].attribute_type_id == 'EDAM-DATA:1669'
+
+
+def test_FET_example_2():
+ # This a FET 2-top example: try to find the diseases that share the same protein with ibuprofen (CHEMBL.COMPOUND:CHEMBL521)
+ query = {"operations": {"actions": [
+ "add_qnode(key=n00, ids=CHEMBL.COMPOUND:CHEMBL1472, categories=biolink:ChemicalEntity)",
+ "add_qnode(key=n01, categories=biolink:Protein)",
+ "add_qedge(key=e00, subject=n00, object=n01)",
+ "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "overlay(action=fisher_exact_test, subject_qnode_key=n00, object_qnode_key=n01, virtual_relation_label=FET1)",
+ "filter_kg(action=remove_edges_by_continuous_attribute, edge_attribute=fisher_exact_test_p-value, direction=above, threshold=0.01, remove_connected_nodes=t, qnode_keys=[n01])",
+ "add_qnode(categories=biolink:Disease, key=n02)",
+ "add_qedge(subject=n01, object=n02, key=e01)",
+ "expand(edge_key=e01, kp=infores:rtx-kg2)",
+ "resultify()",
+ "filter_results(action=limit_number_of_results, max_results=50)",
+ "return(message=true, store=false)"
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ assert message.n_results > 0
+ edge_predicates_in_kg = Counter([x.predicate for x in message.knowledge_graph.edges.values()])
+ assert 'biolink:has_fisher_exact_test_p_value_with' in edge_predicates_in_kg
+ FET_edges = [x for x in message.knowledge_graph.edges.values() if x.predicate.find("fisher_exact_test") != -1]
+ FET_edge_labels = set([attr.value for edge in FET_edges for attr in edge.attributes if attr.original_attribute_name == 'virtual_relation_label'])
+ assert len(FET_edge_labels) == 1
+ for edge in FET_edges:
+ assert hasattr(edge, 'attributes')
+ FET_edge_attribute = [attr for attr in edge.attributes if attr.original_attribute_name == 'fisher_exact_test_p-value']
+ assert 0 <= float(FET_edge_attribute[0].value) < 0.01
+ assert FET_edge_attribute[0].attribute_type_id == 'EDAM-DATA:1669'
+
+
+def test_FET_example_3():
+ # This a FET 3-top example: try to find the proteins connected to diseases that share the same phenotypes of age-related macular degeneration(MONDO:0005150)
+ query = {"operations": {"actions": [
+ "add_qnode(ids=MONDO:0005150, key=n00, categories=biolink:Disease)",
+ "add_qnode(categories=biolink:PhenotypicFeature, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "overlay(action=fisher_exact_test, subject_qnode_key=n00, object_qnode_key=n01, virtual_relation_label=FET1, rel_edge_key=e00)",
+ "filter_kg(action=remove_edges_by_continuous_attribute, edge_attribute=fisher_exact_test_p-value, direction=above, threshold=0.001, remove_connected_nodes=t, qnode_keys=[n01])",
+ "add_qnode(categories=biolink:Disease, key=n02)",
+ "add_qedge(subject=n01,object=n02,key=e01)",
+ "expand(edge_key=e01, kp=infores:rtx-kg2)",
+ "overlay(action=fisher_exact_test, subject_qnode_key=n01, object_qnode_key=n02, virtual_relation_label=FET2, rel_edge_key=e01)",
+ "filter_kg(action=remove_edges_by_continuous_attribute, edge_attribute=fisher_exact_test_p-value, direction=above, threshold=0.001, remove_connected_nodes=t, qnode_keys=[n02])",
+ "add_qnode(categories=biolink:Protein, key=n03)",
+ "add_qedge(subject=n02, object=n03, key=e02)",
+ "expand(edge_key=e02, kp=infores:rtx-kg2)",
+ "resultify()",
+ "return(message=true, store=false)"
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ assert message.n_results > 0
+ edge_predicates_in_kg = Counter([x.predicate for x in message.knowledge_graph.edges.values()])
+ assert 'biolink:has_fisher_exact_test_p_value_with' in edge_predicates_in_kg
+ FET_edges = [x for x in message.knowledge_graph.edges.values() if x.predicate.find("fisher_exact_test") != -1]
+ FET_edge_labels = set([attr.value for edge in FET_edges for attr in edge.attributes if attr.original_attribute_name == 'virtual_relation_label'])
+ assert len(FET_edge_labels) == 2
+ for edge in FET_edges:
+ assert hasattr(edge, 'attributes')
+ FET_edge_attribute = [attr for attr in edge.attributes if attr.original_attribute_name == 'fisher_exact_test_p-value']
+ assert 0 <= float(FET_edge_attribute[0].value) < 0.001
+ assert FET_edge_attribute[0].attribute_type_id == 'EDAM-DATA:1669'
+
+
+def test_FET_example_4():
+ # This a FET 2-top example collecting nodes and edges from KG2: try to find the diseases that share the same protein with Parkinson disease(DOID:14330)
+ query = {"operations": {"actions": [
+ "add_qnode(ids=DOID:10718, key=n00, categories=biolink:Disease)",
+ "add_qnode(categories=biolink:Protein, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "expand(edge_key=e00, kp=infores:rtx-kg2)",
+ "overlay(action=fisher_exact_test, subject_qnode_key=n00, virtual_relation_label=FET1, object_qnode_key=n01,rel_edge_id=e00)",
+ "filter_kg(action=remove_edges_by_continuous_attribute,edge_attribute=fisher_exact_test_p-value,direction=above,threshold=0.001,remove_connected_nodes=t,qnode_keys=[n01])",
+ "add_qnode(categories=biolink:Disease, key=n02)",
+ "add_qedge(subject=n01, object=n02, key=e01)",
+ "expand(edge_key=e01, kp=infores:rtx-kg2)",
+ "resultify()",
+ "return(message=true, store=false)"
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ assert message.n_results > 0
+ edge_predicates_in_kg = Counter([x.predicate for x in message.knowledge_graph.edges.values()])
+ assert 'biolink:has_fisher_exact_test_p_value_with' in edge_predicates_in_kg
+ FET_edges = [x for x in message.knowledge_graph.edges.values() if x.predicate.find("fisher_exact_test") != -1]
+ FET_edge_labels = set([attr.value for edge in FET_edges for attr in edge.attributes if attr.original_attribute_name == 'virtual_relation_label'])
+ assert len(FET_edge_labels) == 1
+ for edge in FET_edges:
+ assert hasattr(edge, 'attributes')
+ FET_edge_attribute = [attr for attr in edge.attributes if attr.original_attribute_name == 'fisher_exact_test_p-value']
+ assert 0 <= float(FET_edge_attribute[0].value) < 0.001
+ assert FET_edge_attribute[0].attribute_type_id == 'EDAM-DATA:1669'
+
+
+def test_FET_ranking_1():
+ query = {"operations": { "actions": [
+ "create_message",
+ "add_qnode(key=n00,ids=UniProtKB:P14136,categories=biolink:Protein)",
+ "add_qnode(categories=biolink:BiologicalProcess, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "expand(edge_key=e00,kp=infores:rtx-kg2)",
+ "overlay(action=fisher_exact_test, subject_qnode_key=n00, object_qnode_key=n01, virtual_relation_label=FET)",
+ "resultify()",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ scores = [result.row_data[0] for result in message.results]
+ assert min(scores) != max(scores)
+
+@pytest.mark.slow
+def test_example_2_kg2():
+ query = {"operations": { "actions": [
+ "create_message",
+ "add_qnode(name=DOID:14330, key=n00)",
+ "add_qnode(categories=biolink:Protein, is_set=true, key=n01)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n02)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "add_qedge(subject=n01, object=n02, key=e01, predicates=biolink:molecularly_interacts_with)",
+ "expand(edge_key=[e00,e01], infores:rtx-kg2)",
+ "overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J1)", # seems to work just fine
+ "filter_kg(action=remove_edges_by_continuous_attribute, edge_attribute=jaccard_index, direction=below, threshold=.008, remove_connected_nodes=t, qnode_keys=[n02])",
+ "resultify(ignore_edge_direction=true)",
+ "filter_results(action=sort_by_edge_attribute, edge_attribute=jaccard_index, direction=descending, max_results=15)",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ assert len(message.results) == 15
+ assert message.results[0].essence is not None
+ _virtual_tester(response.envelope.message, 'biolink:has_jaccard_index_with', 'J1', 'jaccard_index', 'EDAM-DATA:1772', 2)
+
+
+@pytest.mark.slow
+def test_clinical_overlay_example1():
+ """
+ Gives an example of a KG that does not have edges that COHD can decorate, but does have pairs of nodes that COHD
+ could decorate (eg here is drug and chemical_substance), so add the info in as a virtual edge.
+ """
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=DOID:11830, key=n00)",
+ "add_qnode(categories=biolink:Protein, is_set=true, key=n01)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n02)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "add_qedge(subject=n01, object=n02, key=e01, predicates=biolink:molecularly_interacts_with)",
+ "expand(edge_key=[e00,e01], infores:rtx-kg2)",
+ # overlay a bunch of clinical info
+ "overlay(action=overlay_clinical_info, paired_concept_frequency=true, subject_qnode_key=n00, object_qnode_key=n02, virtual_relation_label=C1)",
+ "overlay(action=overlay_clinical_info, observed_expected_ratio=true, subject_qnode_key=n00, object_qnode_key=n02, virtual_relation_label=C2)",
+ "overlay(action=overlay_clinical_info, chi_square=true, subject_qnode_key=n00, object_qnode_key=n02, virtual_relation_label=C3)",
+ # filter some stuff out for the fun of it
+ "filter_kg(action=remove_edges_by_continuous_attribute, edge_attribute=paired_concept_frequency, direction=above, threshold=0.5, remove_connected_nodes=true, qnode_keys=[n02])",
+ "filter_kg(action=remove_edges_by_continuous_attribute, edge_attribute=observed_expected_ratio, direction=above, threshold=1, remove_connected_nodes=true, qnode_keys=[n02])",
+ "filter_kg(action=remove_edges_by_continuous_attribute, edge_attribute=chi_square, direction=below, threshold=0.05, remove_connected_nodes=true, qnode_keys=[n02])",
+ # return results
+ "resultify(ignore_edge_direction=true)",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ _virtual_tester(message, 'biolink:has_paired_concept_frequency_with', 'C1', 'paired_concept_frequency', 'EDAM-DATA:0951', 2)
+ _virtual_tester(message, 'biolink:has_observed_expected_ratio_with', 'C2', 'observed_expected_ratio', 'EDAM-DATA:0951', 2)
+ _virtual_tester(message, 'biolink:has_chi_square_with', 'C3', 'chi_square', 'EDAM-DATA:0951', 2)
+
+
+@pytest.mark.skip(reason="redundant if the test_clinical_overlay_example() passes and test_ARAX_overlay passes")
+def test_clinical_overlay_example2():
+ """
+ Gives an example of overlaying (and filtering) clinical attributes when there exist edges in the KG that COHD can decorate
+ """
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=DOID:11830, key=n00)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "expand(edge_key=e00, infores:rtx-kg2)",
+ # overlay a bunch of clinical info
+ "overlay(action=overlay_clinical_info, paired_concept_frequency=true)",
+ "overlay(action=overlay_clinical_info, observed_expected_ratio=true)",
+ "overlay(action=overlay_clinical_info, chi_square=true)",
+ # filter some stuff out for the fun of it
+ "filter_kg(action=remove_edges_by_attribute_default, edge_attribute=paired_concept_frequency, type=std, remove_connected_nodes=F)",
+ "filter_kg(action=remove_edges_by_attribute_default, edge_attribute=observed_expected_ratio, type=std, remove_connected_nodes=F)",
+ "filter_kg(action=remove_edges_by_attribute_default, edge_attribute=chi_square, type=std, remove_connected_nodes=F)",
+ # return results
+ "resultify(ignore_edge_direction=true)",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ _attribute_tester(message, 'paired_concept_frequency', 'EDAM-DATA:0951', 1)
+ _attribute_tester(message, 'observed_expected_ratio', 'EDAM-DATA:0951', 1)
+ _attribute_tester(message, 'chi_square', 'EDAM-DATA:0951', 1)
+
+
+@pytest.mark.skip(reason="redundant if test_one_hop_based_on_types_1() and test_ARAX_overlay() passes")
+def test_two_hop_based_on_types_1():
+ """
+ Example DSL for a two hop question that is based on types
+ """
+ #doid_list = {"DOID:11830", "DOID:5612", "DOID:2411", "DOID:8501", "DOID:174"}
+ doid_list = {"DOID:11830"}
+ for doid in doid_list:
+ query = {"operations": {"actions": [
+ "create_message",
+ f"add_qnode(name={doid}, key=n00, categories=biolink:Disease)",
+ "add_qnode(categories=biolink:Protein, is_set=true, key=n01)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n02)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "add_qedge(subject=n01, object=n02, key=e01)",
+ "expand(edge_key=e00, infores:rtx-kg2)",
+ #"expand(edge_key=e00, kp=infores:biothings-explorer)",
+ "expand(edge_key=e01, infores:rtx-kg2)",
+ "overlay(action=overlay_clinical_info, paired_concept_frequency=true, subject_qnode_key=n00, object_qnode_key=n02, virtual_relation_label=C1)",
+ "overlay(action=overlay_clinical_info, observed_expected_ratio=true, subject_qnode_key=n00, object_qnode_key=n02, virtual_relation_label=C2)",
+ "overlay(action=overlay_clinical_info, chi_square=true, subject_qnode_key=n00, object_qnode_key=n02, virtual_relation_label=C3)",
+ # "overlay(action=predict_drug_treats_disease, subject_qnode_key=n02, object_qnode_key=n00, virtual_relation_label=P1)",
+ "overlay(action=compute_ngd)",
+ "resultify(ignore_edge_direction=true)",
+ "filter_results(action=limit_number_of_results, max_results=50)",
+ "return(message=false, store=true)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ print(message.id)
+ assert response.status == 'OK'
+ _virtual_tester(message, 'biolink:has_paired_concept_frequency_with', 'C1', 'paired_concept_frequency', 'EDAM-DATA:0951', 1)
+ _virtual_tester(message, 'biolink:has_observed_expected_ratio_with', 'C2', 'observed_expected_ratio', 'EDAM-DATA:0951', 1)
+ _virtual_tester(message, 'biolink:has_chi_square_with', 'C3', 'chi_square', 'EDAM-DATA:0951', 1)
+ assert len(message.results) > 1
+
+
+@pytest.mark.external
+@pytest.mark.slow
+def test_one_hop_based_on_types_1():
+ """
+ Example DSL for a one hop question that is based on types
+ """
+ #doid_list = {"DOID:11830", "DOID:5612", "DOID:2411", "DOID:8501", "DOID:174"}
+ doid_list = {"DOID:11830"}
+ for doid in doid_list:
+ query = {"operations": {"actions": [
+ "create_message",
+ f"add_qnode(ids={doid}, key=n00, categories=biolink:Disease)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "expand(edge_key=e00, infores:rtx-kg2)",
+ "expand(edge_key=e00, kp=infores:biothings-explorer)",
+ "overlay(action=overlay_clinical_info, observed_expected_ratio=true)",
+ # "overlay(action=predict_drug_treats_disease)",
+ "filter_kg(action=remove_edges_by_continuous_attribute, edge_attribute=probability_treats, direction=below, threshold=0.75, remove_connected_nodes=true, qnode_keys=[n01])",
+ "overlay(action=compute_ngd)",
+ "resultify(ignore_edge_direction=true)",
+ "filter_results(action=limit_number_of_results, max_results=50)",
+ "return(message=true, store=false)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ assert len(message.results) > 1
+
+
+@pytest.mark.skip(reason="Work in progress (and takes a very long time)")
+def test_one_hop_kitchen_sink_BTE_1():
+ """
+ Example of throwing everything at a simple BTE query
+ """
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(curie=DOID:11830, key=n0, categories=biolink:Disease)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n1)",
+ "add_qedge(subject=n0, object=n1, key=e1)",
+ #"expand(edge_key=e00, infores:rtx-kg2)",
+ "expand(edge_key=e1, kp=infores:biothings-explorer)",
+ "overlay(action=overlay_clinical_info, paired_concept_frequency=true)",
+ "overlay(action=overlay_clinical_info, observed_expected_ratio=true)",
+ "overlay(action=overlay_clinical_info, chi_square=true)",
+ # "overlay(action=predict_drug_treats_disease)",
+ "overlay(action=compute_ngd)",
+ "resultify(ignore_edge_direction=true)",
+ "filter_results(action=limit_number_of_results, max_results=50)",
+ "return(message=true, store=true)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ print(message.id)
+ assert response.status == 'OK'
+ _attribute_tester(message, 'paired_concept_frequency', 'EDAM-DATA:0951', 1)
+ _attribute_tester(message, 'observed_expected_ratio', 'EDAM-DATA:0951', 1)
+ _attribute_tester(message, 'chi_square', 'EDAM-DATA:0951', 1)
+
+
+@pytest.mark.skip(reason="Work in progress (and takes a very long time)")
+def test_one_hop_kitchen_sink_BTE_2():
+ """
+ Example of throwing everything at a simple BTE query, but with node types that aren't appropriate for some reasoning capabilities
+ """
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(curie=DOID:11830, key=n0, categories=biolink:Disease)",
+ "add_qnode(categories=biolink:Gene, key=n1)",
+ "add_qedge(subject=n0, object=n1, key=e1)",
+ #"expand(edge_key=e00, infores:rtx-kg2)",
+ "expand(edge_key=e1, kp=infores:biothings-explorer)",
+ "overlay(action=overlay_clinical_info, paired_concept_frequency=true)",
+ "overlay(action=overlay_clinical_info, observed_expected_ratio=true)",
+ "overlay(action=overlay_clinical_info, chi_square=true)",
+ # "overlay(action=predict_drug_treats_disease)",
+ "overlay(action=compute_ngd)",
+ "resultify(ignore_edge_direction=true)",
+ "filter_results(action=limit_number_of_results, max_results=50)",
+ "return(message=true, store=true)",
+ ]}}
+ [response, message] = _do_arax_query(query)
+ print(message.id)
+ assert response.status == 'OK'
+ _attribute_tester(message, 'paired_concept_frequency', 'EDAM-DATA:0951', 1)
+ _attribute_tester(message, 'observed_expected_ratio', 'EDAM-DATA:0951', 1)
+ _attribute_tester(message, 'chi_square', 'EDAM-DATA:0951', 1)
+
+def test_FET_ranking_2():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(key=n00,ids=[UniProtKB:P14136,UniProtKB:P35579],is_set=true,categories=biolink:Protein)",
+ "add_qnode(categories=biolink:BiologicalProcess, key=n01)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "expand(edge_key=e00,kp=infores:rtx-kg2)",
+ "overlay(action=fisher_exact_test, subject_qnode_key=n00, object_qnode_key=n01, virtual_relation_label=FET)",
+ "resultify()",
+ "return(message=true, store=false)"
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ fet_ranking_value = {}
+ for result in message.results:
+ for key, edge_bindings in result.analyses[0].edge_bindings.items():
+ if key.startswith('FET'):
+ for edge in edge_bindings:
+ for attribute in message.knowledge_graph.edges[edge.id].attributes:
+ if attribute.original_attribute_name == "fisher_exact_test_p-value":
+ if str(result.score) in fet_ranking_value:
+ fet_ranking_value[str(result.score)].append(float(attribute.value))
+ else:
+ fet_ranking_value[str(result.score)] = [float(attribute.value)]
+
+ for fet_val, conf_list in fet_ranking_value.items():
+ if len(conf_list) > 1:
+ for diff in [abs(x - y) for i,x in enumerate(conf_list) for j,y in enumerate(conf_list) if i < j]:
+ assert diff == 0
+
+
+@pytest.mark.external
+def test_genetics_kp_ranking():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(name=type 2 diabetes mellitus, categories=biolink:Disease, key=n0)",
+ "add_qnode(categories=biolink:Gene, key=n1)",
+ "add_qedge(subject=n0, object=n1, key=e0, predicates=biolink:condition_associated_with_gene)",
+ "expand(edge_key=e0,kp=infores:genetics-data-provider)",
+ "resultify()",
+ "filter_results(action=limit_number_of_results, max_results=30)",
+ "return(message=true, store=false)"
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ gen_kp_ranking_value = {}
+ for result in message.results:
+ for key, edge_bindings in result.edge_bindings.items():
+ for edge in edge_bindings:
+ if edge.id.startswith('infores:genetics-data-provider'):
+ if message.knowledge_graph.edges[edge.id].attributes is not None:
+ for attribute in message.knowledge_graph.edges[edge.id].attributes:
+ if attribute.original_attribute_name == "pValue":
+ if str(result.score) in gen_kp_ranking_value:
+ gen_kp_ranking_value[str(result.score)].append(float(attribute.value))
+ else:
+ gen_kp_ranking_value[str(result.score)] = [float(attribute.value)]
+ assert len(gen_kp_ranking_value) > 0
+
+@pytest.mark.slow
+def test_ranker_float_error_ex1():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(ids=CHEMBL.COMPOUND:CHEMBL112, key=n0, categories=biolink:ChemicalEntity)",
+ "add_qnode(categories=biolink:DiseaseOrPhenotypicFeature, key=n1)",
+ "add_qedge(subject=n0, object=n1, key=e0)",
+ "expand()",
+ "overlay(action=compute_ngd, virtual_relation_label=N1, subject_qnode_key=n0, object_qnode_key=n1)",
+ "overlay(action=fisher_exact_test,subject_qnode_key=n0,virtual_relation_label=F1,object_qnode_key=n1)",
+ "overlay(action=overlay_clinical_info,COHD_method=paired_concept_frequency,virtual_relation_label=C1,subject_qnode_key=n0,object_qnode_key=n1)",
+ # "overlay(action=predict_drug_treats_disease,virtual_relation_label=P1,subject_qnode_key=n0,object_qnode_key=n1,threshold=0.8,slow_mode=false)",
+ "resultify()",
+ "filter_results(action=limit_number_of_results, max_results=30)",
+ "return(message=true, store=false)"
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+
+@pytest.mark.external
+def test_ranker_float_error_ex2():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(ids=MONDO:0005301, key=n0)",
+ "add_qnode(categories=biolink:ChemicalEntity, key=n1)",
+ "add_qedge(subject=n0, object=n1, key=e0)",
+ "expand(kp=infores:cohd)",
+ "resultify()",
+ "filter_results(action=limit_number_of_results, max_results=30)",
+ "return(message=true, store=false)"
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ result_scores = [x.score for x in message.results]
+ assert max(result_scores) == 1
+
+@pytest.mark.external
+def test_cmap_ranking():
+ query = {"operations": {"actions": [
+ "create_message",
+ "add_qnode(key=n00,categories=biolink:Gene,ids=HGNC:321)",
+ "add_qnode(categories=biolink:ChemicalEntity)",
+ "add_qedge(subject=n00,object=n01)",
+ "expand(kp=infores:molepro)",
+ "resultify()",
+ "filter_results(action=limit_number_of_results, max_results=500)",
+ "return(message=true, store=false)"
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ result_scores = {x.score for x in message.results}
+ assert len(result_scores) > 1
+
+@pytest.mark.slow
+def test_ranker_float_error_ex3():
+ query={"message": {
+ "query_graph": {
+ "edges": {
+ "e00": {
+ "object": "n01",
+ "subject": "n00"
+ },
+ "e01": {
+ "object": "n02",
+ "subject": "n01"
+ }
+ },
+ "nodes": {
+ "n00": {
+ "categories": [
+ "biolink:Disease"
+ ],
+ "ids": [
+ "DOID:14330"
+ ]
+ },
+ "n01": {
+ "categories": [
+ "biolink:NamedThing"
+ ]
+ },
+ "n02": {
+ "categories": [
+ "biolink:Disease"
+ ],
+ "ids": [
+ "DOID:8778"
+ ]
+ }
+ }
+ }
+ }
+ }
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+
+
+@pytest.mark.external
+def test_issue_1848():
+ query = {
+ "workflow": [
+ {
+ "id": "fill",
+ "parameters": { "allowlist": ["infores:cohd"],
+ "qedge_keys": [
+ "e0"
+ ]
+ }
+ },
+ {
+ "id": "overlay_compute_ngd",
+ "parameters": {
+ "virtual_relation_label": "N1",
+ "qnode_keys": [
+ "n0",
+ "n1"
+ ]
+ }
+ },
+ {
+ "id": "bind"
+ },
+ {
+ "id": "score"
+ },
+ {
+ "id": "filter_results_top_n",
+ "parameters": {
+ "max_results": 3
+ }
+ },
+ {
+ "id": "fill",
+ "parameters": { "allowlist": ["infores:rtx-kg2"],
+ "qedge_keys": [
+ "e1",
+ "e2",
+ "e3",
+ "e4"
+ ]
+ }
+ },
+ {
+ "id": "bind"
+ },
+ {
+ "id": "score"
+ }
+ ],
+ "message": {
+ "query_graph": {
+ "edges": {
+ "e0": {
+ "subject": "n0",
+ "object": "n1",
+ "predicates": [
+ "biolink:associated_with"
+ ]
+ },
+ "e1": {
+ "subject": "n1",
+ "object": "n2",
+ "predicates": [
+ "biolink:increases_activity_of"
+ ]
+ },
+ "e2": {
+ "subject": "n3",
+ "object": "n2",
+ "predicates": [
+ "biolink:increases_activity_of"
+ ]
+ },
+ "e3": {
+ "subject": "n1",
+ "object": "n2",
+ "predicates": [
+ "biolink:decreases_activity_of"
+ ],
+ "option_group_id": "decr"
+ },
+ "e4": {
+ "subject": "n3",
+ "object": "n2",
+ "predicates": [
+ "biolink:decreases_activity_of"
+ ],
+ "option_group_id": "decr"
+ }
+ },
+ "nodes": {
+ "n0": {
+ "ids": [
+ "MONDO:0009061"
+ ],
+ "name": "MONDO:0009061"
+ },
+ "n1": {
+ "categories": [
+ "biolink:ChemicalEntity"
+ ]
+ },
+ "n2": {
+ "categories": [
+ "biolink:Gene",
+ "biolink:Protein"
+ ]
+ },
+ "n3": {
+ "categories": [
+ "biolink:ChemicalEntity"
+ ]
+ }
+ }
+ }
+ }
+ }
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+
+# Not working yet
+# def test_example_3_kg2():
+# query = {"operations": { "actions": [
+# "create_message",
+# #"add_qnode(key=n00, curie=DOID:0050156)", # idiopathic pulmonary fibrosis
+# "add_qnode(curie=DOID:9406, key=n00)", # hypopituitarism, original demo example
+# "add_qnode(key=n01, categories=chemical_substance, is_set=true)",
+# "add_qnode(key=n02, categories=protein)",
+# "add_qedge(key=e00, subject=n00, object=n01)",
+# "add_qedge(key=e01, subject=n01, object=n02)",
+# "expand(edge_key=[e00,e01], infores:rtx-kg2)",
+# "overlay(action=overlay_clinical_info, observed_expected_ratio=true, virtual_relation_label=C1, subject_qnode_key=n00, object_qnode_key=n01)",
+# "overlay(action=compute_ngd, virtual_relation_label=N1, subject_qnode_key=n01, object_qnode_key=n02)",
+# "filter_kg(action=remove_edges_by_continuous_attribute, edge_attribute=observed_expected_ratio, direction=below, threshold=2, remove_connected_nodes=t, qnode_keys=[n01])",
+# "filter_kg(action=remove_orphaned_nodes, node_category=protein)",
+# "return(message=true, store=false)"
+# ]}}
+# [response, message] = _do_arax_query(query)
+# assert response.status == 'OK'
+# #assert len(message.results) == ?
+# assert message.results[0].essence is not None
+# _virtual_tester(message, 'biolink:has_observed_expected_ratio_with', 'C1', 'observed_expected_ratio', 'EDAM-DATA:0951', 2)
+# _virtual_tester(message, 'biolink:occurs_together_in_literature_with', 'N1', 'normalized_google_distance', 'EDAM-DATA:2526', 2)
+
+@pytest.mark.slow
+def test_example_3_issue_679():
+ query = {"operations": { "actions": [
+ "create_message",
+ "add_qnode(name=DOID:9406, key=n00)",
+ "add_qnode(categories=[biolink:ChemicalEntity], is_set=true, key=n01)",
+ "add_qnode(categories=[biolink:Protein], key=n02)",
+ "add_qedge(subject=n00, object=n01, key=e00)",
+ "add_qedge(subject=n01, object=n02, key=e01)",
+ "expand()",
+ "overlay(action=overlay_clinical_info, COHD_method=observed_expected_ratio, virtual_relation_label=C1, subject_qnode_key=n00, object_qnode_key=n01)",
+ "filter_kg(action=remove_edges_by_continuous_attribute,edge_attribute=observed_expected_ratio,direction=below,threshold=3,remove_connected_nodes=true,qnode_keys=n01)",
+ "filter_kg(action=remove_orphaned_nodes,node_category=biolink:Protein)",
+ "overlay(action=compute_ngd, virtual_relation_label=N1, subject_qnode_key=n01, object_qnode_key=n02)",
+ "filter_kg(action=remove_edges_by_continuous_attribute,edge_attribute=ngd,direction=above,threshold=0.85,remove_connected_nodes=true,qnode_keys=n02)",
+ "resultify(ignore_edge_direction=true, debug=true)",
+ "return(message=true, store=false)"
+ ]}}
+ [response, message] = _do_arax_query(query)
+ assert response.status == 'OK'
+ assert message.results[0].essence is not None
+
+
+if __name__ == "__main__":
+ pytest.main(['-v'])
diff --git a/notes/arax-maintenance-sop.md b/notes/arax-maintenance-sop.md
index 0f531d322..e689d2c0b 100644
--- a/notes/arax-maintenance-sop.md
+++ b/notes/arax-maintenance-sop.md
@@ -250,10 +250,10 @@ ARAX Flask server locally.
#### Running the ARAX unit tests
Running the unit tests involves these steps:
```
-cd ARAX_DEV_DIR/issue-XXX/RTX/code/ARAX
-../../../venv/bin/pytest --cache-clear --nodatabases -v test/
+cd ARAX_DEV_DIR/issue-XXX/RTX
+../../../venv/bin/pytest --cache-clear -v
```
-The procedure will take about 15 minutes to complete. All 153 standard unit tests should pass, or your
+The procedure will take about 15 minutes to complete. All standard unit tests should pass, or your
locally installed ARAX is not in a "known good" state (and you should work on troubleshooting
the broken unit test before proceeding).
@@ -448,8 +448,8 @@ the local code repository is on `master`. Staying in the same `RTX` directory:
7. `git fetch origin`
8. `git checkout issue-XXX`
9. `git pull origin issue-XXX`
-10. Run all the pytests, using your updated code: `cd code/ARAX && pytest -v --cache-clear`
-All 153 standard ARAX unit tests should pass, when run in the `arax.ncats.io/beta` devarea.
+10. Run all the pytests, using your updated code: `cd RTX && pytest -v --cache-clear`
+All standard ARAX unit tests should pass, when run in the `arax.ncats.io/beta` devarea.
11. Next is to run the example queries, using your updated code. You will need to restart ARAX.
Exit out of the shell session for user `rt`,
by typing `exit`. You should see the root account prompt `#`:
@@ -516,9 +516,9 @@ tail -f /tmp/RTX_OpenAPI_beta.elog
If you had to fix merge conflicts or if your issue
branch was behind `master` when you merged, rerun all the pytests.
In any case, you will need to test the ARAX User Interface again with the
-Example 1 query. If everything
-is working, then proceed.
-2. Change your local `ARAX_DEV_DIR/issue-XXX/RTX` to `master`:
+Example 1 query. If everything is working, then proceed.
+2. Change your local `ARAX_DEV_DIR/issue-XXX/RTX` code branch to the
+`master` branch:
```
cd ARAX_DEV_DIR/issue-XXX/RTX
git fetch origin
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 000000000..2a852e04c
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,3 @@
+[pytest]
+testpaths = code/ARAX/test
+python_files = test_*.py