Skip to content
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
27b5e1a
#2712 fixes to get test_ARAX_expand.py unit tests working again
saramsey Apr 3, 2026
68b4e5f
#2712 fixing broken unit tests in test_ARAX_resultify.py
saramsey Apr 3, 2026
0cd0b55
#2712 fixing broken unit tests in test_ARAX_overlay.py
saramsey Apr 3, 2026
733ed4e
#2712 fixing broken unit tests in test_ARAX_workflows.py
saramsey Apr 3, 2026
dde36c3
#2712 fixing broken unit tests in test_ARAX_translate.py
saramsey Apr 3, 2026
c197c75
#2712 fixing broken tests in test_ARAX_filter_kg.py, plus cleanup of …
saramsey Apr 3, 2026
629992d
#2712 fixing broken unit tests in test_ARAX_filter_results.py
saramsey Apr 3, 2026
8da015b
#2712 fixing broken unit test in test_ARAX_json_queries.py
saramsey Apr 3, 2026
bdf3860
#2712 updating ARAX maintenance SOP doc
saramsey Apr 3, 2026
73c19cb
#2712 adding old archived ARAX tests to folder code/code-archive/old-…
saramsey Apr 3, 2026
dfc0392
#2712 adding pytest.ini to avoid collecting non-pytest modules contai…
saramsey Apr 3, 2026
3cc34df
#2712 fixing broken unit test test_ARAX_workflows.py::test_FET_example_3
saramsey Apr 3, 2026
7282f04
#2712 still trying to fix test_ARAX_workflows.py::test_FET_example_3
saramsey Apr 3, 2026
fa8cee1
#2712 speeding up test_ARAX_workflows.py::test_FET_example_3
saramsey Apr 3, 2026
02df4b6
#2712 fixing issues raised by Copilot
saramsey Apr 4, 2026
c4022a6
#2712 abolishing references to rtx-kg2 in test_ARAX_resultify.py
saramsey Apr 4, 2026
392d301
#2712 fixing issues raised by copilot
saramsey Apr 4, 2026
842e689
#2712 fixing issues raised by Copilot in the PR review
saramsey Apr 4, 2026
50b7bc4
#2712
saramsey Apr 4, 2026
647a924
#2712 reverting previous commit before we merge to master
saramsey Apr 4, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 21 additions & 99 deletions code/ARAX/ARAXQuery/ARAX_expander.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ def trim_to_size(input_list, length):
else:
return input_list

KPS_THAT_RETURN_PREFERRED_NODE_CURIES = {'infores:retriever'}
KP_THAT_CAN_HANDLE_SINGLE_NODE_QUERIES = {'infores:rtx-kg2'}

class ARAXExpander:

def __init__(self):
Expand Down Expand Up @@ -1101,22 +1104,9 @@ async def expand_edge_async(

# Do some post-processing (deduplicate nodes, remove self-edges..)
# KG2c and retriever are already deduplicated and uses canonical predicates
if kp_to_use != 'infores:rtx-kg2' and kp_to_use != 'infores:retriever':
qg_org_kg = eu.check_for_canonical_predicates(qg_org_kg, kp_to_use, log)
qg_org_kg,\
dropped_edge_counts = self._deduplicate_nodes(qg_org_kg,
kp_to_use,
log)
for qedge_key, count in dropped_edge_counts.items():
if count > 0:
# update query plan here
done_str = log.query_plan['qedge_keys'][qedge_key][kp_to_use]['description']
log.update_query_plan(qedge_key,
kp_to_use,
"Warning",
done_str + "; "
f"{count} edges dropped due "
"to node reference failure")
if kp_to_use not in KPS_THAT_RETURN_PREFERRED_NODE_CURIES:
log.warning(f"{kp_to_use}: this KP may not return preferred CURIEs; please check, and if it does return only preferred CURIEs, add to the Expand whitelist")

if any(edges for edges in qg_org_kg.edges_by_qg_id.values()): # Make sure the KP actually returned something
qg_org_kg = self._remove_self_edges(qg_org_kg, kp_to_use, log)

Expand All @@ -1136,6 +1126,8 @@ def _expand_node(qnode_key: str,
# This function expands a single node using the specified knowledge provider (for now only KG2 is supported)
log.debug(f"Expanding node {qnode_key} using {kps_to_use}")
qnode = query_graph.nodes[qnode_key]
if qnode.ids:
qnode.ids = eu.get_canonical_curies_list(qnode.ids, log)
single_node_qg = QueryGraph(nodes={qnode_key: qnode}, edges={})
answer_kg = QGOrganizedKnowledgeGraph()
if log.status != 'OK':
Expand All @@ -1145,18 +1137,21 @@ def _expand_node(qnode_key: str,
return answer_kg

# Answer the query using the proper KP (only our own KP answers single-node queries for now)
if kps_to_use == ["infores:rtx-kg2"]:
kp_querier = TRAPIQuerier(response_object=log,
kp_name=kps_to_use[0],
user_specified_kp=user_specified_kp,
kp_timeout=kp_timeout)
answer_kg = kp_querier.answer_single_node_query(single_node_qg)
log.info(f"Query for node {qnode_key} returned results ({eu.get_printable_counts_by_qg_id(answer_kg)})")
return answer_kg
else:
log.error("Only infores:rtx-kg2 can answer single-node queries currently", error_code="InvalidKP")
kps_to_use_that_cannot_handle_single_node_queries = set(kps_to_use) - KP_THAT_CAN_HANDLE_SINGLE_NODE_QUERIES
if kps_to_use_that_cannot_handle_single_node_queries:
log.error("these KPs cannot answer single-node queries: "
f"{kps_to_use_that_cannot_handle_single_node_queries}",
error_code="InvalidKP")
return answer_kg

kp_querier = TRAPIQuerier(response_object=log,
kp_name=KP_THAT_CAN_HANDLE_SINGLE_NODE_QUERIES,
user_specified_kp=user_specified_kp,
kp_timeout=kp_timeout)
answer_kg = kp_querier.answer_single_node_query(single_node_qg)
log.info(f"Query for node {qnode_key} returned results ({eu.get_printable_counts_by_qg_id(answer_kg)})")
return answer_kg

def _get_query_graph_for_edge(self, qedge_key: str, full_qg: QueryGraph, overarching_kg: QGOrganizedKnowledgeGraph, log: ARAXResponse) -> QueryGraph:
# This function creates a query graph for the specified qedge, updating its qnodes' curies as needed
edge_qg = QueryGraph(nodes={}, edges={})
Expand Down Expand Up @@ -1205,79 +1200,6 @@ def _get_query_graph_for_edge(self, qedge_key: str, full_qg: QueryGraph, overarc
f"{qedge.predicates if qedge.predicates else ''}-({output_qnode_key}:{output_qnode.categories}{output_curie_summary})")
return edge_qg

@staticmethod
def _deduplicate_nodes(
answer_kg: QGOrganizedKnowledgeGraph,
kp_name: str,
log: ARAXResponse
) -> tuple[QGOrganizedKnowledgeGraph, dict[str, int]]:
log.debug(f"{kp_name}: Deduplicating nodes")
deduplicated_kg = QGOrganizedKnowledgeGraph(nodes={qnode_key: {} for qnode_key in answer_kg.nodes_by_qg_id},
edges={qedge_key: {} for qedge_key in answer_kg.edges_by_qg_id})
deduplicated_kg.unbound_edges = answer_kg.unbound_edges
curie_mappings = {}

# First deduplicate the bound nodes
for qnode_key, nodes in {**answer_kg.nodes_by_qg_id, UNBOUND_NODES_KEY: answer_kg.unbound_nodes}.items():
# Load preferred curie info from NodeSynonymizer
log.debug(f"{kp_name}: Getting preferred curies for {qnode_key} nodes returned in this step")
canonicalized_nodes = eu.get_canonical_curies_dict(list(nodes), log) if nodes else {}
if log.status != 'OK':
return deduplicated_kg

for node_key in nodes:
# Figure out the preferred curie/name for this node
node = nodes.get(node_key)
canonicalized_node = canonicalized_nodes.get(node_key)
if canonicalized_node:
preferred_curie = canonicalized_node.get('preferred_curie', node_key)
preferred_name = canonicalized_node.get('preferred_name', node.name)
preferred_type = canonicalized_node.get('preferred_type')
preferred_categories = eu.convert_to_list(preferred_type) if preferred_type else node.categories
curie_mappings[node_key] = preferred_curie
else:
# Means the NodeSynonymizer didn't recognize this curie
preferred_curie = node_key
preferred_name = node.name
preferred_categories = node.categories
curie_mappings[node_key] = preferred_curie

# Add this node into our deduplicated KG as necessary
if qnode_key != UNBOUND_NODES_KEY:
if preferred_curie not in deduplicated_kg.nodes_by_qg_id[qnode_key]:
node_key = preferred_curie
node.name = preferred_name
node.categories = preferred_categories
deduplicated_kg.add_node(node_key, node, qnode_key)
else: # this is an unbound node
if preferred_curie not in deduplicated_kg.unbound_nodes:
node.name = preferred_name
node.categories = preferred_categories
deduplicated_kg.unbound_nodes[preferred_curie] = node

# Then update the edges to reflect changes made to the nodes
dropped_edge_count = {}
for qedge_key, edges in answer_kg.edges_by_qg_id.items():
dropped_edge_count[qedge_key] = 0
for edge_key, edge in edges.items():
drop_edge = False
if edge.subject not in curie_mappings:
log.warning(f"{kp_name}: edge subject not in curie mappings; qedge key: {qedge_key}; subject ID: {edge.subject}")
drop_edge = True
dropped_edge_count[qedge_key] += 1
else:
edge.subject = curie_mappings.get(edge.subject)
if edge.object not in curie_mappings:
log.warning(f"{kp_name}: edge object not in curie mappings; qedge key: {qedge_key}; object ID: {edge.object}")
drop_edge = True
dropped_edge_count[qedge_key] += 1
else:
edge.object = curie_mappings.get(edge.object)
if not drop_edge:
deduplicated_kg.add_edge(edge_key, edge, qedge_key)
log.debug(f"{kp_name}: After deduplication, answer KG counts are: {eu.get_printable_counts_by_qg_id(deduplicated_kg)}")
return deduplicated_kg, dropped_edge_count

@staticmethod
def _extract_query_subgraph(qedge_keys_to_expand: list[str], query_graph: QueryGraph, log: ARAXResponse) -> QueryGraph:
# This function extracts a sub-query graph containing the provided qedge IDs from a larger query graph
Expand Down
28 changes: 24 additions & 4 deletions code/ARAX/ARAXQuery/ARAX_filter_kg.py
Original file line number Diff line number Diff line change
Expand Up @@ -575,13 +575,24 @@ def __remove_edges_by_predicate(self, describe=False):
"""
message = self.message
parameters = self.parameters
kg = message.knowledge_graph
# make a list of the allowable parameters (keys), and their possible values (values). Note that the action and corresponding name will always be in the allowable parameters
if message and parameters and hasattr(message, 'query_graph') and hasattr(message.query_graph, 'edges'):
allowable_parameters = {'action': {'remove_edges_by_predicate'},
'edge_predicate': set([x.predicate for x in self.message.knowledge_graph.edges.values()]),
'remove_connected_nodes': {'true', 'false', 'True', 'False', 't', 'f', 'T', 'F'},
'qnode_keys': set([t for x in self.message.knowledge_graph.nodes.values() if x.qnode_keys is not None for t in x.qnode_keys]),
'qedge_keys': set([t for x in self.message.knowledge_graph.edges.values() if x.qedge_keys is not None for t in x.qedge_keys])
'qnode_keys': {
qnode_key
for node in kg.nodes.values()
for qnode_key in (getattr(node, "qnode_keys", None) or [])
},
'qedge_keys': {
qedge_key
for edge in kg.edges.values()
for qedge_key in (getattr(edge, "qedge_keys", None) or [])
}
# 'qnode_keys': set([t for x in self.message.knowledge_graph.nodes.values() if x.qnode_keys is not None for t in x.qnode_keys]),
# 'qedge_keys': set([t for x in self.message.knowledge_graph.edges.values() if x.qedge_keys is not None for t in x.qedge_keys])
}
else:
allowable_parameters = {'action': {'remove_edges_by_predicate'},
Expand Down Expand Up @@ -849,6 +860,7 @@ def __remove_edges_by_std_dev(self, describe=False):
:return:
"""
message = self.message
kg = message.knowledge_graph
parameters = self.parameters
# make a list of the allowable parameters (keys), and their possible values (values). Note that the action and corresponding name will always be in the allowable parameters
if message and parameters and hasattr(message, 'knowledge_graph') and hasattr(message.knowledge_graph, 'edges'):
Expand All @@ -868,8 +880,16 @@ def __remove_edges_by_std_dev(self, describe=False):
'threshold': {float()},
'top': {'true', 'false', 'True', 'False', 't', 'f', 'T', 'F'},
'remove_connected_nodes': {'true', 'false', 'True', 'False', 't', 'f', 'T', 'F'},
'qnode_keys':set([t for x in self.message.knowledge_graph.nodes.values() if x.qnode_keys is not None for t in x.qnode_keys]),
'qedge_keys': set([t for x in self.message.knowledge_graph.edges.values() if x.qedge_keys is not None for t in x.qedge_keys])
'qnode_keys': {
qnode_key
for node in kg.nodes.values()
for qnode_key in (getattr(node, "qnode_keys", None) or [])
},
'qedge_keys': {
qedge_key
for edge in kg.edges.values()
for qedge_key in (getattr(edge, "qedge_keys", None) or [])
}
}
else:
allowable_parameters = {'action': {'remove_edges_by_std_dev'},
Expand Down
2 changes: 1 addition & 1 deletion code/ARAX/ARAXQuery/Expand/kp_info_cacher.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def load_kp_info_caches(self, log: ARAXResponse):
log.error(f"Unable to load KP info caches: {e}")

# The caches MUST be up to date at this point, so we just load them
log.debug("Loading cached Smart API amd meta map info")
log.debug("Loading cached Smart API and meta map info")
with open(self.smart_api_and_meta_map_cache, "rb") as cache:
cache = pickle.load(cache)
smart_api_info = cache['smart_api_cache']
Expand Down
Loading
Loading