diff --git a/workers/aragorn_pathfinder/worker.py b/workers/aragorn_pathfinder/worker.py index 3828cfc..a766b72 100644 --- a/workers/aragorn_pathfinder/worker.py +++ b/workers/aragorn_pathfinder/worker.py @@ -76,6 +76,17 @@ async def shadowfax(task, logger: logging.Logger): intermediate_categories = ["biolink:NamedThing"] # Create 3-hop query + gandalf_parameters = { + "min_information_content": message.get("parameters", {}) + .get("gandalf_parameters", {}) + .get("min_information_content", 69), + "max_node_degree": message.get("parameters", {}) + .get("gandalf_parameters", {}) + .get("max_node_degree", 5000), + "dehydrated": message.get("parameters", {}) + .get("gandalf_parameters", {}) + .get("dehydrated", True), + } threehop = { "message": { "query_graph": { @@ -180,6 +191,7 @@ async def shadowfax(task, logger: logging.Logger): }, }, }, + "parameters": {"gandalf_parameters": gandalf_parameters}, } callback_id = str(uuid.uuid4())[:8] diff --git a/workers/gandalf/Dockerfile b/workers/gandalf/Dockerfile index f910fe3..3ab12c4 100644 --- a/workers/gandalf/Dockerfile +++ b/workers/gandalf/Dockerfile @@ -21,6 +21,7 @@ COPY ./workers/gandalf/requirements.txt . RUN pip install -r requirements.txt # switch to the non-root user (nru). defined in the base image +ENV PYSTOW_HOME=/tmp/pystow USER nru # Copy in files diff --git a/workers/gandalf/requirements.txt b/workers/gandalf/requirements.txt index 64cd655..fd41974 100644 --- a/workers/gandalf/requirements.txt +++ b/workers/gandalf/requirements.txt @@ -1,2 +1,2 @@ -gandalf-csr>=0.1.11 +gandalf-csr>=0.3.3 diff --git a/workers/gandalf/worker.py b/workers/gandalf/worker.py index a08c4c9..8bdad6b 100644 --- a/workers/gandalf/worker.py +++ b/workers/gandalf/worker.py @@ -74,7 +74,29 @@ def load_graph(path: str, fmt: str = "auto") -> CSRGraph: def gandalf_lookup(graph, bmt, in_message, task_logger: logging.Logger): """Run a Gandalf lookup for a single task.""" task_logger.info("Starting Gandalf lookup") - return lookup(graph, in_message, bmt=bmt) + max_node_degree = ( + in_message.get("parameters", {}) + .get("gandalf_parameters", {}) + .get("max_node_degree", None) + ) + min_information_content = ( + in_message.get("parameters", {}) + .get("gandalf_parameters", {}) + .get("min_information_content", None) + ) + dehydrated = ( + in_message.get("parameters", {}) + .get("gandalf_parameters", {}) + .get("dehydrated", False) + ) + return lookup( + graph, + in_message, + bmt=bmt, + max_node_degree=max_node_degree, + min_information_content=min_information_content, + dehydrated=dehydrated, + ) async def poll_for_tasks(graph: CSRGraph, bmt: Toolkit): diff --git a/workers/gandalf_rehydrate/Dockerfile b/workers/gandalf_rehydrate/Dockerfile index 1479d60..498c0a7 100644 --- a/workers/gandalf_rehydrate/Dockerfile +++ b/workers/gandalf_rehydrate/Dockerfile @@ -21,6 +21,7 @@ COPY ./workers/gandalf_rehydrate/requirements.txt . RUN pip install -r requirements.txt # switch to the non-root user (nru). defined in the base image +ENV PYSTOW_HOME=/tmp/pystow USER nru # Copy in files diff --git a/workers/score_paths/Dockerfile b/workers/score_paths/Dockerfile index 300e43c..6b2019d 100644 --- a/workers/score_paths/Dockerfile +++ b/workers/score_paths/Dockerfile @@ -27,6 +27,7 @@ COPY ./workers/score_paths . RUN chmod -R 777 . # switch to the non-root user (nru). defined in the base image +ENV PYSTOW_HOME=/tmp/pystow USER nru # Variables that can be overriden diff --git a/workers/score_paths/worker.py b/workers/score_paths/worker.py index 71b265b..63085c9 100644 --- a/workers/score_paths/worker.py +++ b/workers/score_paths/worker.py @@ -24,31 +24,6 @@ tracer = setup_tracer(STREAM) -def get_most_specific_category(categories, logger): - valid = [] - for cat in categories: - element = bmt.get_element(cat) - if not element: - logger.error(f"Category {cat} doesn't exist.") - continue - valid.append(cat) - - if not valid: - return None - - most_specific = [] - for cat in valid: - dominated = any( - cat in bmt.get_ancestors(other, reflexive=False) - for other in valid - if other != cat - ) - if not dominated: - most_specific.append(cat) - - return bmt.get_element(most_specific[0]) - - def convert_path_to_sentence(source, target, path, knowledge_graph, logger): path_node_list = [source] @@ -99,13 +74,10 @@ def convert_path_to_sentence(source, target, path, knowledge_graph, logger): path_predicate_list[hop_num].add(inv) current_node = next_node - source_cat = get_most_specific_category( - knowledge_graph["nodes"][source]["categories"], logger - ) + source_cat = knowledge_graph["nodes"][source]["categories"][0] if not source_cat: raise ValueError(f"Could not determine category for source node {source}.") - - path_sentence = f"{knowledge_graph['nodes'][source]['name']} (a {source_cat.name}) " + path_sentence = f"{knowledge_graph['nodes'][source]['name']} (a {source_cat.removeprefix('biolink:')}) " first_hop = True for path_node, hop_predicates in zip(path_node_list[1:], path_predicate_list): hop_preds = list(hop_predicates) @@ -122,14 +94,10 @@ def convert_path_to_sentence(source, target, path, knowledge_graph, logger): for hop_pred in hop_preds[:-1]: path_sentence += f"{hop_pred} or " path_sentence += f"{hop_preds[-1]}]" - node_cat = get_most_specific_category( - knowledge_graph["nodes"][path_node]["categories"], logger - ) + node_cat = knowledge_graph["nodes"][path_node]["categories"][0] if not node_cat: raise ValueError(f"Could not determine category for node {path_node}.") - path_sentence += ( - f" {knowledge_graph['nodes'][path_node]['name']} (a {node_cat.name})" - ) + path_sentence += f" {knowledge_graph['nodes'][path_node]['name']} (a {node_cat.removeprefix('biolink:')})" return path_sentence