-
Notifications
You must be signed in to change notification settings - Fork 3
Arax pathfinder #64
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Arax pathfinder #64
Changes from 3 commits
cba10f7
5539e3d
cf94d6d
4999dff
767e51b
540f220
cd4cf5a
ce1a5e6
7cd663a
8ec57a9
82852c4
6b537e5
9919306
ab7049e
4771f90
107614b
268d226
c6b79c3
f98c6e9
298aee5
17d4187
70758cb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,34 @@ | ||
| # Use RENCI python base image | ||
| FROM ghcr.io/translatorsri/renci-python-image:3.11.5 | ||
|
|
||
| # Add image info | ||
| LABEL org.opencontainers.image.source https://github.com/BioPack-team/shepherd | ||
|
|
||
| ENV PYTHONHASHSEED=0 | ||
|
|
||
| # set up requirements | ||
| WORKDIR /app | ||
|
|
||
| # make sure all is writeable for the nru USER later on | ||
| RUN chmod -R 777 . | ||
|
|
||
| # Install requirements | ||
| COPY ./shepherd_utils ./shepherd_utils | ||
| COPY ./pyproject.toml . | ||
| RUN pip install . | ||
|
|
||
| COPY ./workers/arax_pathfinder/requirements.txt . | ||
| RUN pip install -r requirements.txt | ||
|
|
||
| # switch to the non-root user (nru). defined in the base image | ||
| USER nru | ||
|
|
||
| # Copy in files | ||
| COPY ./workers/arax_pathfinder ./ | ||
|
|
||
| # Set up base for command and any variables | ||
| # that shouldn't be modified | ||
| # ENTRYPOINT ["uvicorn", "shepherd_server.server:APP"] | ||
|
|
||
| # Variables that can be overriden | ||
| CMD ["python", "worker.py"] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| catrax-pathfinder==1.0.2 | ||
| biolink-helper-pkg==1.0.0 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,175 @@ | ||
| """Arax ARA Pathfinder module.""" | ||
|
|
||
| import requests | ||
| import asyncio | ||
| import json | ||
| import logging | ||
| import time | ||
| import uuid | ||
| from pathlib import Path | ||
| from pathfinder.Pathfinder import Pathfinder | ||
| from biolink_helper_pkg import BiolinkHelper | ||
|
|
||
| from shepherd_utils.config import settings | ||
| from shepherd_utils.db import ( | ||
| get_message, | ||
| save_message, | ||
| ) | ||
| from shepherd_utils.otel import setup_tracer | ||
| from shepherd_utils.shared import ( | ||
| get_tasks, | ||
| wrap_up_task, | ||
| ) | ||
|
|
||
| # Queue name | ||
| STREAM = "arax.pathfinder" | ||
| # Consumer group, most likely you don't need to change this. | ||
| GROUP = "consumer" | ||
| CONSUMER = str(uuid.uuid4())[:8] | ||
| TASK_LIMIT = 100 | ||
| tracer = setup_tracer(STREAM) | ||
|
|
||
| NUM_TOTAL_HOPS = 4 | ||
| MAX_PATHFINDER_PATHS = 500 | ||
|
|
||
|
|
||
|
|
||
| OUT_PATH = Path("general_concepts.json") | ||
|
|
||
| def download_file(url: str, out_path: Path, overwrite: bool = False) -> Path: | ||
| out_path = Path(out_path) | ||
|
|
||
| if out_path.exists() and not overwrite: | ||
| return out_path | ||
|
|
||
| out_path.parent.mkdir(parents=True, exist_ok=True) | ||
|
|
||
| r = requests.get(url, timeout=60) | ||
| r.raise_for_status() | ||
|
|
||
| out_path.write_bytes(r.content) | ||
| return out_path | ||
|
|
||
|
|
||
| def get_blocked_list(): | ||
| download_file(settings.arax_blocked_list_url, OUT_PATH, False) | ||
|
|
||
| with open(OUT_PATH, 'r') as file: | ||
| json_block_list = json.load(file) | ||
| synonyms = set(s.lower() for s in json_block_list['synonyms']) | ||
| return set(json_block_list['curies']), synonyms | ||
|
|
||
|
|
||
| async def pathfinder(task, logger: logging.Logger): | ||
| start = time.time() | ||
| query_id = task[1]["query_id"] | ||
| workflow = json.loads(task[1]["workflow"]) | ||
maximusunc marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| response_id = task[1]["response_id"] | ||
| message = await get_message(query_id, logger) | ||
| parameters = message.get("parameters") or {} | ||
| parameters["timeout"] = parameters.get("timeout", settings.lookup_timeout) | ||
| parameters["tiers"] = parameters.get("tiers") or [0] | ||
| message["parameters"] = parameters | ||
|
|
||
| qgraph = message["message"]["query_graph"] | ||
| pinned_node_keys = [] | ||
| pinned_node_ids = [] | ||
| for node_key, node in qgraph["nodes"].items(): | ||
| pinned_node_keys.append(node_key) | ||
| if node.get("ids", None) is not None: | ||
| pinned_node_ids.append(node["ids"][0]) | ||
| if len(set(pinned_node_ids)) != 2: | ||
| logger.error("Pathfinder queries require two pinned nodes.") | ||
| return message, 500 | ||
|
|
||
| intermediate_categories = [] | ||
| path_key = next(iter(qgraph["paths"].keys())) | ||
| qpath = qgraph["paths"][path_key] | ||
| if qpath.get("constraints", None) is not None: | ||
| constraints = qpath["constraints"] | ||
| if len(constraints) > 1: | ||
| logger.error("Pathfinder queries do not support multiple constraints.") | ||
| return message, 500 | ||
| if len(constraints) > 0: | ||
| intermediate_categories = ( | ||
| constraints[0].get("intermediate_categories", None) or [] | ||
| ) | ||
| if len(intermediate_categories) > 1: | ||
| logger.error( | ||
| "Pathfinder queries do not support multiple intermediate categories" | ||
| ) | ||
| return message, 500 | ||
| else: | ||
| intermediate_categories = ["biolink:NamedThing"] | ||
|
|
||
| blocked_curies, blocked_synonyms = get_blocked_list() | ||
dkoslicki marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| pathfinder = Pathfinder( | ||
| "MLRepo", | ||
| settings.plover_url, | ||
| settings.curie_ngd_addr, | ||
| settings.node_degree_addr, | ||
| blocked_curies, | ||
| blocked_synonyms, | ||
| logger | ||
| ) | ||
|
|
||
| biolink_cache_dir = "/tmp/biolink" | ||
| Path(biolink_cache_dir).mkdir(parents=True, exist_ok=True) | ||
| biolink_helper = BiolinkHelper(settings.arax_biolink_version, biolink_cache_dir) | ||
| descendants = set(biolink_helper.get_descendants(intermediate_categories[0])) | ||
|
|
||
| try: | ||
| result, aux_graphs, knowledge_graph = pathfinder.get_paths( | ||
|
||
| pinned_node_ids[0], | ||
| pinned_node_ids[1], | ||
| pinned_node_keys[0], | ||
| pinned_node_keys[1], | ||
| NUM_TOTAL_HOPS, | ||
| NUM_TOTAL_HOPS, | ||
| MAX_PATHFINDER_PATHS, | ||
| descendants, | ||
| ) | ||
| res = [] | ||
| if result is not None: | ||
| res.append({ | ||
| "id": result["id"], | ||
| "analyses": result['analyses'], | ||
| "node_bindings": result['node_bindings'], | ||
| "essence": "result" | ||
| }) | ||
| if aux_graphs is None: | ||
| aux_graphs = {} | ||
| if knowledge_graph is None: | ||
| knowledge_graph = {} | ||
| message["message"]["knowledge_graph"] = knowledge_graph | ||
| message["message"]["auxiliary_graphs"] = aux_graphs | ||
| message["message"]["results"] = res | ||
| await save_message(response_id, message, logger) | ||
| except Exception as e: | ||
| logger.error(f"PathFinder failed to find paths between {pinned_node_keys[0]} and {pinned_node_keys[1]}. " | ||
| f"Error message is: {e}") | ||
| message = {"status": "error", "error": str(e)} | ||
| await save_message(response_id, message, logger) | ||
|
|
||
| await wrap_up_task(STREAM, GROUP, task, workflow, logger) | ||
maximusunc marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| logger.info(f"Task took {time.time() - start}") | ||
|
|
||
|
|
||
| async def process_task(task, parent_ctx, logger, limiter): | ||
| span = tracer.start_span(STREAM, context=parent_ctx) | ||
| try: | ||
| await pathfinder(task, logger) | ||
| finally: | ||
| span.end() | ||
| limiter.release() | ||
|
|
||
|
|
||
| async def poll_for_tasks(): | ||
| async for task, parent_ctx, logger, limiter in get_tasks( | ||
| STREAM, GROUP, CONSUMER, TASK_LIMIT | ||
| ): | ||
| asyncio.create_task(process_task(task, parent_ctx, logger, limiter)) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| asyncio.run(poll_for_tasks()) | ||
Uh oh!
There was an error while loading. Please reload this page.