From 58fa1d87b3858cc067dc1701b9869e68880b72c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20Carre=C3=B1o?= Date: Tue, 10 Mar 2026 11:13:38 +0100 Subject: [PATCH 1/7] feat: add port for the download_manager package as a replacement to Pooch --- ontograph/downloader.py | 134 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 131 insertions(+), 3 deletions(-) diff --git a/ontograph/downloader.py b/ontograph/downloader.py index 9797974..886341d 100644 --- a/ontograph/downloader.py +++ b/ontograph/downloader.py @@ -213,8 +213,136 @@ def _get_resource_url( class DownloadManagerAdapter(DownloaderPort): """Alternative downloader implementation. - Placeholder class for a implement the adapter using the - `downloader-manager` by Saezlab. + Adapter for the `download_manager` package by Saezlab. """ - pass + def __init__( + self, + cache_dir: Path, + *, + backend: str = 'requests', + **kwargs: object, + ) -> None: + """Initialize the download-manager adapter. + + Args: + cache_dir: Directory to store downloaded files. + backend: Backend for download_manager ('requests' or 'curl'). + **kwargs: Extra keyword args forwarded to DownloadManager. + """ + try: + import download_manager as dm + except ModuleNotFoundError as exc: + raise ModuleNotFoundError( + 'download_manager is not installed. ' + 'Install it to use DownloadManagerAdapter.' + ) from exc + + self._cache_dir = cache_dir + self._cache_dir.mkdir(parents=True, exist_ok=True) + self._manager = dm.DownloadManager( + path=str(self._cache_dir), + backend=backend, + **kwargs, + ) + self._resources_paths: dict[str, Path] = {} + + def get_paths(self) -> dict[str, Path]: + """Get paths of all downloaded resources. + + Returns: + dict[str, Path]: dictionary mapping resource IDs to file paths + """ + return self._resources_paths + + def fetch_from_url(self, url_ontology: str, filename: str | None) -> Path: + """Download an ontology file from a specified URL. + + Args: + url_ontology: URL pointing to the ontology file + filename: Name to save the file as + + Returns: + Path: Path to the downloaded file + + Raises: + ValueError: If the URL or filename is empty + RequestException: If the download fails + IOError: If saving the file fails + """ + self._validate_download_parameters(url_ontology, filename) + + dest = self._cache_dir / filename + logging.info(f'Downloading ontology from {url_ontology} as {dest}') + result_path = self._manager.download(url_ontology, dest=str(dest)) + if not result_path: + raise OSError('Download manager did not return a file path.') + + result = Path(result_path) + self._resources_paths[dest.stem] = result + return result + + def fetch_from_catalog( + self, resources: list[dict[str, str]], catalog: CatalogOntologies + ) -> dict[str, Path]: + """Download multiple ontology files defined in a catalog. + + Args: + resources: list of dictionaries with resource information + catalog: Catalog object containing download URLs + + Returns: + dict[str, Path]: dictionary mapping resource IDs to file paths + + Raises: + ValueError: If the resources list is empty or URL not found + KeyError: If a resource is missing required fields + """ + if not resources: + raise ValueError('Resources list for batch download is empty.') + + results = {} + for resource in resources: + name_id, format_type = self._extract_resource_info(resource) + url = self._get_resource_url(name_id, format_type, catalog) + + filename = f'{name_id}.{format_type}' + local_path = self.fetch_from_url( + url_ontology=url, filename=filename + ) + results[name_id] = local_path + + self._resources_paths.update(results) + return results + + def _validate_download_parameters( + self, url_ontology: str, filename: str | None + ) -> None: + if not url_ontology or not url_ontology.strip(): + raise ValueError('URL cannot be empty') + + if not filename or not filename.strip(): + raise ValueError('Filename cannot be empty') + + def _extract_resource_info( + self, resource: dict[str, str] + ) -> tuple[str, str]: + name_id = resource.get('name_id') + if not name_id: + raise KeyError("Resource dictionary must contain 'name_id' key") + + format_type = resource.get( + 'format', DEFAULT_FORMAT_ONTOLOGY + ) # Default to OBO format + return name_id, format_type + + def _get_resource_url( + self, name_id: str, format_type: str, catalog: CatalogOntologies + ) -> str: + url = catalog.get_download_url(name_id, format_type) + if not url: + raise ValueError( + f'Cannot find download URL for ontology {name_id} ' + f'in format {format_type}' + ) + return url From b1b6b2fb68cce1b1c5a9e10b295fb14e0512a044 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20Carre=C3=B1o?= Date: Tue, 10 Mar 2026 11:22:19 +0100 Subject: [PATCH 2/7] chores: change dependency branch for the download_manager, this dependency is an alternative branch not main --- pyproject.toml | 16 ++++++++-------- uv.lock | 43 ++++++++++++++++++++++--------------------- 2 files changed, 30 insertions(+), 29 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index ffbcd14..5131c52 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,6 +6,11 @@ build-backend = "hatchling.build" requires = ["hatchling"] +[dependency-groups] +dev = [ + "pkg_infra" +] + # =================================== # ======= PROJECT ======== # =================================== @@ -33,7 +38,7 @@ dependencies = [ "pyyaml", "appdirs>=1.4.4", "python-graphblas[default]>=2025.2.0", - "download-manager", + "download-manager" ] description = "A lightweight Python package for loading, representing, and efficiently querying biological ontologies as graph structures." license = "MIT" @@ -345,12 +350,7 @@ python = [ ] [tool.uv.sources] +download-manager = {git = "https://github.com/saezlab/download-manager.git", rev = "feat/migration-logger"} jupyter-contrib-nbextensions = {git = "https://github.com/deeenes/jupyter_contrib_nbextensions.git", branch = "master"} nbsphinx = {git = "https://github.com/deeenes/nbsphinx", branch = "timings"} -saezlab-core = { git = "https://github.com/saezlab/saezlab_core.git", rev = "feat/mvp" } -download-manager = { git = "https://github.com/saezlab/download-manager.git", rev = "/fix/cache-manager-numpy" } - -[dependency-groups] -dev = [ - "saezlab-core", -] +pkg_infra = {git = "https://github.com/saezlab/pkg_infra.git", rev = "main"} diff --git a/uv.lock b/uv.lock index 3728b82..c361edb 100644 --- a/uv.lock +++ b/uv.lock @@ -108,7 +108,7 @@ wheels = [ [[package]] name = "cachemanager" version = "0.1.1" -source = { git = "https://github.com/saezlab/cache-manager.git?rev=fix%2Fnumpy-dependency-version#a48300c9d9536ce0a6adcb7f35a8057120ca266d" } +source = { git = "https://github.com/saezlab/cache-manager.git?rev=main#48a83eb03e9bb5b790dbcc7c23e1c71c2477fb23" } dependencies = [ { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "numpy", version = "2.3.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, @@ -510,13 +510,14 @@ wheels = [ [[package]] name = "download-manager" version = "0.0.1" -source = { git = "https://github.com/saezlab/download-manager.git?rev=%2Ffix%2Fcache-manager-numpy#a5013789f66c1bf47ff1a5c63f6d5e15ffca0362" } +source = { git = "https://github.com/saezlab/download-manager.git?rev=feat%2Fmigration-logger#1a12ad05a04f1f83097412bb1d13b5e014e9361d" } dependencies = [ { name = "cachemanager" }, { name = "certifi" }, { name = "pycurl" }, { name = "requests" }, { name = "toml" }, + { name = "tqdm" }, ] [[package]] @@ -1522,7 +1523,7 @@ tests = [ [package.dev-dependencies] dev = [ - { name = "saezlab-core" }, + { name = "pkg-infra" }, ] [package.metadata] @@ -1534,7 +1535,7 @@ requires-dist = [ { name = "coverage", marker = "extra == 'tests'", specifier = ">=6.0" }, { name = "diff-cover", marker = "extra == 'tests'" }, { name = "distlib", marker = "extra == 'dev'" }, - { name = "download-manager", git = "https://github.com/saezlab/download-manager.git?rev=%2Ffix%2Fcache-manager-numpy" }, + { name = "download-manager", git = "https://github.com/saezlab/download-manager.git?rev=feat%2Fmigration-logger" }, { name = "ipykernel", marker = "extra == 'dev'" }, { name = "mkdocs-material", marker = "extra == 'docs'", specifier = ">=9.6.14" }, { name = "mkdocstrings", extras = ["python"], marker = "extra == 'docs'", specifier = ">=0.29.1,<0.30" }, @@ -1555,7 +1556,7 @@ requires-dist = [ provides-extras = ["dev", "docs", "security", "semantic", "tests"] [package.metadata.requires-dev] -dev = [{ name = "saezlab-core", git = "https://github.com/saezlab/saezlab_core.git?rev=feat%2Fmvp" }] +dev = [{ name = "pkg-infra", git = "https://github.com/saezlab/pkg_infra.git?rev=main" }] [[package]] name = "packaging" @@ -1679,6 +1680,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9e/c3/059298687310d527a58bb01f3b1965787ee3b40dce76752eda8b44e9a2c5/pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523", size = 63772, upload-time = "2023-11-25T06:56:14.81Z" }, ] +[[package]] +name = "pkg-infra" +version = "0.0.1" +source = { git = "https://github.com/saezlab/pkg_infra.git?rev=main#966990b5e4f2a76edccfc84a2c553c057289ad09" } +dependencies = [ + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.3.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "omegaconf" }, + { name = "pandas" }, + { name = "platformdirs" }, + { name = "pydantic" }, + { name = "python-json-logger" }, + { name = "pyyaml" }, + { name = "toml" }, +] + [[package]] name = "platformdirs" version = "4.9.2" @@ -2403,22 +2420,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e0/30/f3eaf6563c637b6e66238ed6535f6775480db973c836336e4122161986fc/ruff-0.12.3-py3-none-win_arm64.whl", hash = "sha256:5f9c7c9c8f84c2d7f27e93674d27136fbf489720251544c4da7fb3d742e011b1", size = 10805855, upload-time = "2025-07-11T13:21:13.547Z" }, ] -[[package]] -name = "saezlab-core" -version = "0.0.1" -source = { git = "https://github.com/saezlab/saezlab_core.git?rev=feat%2Fmvp#778b74902c753e4163ed3910882b4dab9de8b183" } -dependencies = [ - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "numpy", version = "2.3.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "omegaconf" }, - { name = "pandas" }, - { name = "platformdirs" }, - { name = "pydantic" }, - { name = "python-json-logger" }, - { name = "pyyaml" }, - { name = "toml" }, -] - [[package]] name = "scipy" version = "1.15.3" From 5304ee92524691d65224ec19665c2b0edfc34505 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20Carre=C3=B1o?= Date: Tue, 10 Mar 2026 14:16:31 +0100 Subject: [PATCH 3/7] feat: modify main code to let the user decide which downloader to use at the end, Pooch or Download Manager --- ontograph/client.py | 37 ++++++++++++++---- ontograph/config/settings.py | 4 ++ ontograph/downloader.py | 44 ++++++++++++++++++--- ontograph/loader.py | 74 +++++++++++++++++++++++++++++------- ontograph/models.py | 41 +++++++++++++++----- 5 files changed, 164 insertions(+), 36 deletions(-) diff --git a/ontograph/client.py b/ontograph/client.py index 3a4fc3c..5100bf7 100644 --- a/ontograph/client.py +++ b/ontograph/client.py @@ -73,13 +73,22 @@ class ClientCatalog: {'id': 'ado', 'title': "Alzheimer's Disease Ontology"} """ - def __init__(self, cache_dir: str = DEFAULT_CACHE_DIR) -> None: + def __init__( + self, + cache_dir: str = DEFAULT_CACHE_DIR, + downloader: DownloaderPort | None = None, + ) -> None: """Initialize the ClientCatalog. Args: cache_dir (str, optional): Directory for caching catalog data. Defaults to DEFAULT_CACHE_DIR. + downloader (DownloaderPort | None, optional): Downloader adapter for remote resources. Defaults to None. """ - self.__catalog_adapter = CatalogOntologies(cache_dir=Path(cache_dir)) + self.__catalog_adapter = CatalogOntologies( + cache_dir=Path(cache_dir), + downloader=downloader, + ) + self._downloader = downloader def load_catalog(self, force_download: bool = False) -> None: """Load the ontology catalog. @@ -92,7 +101,8 @@ def load_catalog(self, force_download: bool = False) -> None: >>> catalog.load_catalog() """ return self.__catalog_adapter.load_catalog( - force_download=force_download + force_download=force_download, + downloader=self._downloader, ) def catalog_as_dict(self) -> dict: @@ -218,14 +228,20 @@ class ClientOntology: [Term('Z', name='root')] """ - def __init__(self, cache_dir: str = DEFAULT_CACHE_DIR) -> None: + def __init__( + self, + cache_dir: str = DEFAULT_CACHE_DIR, + downloader: DownloaderPort | None = None, + ) -> None: """Initialize the ClientOntology. Args: cache_dir (str, optional): Directory for caching ontology data. Defaults to DEFAULT_CACHE_DIR. + downloader (DownloaderPort | None, optional): Downloader adapter for remote resources. Defaults to None. """ self._cache_dir = Path(cache_dir) self._ontology = None + self._downloader = downloader self._lookup_tables = None self._navigator = None self._relations = None @@ -346,7 +362,9 @@ def load( >>> client.load(source="./tests/resources/dummy_ontology.obo") """ logger.info(f'Loading ontology from source: {source} ...') - loader = ProntoLoaderAdapter(cache_dir=self._cache_dir) + loader = ProntoLoaderAdapter( + cache_dir=self._cache_dir, downloader=self._downloader + ) path = Path(source) ontology = None @@ -368,7 +386,10 @@ def load( # 3. Case 3: Try OBO catalog (if file missing or simple ID) else: - catalog_client = ClientCatalog(cache_dir=self._cache_dir) + catalog_client = ClientCatalog( + cache_dir=self._cache_dir, + downloader=self._downloader, + ) catalog_client.load_catalog() available = [ o['id'] for o in catalog_client.list_available_ontologies() @@ -380,7 +401,9 @@ def load( f"Ontology '{name_id}' found in catalog, downloading..." ) ontology = loader.load_from_catalog( - name_id=name_id, format='obo' + name_id=name_id, + format='obo', + downloader=self._downloader, ) else: msg = f"Ontology '{source}' not found as file, URL, or catalog entry." diff --git a/ontograph/config/settings.py b/ontograph/config/settings.py index cc776de..95ca59b 100644 --- a/ontograph/config/settings.py +++ b/ontograph/config/settings.py @@ -11,6 +11,7 @@ 'PACKAGE_VERSION', 'SUPPORTED_FORMATS_ONTOGRAPH', 'DEFAULT_FORMAT_ONTOLOGY', + 'DEFAULT_DOWNLOADER', ] # Package metadata from installed package @@ -31,4 +32,7 @@ SUPPORTED_FORMATS_ONTOGRAPH = ['obo', 'owl'] DEFAULT_FORMAT_ONTOLOGY = 'obo' +# Default downloader backend for remote resources ('pooch' or 'download_manager') +DEFAULT_DOWNLOADER = 'pooch' + # TODO: Ready for improvement diff --git a/ontograph/downloader.py b/ontograph/downloader.py index 886341d..65e36ab 100644 --- a/ontograph/downloader.py +++ b/ontograph/downloader.py @@ -5,15 +5,18 @@ """ from abc import ABC, abstractmethod +from typing import TYPE_CHECKING import logging from pathlib import Path from pooch import retrieve import requests -from ontograph.models import CatalogOntologies from ontograph.config.settings import DEFAULT_FORMAT_ONTOLOGY +if TYPE_CHECKING: + from ontograph.models import CatalogOntologies + logger = logging.getLogger(__name__) logger.addHandler(logging.NullHandler()) @@ -22,6 +25,7 @@ 'DownloadManagerAdapter', 'DownloaderPort', 'PoochDownloaderAdapter', + 'get_default_downloader', ] @@ -55,7 +59,9 @@ def fetch_from_url(self, url_ontology: str, filename: str | None) -> Path: @abstractmethod def fetch_from_catalog( - self, resources: list[dict[str, str]], catalog: CatalogOntologies + self, + resources: list[dict[str, str]], + catalog: 'CatalogOntologies', ) -> dict[str, Path]: """Download multiple ontology files defined in a catalog. @@ -73,6 +79,28 @@ def fetch_from_catalog( pass +def get_default_downloader( + cache_dir: Path, *, backend: str | None = None +) -> DownloaderPort: + """Return the default downloader adapter. + + Args: + cache_dir: Directory to store downloaded files. + backend: Override the configured default ('pooch' or 'download_manager'). + """ + from ontograph.config.settings import DEFAULT_DOWNLOADER + + selection = (backend or DEFAULT_DOWNLOADER).strip().lower() + if selection == 'pooch': + return PoochDownloaderAdapter(cache_dir=cache_dir) + if selection == 'download_manager': + return DownloadManagerAdapter(cache_dir=cache_dir) + + raise ValueError( + "Unknown downloader backend. Use 'pooch' or 'download_manager'." + ) + + # ---------------------------------------------------------------------- # ---- Pooch Downloader Adapter (concrete implementation) ---- # ---------------------------------------------------------------------- @@ -151,7 +179,9 @@ def _perform_download(self, url_ontology: str, filename: str) -> Path: return result_path def fetch_from_catalog( - self, resources: list[dict[str, str]], catalog: CatalogOntologies + self, + resources: list[dict[str, str]], + catalog: 'CatalogOntologies', ) -> dict[str, Path]: """Download multiple ontology files defined in a catalog. @@ -196,7 +226,7 @@ def _extract_resource_info( return name_id, format_type def _get_resource_url( - self, name_id: str, format_type: str, catalog: CatalogOntologies + self, name_id: str, format_type: str, catalog: 'CatalogOntologies' ) -> str: url = catalog.get_download_url(name_id, format_type) if not url: @@ -283,7 +313,9 @@ def fetch_from_url(self, url_ontology: str, filename: str | None) -> Path: return result def fetch_from_catalog( - self, resources: list[dict[str, str]], catalog: CatalogOntologies + self, + resources: list[dict[str, str]], + catalog: 'CatalogOntologies', ) -> dict[str, Path]: """Download multiple ontology files defined in a catalog. @@ -337,7 +369,7 @@ def _extract_resource_info( return name_id, format_type def _get_resource_url( - self, name_id: str, format_type: str, catalog: CatalogOntologies + self, name_id: str, format_type: str, catalog: 'CatalogOntologies' ) -> str: url = catalog.get_download_url(name_id, format_type) if not url: diff --git a/ontograph/loader.py b/ontograph/loader.py index b817850..50e8205 100644 --- a/ontograph/loader.py +++ b/ontograph/loader.py @@ -17,10 +17,7 @@ from charset_normalizer import from_path from ontograph.models import Ontology, CatalogOntologies -from ontograph.downloader import ( - DownloaderPort, - PoochDownloaderAdapter, -) +from ontograph.downloader import DownloaderPort, get_default_downloader from ontograph.config.settings import ( DEFAULT_CACHE_DIR, SUPPORTED_FORMATS_ONTOGRAPH, @@ -55,12 +52,18 @@ def load_from_file(self, file_path_ontology: str | Path) -> Ontology: pass @abstractmethod - def load_from_catalog(self, name_id: str, format: str = 'obo') -> Ontology: + def load_from_catalog( + self, + name_id: str, + format: str = 'obo', + downloader: DownloaderPort | None = None, + ) -> Ontology: """Load ontology from a catalog. Args: name_id (str): Ontology identifier. format (str, optional): Ontology format. Defaults to 'obo'. + downloader (DownloaderPort | None, optional): Downloader implementation. Defaults to None. Returns: Ontology: Loaded ontology object. @@ -93,16 +96,22 @@ class ProntoLoaderAdapter(OntologyLoaderPort): Loads ontologies from files, catalogs, and URLs using the Pronto library. """ - def __init__(self, cache_dir: str | Path | None = None) -> None: + def __init__( + self, + cache_dir: str | Path | None = None, + downloader: DownloaderPort | None = None, + ) -> None: """Initialize the ProntoLoaderAdapter. Args: cache_dir (str | Path | None, optional): Directory for cached files. Defaults to None. + downloader (DownloaderPort | None, optional): Downloader adapter for remote resources. Defaults to None. """ self._cache_dir: Path | None = ( Path(cache_dir) if cache_dir else DEFAULT_CACHE_DIR ) self._ontology: Ontology | None = None + self._downloader: DownloaderPort | None = downloader @cached_property def catalog(self) -> CatalogOntologies: @@ -112,7 +121,10 @@ def catalog(self) -> CatalogOntologies: CatalogOntologies: The ontology catalog instance. """ logger.debug('Initializing ontology catalog') - return CatalogOntologies(cache_dir=self._cache_dir) + return CatalogOntologies( + cache_dir=self._cache_dir, + downloader=self._downloader, + ) @property def cache_dir(self) -> Path: @@ -265,6 +277,18 @@ def _load_ontology( ontology: pronto.Ontology = pronto.Ontology( fixed_path, encoding=self.find_file_encoding(fixed_path) ) + except PermissionError: + # Fallback for restricted environments where multiprocessing locks + # are not permitted (pronto ThreadPool uses multiprocessing pool). + try: + from pronto.utils import pool as pronto_pool + + pronto_pool._ThreadPool = None + except (AttributeError, ImportError): + pass + ontology = pronto.Ontology( + fixed_path, encoding=self.find_file_encoding(fixed_path) + ) except (TypeError, ValueError) as e: error_msg = f'Failed to load ontology from {path_file}: {str(e)}' logger.exception(error_msg) @@ -359,12 +383,18 @@ def load_from_file(self, file_path_ontology: str | Path) -> Ontology: ) raise - def _download_ontology(self, name_id: str, format: str) -> Path: + def _download_ontology( + self, + name_id: str, + format: str, + downloader: DownloaderPort | None = None, + ) -> Path: """Download ontology from catalog. Args: name_id (str): Ontology identifier. format (str): Ontology format. + downloader (DownloaderPort | None, optional): Downloader implementation. Defaults to None. Returns: Path: Path to downloaded file. @@ -373,8 +403,13 @@ def _download_ontology(self, name_id: str, format: str) -> Path: FileNotFoundError: If file can't be downloaded. NotImplementedError: If download functionality is not implemented. """ - downloader = PoochDownloaderAdapter(cache_dir=self.cache_dir) - logger.debug(f'Created default downloader: {type(downloader).__name__}') + if downloader is None: + downloader = self._downloader + if downloader is None: + downloader = get_default_downloader(cache_dir=self.cache_dir) + logger.debug( + f'Created default downloader: {type(downloader).__name__}' + ) resources = [{'name_id': name_id, 'format': format}] try: @@ -396,12 +431,18 @@ def _download_ontology(self, name_id: str, format: str) -> Path: return path_download[name_id] - def load_from_catalog(self, name_id: str, format: str = 'obo') -> Ontology: + def load_from_catalog( + self, + name_id: str, + format: str = 'obo', + downloader: DownloaderPort | None = None, + ) -> Ontology: """Load ontology from the OBO Foundry catalog, downloading if needed. Args: name_id (str): Ontology identifier. format (str, optional): Ontology format. Defaults to "obo". + downloader (DownloaderPort | None, optional): Downloader implementation. Defaults to None. Returns: Ontology: Loaded Ontology object. @@ -424,7 +465,12 @@ def load_from_catalog(self, name_id: str, format: str = 'obo') -> Ontology: logger.debug( f'Ontology file not found locally, downloading: {name_id}.{format}' ) - file_path = self._download_ontology(name_id, format) + if downloader is None: + file_path = self._download_ontology(name_id, format) + else: + file_path = self._download_ontology( + name_id, format, downloader=downloader + ) logger.debug(f'Loading ontology from file: {file_path}') ontology_source, _ = self._load_ontology(file_path) @@ -462,7 +508,9 @@ def load_from_url( ValueError: If parsing fails. """ if downloader is None: - downloader = PoochDownloaderAdapter(cache_dir=self.cache_dir) + downloader = self._downloader + if downloader is None: + downloader = get_default_downloader(cache_dir=self.cache_dir) logger.debug( f'Created default downloader: {type(downloader).__name__}' ) diff --git a/ontograph/models.py b/ontograph/models.py index de7b448..0d821d2 100644 --- a/ontograph/models.py +++ b/ontograph/models.py @@ -1,5 +1,6 @@ import re import pprint +from typing import TYPE_CHECKING import logging from pathlib import Path from dataclasses import dataclass @@ -7,17 +8,20 @@ from tqdm import tqdm import yaml import numpy as np -from pooch import retrieve import pandas as pd import pronto import graphblas as gb +from ontograph.downloader import get_default_downloader from ontograph.config.settings import ( DEFAULT_CACHE_DIR, NAME_OBO_FOUNDRY_CATALOG, OBO_FOUNDRY_REGISTRY_URL, ) +if TYPE_CHECKING: + from ontograph.downloader import DownloaderPort + __all__ = ['CatalogOntologies', 'Ontology'] logger = logging.getLogger(__name__) @@ -33,43 +37,60 @@ class CatalogOntologies: Provides methods to download, load, and query the ontology registry. """ - def __init__(self, cache_dir: Path = DEFAULT_CACHE_DIR) -> None: + def __init__( + self, + cache_dir: Path = DEFAULT_CACHE_DIR, + downloader: 'DownloaderPort | None' = None, + ) -> None: """Initialize the catalog manager. Args: cache_dir (Path): Directory for caching registry files. + downloader (DownloaderPort | None, optional): Downloader adapter for remote resources. Defaults to None. """ self.cache_dir = cache_dir self._catalog: dict | None = None + self._downloader = downloader # Create cache directory if this one doesn't exist. self.cache_dir.mkdir(parents=True, exist_ok=True) - def _download_registry(self) -> Path: + def _download_registry( + self, downloader: 'DownloaderPort | None' = None + ) -> Path: """Download the latest catalog file. Returns: Path: Path to the downloaded catalog file. """ + catalog_path = self.cache_dir / NAME_OBO_FOUNDRY_CATALOG - retrieve( - url=OBO_FOUNDRY_REGISTRY_URL, - known_hash=None, - fname=NAME_OBO_FOUNDRY_CATALOG, - path=self.cache_dir, + if downloader is None: + downloader = self._downloader + if downloader is None: + downloader = get_default_downloader(cache_dir=self.cache_dir) + + downloader.fetch_from_url( + url_ontology=OBO_FOUNDRY_REGISTRY_URL, + filename=NAME_OBO_FOUNDRY_CATALOG, ) return catalog_path - def load_catalog(self, force_download: bool = False) -> None: + def load_catalog( + self, + force_download: bool = False, + downloader: 'DownloaderPort | None' = None, + ) -> None: """Load the ontology catalog from disk or download if needed. Args: force_download (bool): If True, force download the catalog file. + downloader (DownloaderPort | None, optional): Downloader implementation. Defaults to None. """ catalog_path = self.cache_dir / NAME_OBO_FOUNDRY_CATALOG if force_download or not catalog_path.exists(): - catalog_path = self._download_registry() + catalog_path = self._download_registry(downloader=downloader) with open(catalog_path) as f: self._catalog = yaml.safe_load(f) From 5c269884a11017d0d76694666e8c144b4b2b3274 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20Carre=C3=B1o?= Date: Tue, 10 Mar 2026 14:17:11 +0100 Subject: [PATCH 4/7] test: update test to verify the introduced changes. --- tests/test_downloader.py | 37 ++++++++++++++++++++++++++++++++++++- tests/test_loader.py | 30 +++++++++++++++++++++++++----- tests/test_models.py | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 95 insertions(+), 6 deletions(-) diff --git a/tests/test_downloader.py b/tests/test_downloader.py index 13c1f15..efc1f95 100644 --- a/tests/test_downloader.py +++ b/tests/test_downloader.py @@ -5,7 +5,12 @@ import requests import responses -from ontograph.downloader import DownloaderPort, PoochDownloaderAdapter +from ontograph.downloader import ( + DownloaderPort, + PoochDownloaderAdapter, + get_default_downloader, +) +from ontograph.config import settings __all__ = [ 'MockCatalog', @@ -57,6 +62,36 @@ def test_abstract_methods_implementation(self): assert isinstance(catalog_results, dict) +def test_get_default_downloader_pooch(tmp_path, monkeypatch): + monkeypatch.setattr(settings, 'DEFAULT_DOWNLOADER', 'pooch') + downloader = get_default_downloader(cache_dir=tmp_path) + assert isinstance(downloader, PoochDownloaderAdapter) + + +def test_get_default_downloader_download_manager(tmp_path, monkeypatch): + class DummyDownloadManagerAdapter: + def __init__(self, cache_dir): + self.cache_dir = cache_dir + + import ontograph.downloader as downloader_module + + monkeypatch.setattr(settings, 'DEFAULT_DOWNLOADER', 'download_manager') + monkeypatch.setattr( + downloader_module, + 'DownloadManagerAdapter', + DummyDownloadManagerAdapter, + ) + downloader = get_default_downloader(cache_dir=tmp_path) + assert isinstance(downloader, DummyDownloadManagerAdapter) + assert downloader.cache_dir == tmp_path + + +def test_get_default_downloader_invalid_backend(tmp_path, monkeypatch): + monkeypatch.setattr(settings, 'DEFAULT_DOWNLOADER', 'invalid') + with pytest.raises(ValueError): + get_default_downloader(cache_dir=tmp_path) + + class MockCatalog: """Mock catalog class for testing.""" diff --git a/tests/test_loader.py b/tests/test_loader.py index 213147a..094854c 100644 --- a/tests/test_loader.py +++ b/tests/test_loader.py @@ -2,6 +2,7 @@ import pytest +import ontograph.loader as loader_module from ontograph.loader import ( OntologyLoaderPort, ProntoLoaderAdapter, @@ -88,6 +89,27 @@ def test_load_from_catalog_file_not_found(pronto_loader, monkeypatch): pronto_loader.load_from_catalog('ado', format='obo') +def test_download_ontology_uses_default_downloader( + pronto_loader, tmp_path, monkeypatch +): + class DummyDownloader: + def fetch_from_catalog(self, resources, catalog): + return {'ado': tmp_path / 'ado.obo'} + + calls = {'count': 0} + + def fake_get_default(cache_dir): + calls['count'] += 1 + return DummyDownloader() + + monkeypatch.setattr( + loader_module, 'get_default_downloader', fake_get_default + ) + path = pronto_loader._download_ontology('ado', 'obo') + assert calls['count'] == 1 + assert path == tmp_path / 'ado.obo' + + def test_load_from_catalog_metadata_missing( pronto_loader, monkeypatch, tmp_path ): @@ -172,12 +194,10 @@ class DummyDownloader: def fetch_from_catalog(self, resources, catalog): raise Exception('fail') - monkeypatch.setattr( - 'ontograph.loader.PoochDownloaderAdapter', - lambda cache_dir: DummyDownloader(), - ) with pytest.raises(RuntimeError): - pronto_loader._download_ontology('ado', 'obo') + pronto_loader._download_ontology( + 'ado', 'obo', downloader=DummyDownloader() + ) def test_cache_dir_property_value_error(): diff --git a/tests/test_models.py b/tests/test_models.py index 01e9e99..b58f682 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -4,6 +4,7 @@ Ontology, CatalogOntologies, ) +import ontograph.models as models_module @pytest.fixture @@ -111,6 +112,39 @@ def test_print_catalog_schema_tree(catalogontologies, capsys): assert 'OBO Foundry Registry Schema Structure' in out +def test_load_catalog_uses_default_downloader(tmp_path, monkeypatch): + catalog_file = tmp_path / 'registry.yml' + catalog_data = {'ontologies': []} + + def write_catalog(): + import yaml + + with open(catalog_file, 'w') as f: + yaml.safe_dump(catalog_data, f) + + class DummyDownloader: + def fetch_from_url(self, url_ontology, filename): + write_catalog() + return catalog_file + + calls = {'count': 0} + + def fake_get_default(cache_dir): + calls['count'] += 1 + return DummyDownloader() + + monkeypatch.setattr( + models_module, 'NAME_OBO_FOUNDRY_CATALOG', catalog_file.name + ) + monkeypatch.setattr( + models_module, 'get_default_downloader', fake_get_default + ) + + obo_reg = CatalogOntologies(cache_dir=tmp_path) + obo_reg.load_catalog(force_download=True) + assert calls['count'] == 1 + + def test_ontology_model(): ontology = Ontology( ontology_source='dummy', ontology_id='chebi', metadata={'foo': 'bar'} From b34c68e35adb6cf7168a81968039d7fe95f9067f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20Carre=C3=B1o?= Date: Tue, 10 Mar 2026 14:18:16 +0100 Subject: [PATCH 5/7] docs: update documentation that showcase different downloaders that can be used in OntoGraph --- README.md | 26 +++++++++++++++++-- docs/index.md | 2 +- docs/learn/tutorials/quickstart.md | 14 +++++++++- .../source/ontograph/client-catalog.md | 14 ++++++++++ .../source/ontograph/client-ontology.md | 14 ++++++++++ docs/reference/source/ontograph/downloader.md | 13 ++++++++++ 6 files changed, 79 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 8802cae..47371ed 100644 --- a/README.md +++ b/README.md @@ -30,10 +30,16 @@ uv pip install -e . ```python from ontograph.client import ClientCatalog +from ontograph.downloader import DownloadManagerAdapter +from ontograph.config.settings import DEFAULT_CACHE_DIR # Instantiate a client for your catalog client_catalog = ClientCatalog(cache_dir="./data/out") +# Optional: choose a downloader adapter explicitly +# downloader = DownloadManagerAdapter(cache_dir=DEFAULT_CACHE_DIR, backend="requests") +# client_catalog = ClientCatalog(cache_dir="./data/out", downloader=downloader) + # Load the catalog, in case this one doesn't exist it will be downloaded automatically in the cache folder you specify. client_catalog.load_catalog() ``` @@ -56,12 +62,18 @@ metadata_go = client_catalog.get_ontology_metadata(ontology_id="go", show_metada #### Create a client for your ontology ```python from ontograph.client import ClientOntology +from ontograph.downloader import DownloadManagerAdapter +from ontograph.config.settings import DEFAULT_CACHE_DIR # Instantiate a client for your ontology client_dummy_ontology = ClientOntology(cache_dir="./data/out") # Load a dummy ontology, we prepare a simple one to try out this package. -client_dummy_ontology.load(file_path_ontology="./tests/resources/dummy_ontology.obo") +client_dummy_ontology.load(source="./tests/resources/dummy_ontology.obo") + +# Optional: choose a downloader adapter explicitly +# downloader = DownloadManagerAdapter(cache_dir=DEFAULT_CACHE_DIR, backend="requests") +# client_dummy_ontology = ClientOntology(cache_dir="./data/out", downloader=downloader) ``` #### Queries for your ontology @@ -138,7 +150,17 @@ If you are interested in loading an ontology from the catalog, just use the `nam ```bash client_go = ClientOntology() -client_go.load(name_id="go", format="obo") +client_go.load(source="go") +``` + +### Downloader configuration + +By default, the project uses a configurable downloader backend. You can set a global default in `ontograph/config/settings.py`: + +```python +DEFAULT_DOWNLOADER = "pooch" +# or +DEFAULT_DOWNLOADER = "download_manager" ``` ## Contributing diff --git a/docs/index.md b/docs/index.md index c4ea155..cf07b6f 100644 --- a/docs/index.md +++ b/docs/index.md @@ -24,7 +24,7 @@ Analyze ontology structure, calculate paths and trajectories, and visualize term hierarchies. - **Caching & Download Management** - Efficiently download and cache ontology files for offline use. + Efficiently download and cache ontology files for offline use with configurable downloader backends. --- diff --git a/docs/learn/tutorials/quickstart.md b/docs/learn/tutorials/quickstart.md index 55a9286..75be3b5 100644 --- a/docs/learn/tutorials/quickstart.md +++ b/docs/learn/tutorials/quickstart.md @@ -11,12 +11,18 @@ First, let's interact with the OBO Foundry catalog to discover available ontolog ```python from ontograph.client import ClientCatalog +from ontograph.downloader import DownloadManagerAdapter +from ontograph.config.settings import DEFAULT_CACHE_DIR # Create a catalog client (specify a cache directory for downloads) client_catalog = ClientCatalog(cache_dir="./data/out") # Load the catalog (downloads if not cached) client_catalog.load_catalog() + +# Optional: choose a downloader adapter explicitly +# downloader = DownloadManagerAdapter(cache_dir=DEFAULT_CACHE_DIR, backend="requests") +# client_catalog = ClientCatalog(cache_dir="./data/out", downloader=downloader) ``` ### List Available Ontologies @@ -48,12 +54,18 @@ Now, let's load an ontology and explore its structure. ```python from ontograph.client import ClientOntology +from ontograph.downloader import DownloadManagerAdapter +from ontograph.config.settings import DEFAULT_CACHE_DIR # Create an ontology client client_ontology = ClientOntology(cache_dir="./data/out") # Load a sample ontology (provided in the repo for testing) -client_ontology.load(file_path_ontology="./tests/resources/dummy_ontology.obo") +client_ontology.load(source="./tests/resources/dummy_ontology.obo") + +# Optional: choose a downloader adapter explicitly +# downloader = DownloadManagerAdapter(cache_dir=DEFAULT_CACHE_DIR, backend="requests") +# client_ontology = ClientOntology(cache_dir="./data/out", downloader=downloader) ``` --- diff --git a/docs/reference/source/ontograph/client-catalog.md b/docs/reference/source/ontograph/client-catalog.md index 81ba35e..bb0cee9 100644 --- a/docs/reference/source/ontograph/client-catalog.md +++ b/docs/reference/source/ontograph/client-catalog.md @@ -15,6 +15,20 @@ This class is ideal for users who want to explore the catalog of available ontol - Get download URLs and available formats for each ontology - Print the catalog schema tree for exploration +## Usage + +```python +from ontograph.client import ClientCatalog +from ontograph.downloader import DownloadManagerAdapter +from ontograph.config.settings import DEFAULT_CACHE_DIR + +client_catalog = ClientCatalog(cache_dir="./data/out") + +# Optional: use Download Manager for all catalog downloads +# downloader = DownloadManagerAdapter(cache_dir=DEFAULT_CACHE_DIR, backend="requests") +# client_catalog = ClientCatalog(cache_dir="./data/out", downloader=downloader) +``` + --- ## API Reference diff --git a/docs/reference/source/ontograph/client-ontology.md b/docs/reference/source/ontograph/client-ontology.md index aa152c6..723680a 100644 --- a/docs/reference/source/ontograph/client-ontology.md +++ b/docs/reference/source/ontograph/client-ontology.md @@ -15,6 +15,20 @@ This class is ideal for users who want to work directly with a specific ontology - Introspect ontology structure: calculate paths, trajectories, and visualize term hierarchies - Modular query adapters for navigation, relations, and introspection +## Usage + +```python +from ontograph.client import ClientOntology +from ontograph.downloader import DownloadManagerAdapter +from ontograph.config.settings import DEFAULT_CACHE_DIR + +client_ontology = ClientOntology(cache_dir="./data/out") + +# Optional: use Download Manager for all remote downloads +# downloader = DownloadManagerAdapter(cache_dir=DEFAULT_CACHE_DIR, backend="requests") +# client_ontology = ClientOntology(cache_dir="./data/out", downloader=downloader) +``` + --- ## API Reference diff --git a/docs/reference/source/ontograph/downloader.md b/docs/reference/source/ontograph/downloader.md index ecb5294..41b6fca 100644 --- a/docs/reference/source/ontograph/downloader.md +++ b/docs/reference/source/ontograph/downloader.md @@ -2,6 +2,19 @@ This module provides interfaces and adapters for downloading ontology files from URLs and catalogs. +## Default Downloader + +OntoGraph selects a default downloader backend via `DEFAULT_DOWNLOADER` in +`ontograph/config/settings.py`. You can also override this per client by +passing a downloader adapter. + +```python +from ontograph.downloader import get_default_downloader +from ontograph.config.settings import DEFAULT_CACHE_DIR + +downloader = get_default_downloader(cache_dir=DEFAULT_CACHE_DIR) +``` + --- ## API Reference From 9cccf57e27de998ab6dbf0387c886f2bab201a04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20Carre=C3=B1o?= Date: Tue, 10 Mar 2026 14:57:57 +0100 Subject: [PATCH 6/7] chores: add more logging messages to relevant parts of the client, downloader, loader and models modules --- ontograph/client.py | 7 +++++++ ontograph/downloader.py | 27 ++++++++++++++++++++++----- ontograph/loader.py | 14 ++++++++++++++ ontograph/models.py | 6 ++++++ 4 files changed, 49 insertions(+), 5 deletions(-) diff --git a/ontograph/client.py b/ontograph/client.py index 5100bf7..9f18e48 100644 --- a/ontograph/client.py +++ b/ontograph/client.py @@ -362,6 +362,10 @@ def load( >>> client.load(source="./tests/resources/dummy_ontology.obo") """ logger.info(f'Loading ontology from source: {source} ...') + logger.debug( + 'Using downloader: %s', + type(downloader).__name__ if downloader else 'default', + ) loader = ProntoLoaderAdapter( cache_dir=self._cache_dir, downloader=self._downloader ) @@ -371,6 +375,7 @@ def load( # 1. Case 1: Local file exists if path.exists(): + logger.debug('Resolved source type: file') logger.info( f'Found local file at {path}, loading with ProntoLoaderAdapter...' ) @@ -378,6 +383,7 @@ def load( # 2. Case 2: Provided source is a URL elif re.match(r'^https?://', source): + logger.debug('Resolved source type: url') logger.info( f'Detected URL source, downloading ontology from {source}' ) @@ -386,6 +392,7 @@ def load( # 3. Case 3: Try OBO catalog (if file missing or simple ID) else: + logger.debug('Resolved source type: catalog') catalog_client = ClientCatalog( cache_dir=self._cache_dir, downloader=self._downloader, diff --git a/ontograph/downloader.py b/ontograph/downloader.py index 65e36ab..a418b44 100644 --- a/ontograph/downloader.py +++ b/ontograph/downloader.py @@ -91,9 +91,20 @@ def get_default_downloader( from ontograph.config.settings import DEFAULT_DOWNLOADER selection = (backend or DEFAULT_DOWNLOADER).strip().lower() + logger.info('Selected default downloader backend: %s', selection) if selection == 'pooch': + logger.debug( + 'Instantiating downloader: %s (cache_dir=%s)', + PoochDownloaderAdapter.__name__, + cache_dir, + ) return PoochDownloaderAdapter(cache_dir=cache_dir) if selection == 'download_manager': + logger.debug( + 'Instantiating downloader: %s (cache_dir=%s)', + DownloadManagerAdapter.__name__, + cache_dir, + ) return DownloadManagerAdapter(cache_dir=cache_dir) raise ValueError( @@ -145,16 +156,17 @@ def fetch_from_url(self, url_ontology: str, filename: str | None) -> Path: """ self._validate_download_parameters(url_ontology, filename) - logging.info(f'Downloading ontology from {url_ontology} as {filename}') + logger.info('Pooch download started: %s -> %s', url_ontology, filename) try: result_path = self._perform_download(url_ontology, filename) self._resources_paths[filename.split('.')[0]] = result_path + logger.debug('Pooch download completed: %s', result_path) return result_path except requests.RequestException as e: - logging.error(f'Failed to download ontology: {e}') + logger.error('Failed to download ontology: %s', e) raise except OSError as e: - logging.error(f'Failed to save downloaded ontology: {e}') + logger.error('Failed to save downloaded ontology: %s', e) raise def _validate_download_parameters( @@ -175,7 +187,7 @@ def _perform_download(self, url_ontology: str, filename: str) -> Path: progressbar=True, ) result_path = Path(resource_path) - logging.info(f'Successfully downloaded ontology to {result_path}') + logger.debug('Successfully downloaded ontology to %s', result_path) return result_path def fetch_from_catalog( @@ -199,6 +211,7 @@ def fetch_from_catalog( if not resources: raise ValueError('Resources list for batch download is empty.') + logger.debug('Fetching %s resources from catalog', len(resources)) results = {} for resource in resources: name_id, format_type = self._extract_resource_info(resource) @@ -303,13 +316,16 @@ def fetch_from_url(self, url_ontology: str, filename: str | None) -> Path: self._validate_download_parameters(url_ontology, filename) dest = self._cache_dir / filename - logging.info(f'Downloading ontology from {url_ontology} as {dest}') + logger.info( + 'DownloadManager download started: %s -> %s', url_ontology, dest + ) result_path = self._manager.download(url_ontology, dest=str(dest)) if not result_path: raise OSError('Download manager did not return a file path.') result = Path(result_path) self._resources_paths[dest.stem] = result + logger.debug('DownloadManager download completed: %s', result) return result def fetch_from_catalog( @@ -333,6 +349,7 @@ def fetch_from_catalog( if not resources: raise ValueError('Resources list for batch download is empty.') + logger.debug('Fetching %s resources from catalog', len(resources)) results = {} for resource in resources: name_id, format_type = self._extract_resource_info(resource) diff --git a/ontograph/loader.py b/ontograph/loader.py index 50e8205..a0b980c 100644 --- a/ontograph/loader.py +++ b/ontograph/loader.py @@ -286,6 +286,9 @@ def _load_ontology( pronto_pool._ThreadPool = None except (AttributeError, ImportError): pass + logger.warning( + 'Pronto ThreadPool disabled due to PermissionError; retrying without multiprocessing' + ) ontology = pronto.Ontology( fixed_path, encoding=self.find_file_encoding(fixed_path) ) @@ -411,6 +414,12 @@ def _download_ontology( f'Created default downloader: {type(downloader).__name__}' ) + logger.info( + 'Downloading ontology %s.%s using %s (catalog)', + name_id, + format, + type(downloader).__name__, + ) resources = [{'name_id': name_id, 'format': format}] try: path_download = downloader.fetch_from_catalog( @@ -515,6 +524,11 @@ def load_from_url( f'Created default downloader: {type(downloader).__name__}' ) + logger.info( + 'Downloading ontology from URL using %s: %s', + type(downloader).__name__, + url_ontology, + ) file_path: Path = downloader.fetch_from_url( url_ontology=url_ontology, filename=filename, diff --git a/ontograph/models.py b/ontograph/models.py index 0d821d2..9ce89d4 100644 --- a/ontograph/models.py +++ b/ontograph/models.py @@ -70,6 +70,12 @@ def _download_registry( if downloader is None: downloader = get_default_downloader(cache_dir=self.cache_dir) + logger.info( + 'Downloading OBO Foundry registry using %s: %s', + type(downloader).__name__, + OBO_FOUNDRY_REGISTRY_URL, + ) + logger.debug('Catalog file path: %s', catalog_path) downloader.fetch_from_url( url_ontology=OBO_FOUNDRY_REGISTRY_URL, filename=NAME_OBO_FOUNDRY_CATALOG, From d9fe60b934cefc884109825cffdb39d7f104ba36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20Carre=C3=B1o?= Date: Tue, 10 Mar 2026 14:59:06 +0100 Subject: [PATCH 7/7] docs: add a basic tutorial for using the catalog and clients depending on the downloader (Pooch and DownloadManager) --- docs/learn/tutorials/tutorial0001_basics.md | 74 +++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/docs/learn/tutorials/tutorial0001_basics.md b/docs/learn/tutorials/tutorial0001_basics.md index e69de29..f824908 100644 --- a/docs/learn/tutorials/tutorial0001_basics.md +++ b/docs/learn/tutorials/tutorial0001_basics.md @@ -0,0 +1,74 @@ +# Basics: Downloading Ontologies + +This minimal tutorial shows how to download ontologies using either Pooch or Download Manager, and how to load from the catalog or a URL. + +--- + +## 1. Choose a Global Default Downloader + +Set the default once in `ontograph/config/settings.py`: + +```python +DEFAULT_DOWNLOADER = "pooch" +# or +DEFAULT_DOWNLOADER = "download_manager" +``` + +Now any client will use the configured backend unless you pass a downloader explicitly. + +--- + +## 2. Use Pooch Explicitly + +```python +from ontograph.client import ClientCatalog, ClientOntology +from ontograph.downloader import PoochDownloaderAdapter +from ontograph.config.settings import DEFAULT_CACHE_DIR + +downloader = PoochDownloaderAdapter(cache_dir=DEFAULT_CACHE_DIR) + +catalog = ClientCatalog(cache_dir="./data/out", downloader=downloader) +catalog.load_catalog() + +client = ClientOntology(cache_dir="./data/out", downloader=downloader) +client.load(source="go") # catalog download +``` + +--- + +## 3. Use Download Manager Explicitly + +```python +from ontograph.client import ClientCatalog, ClientOntology +from ontograph.downloader import DownloadManagerAdapter +from ontograph.config.settings import DEFAULT_CACHE_DIR + +downloader = DownloadManagerAdapter( + cache_dir=DEFAULT_CACHE_DIR, + backend="requests", +) + +catalog = ClientCatalog(cache_dir="./data/out", downloader=downloader) +catalog.load_catalog() + +client = ClientOntology(cache_dir="./data/out", downloader=downloader) +client.load(source="go") # catalog download +``` + +--- + +## 4. Download From a URL + +```python +from ontograph.client import ClientOntology +from ontograph.downloader import PoochDownloaderAdapter +from ontograph.config.settings import DEFAULT_CACHE_DIR + +downloader = PoochDownloaderAdapter(cache_dir=DEFAULT_CACHE_DIR) + +# URL to GO ontology +source_go = "https://purl.obolibrary.org/obo/go.obo" + +client = ClientOntology(cache_dir=DEFAULT_CACHE_DIR, downloader=downloader) +client.load(source=source_go) +```