Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,5 @@ dependencies = [
"spacy>=3.8.11",
"en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl",
"flask-sqlalchemy>=3.1.1",
"pyrate-limiter>=4.1.0",
]
7 changes: 6 additions & 1 deletion src/db/linkers/elink_dataset_linker.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
import logging
from typing import List, Dict
import re
from typing import List, Dict

import requests
import tenacity
from pyrate_limiter.limiter_factory import create_inmemory_limiter

from src.db.linkers.paper_dataset_linker import PaperDatasetLinker
from src.exception.entrez_error import EntrezError

logger = logging.getLogger(__name__)
eutilities_rate_limiter = create_inmemory_limiter()


class ELinkDatasetLinker(PaperDatasetLinker):
Expand Down Expand Up @@ -53,6 +56,7 @@ def link_to_datasets_mapped(self, pubmed_ids: List[str]) -> Dict[str, List[str]]

@tenacity.retry(wait=tenacity.wait_exponential(max=10), stop=tenacity.stop_after_attempt(NUMBER_OF_RETRIES),
before_sleep=tenacity.before_sleep_log(logger, logging.WARNING), reraise=True)
@eutilities_rate_limiter.as_decorator(name="e-utilities", weight=1)
def _fetch_geo_ids(self, pubmed_ids: List[str]) -> List[str]:
"""
Fetches GEO dataset ids for papers with the specified PubMed IDs.
Expand Down Expand Up @@ -95,6 +99,7 @@ def _fetch_geo_ids(self, pubmed_ids: List[str]) -> List[str]:

@tenacity.retry(wait=tenacity.wait_exponential(max=10), stop=tenacity.stop_after_attempt(NUMBER_OF_RETRIES),
before_sleep=tenacity.before_sleep_log(logger, logging.WARNING), reraise=True)
@eutilities_rate_limiter.as_decorator(name="e-utilities", weight=1)
def _fetch_geo_accessions(self, geo_ids: List[str]) -> List[str]:
"""
Fetches GEO accessions for the given GEO IDs from the NCBI E-Utilities.
Expand Down
6 changes: 6 additions & 0 deletions src/db/linkers/europepmc_dataset_linker.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
from typing import List, Dict
import requests
from pyrate_limiter import Duration
from pyrate_limiter.limiter_factory import create_inmemory_limiter

from src.exception.europepmc_error import EuropePMCError
from src.db.linkers.paper_dataset_linker import PaperDatasetLinker

europepmc_rate_limiter = create_inmemory_limiter(10, Duration.SECOND)


class EuropePMCDatasetLinker(PaperDatasetLinker):
EUROPEPMC_URL = (
Expand Down Expand Up @@ -68,6 +73,7 @@ def link_to_datasets_mapped(self, pubmed_ids: List[str]) -> Dict[str, List[str]]

return result

@europepmc_rate_limiter.as_decorator(name="EuropePMC", weight=1)
def _fetch_geo_accession_batch_mapped(self, pubmed_ids: List[str]) -> Dict[str, List[str]]:
"""
Fetches GEO references in a list of papers (max 8 papers) from EuropePMC's
Expand Down
11 changes: 11 additions & 0 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.