Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
3d37f2e
Renamed LOGGER to logger.
gaurav Dec 18, 2025
4d271de
Added logging to /synonyms API endpoint.
gaurav Dec 18, 2025
096f79c
Added logging to Lookup.
gaurav Dec 18, 2025
923e079
Added logging for bulk_lookup().
gaurav Dec 18, 2025
c8ae693
Added NameRes version to /status.
gaurav Feb 20, 2026
bc02650
Added BIOLINK_MODEL_TAG.
gaurav Feb 20, 2026
9c255ec
Added basic tests for /status.
gaurav Feb 20, 2026
27f9f4b
Improved tests.
gaurav Feb 20, 2026
d4d23b1
Attempted to fix issues.
gaurav Feb 20, 2026
b19316c
Fixed key name.
gaurav Feb 20, 2026
7f02500
Update api/server.py
gaurav Feb 20, 2026
3a2bbc9
Update api/server.py
gaurav Feb 20, 2026
f7267dc
Added a "recent times" that allows us to track query times.
gaurav Dec 18, 2025
9c89abf
Improved name.
gaurav Dec 18, 2025
a0707a6
Apply suggestion from @gaurav
gaurav Apr 7, 2026
a3e00f8
Add Solr performance diagnostics to /status and logging
gaurav Apr 7, 2026
fc168b8
Added on:pull_request trigger for testing.
gaurav Apr 7, 2026
3979968
Add query rate estimation to /status
gaurav Apr 7, 2026
688737e
Incremented version to v1.5.2.
gaurav Apr 7, 2026
a05e4b8
Group Solr metrics under a single 'solr' key in /status
gaurav Apr 7, 2026
c9367b6
Merge query_log and expand rate statistics in /status
gaurav Apr 7, 2026
a211880
Remove recent_times_ms from /status response
gaurav Apr 7, 2026
66b5654
Fix negative inter_arrival_ms from out-of-order log entries
gaurav Apr 7, 2026
03672b0
Fix three bugs identified in PR review
gaurav Apr 7, 2026
9fcb4e8
Add physical_memory_used_pct to /status Solr OS metrics
gaurav Apr 8, 2026
c9fc44c
Extract Solr logic from server.py into api/solr.py with OO design
gaurav Apr 8, 2026
3624ca6
Added on:pull_request trigger for testing.
gaurav Apr 8, 2026
f1d3b2e
Add latency bucket proportions to recent_queries in /status
gaurav Apr 8, 2026
82e9e1a
Add ?full=true parameter to /status to reduce Solr load
gaurav Apr 8, 2026
36c7f11
Document ?full query parameter in FastAPI schema for /status
gaurav Apr 8, 2026
ddcd29e
Simplify latency bucket counting: single pass + named constants
gaurav Apr 8, 2026
85cb011
Fixed bug: changed default infores to NameRes, not NodeNorm.
gaurav Apr 8, 2026
05e0a0a
Fix doc inaccuracies found during sync-docs audit
gaurav Apr 8, 2026
da3f360
Replaced time_ns() with perf_counter_ns().
gaurav Apr 8, 2026
9a805d8
Removed on:pull_request trigger after testing.
gaurav Apr 8, 2026
ba2a9b9
Tweaked tests to get them to pass.
gaurav Apr 8, 2026
0e9b80e
Oops, messed with the wrong test.
gaurav Apr 8, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion api/resources/openapi.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
openapi: 3.0.2
info:
title: Name Resolver
version: 1.5.1
version: 1.5.2
email: bizon@renci.org
name: Chris Bizon
x-id: https://github.com/cbizon
Expand Down
233 changes: 193 additions & 40 deletions api/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,13 @@
* Matching names are returned first, followed by non-matching names
"""
import json
import logging, warnings
import logging
import statistics
import time
import warnings
import os
import re
from collections import deque
from typing import Dict, List, Union, Annotated, Optional

from fastapi import Body, FastAPI, Query
Expand All @@ -20,12 +24,16 @@
from starlette.middleware.cors import CORSMiddleware

from .apidocs import get_app_info, construct_open_api_schema
from .solr import SolrClient

LOGGER = logging.getLogger(__name__)
SOLR_HOST = os.getenv("SOLR_HOST", "localhost")
SOLR_PORT = os.getenv("SOLR_PORT", "8983")

solr_client = SolrClient(SOLR_HOST, int(SOLR_PORT))

app = FastAPI(**get_app_info())
logger = logging.getLogger(__name__)
logging.basicConfig(level=os.getenv("LOGLEVEL", logging.INFO))

app.add_middleware(
CORSMiddleware,
Expand All @@ -35,6 +43,16 @@
allow_headers=["*"],
)

# We track (timestamp_s, duration_ms) for recent queries to compute both latency and rate stats.
# A large default covers ~100s at 500 qps, giving meaningful rate windows even under heavy load.
QUERY_LOG_SIZE = int(os.getenv("QUERY_LOG_SIZE", 50000))
query_log: deque = deque(maxlen=QUERY_LOG_SIZE)

# Queries slower than this threshold will be logged at WARNING level (see documentation/Performance.md).
SLOW_QUERY_THRESHOLD_MS = float(os.getenv("SLOW_QUERY_THRESHOLD_MS", "500"))
IDEAL_QUERY_THRESHOLD_MS = 100 # below this: "ideal"
VERY_SLOW_QUERY_THRESHOLD_MS = 1000 # at or above this: "very slow"

# ENDPOINT /
# If someone tries accessing /, we should redirect them to the Swagger interface.
@app.get("/", include_in_schema=False)
Expand All @@ -50,53 +68,159 @@ async def docs_redirect():
description="This endpoint will return status information and a list of counts from the underlying Solr "
"instance for this NameRes instance."
)
async def status_get() -> Dict:
async def status_get(
full: bool = Query(
False,
description=(
"When false (default), only the Solr cores endpoint is called, returning basic index stats "
"(numDocs, startTime, etc.) with jvm, os, and cache as null. "
"Set to true to also fetch JVM memory, OS metrics, and cache statistics — "
"useful for diagnostics but should not be used for frequent liveness probes."
),
)
) -> Dict:
""" Return status and count information from the underyling Solr instance. """
return await status()
return await status(full=full)


async def status() -> Dict:
async def status(full: bool = False) -> Dict:
""" Return a dictionary containing status and count information for the underlying Solr instance. """
query_url = f"http://{SOLR_HOST}:{SOLR_PORT}/solr/admin/cores"
async with httpx.AsyncClient(timeout=None) as client:
response = await client.get(query_url, params={
'action': 'STATUS'
})
if response.status_code >= 300:
LOGGER.error("Solr error on accessing /solr/admin/cores?action=STATUS: %s", response.text)
response.raise_for_status()
result = response.json()
solr = await solr_client.fetch_status(full=full)

# Do we know the Babel version and version URL? It will be stored in an environmental variable if we do.
babel_version = os.environ.get("BABEL_VERSION", "unknown")
babel_version_url = os.environ.get("BABEL_VERSION_URL", "")

# We should have a status for name_lookup_shard1_replica_n1.
if 'status' in result and 'name_lookup_shard1_replica_n1' in result['status']:
core = result['status']['name_lookup_shard1_replica_n1']
# Look up the BIOLINK_MODEL_TAG.
# Note: this should be a tag from the Biolink Model repo, e.g. "master" or "v4.3.6".
biolink_model_tag = os.environ.get("BIOLINK_MODEL_TAG", "master")
biolink_model_url = f"https://github.com/biolink/biolink-model/tree/{biolink_model_tag}"
biolink_model_download_url = f"https://raw.githubusercontent.com/biolink/biolink-model/{biolink_model_tag}/biolink-model.yaml"

# Figure out the NameRes version.
nameres_version = "master"
app_info = get_app_info()
if 'version' in app_info and app_info['version']:
nameres_version = 'v' + app_info['version']
# Unpack query_log into parallel lists for latency and rate computations.
log_snapshot = list(query_log) # snapshot to avoid mutation during computation
# Sort by timestamp: concurrent requests complete in a different order than they started,
# so insertion order does not reflect arrival order.
log_snapshot.sort(key=lambda x: x[0])
timestamps = [ts for ts, _ in log_snapshot]
durations = [dur for _, dur in log_snapshot]

# Latency percentiles.
if len(durations) >= 2:
qs = statistics.quantiles(durations, n=100)
p50, p95, p99 = qs[49], qs[94], qs[98]
else:
p50 = p95 = p99 = None

# Inter-arrival times (gaps between consecutive query start timestamps, in ms).
# Requires >= 3 timestamps (>= 2 gaps) because statistics.quantiles needs at least 2 data points.
inter_arrival_ms = None
if len(timestamps) >= 3:
gaps = [(timestamps[i] - timestamps[i - 1]) * 1000 for i in range(1, len(timestamps))]
gaps_sorted = sorted(gaps)
inter_arrival_ms = {
'mean': round(sum(gaps) / len(gaps), 2),
'median': round(statistics.median(gaps), 2),
'min': round(gaps_sorted[0], 2),
'max': round(gaps_sorted[-1], 2),
'p95': round(statistics.quantiles(gaps, n=100)[94], 2),
}

# Latency buckets: fraction of queries in each performance tier.
total = len(durations)
if total:
n_ideal = n_fine = n_slow = n_very_slow = 0
for d in durations:
if d < IDEAL_QUERY_THRESHOLD_MS:
n_ideal += 1
elif d < SLOW_QUERY_THRESHOLD_MS:
n_fine += 1
elif d < VERY_SLOW_QUERY_THRESHOLD_MS:
n_slow += 1
else:
n_very_slow += 1
latency_buckets = {
'slow_threshold_ms': SLOW_QUERY_THRESHOLD_MS,
'ideal_pct': round(n_ideal / total, 4),
'fine_pct': round(n_fine / total, 4),
'slow_pct': round(n_slow / total, 4),
'very_slow_pct': round(n_very_slow / total, 4),
}
else:
latency_buckets = None

# Windowed query rates. Scan from newest to oldest, stopping at the largest window.
now = time.time()
count_10s = count_60s = count_300s = 0
for ts in reversed(timestamps):
age = now - ts
if age <= 300:
count_300s += 1
if age <= 60:
count_60s += 1
if age <= 10:
count_10s += 1
else:
break

history_span = (timestamps[-1] - timestamps[0]) if len(timestamps) >= 2 else 0
time_since_last = (now - timestamps[-1]) if timestamps else None

recent_queries = {
'count': len(durations),
'mean_time_ms': round(sum(durations) / len(durations), 2) if durations else -1,
'p50_ms': p50,
'p95_ms': p95,
'p99_ms': p99,
'latency_buckets': latency_buckets,
'rate': {
'history_span_seconds': round(history_span, 1),
'time_since_last_query_seconds': round(time_since_last, 2) if time_since_last is not None else None,
'queries_last_10s': count_10s,
'queries_per_second_last_10s': round(count_10s / 10, 2),
'queries_last_60s': count_60s,
'queries_per_second_last_60s': round(count_60s / 60, 2),
'queries_last_300s': count_300s,
'queries_per_second_last_300s': round(count_300s / 300, 2),
'inter_arrival_ms': inter_arrival_ms,
},
}

index = {}
if 'index' in core:
index = core['index']
biolink_model = {
'tag': biolink_model_tag,
'url': biolink_model_url,
'download_url': biolink_model_download_url,
}

if solr['found']:
solr_dict = {k: v for k, v in solr.items() if k != 'found'}
return {
'status': 'ok',
'message': 'Reporting results from primary core.',
'babel_version': babel_version,
'babel_version_url': babel_version_url,
'startTime': core['startTime'],
'numDocs': index.get('numDocs', ''),
'maxDoc': index.get('maxDoc', ''),
'deletedDocs': index.get('deletedDocs', ''),
'version': index.get('version', ''),
'segmentCount': index.get('segmentCount', ''),
'lastModified': index.get('lastModified', ''),
'size': index.get('size', ''),
'biolink_model': biolink_model,
'nameres_version': nameres_version,
'recent_queries': recent_queries,
'solr': solr_dict,
}
else:
return {
'status': 'error',
'message': 'Expected core not found.'
'message': 'Expected core not found.',
'babel_version': babel_version,
'babel_version_url': babel_version_url,
'biolink_model': biolink_model,
'nameres_version': nameres_version,
'solr': {
'jvm': solr['jvm'],
'os': solr['os'],
},
}


Expand Down Expand Up @@ -125,7 +249,7 @@ async def reverse_lookup_get(
)
) -> Dict[str, Dict]:
"""Returns a list of synonyms for a particular CURIE."""
return await reverse_lookup(curies)
return await curie_lookup(curies)


@app.get(
Expand All @@ -135,14 +259,14 @@ async def reverse_lookup_get(
response_model=Dict[str, Dict],
tags=["lookup"],
)
async def lookup_names_get(
async def synonyms_get(
preferred_curies: List[str]= Query(
example=["MONDO:0005737", "MONDO:0009757"],
description="A list of CURIEs to look up synonyms for."
)
) -> Dict[str, Dict]:
"""Returns a list of synonyms for a particular CURIE."""
return await reverse_lookup(preferred_curies)
return await curie_lookup(preferred_curies)


@app.post(
Expand All @@ -159,7 +283,7 @@ async def lookup_names_post(
}),
) -> Dict[str, Dict]:
"""Returns a list of synonyms for a particular CURIE."""
return await reverse_lookup(request.curies)
return await curie_lookup(request.curies)


@app.post(
Expand All @@ -169,17 +293,18 @@ async def lookup_names_post(
response_model=Dict[str, Dict],
tags=["lookup"],
)
async def lookup_names_post(
async def synonyms_post(
request: SynonymsRequest = Body(..., example={
"preferred_curies": ["MONDO:0005737", "MONDO:0009757"],
}),
) -> Dict[str, Dict]:
"""Returns a list of synonyms for a particular CURIE."""
return await reverse_lookup(request.preferred_curies)
return await curie_lookup(request.preferred_curies)


async def reverse_lookup(curies) -> Dict[str, Dict]:
async def curie_lookup(curies) -> Dict[str, Dict]:
"""Returns a list of synonyms for a particular CURIE."""
time_start = time.perf_counter_ns()
query = f"http://{SOLR_HOST}:{SOLR_PORT}/solr/name_lookup/select"
curie_filter = " OR ".join(
f"curie:\"{curie}\""
Expand All @@ -199,6 +324,10 @@ async def reverse_lookup(curies) -> Dict[str, Dict]:
}
for doc in response_json["response"]["docs"]:
output[doc["curie"]] = doc
time_end = time.perf_counter_ns()

logger.info(f"CURIE Lookup on {len(curies)} CURIEs {json.dumps(curies)} took {(time_end - time_start)/1_000_000:.2f}ms")

return output

class LookupResult(BaseModel):
Expand Down Expand Up @@ -351,6 +480,8 @@ async def lookup(string: str,
will be returned, rather than filtering to concepts that are both PhenotypicFeature and Disease.
"""

time_start = time.perf_counter_ns()

# First, we strip and lowercase the query since all our indexes are case-insensitive.
string_lc = string.strip().lower()

Expand Down Expand Up @@ -459,16 +590,18 @@ async def lookup(string: str,
"fields": "*, score",
"params": inner_params,
}
logging.debug(f"Query: {json.dumps(params, indent=2)}")
logger.debug(f"Query: {json.dumps(params, indent=2)}")

time_solr_start = time.perf_counter_ns()
query_url = f"http://{SOLR_HOST}:{SOLR_PORT}/solr/name_lookup/select"
async with httpx.AsyncClient(timeout=None) as client:
response = await client.post(query_url, json=params)
if response.status_code >= 300:
LOGGER.error("Solr REST error: %s", response.text)
logger.error("Solr REST error: %s", response.text)
response.raise_for_status()
response = response.json()
logging.debug(f"Solr response: {json.dumps(response, indent=2)}")
time_solr_end = time.perf_counter_ns()
logger.debug(f"Solr response: {json.dumps(response, indent=2)}")

# Associate highlighting information with search results.
highlighting_response = response.get("highlighting", {})
Expand Down Expand Up @@ -511,6 +644,22 @@ async def lookup(string: str,
clique_identifier_count=doc.get("clique_identifier_count", 0),
types=[f"biolink:{d}" for d in doc.get("types", [])]))

time_end = time.perf_counter_ns()
time_taken_ms = (time_end - time_start)/1_000_000
solr_ms = (time_solr_end - time_solr_start)/1_000_000
query_log.append((time_start / 1_000_000_000, time_taken_ms))
log_msg = (
f"Lookup query to Solr for {json.dumps(string)} "
f"(autocomplete={autocomplete}, highlighting={highlighting}, offset={offset}, limit={limit}, "
f"biolink_types={biolink_types}, only_prefixes={only_prefixes}, exclude_prefixes={exclude_prefixes}, "
f"only_taxa={only_taxa}) "
f"took {time_taken_ms:.2f}ms (with {solr_ms:.2f}ms waiting for Solr)"
)
if time_taken_ms > SLOW_QUERY_THRESHOLD_MS:
logger.warning("SLOW QUERY: " + log_msg)
else:
logger.info(log_msg)

return outputs

## BULK ENDPOINT
Expand Down Expand Up @@ -579,6 +728,7 @@ class NameResQuery(BaseModel):
tags=["lookup"]
)
async def bulk_lookup(query: NameResQuery) -> Dict[str, List[LookupResult]]:
time_start = time.perf_counter_ns()
result = {}
for string in query.strings:
result[string] = await lookup(
Expand All @@ -591,6 +741,9 @@ async def bulk_lookup(query: NameResQuery) -> Dict[str, List[LookupResult]]:
query.only_prefixes,
query.exclude_prefixes,
query.only_taxa)
time_end = time.perf_counter_ns()
logger.info(f"Bulk lookup query for {len(query.strings)} strings ({query}) took {(time_end - time_start)/1_000_000:.2f}ms")

return result


Expand All @@ -614,7 +767,7 @@ async def bulk_lookup(query: NameResQuery) -> Dict[str, List[LookupResult]]:
# these supresses such warnings.
logging.captureWarnings(capture=True)
warnings.filterwarnings("ignore", category=ResourceWarning)
otel_service_name = os.environ.get('SERVER_NAME', 'infores:sri-node-normalizer')
otel_service_name = os.environ.get('SERVER_NAME', 'infores:sri-name-resolver')
assert otel_service_name and isinstance(otel_service_name, str)

otlp_host = os.environ.get("JAEGER_HOST", "http://localhost/").rstrip('/')
Expand Down
Loading
Loading