diff --git a/api/resources/openapi.yml b/api/resources/openapi.yml
index 8522980c..a4b7a29d 100644
--- a/api/resources/openapi.yml
+++ b/api/resources/openapi.yml
@@ -1,7 +1,7 @@
 openapi: 3.0.2
 info:
   title: Name Resolver
-  version: 1.5.1
+  version: 1.5.2
   email: bizon@renci.org
   name: Chris Bizon
   x-id: https://github.com/cbizon
diff --git a/api/server.py b/api/server.py
index 3c3ec88a..8fc60a97 100755
--- a/api/server.py
+++ b/api/server.py
@@ -8,9 +8,13 @@
   * Matching names are returned first, followed by non-matching names
 """
 import json
-import logging, warnings
+import logging
+import statistics
+import time
+import warnings
 import os
 import re
+from collections import deque
 from typing import Dict, List, Union, Annotated, Optional
 
 from fastapi import Body, FastAPI, Query
@@ -20,12 +24,16 @@
 from starlette.middleware.cors import CORSMiddleware
 
 from .apidocs import get_app_info, construct_open_api_schema
+from .solr import SolrClient
 
-LOGGER = logging.getLogger(__name__)
 SOLR_HOST = os.getenv("SOLR_HOST", "localhost")
 SOLR_PORT = os.getenv("SOLR_PORT", "8983")
 
+solr_client = SolrClient(SOLR_HOST, int(SOLR_PORT))
+
 app = FastAPI(**get_app_info())
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=os.getenv("LOGLEVEL", logging.INFO))
 
 app.add_middleware(
     CORSMiddleware,
@@ -35,6 +43,16 @@
     allow_headers=["*"],
 )
 
+# We track (timestamp_s, duration_ms) for recent queries to compute both latency and rate stats.
+# A large default covers ~100s at 500 qps, giving meaningful rate windows even under heavy load.
+QUERY_LOG_SIZE = int(os.getenv("QUERY_LOG_SIZE", 50000))
+query_log: deque = deque(maxlen=QUERY_LOG_SIZE)
+
+# Queries slower than this threshold will be logged at WARNING level (see documentation/Performance.md).
+SLOW_QUERY_THRESHOLD_MS = float(os.getenv("SLOW_QUERY_THRESHOLD_MS", "500"))
+IDEAL_QUERY_THRESHOLD_MS = 100    # below this: "ideal"
+VERY_SLOW_QUERY_THRESHOLD_MS = 1000  # at or above this: "very slow"
+
 # ENDPOINT /
 # If someone tries accessing /, we should redirect them to the Swagger interface.
 @app.get("/", include_in_schema=False)
@@ -50,53 +68,159 @@ async def docs_redirect():
          description="This endpoint will return status information and a list of counts from the underlying Solr "
                      "instance for this NameRes instance."
          )
-async def status_get() -> Dict:
+async def status_get(
+    full: bool = Query(
+        False,
+        description=(
+            "When false (default), only the Solr cores endpoint is called, returning basic index stats "
+            "(numDocs, startTime, etc.) with jvm, os, and cache as null. "
+            "Set to true to also fetch JVM memory, OS metrics, and cache statistics — "
+            "useful for diagnostics but should not be used for frequent liveness probes."
+        ),
+    )
+) -> Dict:
     """ Return status and count information from the underyling Solr instance. """
-    return await status()
+    return await status(full=full)
 
 
-async def status() -> Dict:
+async def status(full: bool = False) -> Dict:
     """ Return a dictionary containing status and count information for the underlying Solr instance. """
-    query_url = f"http://{SOLR_HOST}:{SOLR_PORT}/solr/admin/cores"
-    async with httpx.AsyncClient(timeout=None) as client:
-        response = await client.get(query_url, params={
-            'action': 'STATUS'
-        })
-    if response.status_code >= 300:
-        LOGGER.error("Solr error on accessing /solr/admin/cores?action=STATUS: %s", response.text)
-        response.raise_for_status()
-    result = response.json()
+    solr = await solr_client.fetch_status(full=full)
 
     # Do we know the Babel version and version URL? It will be stored in an environmental variable if we do.
     babel_version = os.environ.get("BABEL_VERSION", "unknown")
     babel_version_url = os.environ.get("BABEL_VERSION_URL", "")
 
-    # We should have a status for name_lookup_shard1_replica_n1.
-    if 'status' in result and 'name_lookup_shard1_replica_n1' in result['status']:
-        core = result['status']['name_lookup_shard1_replica_n1']
+    # Look up the BIOLINK_MODEL_TAG.
+    # Note: this should be a tag from the Biolink Model repo, e.g. "master" or "v4.3.6".
+    biolink_model_tag = os.environ.get("BIOLINK_MODEL_TAG", "master")
+    biolink_model_url = f"https://github.com/biolink/biolink-model/tree/{biolink_model_tag}"
+    biolink_model_download_url = f"https://raw.githubusercontent.com/biolink/biolink-model/{biolink_model_tag}/biolink-model.yaml"
+
+    # Figure out the NameRes version.
+    nameres_version = "master"
+    app_info = get_app_info()
+    if 'version' in app_info and app_info['version']:
+        nameres_version = 'v' + app_info['version']
+    # Unpack query_log into parallel lists for latency and rate computations.
+    log_snapshot = list(query_log)  # snapshot to avoid mutation during computation
+    # Sort by timestamp: concurrent requests complete in a different order than they started,
+    # so insertion order does not reflect arrival order.
+    log_snapshot.sort(key=lambda x: x[0])
+    timestamps = [ts for ts, _ in log_snapshot]
+    durations = [dur for _, dur in log_snapshot]
+
+    # Latency percentiles.
+    if len(durations) >= 2:
+        qs = statistics.quantiles(durations, n=100)
+        p50, p95, p99 = qs[49], qs[94], qs[98]
+    else:
+        p50 = p95 = p99 = None
+
+    # Inter-arrival times (gaps between consecutive query start timestamps, in ms).
+    # Requires >= 3 timestamps (>= 2 gaps) because statistics.quantiles needs at least 2 data points.
+    inter_arrival_ms = None
+    if len(timestamps) >= 3:
+        gaps = [(timestamps[i] - timestamps[i - 1]) * 1000 for i in range(1, len(timestamps))]
+        gaps_sorted = sorted(gaps)
+        inter_arrival_ms = {
+            'mean': round(sum(gaps) / len(gaps), 2),
+            'median': round(statistics.median(gaps), 2),
+            'min': round(gaps_sorted[0], 2),
+            'max': round(gaps_sorted[-1], 2),
+            'p95': round(statistics.quantiles(gaps, n=100)[94], 2),
+        }
+
+    # Latency buckets: fraction of queries in each performance tier.
+    total = len(durations)
+    if total:
+        n_ideal = n_fine = n_slow = n_very_slow = 0
+        for d in durations:
+            if d < IDEAL_QUERY_THRESHOLD_MS:
+                n_ideal += 1
+            elif d < SLOW_QUERY_THRESHOLD_MS:
+                n_fine += 1
+            elif d < VERY_SLOW_QUERY_THRESHOLD_MS:
+                n_slow += 1
+            else:
+                n_very_slow += 1
+        latency_buckets = {
+            'slow_threshold_ms': SLOW_QUERY_THRESHOLD_MS,
+            'ideal_pct':     round(n_ideal     / total, 4),
+            'fine_pct':      round(n_fine      / total, 4),
+            'slow_pct':      round(n_slow      / total, 4),
+            'very_slow_pct': round(n_very_slow / total, 4),
+        }
+    else:
+        latency_buckets = None
+
+    # Windowed query rates. Scan from newest to oldest, stopping at the largest window.
+    now = time.time()
+    count_10s = count_60s = count_300s = 0
+    for ts in reversed(timestamps):
+        age = now - ts
+        if age <= 300:
+            count_300s += 1
+            if age <= 60:
+                count_60s += 1
+                if age <= 10:
+                    count_10s += 1
+        else:
+            break
+
+    history_span = (timestamps[-1] - timestamps[0]) if len(timestamps) >= 2 else 0
+    time_since_last = (now - timestamps[-1]) if timestamps else None
+
+    recent_queries = {
+        'count': len(durations),
+        'mean_time_ms': round(sum(durations) / len(durations), 2) if durations else -1,
+        'p50_ms': p50,
+        'p95_ms': p95,
+        'p99_ms': p99,
+        'latency_buckets': latency_buckets,
+        'rate': {
+            'history_span_seconds': round(history_span, 1),
+            'time_since_last_query_seconds': round(time_since_last, 2) if time_since_last is not None else None,
+            'queries_last_10s': count_10s,
+            'queries_per_second_last_10s': round(count_10s / 10, 2),
+            'queries_last_60s': count_60s,
+            'queries_per_second_last_60s': round(count_60s / 60, 2),
+            'queries_last_300s': count_300s,
+            'queries_per_second_last_300s': round(count_300s / 300, 2),
+            'inter_arrival_ms': inter_arrival_ms,
+        },
+    }
 
-        index = {}
-        if 'index' in core:
-            index = core['index']
+    biolink_model = {
+        'tag': biolink_model_tag,
+        'url': biolink_model_url,
+        'download_url': biolink_model_download_url,
+    }
 
+    if solr['found']:
+        solr_dict = {k: v for k, v in solr.items() if k != 'found'}
         return {
             'status': 'ok',
             'message': 'Reporting results from primary core.',
             'babel_version': babel_version,
             'babel_version_url': babel_version_url,
-            'startTime': core['startTime'],
-            'numDocs': index.get('numDocs', ''),
-            'maxDoc': index.get('maxDoc', ''),
-            'deletedDocs': index.get('deletedDocs', ''),
-            'version': index.get('version', ''),
-            'segmentCount': index.get('segmentCount', ''),
-            'lastModified': index.get('lastModified', ''),
-            'size': index.get('size', ''),
+            'biolink_model': biolink_model,
+            'nameres_version': nameres_version,
+            'recent_queries': recent_queries,
+            'solr': solr_dict,
         }
     else:
         return {
             'status': 'error',
-            'message': 'Expected core not found.'
+            'message': 'Expected core not found.',
+            'babel_version': babel_version,
+            'babel_version_url': babel_version_url,
+            'biolink_model': biolink_model,
+            'nameres_version': nameres_version,
+            'solr': {
+                'jvm': solr['jvm'],
+                'os': solr['os'],
+            },
         }
 
 
@@ -125,7 +249,7 @@ async def reverse_lookup_get(
         )
 ) -> Dict[str, Dict]:
     """Returns a list of synonyms for a particular CURIE."""
-    return await reverse_lookup(curies)
+    return await curie_lookup(curies)
 
 
 @app.get(
@@ -135,14 +259,14 @@ async def reverse_lookup_get(
     response_model=Dict[str, Dict],
     tags=["lookup"],
 )
-async def lookup_names_get(
+async def synonyms_get(
         preferred_curies: List[str]= Query(
             example=["MONDO:0005737", "MONDO:0009757"],
             description="A list of CURIEs to look up synonyms for."
         )
 ) -> Dict[str, Dict]:
     """Returns a list of synonyms for a particular CURIE."""
-    return await reverse_lookup(preferred_curies)
+    return await curie_lookup(preferred_curies)
 
 
 @app.post(
@@ -159,7 +283,7 @@ async def lookup_names_post(
         }),
 ) -> Dict[str, Dict]:
     """Returns a list of synonyms for a particular CURIE."""
-    return await reverse_lookup(request.curies)
+    return await curie_lookup(request.curies)
 
 
 @app.post(
@@ -169,17 +293,18 @@ async def lookup_names_post(
     response_model=Dict[str, Dict],
     tags=["lookup"],
 )
-async def lookup_names_post(
+async def synonyms_post(
         request: SynonymsRequest = Body(..., example={
             "preferred_curies": ["MONDO:0005737", "MONDO:0009757"],
         }),
 ) -> Dict[str, Dict]:
     """Returns a list of synonyms for a particular CURIE."""
-    return await reverse_lookup(request.preferred_curies)
+    return await curie_lookup(request.preferred_curies)
 
 
-async def reverse_lookup(curies) -> Dict[str, Dict]:
+async def curie_lookup(curies) -> Dict[str, Dict]:
     """Returns a list of synonyms for a particular CURIE."""
+    time_start = time.perf_counter_ns()
     query = f"http://{SOLR_HOST}:{SOLR_PORT}/solr/name_lookup/select"
     curie_filter = " OR ".join(
         f"curie:\"{curie}\""
@@ -199,6 +324,10 @@ async def reverse_lookup(curies) -> Dict[str, Dict]:
     }
     for doc in response_json["response"]["docs"]:
         output[doc["curie"]] = doc
+    time_end = time.perf_counter_ns()
+
+    logger.info(f"CURIE Lookup on {len(curies)} CURIEs {json.dumps(curies)} took {(time_end - time_start)/1_000_000:.2f}ms")
+
     return output
 
 class LookupResult(BaseModel):
@@ -351,6 +480,8 @@ async def lookup(string: str,
         will be returned, rather than filtering to concepts that are both PhenotypicFeature and Disease.
     """
 
+    time_start = time.perf_counter_ns()
+
     # First, we strip and lowercase the query since all our indexes are case-insensitive.
     string_lc = string.strip().lower()
 
@@ -459,16 +590,18 @@ async def lookup(string: str,
         "fields": "*, score",
         "params": inner_params,
     }
-    logging.debug(f"Query: {json.dumps(params, indent=2)}")
+    logger.debug(f"Query: {json.dumps(params, indent=2)}")
 
+    time_solr_start = time.perf_counter_ns()
     query_url = f"http://{SOLR_HOST}:{SOLR_PORT}/solr/name_lookup/select"
     async with httpx.AsyncClient(timeout=None) as client:
         response = await client.post(query_url, json=params)
     if response.status_code >= 300:
-        LOGGER.error("Solr REST error: %s", response.text)
+        logger.error("Solr REST error: %s", response.text)
         response.raise_for_status()
     response = response.json()
-    logging.debug(f"Solr response: {json.dumps(response, indent=2)}")
+    time_solr_end = time.perf_counter_ns()
+    logger.debug(f"Solr response: {json.dumps(response, indent=2)}")
 
     # Associate highlighting information with search results.
     highlighting_response = response.get("highlighting", {})
@@ -511,6 +644,22 @@ async def lookup(string: str,
                            clique_identifier_count=doc.get("clique_identifier_count", 0),
                            types=[f"biolink:{d}" for d in doc.get("types", [])]))
 
+    time_end = time.perf_counter_ns()
+    time_taken_ms = (time_end - time_start)/1_000_000
+    solr_ms = (time_solr_end - time_solr_start)/1_000_000
+    query_log.append((time_start / 1_000_000_000, time_taken_ms))
+    log_msg = (
+        f"Lookup query to Solr for {json.dumps(string)} "
+        f"(autocomplete={autocomplete}, highlighting={highlighting}, offset={offset}, limit={limit}, "
+        f"biolink_types={biolink_types}, only_prefixes={only_prefixes}, exclude_prefixes={exclude_prefixes}, "
+        f"only_taxa={only_taxa}) "
+        f"took {time_taken_ms:.2f}ms (with {solr_ms:.2f}ms waiting for Solr)"
+    )
+    if time_taken_ms > SLOW_QUERY_THRESHOLD_MS:
+        logger.warning("SLOW QUERY: " + log_msg)
+    else:
+        logger.info(log_msg)
+
     return outputs
 
 ## BULK ENDPOINT
@@ -579,6 +728,7 @@ class NameResQuery(BaseModel):
           tags=["lookup"]
 )
 async def bulk_lookup(query: NameResQuery) -> Dict[str, List[LookupResult]]:
+    time_start = time.perf_counter_ns()
     result = {}
     for string in query.strings:
         result[string] = await lookup(
@@ -591,6 +741,9 @@ async def bulk_lookup(query: NameResQuery) -> Dict[str, List[LookupResult]]:
             query.only_prefixes,
             query.exclude_prefixes,
             query.only_taxa)
+    time_end = time.perf_counter_ns()
+    logger.info(f"Bulk lookup query for {len(query.strings)} strings ({query}) took {(time_end - time_start)/1_000_000:.2f}ms")
+
     return result
 
 
@@ -614,7 +767,7 @@ async def bulk_lookup(query: NameResQuery) -> Dict[str, List[LookupResult]]:
     # these supresses such warnings.
     logging.captureWarnings(capture=True)
     warnings.filterwarnings("ignore", category=ResourceWarning)
-    otel_service_name = os.environ.get('SERVER_NAME', 'infores:sri-node-normalizer')
+    otel_service_name = os.environ.get('SERVER_NAME', 'infores:sri-name-resolver')
     assert otel_service_name and isinstance(otel_service_name, str)
 
     otlp_host = os.environ.get("JAEGER_HOST", "http://localhost/").rstrip('/')
diff --git a/api/solr.py b/api/solr.py
new file mode 100644
index 00000000..9cfd4ecc
--- /dev/null
+++ b/api/solr.py
@@ -0,0 +1,184 @@
+"""Solr HTTP client and response parsers for the NameRes status endpoint."""
+import asyncio
+import logging
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+# Primary core name we monitor.
+_CORE_NAME = "name_lookup_shard1_replica_n1"
+
+
+class SolrClient:
+    """HTTP client for Solr admin APIs with static parsing utilities.
+
+    Async fetch methods make HTTP calls and return raw JSON (or None on
+    failure).  Static parse_* methods accept raw JSON dicts and can be
+    unit-tested directly without a running Solr instance.
+    """
+
+    def __init__(self, host: str, port: int):
+        self._base = f"http://{host}:{port}/solr"
+
+    # ------------------------------------------------------------------ #
+    # Static parsers — pure functions, independently unit-testable        #
+    # ------------------------------------------------------------------ #
+
+    @staticmethod
+    def parse_jvm(sysinfo_data: dict) -> dict:
+        """Parse JVM heap stats from a /admin/info/system response."""
+        jvm_raw = sysinfo_data.get("jvm", {}).get("memory", {}).get("raw", {})
+        heap_used = jvm_raw.get("used")
+        heap_max = jvm_raw.get("max")
+        return {
+            "heap_used_bytes": heap_used,
+            "heap_max_bytes": heap_max,
+            "heap_used_pct": round(heap_used / heap_max, 4) if heap_used and heap_max else None,
+        }
+
+    @staticmethod
+    def parse_os(sysinfo_data: dict) -> dict:
+        """Parse OS / memory stats from a /admin/info/system response."""
+        system = sysinfo_data.get("system", {})
+        free_mem = system.get("freePhysicalMemorySize")
+        total_mem = system.get("totalPhysicalMemorySize")
+        if free_mem is not None and total_mem and total_mem > 0 and 0 <= free_mem <= total_mem:
+            physical_memory_used_pct = round((total_mem - free_mem) / total_mem, 4)
+        else:
+            physical_memory_used_pct = None
+        return {
+            "process_cpu_load": system.get("processCpuLoad"),
+            "system_cpu_load": system.get("systemCpuLoad"),
+            "free_physical_memory_bytes": free_mem,
+            "total_physical_memory_bytes": total_mem,
+            "physical_memory_used_pct": physical_memory_used_pct,
+        }
+
+    @staticmethod
+    def parse_cache(mbeans_data: dict, name: str) -> dict | None:
+        """Parse stats for one named cache from a /admin/mbeans?cat=CACHE response.
+
+        Solr stores stats under fully-qualified keys such as
+        ``CACHE.searcher.<name>.hitratio``, not bare ``hitratio``.
+        Returns None when the cache is absent from the response.
+        """
+        for entry in mbeans_data.get("solr-mbeans", []):
+            if isinstance(entry, dict) and name in entry:
+                stats = entry[name].get("stats", {})
+                p = f"CACHE.searcher.{name}."
+                return {
+                    "hitratio": stats.get(f"{p}hitratio"),
+                    "evictions": stats.get(f"{p}evictions"),
+                    "size":      stats.get(f"{p}size"),
+                    "lookups":   stats.get(f"{p}lookups"),
+                    "hits":      stats.get(f"{p}hits"),
+                }
+        return None
+
+    @staticmethod
+    def parse_index(cores_data: dict) -> dict | None:
+        """Parse core index info from a /admin/cores?action=STATUS response.
+
+        Returns None when the expected core is not present.
+        """
+        core = cores_data.get("status", {}).get(_CORE_NAME)
+        if core is None:
+            return None
+        index = core.get("index", {})
+        return {
+            "startTime":    core.get("startTime"),
+            "numDocs":      index.get("numDocs", ""),
+            "maxDoc":       index.get("maxDoc", ""),
+            "deletedDocs":  index.get("deletedDocs", ""),
+            "version":      index.get("version", ""),
+            "segmentCount": index.get("segmentCount", ""),
+            "lastModified": index.get("lastModified", ""),
+            "size":         index.get("size", ""),
+        }
+
+    # ------------------------------------------------------------------ #
+    # Async HTTP fetchers                                                  #
+    # ------------------------------------------------------------------ #
+
+    async def fetch_sysinfo(self, client: httpx.AsyncClient) -> dict | None:
+        try:
+            r = await client.get(f"{self._base}/admin/info/system", params={"wt": "json"})
+            r.raise_for_status()
+            return r.json()
+        except Exception as e:
+            logger.warning("Could not fetch Solr system info: %s", e)
+            return None
+
+    async def fetch_mbeans(self, client: httpx.AsyncClient) -> dict | None:
+        try:
+            r = await client.get(
+                f"{self._base}/name_lookup/admin/mbeans",
+                params={"cat": "CACHE", "stats": "true", "wt": "json"},
+            )
+            r.raise_for_status()
+            return r.json()
+        except Exception as e:
+            logger.warning("Could not fetch Solr cache MBeans: %s", e)
+            return None
+
+    async def fetch_cores(self, client: httpx.AsyncClient) -> dict:
+        r = await client.get(f"{self._base}/admin/cores", params={"action": "STATUS"})
+        if r.status_code >= 300:
+            logger.error("Solr error on /solr/admin/cores?action=STATUS: %s", r.text)
+            r.raise_for_status()
+        return r.json()
+
+    # ------------------------------------------------------------------ #
+    # High-level: fetch everything and return a parsed snapshot           #
+    # ------------------------------------------------------------------ #
+
+    async def fetch_status(self, full: bool = False) -> dict:
+        """Fetch and parse Solr monitoring data.
+
+        When ``full=False`` (default), only the cores endpoint is called,
+        returning basic index stats with ``jvm``, ``os``, and ``cache`` as
+        ``None``.  Pass ``full=True`` to also fetch JVM, OS, and cache metrics
+        (three concurrent requests instead of one).
+
+        Returns a dict with a ``found`` flag plus parsed fields.  Callers
+        should check ``result["found"]`` before accessing index-level keys.
+
+        Raises ``httpx.HTTPStatusError`` if the cores endpoint is unavailable.
+        """
+        async with httpx.AsyncClient(timeout=None) as client:
+            if full:
+                cores_data, sysinfo_data, mbeans_data = await asyncio.gather(
+                    self.fetch_cores(client),
+                    self.fetch_sysinfo(client),
+                    self.fetch_mbeans(client),
+                )
+            else:
+                cores_data = await self.fetch_cores(client)
+                sysinfo_data = mbeans_data = None
+
+        jvm_info   = self.parse_jvm(sysinfo_data)   if sysinfo_data else None
+        os_info    = self.parse_os(sysinfo_data)    if sysinfo_data else None
+        cache_info = {
+            "filterCache":      self.parse_cache(mbeans_data, "filterCache"),
+            "queryResultCache": self.parse_cache(mbeans_data, "queryResultCache"),
+        } if mbeans_data else None
+
+        index_info = self.parse_index(cores_data)
+        if index_info is None:
+            return {"found": False, "jvm": jvm_info, "os": os_info}
+
+        return {
+            "found":        True,
+            "startTime":    index_info["startTime"],
+            "numDocs":      index_info["numDocs"],
+            "maxDoc":       index_info["maxDoc"],
+            "deletedDocs":  index_info["deletedDocs"],
+            "version":      index_info["version"],
+            "segmentCount": index_info["segmentCount"],
+            "lastModified": index_info["lastModified"],
+            "size":         index_info["size"],
+            "jvm":          jvm_info,
+            "os":           os_info,
+            "cache":        cache_info,
+        }
diff --git a/data-loading/README.md b/data-loading/README.md
index 376c9e9f..ab5ac060 100644
--- a/data-loading/README.md
+++ b/data-loading/README.md
@@ -58,7 +58,7 @@ automatically. This is a seven-step process:
    Note the double-quotes: setup-and-load-solr.sh requires a glob pattern as its first argument, not a list of files to process!
 
 5. Generate a backup of the Solr instance. The first command will create a directory at
-   `solrdata/data/name_lookup_shard1_repical_n1/data/snapshot.backup` -- you can track its progress by comparing the
+   `solrdata/data/name_lookup_shard1_replica_n1/data/snapshot.backup` -- you can track its progress by comparing the
    number of files in that directory to the number of files in `../data/index` (as I write this, it has 513 files).
 
    ```shell
diff --git a/documentation/Performance.md b/documentation/Performance.md
new file mode 100644
index 00000000..7a52a1cc
--- /dev/null
+++ b/documentation/Performance.md
@@ -0,0 +1,194 @@
+# NameRes Performance Diagnostics
+
+This document explains how to diagnose why Solr may be slow or under strain, using the
+observability built into NameRes. It covers what the metrics in `/status` mean, how to
+read the log messages, and a decision tree for identifying CPU pressure, memory pressure,
+high query rate, and other causes.
+
+---
+
+## 1. `/status` response fields
+
+The response has two main diagnostic sections: `recent_queries` (Python frontend metrics)
+and `solr` (everything from the Solr database itself).
+
+### Frontend query metrics (`recent_queries`)
+
+These are tracked by the Python process and reflect the full round-trip time seen by callers.
+
+#### Latency
+
+| Field | What it means |
+|---|---|
+| `mean_time_ms` | Average round-trip time (Python → Solr → Python) over the entries in `query_log`. Rising mean = sustained slowdown. |
+| `p50_ms` / `p95_ms` / `p99_ms` | Latency percentiles over the same window. p50 rising = every query is slow. p99 spiking but p50 stable = occasional GC pauses or one-off expensive queries. |
+
+#### Rate (`recent_queries.rate`)
+
+Query start timestamps and durations are stored together in a single `query_log` deque
+(up to `QUERY_LOG_SIZE` entries, default 50,000). The large size ensures rate estimates
+stay meaningful even at high query rates (e.g., 500 qps fills 1,000 entries in 2 seconds,
+but 50,000 entries covers 100 seconds).
+
+| Field | What it means |
+|---|---|
+| `history_span_seconds` | Time from the oldest to newest entry in the log. Shows how much history backs the rate estimates. |
+| `time_since_last_query_seconds` | Seconds since the most recent query. Large values mean the service is idle and windowed rates are stale. |
+| `queries_last_10s` / `queries_per_second_last_10s` | 10-second window. Use this to catch the onset of a spike before the 60s average catches up. |
+| `queries_last_60s` / `queries_per_second_last_60s` | 1-minute average rate. Use this for current load. |
+| `queries_last_300s` / `queries_per_second_last_300s` | 5-minute average rate. Use this to smooth over short bursts. |
+| `inter_arrival_ms.mean` | Average gap between consecutive queries in ms. Equals 1000 / mean_qps; cross-checks the windowed rates. |
+| `inter_arrival_ms.median` | Median gap. More robust than mean under burst traffic. |
+| `inter_arrival_ms.min` | Tightest burst observed — how closely packed the busiest queries were. |
+| `inter_arrival_ms.max` | Longest idle gap in the log window. |
+| `inter_arrival_ms.p95` | 95% of queries arrive within this gap. |
+
+The key diagnostic use: **if Solr is slow AND the query rate is high**, the cause is likely
+load rather than an internal Solr problem. If the rate is normal but Solr is slow, look at
+the `solr` fields below.
+
+### Solr database metrics (`solr`)
+
+All fields under `solr` come from Solr admin endpoints fetched in parallel when `/status`
+is called. They are `null` if the relevant Solr endpoint is unreachable (a warning is logged).
+
+#### Index health
+
+| Field | What it means |
+|---|---|
+| `solr.segmentCount` | Number of Lucene segments. Above ~20 means Solr does more per-segment work per query. Consider triggering an optimize (`POST /solr/name_lookup/update?optimize=true`). |
+| `solr.numDocs` / `solr.maxDoc` | `maxDoc - numDocs` = soft-deleted docs not yet merged. High deleted count + high segment count amplifies query cost. |
+| `solr.size` | Index size on disk. Unexpectedly small may indicate an incomplete data load. |
+
+#### JVM and OS (`solr.jvm`, `solr.os`)
+
+Fetched from Solr's `/solr/admin/info/system` endpoint.
+
+| Field | What it means |
+|---|---|
+| `solr.jvm.heap_used_pct` | Fraction of JVM heap in use (0.0–1.0). **>0.80 = memory pressure.** |
+| `solr.jvm.heap_used_bytes` / `solr.jvm.heap_max_bytes` | Absolute heap figures. Max is set by `-Xmx` in Solr's JVM config. |
+| `solr.os.process_cpu_load` | Solr process CPU (0.0–1.0). **>0.80 = CPU saturation.** |
+| `solr.os.system_cpu_load` | Host-wide CPU. If higher than process load, other processes are competing. |
+| `solr.os.free_physical_memory_bytes` | OS RAM available. If low, the OS may be swapping. |
+
+#### Cache statistics (`solr.cache`)
+
+Fetched from Solr's MBeans endpoint. Reports `filterCache` and `queryResultCache`.
+
+| Field | What it means |
+|---|---|
+| `hitratio` | Fraction of cache lookups that were hits. Should be >0.90. Below 0.50 = Solr is re-computing filters on nearly every query. |
+| `evictions` | Rising count = cache too small for the working set (a symptom of memory pressure). |
+| `size` | Current number of entries in the cache. If evictions are rising, the working set is larger than the cache. |
+| `lookups` | Total cache lookups. |
+| `hits` | Cache lookups that were served from cache (rather than recomputed). |
+
+Cache sizes are configured in Solr's `solrconfig.xml`. If evictions are high, increase
+`<maxSize>` for the affected cache — or investigate whether requests use many distinct
+filter combinations that defeat caching.
+
+---
+
+## 2. Log messages
+
+Every call to `lookup()` emits a line at INFO (or WARNING if slow):
+
+```
+INFO: Lookup query to Solr for "diabetes" (autocomplete=False, highlighting=False, offset=0,
+      limit=10, biolink_types=['biolink:Disease'], only_prefixes=None, exclude_prefixes=None,
+      only_taxa=None) took 123.45ms (with 100.12ms waiting for Solr)
+```
+
+```
+WARNING: SLOW QUERY: Lookup query to Solr for "..." ... took 850.12ms (with 840.00ms waiting for Solr)
+```
+
+Key interpretation:
+- **"waiting for Solr" ≈ total** → the bottleneck is inside Solr (JVM, index, caches).
+- **"waiting for Solr" is small, total is high** → the bottleneck is Python result processing
+  (large result sets being deserialized or filtered).
+- A WARNING is emitted when total time exceeds `SLOW_QUERY_THRESHOLD_MS` (default 500ms).
+  Set `LOGLEVEL=DEBUG` to also log the full Solr request and response JSON.
+
+---
+
+## 3. Diagnostic decision tree
+
+```
+Solr seems slow or the service is unresponsive
+│
+├─ Step 1: Check recent_queries.rate in /status
+│    │
+│    ├─ queries_per_second_last_60s is unusually high (e.g. 10x normal)
+│    │    → HIGH QUERY RATE is driving the load
+│    │       Check: are requests batching correctly? (use /reverse_lookup or /bulk_lookup)
+│    │       Check: is a client in a retry loop? (look for repeated identical queries in logs)
+│    │       Fix: rate-limit upstream callers; scale horizontally
+│    │
+│    └─ Rate is normal → the problem is internal to Solr → continue
+│
+├─ Step 2: Check log messages for "waiting for Solr" vs total time
+│    │
+│    ├─ "waiting for Solr" is small, total is high
+│    │    → Python result-processing bottleneck
+│    │       Check: is limit very large? High result counts = expensive deserialization
+│    │       Check: Python process CPU (scale horizontally if saturated)
+│    │
+│    └─ "waiting for Solr" is most of total → problem is INSIDE Solr → continue
+│
+├─ Step 3: Check solr.jvm.heap_used_pct in /status
+│    │
+│    ├─ >0.80 → MEMORY PRESSURE
+│    │    │
+│    │    ├─ Check solr.cache.filterCache.evictions
+│    │    │    ├─ Rising evictions → cache is too small for the working set
+│    │    │    │    Fix: increase <maxSize> in solrconfig.xml for filterCache
+│    │    │    └─ Evictions low but heap still high → data or fieldCache is large
+│    │    │         Fix: increase JVM -Xmx (SOLR_JAVA_MEM in Solr's environment config)
+│    │    │              or add more RAM to the host
+│    │    │
+│    │    └─ Heap high AND evictions low → not a cache-size problem
+│    │         Consider: Solr fieldCache warming on startup; large stored fields
+│    │
+│    └─ <0.50 → NOT a memory issue → continue
+│
+├─ Step 4: Check solr.os.process_cpu_load in /status
+│    │
+│    ├─ >0.80 → CPU SATURATION
+│    │    │
+│    │    ├─ Check solr.segmentCount in /status
+│    │    │    ├─ >20 → run Solr optimize to merge segments
+│    │    │    │    POST http://solr-host:8983/solr/name_lookup/update?optimize=true
+│    │    │    └─ Low segmentCount → CPU is busy with query evaluation itself
+│    │    │
+│    │    ├─ Check SLOW QUERY WARNINGs in logs
+│    │    │    Are specific queries (many filters, wildcard-heavy) driving the load?
+│    │    │    Fix: cache common filter combinations; avoid leading wildcards
+│    │    │
+│    │    └─ Even load across all queries → scale horizontally (add Solr replicas)
+│    │
+│    └─ Low CPU and low memory but slow queries → likely JVM GC pauses → continue
+│
+└─ Step 5: Check p99 vs p50 in recent_queries
+     │
+     ├─ p99 >> p50 (e.g. p50=50ms, p99=5000ms) → GC pause signature
+     │    Fix: tune JVM GC settings in Solr's JVM config:
+     │         -XX:+UseG1GC -XX:MaxGCPauseMillis=200
+     │         Check Solr GC logs (solr-gc.log) for Full GC frequency and duration
+     │
+     └─ p50 and p99 both high → sustained overload at all percentiles
+          → All of the above apply; start with memory (Step 3)
+```
+
+---
+
+## 4. Environment variables
+
+| Variable | Default | Effect |
+|---|---|---|
+| `SOLR_HOST` | `localhost` | Solr hostname |
+| `SOLR_PORT` | `8983` | Solr port |
+| `QUERY_LOG_SIZE` | `50000` | How many `(timestamp, duration)` pairs to retain. Covers both latency percentiles and rate estimation. At 500 qps this covers ~100 seconds; lower to reduce memory on low-traffic instances. |
+| `SLOW_QUERY_THRESHOLD_MS` | `500` | Queries slower than this are logged at WARNING level |
+| `LOGLEVEL` | `INFO` | Set to `DEBUG` to log full Solr request/response JSON for every query |
diff --git a/tests/test_solr.py b/tests/test_solr.py
new file mode 100644
index 00000000..c9fe923f
--- /dev/null
+++ b/tests/test_solr.py
@@ -0,0 +1,189 @@
+"""Unit tests for SolrClient static parsing methods.
+
+These tests exercise the pure parsing logic without any HTTP calls or a
+running Solr instance.  The mock payloads mirror the actual structure
+returned by Solr's admin endpoints.
+"""
+from api.solr import SolrClient
+
+
+# ------------------------------------------------------------------ #
+# parse_cache                                                          #
+# ------------------------------------------------------------------ #
+
+# Minimal mbeans payload matching real Solr structure:
+# solr-mbeans is an alternating [category-string, {entries...}, ...] list.
+_MBEANS = {
+    "solr-mbeans": [
+        "CACHE",
+        {
+            "filterCache": {
+                "class": "org.apache.solr.search.CaffeineCache",
+                "description": "Caffeine Cache(maxSize=512, initialSize=512)",
+                "stats": {
+                    "CACHE.searcher.filterCache.hitratio": 0.9,
+                    "CACHE.searcher.filterCache.evictions": 10,
+                    "CACHE.searcher.filterCache.size": 500,
+                    "CACHE.searcher.filterCache.lookups": 1000,
+                    "CACHE.searcher.filterCache.hits": 900,
+                },
+            },
+            "queryResultCache": {
+                "class": "org.apache.solr.search.CaffeineCache",
+                "description": "Caffeine Cache(maxSize=512, initialSize=512)",
+                "stats": {
+                    "CACHE.searcher.queryResultCache.hitratio": 0.5756,
+                    "CACHE.searcher.queryResultCache.evictions": 2310,
+                    "CACHE.searcher.queryResultCache.size": 512,
+                    "CACHE.searcher.queryResultCache.lookups": 6649,
+                    "CACHE.searcher.queryResultCache.hits": 3827,
+                },
+            },
+        },
+    ]
+}
+
+
+def test_parse_cache_queryResultCache():
+    result = SolrClient.parse_cache(_MBEANS, "queryResultCache")
+    assert result == {
+        "hitratio": 0.5756,
+        "evictions": 2310,
+        "size": 512,
+        "lookups": 6649,
+        "hits": 3827,
+    }
+
+
+def test_parse_cache_filterCache():
+    result = SolrClient.parse_cache(_MBEANS, "filterCache")
+    assert result == {
+        "hitratio": 0.9,
+        "evictions": 10,
+        "size": 500,
+        "lookups": 1000,
+        "hits": 900,
+    }
+
+
+def test_parse_cache_missing_name():
+    assert SolrClient.parse_cache(_MBEANS, "perSegFilter") is None
+
+
+def test_parse_cache_empty_mbeans():
+    assert SolrClient.parse_cache({}, "queryResultCache") is None
+
+
+def test_parse_cache_empty_stats():
+    """Cache present but no stats entries — all values None, not an error."""
+    data = {"solr-mbeans": ["CACHE", {"queryResultCache": {"stats": {}}}]}
+    result = SolrClient.parse_cache(data, "queryResultCache")
+    assert result == {
+        "hitratio": None,
+        "evictions": None,
+        "size": None,
+        "lookups": None,
+        "hits": None,
+    }
+
+
+# ------------------------------------------------------------------ #
+# parse_jvm                                                            #
+# ------------------------------------------------------------------ #
+
+def test_parse_jvm():
+    data = {"jvm": {"memory": {"raw": {"used": 500_000_000, "max": 1_000_000_000}}}}
+    result = SolrClient.parse_jvm(data)
+    assert result["heap_used_bytes"] == 500_000_000
+    assert result["heap_max_bytes"] == 1_000_000_000
+    assert result["heap_used_pct"] == 0.5
+
+
+def test_parse_jvm_missing_data():
+    result = SolrClient.parse_jvm({})
+    assert result["heap_used_bytes"] is None
+    assert result["heap_max_bytes"] is None
+    assert result["heap_used_pct"] is None
+
+
+# ------------------------------------------------------------------ #
+# parse_os                                                             #
+# ------------------------------------------------------------------ #
+
+def test_parse_os():
+    data = {
+        "system": {
+            "processCpuLoad": 0.25,
+            "systemCpuLoad": 0.40,
+            "freePhysicalMemorySize": 2_000_000_000,
+            "totalPhysicalMemorySize": 8_000_000_000,
+        }
+    }
+    result = SolrClient.parse_os(data)
+    assert result["process_cpu_load"] == 0.25
+    assert result["system_cpu_load"] == 0.40
+    assert result["free_physical_memory_bytes"] == 2_000_000_000
+    assert result["total_physical_memory_bytes"] == 8_000_000_000
+    assert result["physical_memory_used_pct"] == 0.75
+
+
+def test_parse_os_invalid_memory_free_greater_than_total():
+    data = {
+        "system": {
+            "freePhysicalMemorySize": 9_000_000_000,
+            "totalPhysicalMemorySize": 8_000_000_000,
+        }
+    }
+    result = SolrClient.parse_os(data)
+    assert result["physical_memory_used_pct"] is None
+
+
+def test_parse_os_zero_total():
+    data = {"system": {"freePhysicalMemorySize": 0, "totalPhysicalMemorySize": 0}}
+    result = SolrClient.parse_os(data)
+    assert result["physical_memory_used_pct"] is None
+
+
+def test_parse_os_missing_data():
+    result = SolrClient.parse_os({})
+    assert result["process_cpu_load"] is None
+    assert result["physical_memory_used_pct"] is None
+
+
+# ------------------------------------------------------------------ #
+# parse_index                                                          #
+# ------------------------------------------------------------------ #
+
+_CORES = {
+    "status": {
+        "name_lookup_shard1_replica_n1": {
+            "startTime": "2025-12-19T21:20:02.292Z",
+            "index": {
+                "numDocs": 89,
+                "maxDoc": 89,
+                "deletedDocs": 0,
+                "version": 42,
+                "segmentCount": 3,
+                "lastModified": "2025-12-19T21:21:00.000Z",
+                "size": "10 MB",
+            },
+        }
+    }
+}
+
+
+def test_parse_index():
+    result = SolrClient.parse_index(_CORES)
+    assert result["startTime"] == "2025-12-19T21:20:02.292Z"
+    assert result["numDocs"] == 89
+    assert result["maxDoc"] == 89
+    assert result["deletedDocs"] == 0
+    assert result["size"] == "10 MB"
+
+
+def test_parse_index_core_not_found():
+    assert SolrClient.parse_index({"status": {}}) is None
+
+
+def test_parse_index_empty():
+    assert SolrClient.parse_index({}) is None
diff --git a/tests/test_status.py b/tests/test_status.py
new file mode 100644
index 00000000..2f8393d6
--- /dev/null
+++ b/tests/test_status.py
@@ -0,0 +1,50 @@
+import logging
+
+from api.server import app
+from fastapi.testclient import TestClient
+
+# Turn on debugging for tests.
+logging.basicConfig(level=logging.DEBUG)
+
+def test_status():
+    client = TestClient(app)
+    response = client.get("/status")
+    status = response.json()
+
+    assert status['status'] == 'ok'
+    assert status['message'] != ''
+    assert 'babel_version' in status
+    assert 'babel_version_url' in status
+    assert 'biolink_model' in status
+    assert 'tag' in status['biolink_model']
+    assert 'nameres_version' in status
+
+    solr = status['solr']
+    assert solr['version'] != ''
+    assert solr['size'] != ''
+    assert solr['startTime']
+
+    # Count the specific number of test documents we load.
+    assert solr['numDocs'] == 88
+    assert solr['maxDoc'] == 88
+    assert solr['deletedDocs'] == 0
+
+
+def test_status_default_excludes_metrics():
+    """Default /status omits expensive metrics — only one Solr call needed."""
+    client = TestClient(app)
+    solr = client.get("/status").json()['solr']
+    assert solr['numDocs'] is not None
+    assert solr['jvm'] is None
+    assert solr['os'] is None
+    assert solr['cache'] is None
+
+
+def test_status_full_includes_metrics():
+    """?full=true fetches JVM, OS, and cache metrics."""
+    client = TestClient(app)
+    solr = client.get("/status?full=true").json()['solr']
+    assert solr['jvm'] is not None
+    assert solr['jvm']['heap_used_bytes'] is not None
+    assert solr['os'] is not None
+    # assert solr['cache'] is not None -- TODO: figure out why this doesn't work on our little Docker image.