From 4fe38638a2fbe7feeedf720f4805b1816337e9d5 Mon Sep 17 00:00:00 2001
From: hyi <hongyi@renci.org>
Date: Thu, 11 Sep 2025 12:03:37 -0400
Subject: [PATCH 1/8] added networkx

---
 poetry.lock    | 20 +++++++++++++++++++-
 pyproject.toml |  1 +
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/poetry.lock b/poetry.lock
index ba098cc..8224fb5 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -583,6 +583,24 @@ files = [
 [package.dependencies]
 traitlets = "*"
 
+[[package]]
+name = "networkx"
+version = "3.1"
+description = "Python package for creating and manipulating graphs and networks"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "networkx-3.1-py3-none-any.whl", hash = "sha256:4f33f68cb2afcf86f28a45f43efc27a9386b535d567d2127f8f61d51dec58d36"},
+    {file = "networkx-3.1.tar.gz", hash = "sha256:de346335408f84de0eada6ff9fafafff9bcda11f0a0dfaa931133debb146ab61"},
+]
+
+[package.extras]
+default = ["matplotlib (>=3.4)", "numpy (>=1.20)", "pandas (>=1.3)", "scipy (>=1.8)"]
+developer = ["mypy (>=1.1)", "pre-commit (>=3.2)"]
+doc = ["nb2plots (>=0.6)", "numpydoc (>=1.5)", "pillow (>=9.4)", "pydata-sphinx-theme (>=0.13)", "sphinx (>=6.1)", "sphinx-gallery (>=0.12)", "texext (>=0.6.7)"]
+extra = ["lxml (>=4.6)", "pydot (>=1.4.2)", "pygraphviz (>=1.10)", "sympy (>=1.10)"]
+test = ["codecov (>=2.1)", "pytest (>=7.2)", "pytest-cov (>=4.0)"]
+
 [[package]]
 name = "numpy"
 version = "1.24.4"
@@ -1370,4 +1388,4 @@ files = [
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8.10,<3.12"
-content-hash = "f06e2e5bcc2170425b33b4887b1d42d216812771cbb34c01b3e79a6c962f0524"
+content-hash = "7ef5e6a3bec2bcef8429f74816408f554f0d021da19349481077a67065489833"
diff --git a/pyproject.toml b/pyproject.toml
index 5f63a93..7600f55 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,6 +24,7 @@ ipytree = "^0.2.2"
 ipywidgets = "^8.1.5"
 jinja2 = "3.1.6"
 tqdm = "4.67.1"
+networkx = "3.1"
 
 [tool.poetry.dev-dependencies]
 pytest = "^8.3.3"

From 372f9cbe0e28366d4d66c08274cbd683df3c0e93 Mon Sep 17 00:00:00 2001
From: hyi <hongyi@renci.org>
Date: Thu, 11 Sep 2025 23:17:20 -0400
Subject: [PATCH 2/8] intermediate work

---
 biasanalyzer/api.py                           |  23 ++++
 biasanalyzer/cohort.py                        |  22 ++-
 biasanalyzer/cohort_query_builder.py          |   7 +-
 biasanalyzer/concept.py                       | 125 ++++++++++++++++++
 biasanalyzer/database.py                      |   5 +-
 .../cohort_concept_prevalence_query.sql.j2    |   1 -
 .../BiasAnalyzerCohortConceptTutorial.ipynb   |   9 +-
 .../test_hierarchical_prevalence.py           |  51 +++----
 8 files changed, 196 insertions(+), 47 deletions(-)
 create mode 100644 biasanalyzer/concept.py

diff --git a/biasanalyzer/api.py b/biasanalyzer/api.py
index d09163f..3832617 100644
--- a/biasanalyzer/api.py
+++ b/biasanalyzer/api.py
@@ -1,5 +1,6 @@
 import time
 from pydantic import ValidationError
+from typing import List
 from biasanalyzer.database import OMOPCDMDatabase, BiasDatabase
 from biasanalyzer.cohort import CohortAction
 from biasanalyzer.config import load_config
@@ -158,6 +159,28 @@ def create_cohort(self, cohort_name: str, cohort_desc: str, query_or_yaml_file:
             return None
 
 
+    def get_cohorts_concept_stats(self, cohorts: List[int],
+                                  concept_type: str='condition_occurrence',
+                                  filter_count: int=0,
+                                  vocab=None):
+        """
+        compute concept statistics such as concept prevalence in a union of multiple cohorts
+        :param cohorts: list of cohort ids
+        :param concept_type: concept type to consider with default "condition_occurrence"
+        :param filter_count: filtering out those concepts with less than this count. Default is 0 meaning no filtering
+        :param vocab: vocabulary to consider with default None meaning using the default vocabulary corresponding to
+        the domain instead as defined in DOMAIN_MAPPING variable in models.py
+        :return: ConceptHierarchy object
+        """
+        c_action = self._set_cohort_action()
+        if c_action:
+            return c_action.get_cohorts_concept_stats(cohorts, concept_type=concept_type, filter_count=filter_count,
+                                              vocab=vocab)
+        else:
+            notify_users('failed to get concept prevalence stats for the union of cohorts')
+            return None
+
+
     def compare_cohorts(self, cohort_id1, cohort_id2):
         c_action = self._set_cohort_action()
         if c_action:
diff --git a/biasanalyzer/cohort.py b/biasanalyzer/cohort.py
index 83aa2e8..7f0cf1c 100644
--- a/biasanalyzer/cohort.py
+++ b/biasanalyzer/cohort.py
@@ -4,11 +4,13 @@
 from datetime import datetime
 from tqdm.auto import tqdm
 from pydantic import ValidationError
+from typing import List
 from biasanalyzer.models import CohortDefinition
 from biasanalyzer.config import load_cohort_creation_config
 from biasanalyzer.database import OMOPCDMDatabase, BiasDatabase
 from biasanalyzer.utils import hellinger_distance, clean_string, notify_users
 from biasanalyzer.cohort_query_builder import CohortQueryBuilder
+from biasanalyzer.concept import build_concept_hierarchy_from_results
 
 
 class CohortData:
@@ -51,7 +53,7 @@ def get_distributions(self, variable):
         return self.bias_db.get_cohort_distributions(self.cohort_id, variable)
 
     def get_concept_stats(self, concept_type='condition_occurrence', filter_count=0,
-                          vocab=None, include_hierarchy=False):
+                          vocab=None):
         """
         Get cohort concept statistics such as concept prevalence
         """
@@ -59,9 +61,8 @@ def get_concept_stats(self, concept_type='condition_occurrence', filter_count=0,
                                                              self.query_builder,
                                                              concept_type=concept_type,
                                                              filter_count=filter_count,
-                                                             vocab=vocab,
-                                                             include_hierarchy=include_hierarchy)
-        return cohort_stats
+                                                             vocab=vocab)
+        return build_concept_hierarchy_from_results(cohort_stats[concept_type], self.cohort_id)
 
 
     def __del__(self):
@@ -148,6 +149,19 @@ def create_cohort(self, cohort_name: str, description: str, query_or_yaml_file:
                 omop_session.close()
             return None
 
+    def get_cohorts_concept_stats(self, cohorts: List[int],
+                          concept_type: str = 'condition_occurrence',
+                          filter_count: int = 0,
+                          vocab=None):
+        hierarchies = [self.bias_db.get_cohort_concept_stats(c, self._query_builder,
+                                                             concept_type=concept_type,
+                                                             filter_count=filter_count,
+                                                             vocab=vocab) for c in cohorts]
+        union_h = hierarchies[0]
+        for h in hierarchies[1:]:
+            union_h = union_h.union(h)
+        return union_h
+
     def compare_cohorts(self, cohort_id_1: int, cohort_id_2: int):
         """
         Compare the distributions of two cohorts in BiasDatabase.
diff --git a/biasanalyzer/cohort_query_builder.py b/biasanalyzer/cohort_query_builder.py
index 79fda66..4e3ca46 100644
--- a/biasanalyzer/cohort_query_builder.py
+++ b/biasanalyzer/cohort_query_builder.py
@@ -71,15 +71,13 @@ def build_query_cohort_creation(self, cohort_config: dict) -> str:
             temporal_events=temporal_events
         )
 
-    def build_concept_prevalence_query(self, concept_type: str, cid: int, filter_count: int, vocab: str,
-                                       include_hierarchy: bool) -> str:
+    def build_concept_prevalence_query(self, concept_type: str, cid: int, filter_count: int, vocab: str) -> str:
         """
         Build a SQL query for concept prevalence statistics for a given domain and cohort.
         :param concept_type: Domain from DOMAIN_MAPPING (e.g., 'condition_occurrence').
         :param cid: Cohort definition ID.
         :param filter_count: Minimum count threshold for concepts with 0 meaning no filtering
         :param vocab: Vocabulary ID. Defaults to domain-specific vocabulary as defined in DOMAIN_MAPPING if set to None
-        :param include_hierarchy: Include concept hierarchy in results or not
         :return: The rendered SQL query
         :raises ValueError if concept_type is not invalid
         """
@@ -100,8 +98,7 @@ def build_concept_prevalence_query(self, concept_type: str, cid: int, filter_cou
             start_date_column=DOMAIN_MAPPING[concept_type]["start_date"],
             cid=cid,
             filter_count=filter_count,
-            vocab=effective_vocab,
-            include_hierarchy=include_hierarchy
+            vocab=effective_vocab
         )
 
     @staticmethod
diff --git a/biasanalyzer/concept.py b/biasanalyzer/concept.py
new file mode 100644
index 0000000..e97e5b0
--- /dev/null
+++ b/biasanalyzer/concept.py
@@ -0,0 +1,125 @@
+from collections import defaultdict
+import networkx as nx
+from typing import List, Any, Dict
+
+
+class ConceptNode:
+    def __init__(self, concept_id: int, graph: "ConceptHierarchy"):
+        self.id = concept_id
+        self._ch = graph  # reference back to ConceptHierarchy
+
+    @property
+    def name(self) -> str:
+        return self._ch.graph.nodes[self.id]["concept_name"]
+
+    @property
+    def code(self) -> str:
+        return self._ch.graph.nodes[self.id]["concept_code"]
+
+    def get_metrics(self, cohort_id: int) -> dict:
+        metrics = self._ch.graph.nodes[self.id].get("metrics", {})
+        return metrics.get(cohort_id, {})
+
+    def get_union_metrics(self) -> dict:
+        # simple aggregation example
+        metrics = self._ch.graph.nodes[self.id].get("metrics", {})
+        counts = [m["count"] for m in metrics.values()]
+        prevalences = [m["prevalence"] for m in metrics.values()]
+        return {
+            "count": sum(counts),
+            "prevalence": sum(prevalences) / len(prevalences) if prevalences else 0.0,
+        }
+
+    def parents(self) -> List["ConceptNode"]:
+        return [ConceptNode(p, self._ch) for p in self._ch.graph.predecessors(self.id)]
+
+    def children(self) -> List["ConceptNode"]:
+        return [ConceptNode(c, self._ch) for c in self._ch.graph.successors(self.id)]
+
+
+class ConceptHierarchy:
+    def __init__(self, input_g: nx.DiGraph):
+        self.graph = input_g
+
+    def get_node(self, concept_id: int):
+        if concept_id in self.graph.nodes:
+            return ConceptNode(concept_id, self)
+        return None
+
+    def get_root_nodes(self) -> List[ConceptNode]:
+        roots = [n for n in self.graph.nodes if self.graph.in_degree(n) == 0]
+        return [ConceptNode(r, self) for r in roots]
+
+    def subtree(self, concept_id: int):
+        """Yield all nodes in the subtree rooted at concept_id."""
+        descendants = nx.descendants(self.graph, concept_id) | {concept_id}
+        for d in descendants:
+            yield ConceptNode(d, self)
+
+    def union(self, other: "ConceptHierarchy") -> "ConceptHierarchy":
+        """Merge two hierarchies into a new one, aggregating metrics."""
+        composed_graph = nx.compose(self.graph, other.graph)
+        # merge node metrics
+        for n in composed_graph.nodes:
+            metrics_self = self.graph.nodes.get(n, {}).get("metrics", {})
+            metrics_other = other.graph.nodes.get(n, {}).get("metrics", {})
+            merged = {**metrics_self, **metrics_other}
+            composed_graph.nodes[n]["metrics"] = merged
+        return ConceptHierarchy(composed_graph)
+
+    def to_dict(self) -> dict:
+        roots = self.get_root_nodes()
+        return {"roots": [self._node_to_dict(r) for r in roots]}
+
+    def _node_to_dict(self, node: ConceptNode) -> dict:
+        data = {
+            "concept_id": node.id,
+            "concept_name": node.name,
+            "concept_code": node.code,
+            "metrics": {
+                "union": node.get_union_metrics(),
+                "cohorts": self.graph.nodes[node.id].get("metrics", {}),
+                },
+            "children": [
+                self._node_to_dict(c) for c in node.children()
+                ]
+        }
+        return data
+
+
+def build_concept_hierarchy_from_results(results, cohort_id: int) -> ConceptHierarchy:
+    """
+    build concept hierarchy tree managed by networkx from list of dicts returned from the concept prevalence SQL
+    :param results: list of dicts from prevalence SQL
+    :param cohort_id: cohort id to get concept hierarchy for
+    :return: ConceptHierarchy object
+    """
+    # node metrics
+    metrics_by_concept = defaultdict(lambda: {"count": 0, "prevalence": 0.0})
+    node_metadata = {}
+
+    for row in results:
+        cid = row["descendant_concept_id"]
+        if cid not in node_metadata:
+            node_metadata[cid] = {
+                "concept_name": row["concept_name"],
+                "concept_code": row["concept_code"],
+            }
+            metrics_by_concept[cid] = {
+                "count": row["count_in_cohort"],
+                "prevalence": row["prevalence"],
+            }
+
+    graph = nx.DiGraph()
+    # add nodes with metadata + metrics
+    for cid, meta in node_metadata.items():
+        graph.add_node(cid, **meta, metrics={cohort_id: metrics_by_concept[cid]})
+
+    # add parent-child edges
+    for row in results:
+        anc = row["ancestor_concept_id"]
+        desc = row["descendant_concept_id"]
+        if anc and desc and anc != desc:
+            graph.add_edge(anc, desc)
+
+    return ConceptHierarchy(graph)
diff --git a/biasanalyzer/database.py b/biasanalyzer/database.py
index 03efa6e..0f9b3f4 100644
--- a/biasanalyzer/database.py
+++ b/biasanalyzer/database.py
@@ -228,8 +228,7 @@ def get_cohort_distributions(self, cohort_definition_id: int, variable: str):
             return None
 
     def get_cohort_concept_stats(self, cohort_definition_id: int, qry_builder,
-                                 concept_type='condition_occurrence', filter_count=0, vocab=None,
-                                 include_hierarchy=False):
+                                 concept_type='condition_occurrence', filter_count=0, vocab=None):
         """
         Get concept statistics for a cohort from the cohort table.
         """
@@ -249,7 +248,7 @@ def get_cohort_concept_stats(self, cohort_definition_id: int, qry_builder,
                         return concept_stats
 
                 query = qry_builder.build_concept_prevalence_query(concept_type, cohort_definition_id,
-                                                                   filter_count, vocab, include_hierarchy)
+                                                                   filter_count, vocab)
                 concept_stats[concept_type] = self._execute_query(query)
                 cs_df = pd.DataFrame(concept_stats[concept_type])
                 # Combine concept_name and prevalence into a "details" column
diff --git a/biasanalyzer/sql_templates/cohort_concept_prevalence_query.sql.j2 b/biasanalyzer/sql_templates/cohort_concept_prevalence_query.sql.j2
index ff3be54..b832cd8 100644
--- a/biasanalyzer/sql_templates/cohort_concept_prevalence_query.sql.j2
+++ b/biasanalyzer/sql_templates/cohort_concept_prevalence_query.sql.j2
@@ -56,6 +56,5 @@ JOIN
     concept c ON ac.concept_id = c.concept_id
 WHERE
     ac.count_in_cohort > {{ filter_count }}
-    AND ({{ include_hierarchy }} = True OR ch.ancestor_concept_id = ch.descendant_concept_id)
 ORDER BY
     prevalence DESC;
\ No newline at end of file
diff --git a/notebooks/BiasAnalyzerCohortConceptTutorial.ipynb b/notebooks/BiasAnalyzerCohortConceptTutorial.ipynb
index 2b284eb..166d228 100644
--- a/notebooks/BiasAnalyzerCohortConceptTutorial.ipynb
+++ b/notebooks/BiasAnalyzerCohortConceptTutorial.ipynb
@@ -157,11 +157,10 @@
     "---\n",
     "\n",
     "### Exploring cohort concept prevalence\n",
-    "You can retrieve concept prevalence statistics for a cohort using the `get_concept_stats(concept_type='condition_occurrence', filter_count=0, vocab=None, include_hierarchy=False)` method on the `cohort_data` object. Each input argument to this method has a default value, so you can call the method without specifying all parameters.\n",
+    "You can retrieve concept prevalence statistics for a cohort using the `get_concept_stats(concept_type='condition_occurrence', filter_count=0, vocab=None)` method on the `cohort_data` object. Each input argument to this method has a default value, so you can call the method without specifying all parameters.\n",
     "- The `concept_type` input argument specifies the OMOP domain to analyze. It must be one of the OMOP domain names: `condition_occurrence`, `drug_exposure`, `procedure_occurrence`, `visit_occurrence`, `measurement`, or `observation`.\n",
     "- The `vocab` input argument specifies the OMOP vocabulary ID to filter concepts by. If set to `None`, a default vocabulary is used based on the domain: `RxNorm` for `drug_exposure`, `LOINC` for `measurement`, and `SNOMED` for all other domains.\n",
     "- The `filter_count` input argument filters out concepts with fewer than this number of patients in the cohort. Set it to `0` to include all without filtering.\n",
-    "- The `include_hierarchy` input argument specifies whether to include concept hierarchical relationship. If set to `True`, ancestor concepts using the OMOP concept hierarchy are included when calculating prevalence.\n",
     "This method helps identify the most prevalent clinical concepts in your cohort, which can reveal patterns or potential sources of selection bias in the cohort data.\n",
     "\n",
     "**Cohort condition occurrence concept prevalence**: \n",
@@ -854,11 +853,11 @@
     "———————————————\n",
     "\n",
     "**Cohort drug exposure concept prevalence**: \n",
-    "The code block below demonstrates how to use `get_concept_stats(concept_type='drug_exposure', filter_count=500, include_hierarchy=True)` method to retrieve concept prevalence for the `drug_exposure` domain. By default, this uses the `RxNorm` vocabulary. Concepts with fewer than 500 patients are excluded, and hierarchical relationships are included in the results. The method returns a dictionary where the **key** is the `concept_type` (in this case, `drug_exposure`) and the **value** is a list of concept dictionaries. Each concept dictionary in the list contains the following fields: `concept_name`, `concept_code`, `count_in_cohort`, `prevalence`, `ancestor_concept_id`, and `descendant_concept_id`. These values allow you to explore which clinical concepts are most prevalent in your cohort and suppoert deeper investigations into potential sources of selection bias.\n",
+    "The code block below demonstrates how to use `get_concept_stats(concept_type='drug_exposure', filter_count=500)` method to retrieve concept prevalence for the `drug_exposure` domain. By default, this uses the `RxNorm` vocabulary. Concepts with fewer than 500 patients are excluded, and hierarchical relationships are included in the results. The method returns a dictionary where the **key** is the `concept_type` (in this case, `drug_exposure`) and the **value** is a list of concept dictionaries. Each concept dictionary in the list contains the following fields: `concept_name`, `concept_code`, `count_in_cohort`, `prevalence`, `ancestor_concept_id`, and `descendant_concept_id`. These values allow you to explore which clinical concepts are most prevalent in your cohort and suppoert deeper investigations into potential sources of selection bias.\n",
     "\n",
     "**Note**: Prevalence computation may take some time, especially for large cohorts or when hierarchical relationships are included. A progress bar will appear to indicate the progress of the computation. \n",
     "\n",
-    "When `include_hierarchy=True`, the output also includes a text-based, indented representation of the concept hierarchy. Each concept is displayed along with its **concept code**, **patient count**, and **prevalence** in parentheses, providing a quick summary of both the structure and frequency of clinical concepts in the cohort."
+    "The output also includes a text-based, indented representation of the concept hierarchy. Each concept is displayed along with its **concept code**, **patient count**, and **prevalence** in parentheses, providing a quick summary of both the structure and frequency of clinical concepts in the cohort."
    ]
   },
   {
@@ -981,7 +980,7 @@
    ],
    "source": [
     "t1 = time.time()\n",
-    "cohort_de_concepts = cohort_data.get_concept_stats(concept_type='drug_exposure', filter_count=500, include_hierarchy=True)\n",
+    "cohort_de_concepts = cohort_data.get_concept_stats(concept_type='drug_exposure', filter_count=500)\n",
     "print(pd.DataFrame(cohort_de_concepts[\"drug_exposure\"]))\n",
     "print(f'the time taken to get cohort concept stats for drug_exposure is {time.time() - t1}s')"
    ]
diff --git a/tests/query_based/test_hierarchical_prevalence.py b/tests/query_based/test_hierarchical_prevalence.py
index e0ad55c..8300f1c 100644
--- a/tests/query_based/test_hierarchical_prevalence.py
+++ b/tests/query_based/test_hierarchical_prevalence.py
@@ -35,32 +35,25 @@ def test_cohort_concept_hierarchical_prevalence(test_db, caplog):
     concept_stats = cohort.get_concept_stats(concept_type='procedure_occurrence')
     assert concept_stats == {'procedure_occurrence': []}
 
-    include_hierarchy_flags = [True, False]
-    for flag in include_hierarchy_flags:
-        concept_stats = cohort.get_concept_stats(vocab='ICD10CM', include_hierarchy=flag)
-        assert concept_stats is not None, "Failed to fetch concept stats"
-        assert len(concept_stats) > 0, "No concept stats returned"
-        # check returned data with different include_hierarchy flag
-        if flag is True:
-            assert not all(s['ancestor_concept_id'] == s['descendant_concept_id']
-                           for s in concept_stats['condition_occurrence']), \
-                "Some ancestor_concept_id and descendant_concept_id should differ when include_hierarchy is True"
-        else:
-            assert all(s['ancestor_concept_id'] == s['descendant_concept_id'] for s in
-                       concept_stats['condition_occurrence']), \
-                "ancestor_concept_id and descendant_concept_id must be equal when include_hierarchy is False"
-        # Check concept prevalence for overlaps
-        diabetes_prevalence = next((c for c in concept_stats['condition_occurrence']
-                                    if c['ancestor_concept_id'] == 1 and c['descendant_concept_id'] == 1), None)
-        assert diabetes_prevalence is not None, "Parent diabetes concept prevalence missing"
-        type1_prevalence = next((c for c in concept_stats['condition_occurrence']
-                                    if c['ancestor_concept_id'] == 2 and c['descendant_concept_id'] == 2), None)
-        assert type1_prevalence is not None, "Child type 1 diabetes concept prevalence missing"
-        type2_prevalence = next((c for c in concept_stats['condition_occurrence']
-                                 if c['ancestor_concept_id'] == 3 and c['descendant_concept_id'] == 3), None)
-        assert type2_prevalence is not None, "Child type 2 diabetes concept prevalence missing"
-        print(f"type1_prevalence: {type1_prevalence['prevalence']}, type2_prevalence: {type2_prevalence['prevalence']}, "
-              f"diabetes_prevalence: {diabetes_prevalence['prevalence']}")
-        assert diabetes_prevalence['prevalence'] < type1_prevalence['prevalence'] + type2_prevalence['prevalence'], \
-            ("Parent diabetes concept prevalence does not reflect overlap between type 1 and type 2 diabetes "
-             "children concept prevalence")
+    concept_stats = cohort.get_concept_stats(vocab='ICD10CM')
+    assert concept_stats is not None, "Failed to fetch concept stats"
+    assert len(concept_stats) > 0, "No concept stats returned"
+    # check returned data
+    assert not all(s['ancestor_concept_id'] == s['descendant_concept_id']
+                   for s in concept_stats['condition_occurrence']), \
+        "Some ancestor_concept_id and descendant_concept_id should differ"
+    # Check concept prevalence for overlaps
+    diabetes_prevalence = next((c for c in concept_stats['condition_occurrence']
+                                if c['ancestor_concept_id'] == 1 and c['descendant_concept_id'] == 1), None)
+    assert diabetes_prevalence is not None, "Parent diabetes concept prevalence missing"
+    type1_prevalence = next((c for c in concept_stats['condition_occurrence']
+                                if c['ancestor_concept_id'] == 2 and c['descendant_concept_id'] == 2), None)
+    assert type1_prevalence is not None, "Child type 1 diabetes concept prevalence missing"
+    type2_prevalence = next((c for c in concept_stats['condition_occurrence']
+                             if c['ancestor_concept_id'] == 3 and c['descendant_concept_id'] == 3), None)
+    assert type2_prevalence is not None, "Child type 2 diabetes concept prevalence missing"
+    print(f"type1_prevalence: {type1_prevalence['prevalence']}, type2_prevalence: {type2_prevalence['prevalence']}, "
+          f"diabetes_prevalence: {diabetes_prevalence['prevalence']}")
+    assert diabetes_prevalence['prevalence'] < type1_prevalence['prevalence'] + type2_prevalence['prevalence'], \
+        ("Parent diabetes concept prevalence does not reflect overlap between type 1 and type 2 diabetes "
+         "children concept prevalence")

From bf447b78cbc7c4f128ec155971b170b5015066fc Mon Sep 17 00:00:00 2001
From: hyi <hongyi@renci.org>
Date: Fri, 12 Sep 2025 20:27:13 -0400
Subject: [PATCH 3/8] get single cohort concept stats endpoint working

---
 biasanalyzer/cohort.py      |   4 +-
 biasanalyzer/concept.py     | 213 +++++++++++++++++++++++-------------
 biasanalyzer/module_test.py |  35 ++++--
 3 files changed, 168 insertions(+), 84 deletions(-)

diff --git a/biasanalyzer/cohort.py b/biasanalyzer/cohort.py
index 7f0cf1c..e207fb6 100644
--- a/biasanalyzer/cohort.py
+++ b/biasanalyzer/cohort.py
@@ -10,7 +10,7 @@
 from biasanalyzer.database import OMOPCDMDatabase, BiasDatabase
 from biasanalyzer.utils import hellinger_distance, clean_string, notify_users
 from biasanalyzer.cohort_query_builder import CohortQueryBuilder
-from biasanalyzer.concept import build_concept_hierarchy_from_results
+from biasanalyzer.concept import ConceptHierarchy
 
 
 class CohortData:
@@ -62,7 +62,7 @@ def get_concept_stats(self, concept_type='condition_occurrence', filter_count=0,
                                                              concept_type=concept_type,
                                                              filter_count=filter_count,
                                                              vocab=vocab)
-        return build_concept_hierarchy_from_results(cohort_stats[concept_type], self.cohort_id)
+        return ConceptHierarchy.build_concept_hierarchy_from_results(self.cohort_id, cohort_stats[concept_type])
 
 
     def __del__(self):
diff --git a/biasanalyzer/concept.py b/biasanalyzer/concept.py
index e97e5b0..cd2953f 100644
--- a/biasanalyzer/concept.py
+++ b/biasanalyzer/concept.py
@@ -1,12 +1,13 @@
 from collections import defaultdict
 import networkx as nx
-from typing import List, Any, Dict
+from typing import List, Optional
+from _collections import deque
 
 
 class ConceptNode:
-    def __init__(self, concept_id: int, graph: "ConceptHierarchy"):
+    def __init__(self, concept_id: int, ch: "ConceptHierarchy"):
         self.id = concept_id
-        self._ch = graph  # reference back to ConceptHierarchy
+        self._ch = ch  # reference back to ConceptHierarchy
 
     @property
     def name(self) -> str:
@@ -36,25 +37,132 @@ def parents(self) -> List["ConceptNode"]:
     def children(self) -> List["ConceptNode"]:
         return [ConceptNode(c, self._ch) for c in self._ch.graph.successors(self.id)]
 
+    def to_dict(self, include_children: bool = True) -> dict:
+        """
+        Serialize this node into a dict. Optionally include nested children.
+        """
+        data = {
+            "concept_id": self.id,
+            "concept_name": self.name,
+            "concept_code": self.code,
+            "metrics": {
+                "union": self.get_union_metrics(),
+                "cohorts": self._ch.graph.nodes[self.id].get("metrics", {}),
+            },
+            "parent_ids": list(self._ch.graph.predecessors(self.id)),
+        }
+        if include_children:
+            data["children"] = [c.to_dict(include_children=True) for c in self.children()]
+        return data
 
-class ConceptHierarchy:
-    def __init__(self, input_g: nx.DiGraph):
-        self.graph = input_g
 
-    def get_node(self, concept_id: int):
-        if concept_id in self.graph.nodes:
-            return ConceptNode(concept_id, self)
-        return None
+class ConceptHierarchy:
+    _graph_cache = {}
 
-    def get_root_nodes(self) -> List[ConceptNode]:
+    def __init__(self, input_g: nx.DiGraph, cohort_id: int):
+        self.graph = input_g
+        self.cohort_id = cohort_id
+
+    @classmethod
+    def build_concept_hierarchy_from_results(cls, cohort_id: int, results: List[dict]):
+        """
+        build concept hierarchy tree managed by networkx from list of dicts returned from the concept prevalence SQL
+        with cache management
+        :param results: list of dicts from prevalence SQL
+        :param cohort_id: cohort id to get concept hierarchy for
+        :return: ConceptHierarchy object
+        """
+        if cohort_id in cls._graph_cache:
+            return cls._graph_cache[cohort_id]
+
+        # node metrics
+        metrics_by_concept = defaultdict(lambda: {"count": 0, "prevalence": 0.0})
+        node_metadata = {}
+
+        for row in results:
+            cid = row["descendant_concept_id"]
+            if cid not in node_metadata:
+                node_metadata[cid] = {
+                    "concept_name": row["concept_name"],
+                    "concept_code": row["concept_code"],
+                }
+                metrics_by_concept[cid] = {
+                    "count": row["count_in_cohort"],
+                    "prevalence": row["prevalence"],
+                }
+
+        graph = nx.DiGraph()
+        # add nodes with metadata + metrics
+        for cid, meta in node_metadata.items():
+            graph.add_node(cid, **meta, metrics={cohort_id: metrics_by_concept[cid]})
+
+        # add parent-child edges
+        for row in results:
+            anc = row["ancestor_concept_id"]
+            desc = row["descendant_concept_id"]
+            if anc and desc and anc != desc:
+                graph.add_edge(anc, desc)
+
+        hierarchy = ConceptHierarchy(graph, cohort_id)
+        cls._graph_cache[cohort_id] = hierarchy
+        return hierarchy
+
+    @classmethod
+    def clear_cache(cls):
+        cls._graph_cache.clear()
+
+    def get_node(self, concept_id: int, serialization: bool = False):
+        concept_node = ConceptNode(concept_id, self) if concept_id in self.graph.nodes else None
+        return concept_node.to_dict(include_children=False) if serialization else concept_node
+
+    def get_root_nodes(self, serialization: bool = False) -> List:
         roots = [n for n in self.graph.nodes if self.graph.in_degree(n) == 0]
-        return [ConceptNode(r, self) for r in roots]
-
-    def subtree(self, concept_id: int):
-        """Yield all nodes in the subtree rooted at concept_id."""
-        descendants = nx.descendants(self.graph, concept_id) | {concept_id}
-        for d in descendants:
-            yield ConceptNode(d, self)
+        root_nodes = [ConceptNode(r, self) for r in roots]
+        if serialization:
+            return [rn.to_dict(include_children=False)  for rn in root_nodes]
+        else:
+            return root_nodes
+
+    def get_leaf_nodes(self, serialization: bool = False) -> List:
+        leaves = [n for n in self.graph.nodes if self.graph.out_degree(n) == 0]
+        leave_nodes = [ConceptNode(l, self) for l in leaves]
+        if serialization:
+            return [rl.to_dict(include_children=False) for rl in leave_nodes]
+        else:
+            return leave_nodes
+
+    def iter_nodes(self, root_id: int, order: str = "bfs", include_root: bool = True,
+                   serialization: bool = False):
+        """Iterate nodes in BFS or DFS order from a given root."""
+        if root_id not in self.graph:
+            raise ValueError(f"Root node {root_id} not found in graph.")
+
+        if order == "bfs":
+            queue = deque([root_id])
+            while queue:
+                node = queue.popleft()
+                if not include_root and node == root_id:
+                    queue.extend(self.graph.successors(node))
+                    continue
+                if serialization:
+                    yield ConceptNode(node, self).to_dict(include_children=False)
+                else:
+                    yield ConceptNode(node, self)
+                queue.extend(self.graph.successors(node))
+        elif order == "dfs":
+            stack = [root_id]
+            while stack:
+                node = stack.pop()
+                if not include_root and node == root_id:
+                    stack.extend(self.graph.successors(node))
+                    continue
+                if serialization:
+                    yield ConceptNode(node, self).to_dict(include_children=False)
+                else:
+                    yield ConceptNode(node, self)
+                stack.extend(self.graph.successors(node))
+        else:
+            raise ValueError("order must be 'bfs' or 'dfs'")
 
     def union(self, other: "ConceptHierarchy") -> "ConceptHierarchy":
         """Merge two hierarchies into a new one, aggregating metrics."""
@@ -67,59 +175,16 @@ def union(self, other: "ConceptHierarchy") -> "ConceptHierarchy":
             composed_graph.nodes[n]["metrics"] = merged
         return ConceptHierarchy(composed_graph)
 
-    def to_dict(self) -> dict:
-        roots = self.get_root_nodes()
-        return {"roots": [self._node_to_dict(r) for r in roots]}
-
-    def _node_to_dict(self, node: ConceptNode) -> dict:
-        data = {
-            "concept_id": node.id,
-            "concept_name": node.name,
-            "concept_code": node.code,
-            "metrics": {
-                "union": node.get_union_metrics(),
-                "cohorts": self.graph.nodes[node.id].get("metrics", {}),
-                },
-            "children": [
-                self._node_to_dict(c) for c in node.children()
-                ]
-        }
-        return data
-
-
-def build_concept_hierarchy_from_results(results, cohort_id: int) -> ConceptHierarchy:
-    """
-    build concept hierarchy tree managed by networkx from list of dicts returned from the concept prevalence SQL
-    :param results: list of dicts from prevalence SQL
-    :param cohort_id: cohort id to get concept hierarchy for
-    :return: ConceptHierarchy object
-    """
-    # node metrics
-    metrics_by_concept = defaultdict(lambda: {"count": 0, "prevalence": 0.0})
-    node_metadata = {}
-
-    for row in results:
-        cid = row["descendant_concept_id"]
-        if cid not in node_metadata:
-            node_metadata[cid] = {
-                "concept_name": row["concept_name"],
-                "concept_code": row["concept_code"],
-            }
-            metrics_by_concept[cid] = {
-                "count": row["count_in_cohort"],
-                "prevalence": row["prevalence"],
-            }
-
-    graph = nx.DiGraph()
-    # add nodes with metadata + metrics
-    for cid, meta in node_metadata.items():
-        graph.add_node(cid, **meta, metrics={cohort_id: metrics_by_concept[cid]})
-
-    # add parent-child edges
-    for row in results:
-        anc = row["ancestor_concept_id"]
-        desc = row["descendant_concept_id"]
-        if anc and desc and anc != desc:
-            graph.add_edge(anc, desc)
-
-    return ConceptHierarchy(graph)
+    def to_dict(self, root_id: Optional[int] = None) -> dict:
+        """
+        Convert the concept hierarchy or a sub-hierarchy to a nested dict structure
+        :param root_id: if provided, return the sub-hierarchy rooted at this concept_id;
+        if None, return the whole hierarchy with all roots.
+        :return: nested dict representation of the hierarchy or sub-hierarchy
+        """
+        if root_id is not None:
+            if root_id not in self.graph:
+                raise ValueError(f"Input concept id {root_id} not found in the concept hierarchy graph")
+            return {"hierarchy": [ConceptNode(root_id, self).to_dict()]}
+
+        return {"hierarchy": [r.to_dict() for r in self.get_root_nodes()]}
diff --git a/biasanalyzer/module_test.py b/biasanalyzer/module_test.py
index 1c4de4e..13f0a88 100644
--- a/biasanalyzer/module_test.py
+++ b/biasanalyzer/module_test.py
@@ -1,3 +1,4 @@
+import pprint
 from biasanalyzer.api import BIAS
 import time
 import os
@@ -8,7 +9,10 @@ def cohort_creation_template_test(bias_obj):
     cohort_data = bias_obj.create_cohort('COVID-19 patients', 'COVID-19 patients',
                                          os.path.join(os.path.dirname(__file__), '..', 'tests', 'assets',
                                                       'cohort_creation',
-                                                      'test_cohort_creation_condition_occurrence_config.yaml'),
+                                                      # 'extras',
+                                                      # 'covid_example3',
+                                                      # 'cohort_creation_config_baseline_example3.yaml'),
+                                                      'test_cohort_creation_condition_occurrence_config_study.yaml'),
                                          'system')
     if cohort_data:
         md = cohort_data.metadata
@@ -33,7 +37,7 @@ def condition_cohort_test(bias_obj):
                              'WHERE c.condition_concept_id = 37311061 '
                              'AND p.gender_concept_id = 8532 AND p.year_of_birth > 2000')
     cohort_data = bias_obj.create_cohort('COVID-19 patients', 'COVID-19 patients',
-                                     baseline_cohort_query, 'system')
+                                         baseline_cohort_query, 'system')
     if cohort_data:
         md = cohort_data.metadata
         print(f'cohort_definition: {md}')
@@ -45,12 +49,27 @@ def condition_cohort_test(bias_obj):
         print(f'the cohort ethnicity stats: {cohort_data.get_stats("ethnicity")}')
         print(f'the cohort age distributions: {cohort_data.get_distributions("age")}')
         t1 = time.time()
-        cohort_concepts = cohort_data.get_concept_stats(concept_type='condition_occurrence', filter_count=5000)
-        # print('the cohort concept condition occurrence stats:')
-        # print(pd.DataFrame(cohort_concepts["condition_occurrence"]))
-        cohort_de_concepts = cohort_data.get_concept_stats(concept_type='drug_exposure', filter_count=500)
-        # print(f'the cohort concept drug exposure stats: \n{pd.DataFrame(cohort_de_concepts["drug_exposure"])}')
-        print(f'the time taken to get cohort concept stats is {time.time() - t1}s')
+        cohort_concept_hierarchy = cohort_data.get_concept_stats(concept_type='condition_occurrence', filter_count=5000)
+        concept_node = cohort_concept_hierarchy.get_node(concept_id=37311061)
+        print(f'concept_node 37311061 metric: {concept_node.get_metrics(md["id"])}')
+
+        # Print the root node
+        root_nodes = cohort_concept_hierarchy.get_root_nodes()
+        root = [(n.name, n.code, n.get_metrics(md["id"])) for n in root_nodes]
+        leave_nodes = cohort_concept_hierarchy.get_leaf_nodes()
+        leaves = [(n.name, n.code, n.get_metrics(md["id"])) for n in leave_nodes]
+        print(f"Root: {root}", flush=True)
+        print(f"Leaves: {leaves}", flush=True)
+        for node in cohort_concept_hierarchy.iter_nodes(root_nodes[0].id, serialization=True):
+            print(node)
+
+        hier_dict = cohort_concept_hierarchy.to_dict()
+        pprint.pprint(hier_dict, indent=2)
+
+
+        cohort_de_concept_hierarchy = cohort_data.get_concept_stats(concept_type='drug_exposure', filter_count=500)
+        de_hier_dict = cohort_de_concept_hierarchy.to_dict()
+        pprint.pprint(de_hier_dict, indent=2)
         compare_stats = bias_obj.compare_cohorts(cohort_data.metadata['id'], cohort_data.metadata['id'])
         print(f'compare_stats: {compare_stats}')
     return

From f9d8d583b922482997a72a3dacdc7f2f8b6a7f3e Mon Sep 17 00:00:00 2001
From: hyi <hongyi@renci.org>
Date: Sat, 13 Sep 2025 17:23:02 -0400
Subject: [PATCH 4/8] get single cohort concept stats working

---
 biasanalyzer/api.py                           |   3 +
 biasanalyzer/cohort.py                        |  19 +-
 biasanalyzer/concept.py                       |  61 +++---
 biasanalyzer/database.py                      |  16 +-
 .../test_hierarchical_prevalence.py           | 188 ++++++++++++++++--
 tests/test_database.py                        |  12 +-
 6 files changed, 236 insertions(+), 63 deletions(-)

diff --git a/biasanalyzer/api.py b/biasanalyzer/api.py
index 3832617..df009ac 100644
--- a/biasanalyzer/api.py
+++ b/biasanalyzer/api.py
@@ -172,6 +172,9 @@ def get_cohorts_concept_stats(self, cohorts: List[int],
         the domain instead as defined in DOMAIN_MAPPING variable in models.py
         :return: ConceptHierarchy object
         """
+        if not cohorts:
+            notify_users('The input cohorts list is empty. At least one cohort id must be provided.')
+            return None
         c_action = self._set_cohort_action()
         if c_action:
             return c_action.get_cohorts_concept_stats(cohorts, concept_type=concept_type, filter_count=filter_count,
diff --git a/biasanalyzer/cohort.py b/biasanalyzer/cohort.py
index e207fb6..577f95b 100644
--- a/biasanalyzer/cohort.py
+++ b/biasanalyzer/cohort.py
@@ -1,4 +1,5 @@
 from sqlalchemy.exc import SQLAlchemyError
+from functools import reduce
 import duckdb
 import pandas as pd
 from datetime import datetime
@@ -62,7 +63,8 @@ def get_concept_stats(self, concept_type='condition_occurrence', filter_count=0,
                                                              concept_type=concept_type,
                                                              filter_count=filter_count,
                                                              vocab=vocab)
-        return ConceptHierarchy.build_concept_hierarchy_from_results(self.cohort_id, cohort_stats[concept_type])
+        return (cohort_stats,
+                ConceptHierarchy.build_concept_hierarchy_from_results(self.cohort_id, cohort_stats[concept_type]))
 
 
     def __del__(self):
@@ -153,14 +155,13 @@ def get_cohorts_concept_stats(self, cohorts: List[int],
                           concept_type: str = 'condition_occurrence',
                           filter_count: int = 0,
                           vocab=None):
-        hierarchies = [self.bias_db.get_cohort_concept_stats(c, self._query_builder,
-                                                             concept_type=concept_type,
-                                                             filter_count=filter_count,
-                                                             vocab=vocab) for c in cohorts]
-        union_h = hierarchies[0]
-        for h in hierarchies[1:]:
-            union_h = union_h.union(h)
-        return union_h
+        cohort_concept_stats = [self.bias_db.get_cohort_concept_stats(c, self._query_builder,
+                                                                      concept_type=concept_type,
+                                                                      filter_count=filter_count,
+                                                                      vocab=vocab) for c in cohorts]
+        hierarchies = [ConceptHierarchy.build_concept_hierarchy_from_results(c, c_stats.get(concept_type, []))
+                       for c, c_stats in zip(cohorts, cohort_concept_stats)]
+        return reduce(lambda h1, h2: h1.union(h2), hierarchies).to_dict()
 
     def compare_cohorts(self, cohort_id_1: int, cohort_id_2: int):
         """
diff --git a/biasanalyzer/concept.py b/biasanalyzer/concept.py
index cd2953f..e208be5 100644
--- a/biasanalyzer/concept.py
+++ b/biasanalyzer/concept.py
@@ -1,6 +1,5 @@
-from collections import defaultdict
 import networkx as nx
-from typing import List, Optional
+from typing import List, Optional, Union
 from _collections import deque
 
 
@@ -17,9 +16,9 @@ def name(self) -> str:
     def code(self) -> str:
         return self._ch.graph.nodes[self.id]["concept_code"]
 
-    def get_metrics(self, cohort_id: int) -> dict:
+    def get_metrics(self, identifier: Union[int, str]) -> dict:
         metrics = self._ch.graph.nodes[self.id].get("metrics", {})
-        return metrics.get(cohort_id, {})
+        return metrics.get(str(identifier), {})
 
     def get_union_metrics(self) -> dict:
         # simple aggregation example
@@ -59,9 +58,20 @@ def to_dict(self, include_children: bool = True) -> dict:
 class ConceptHierarchy:
     _graph_cache = {}
 
-    def __init__(self, input_g: nx.DiGraph, cohort_id: int):
+    def __init__(self, input_g: nx.DiGraph, identifier: str):
         self.graph = input_g
-        self.cohort_id = cohort_id
+        self.identifier = ConceptHierarchy._normalize_identifier(identifier)
+
+    @staticmethod
+    def _normalize_identifier(identifier: str) -> str:
+        # Split on "+" to allow union identifiers
+        if "+" not in identifier:
+            return identifier.strip()
+        else:
+            parts = identifier.split("+")
+            parts = [p.strip() for p in parts if p and p.strip() != ""]
+            parts = sorted(set(parts))  # deduplicate + sort
+            return "+".join(parts)
 
     @classmethod
     def build_concept_hierarchy_from_results(cls, cohort_id: int, results: List[dict]):
@@ -72,11 +82,12 @@ def build_concept_hierarchy_from_results(cls, cohort_id: int, results: List[dict
         :param cohort_id: cohort id to get concept hierarchy for
         :return: ConceptHierarchy object
         """
-        if cohort_id in cls._graph_cache:
-            return cls._graph_cache[cohort_id]
+        identifer = str(cohort_id)
+        if identifer in cls._graph_cache:
+            return cls._graph_cache[identifer]
 
         # node metrics
-        metrics_by_concept = defaultdict(lambda: {"count": 0, "prevalence": 0.0})
+        metrics_by_concept = {}
         node_metadata = {}
 
         for row in results:
@@ -94,7 +105,7 @@ def build_concept_hierarchy_from_results(cls, cohort_id: int, results: List[dict
         graph = nx.DiGraph()
         # add nodes with metadata + metrics
         for cid, meta in node_metadata.items():
-            graph.add_node(cid, **meta, metrics={cohort_id: metrics_by_concept[cid]})
+            graph.add_node(cid, **meta, metrics={identifer: metrics_by_concept[cid]})
 
         # add parent-child edges
         for row in results:
@@ -103,8 +114,8 @@ def build_concept_hierarchy_from_results(cls, cohort_id: int, results: List[dict
             if anc and desc and anc != desc:
                 graph.add_edge(anc, desc)
 
-        hierarchy = ConceptHierarchy(graph, cohort_id)
-        cls._graph_cache[cohort_id] = hierarchy
+        hierarchy = ConceptHierarchy(graph, identifer)
+        cls._graph_cache[identifer] = hierarchy
         return hierarchy
 
     @classmethod
@@ -127,23 +138,20 @@ def get_leaf_nodes(self, serialization: bool = False) -> List:
         leaves = [n for n in self.graph.nodes if self.graph.out_degree(n) == 0]
         leave_nodes = [ConceptNode(l, self) for l in leaves]
         if serialization:
-            return [rl.to_dict(include_children=False) for rl in leave_nodes]
+            return [ln.to_dict(include_children=False) for ln in leave_nodes]
         else:
             return leave_nodes
 
-    def iter_nodes(self, root_id: int, order: str = "bfs", include_root: bool = True,
+    def iter_nodes(self, root_id: int, order: str = "bfs",
                    serialization: bool = False):
         """Iterate nodes in BFS or DFS order from a given root."""
-        if root_id not in self.graph:
+        if root_id not in self.graph.nodes:
             raise ValueError(f"Root node {root_id} not found in graph.")
 
         if order == "bfs":
             queue = deque([root_id])
             while queue:
                 node = queue.popleft()
-                if not include_root and node == root_id:
-                    queue.extend(self.graph.successors(node))
-                    continue
                 if serialization:
                     yield ConceptNode(node, self).to_dict(include_children=False)
                 else:
@@ -153,9 +161,6 @@ def iter_nodes(self, root_id: int, order: str = "bfs", include_root: bool = True
             stack = [root_id]
             while stack:
                 node = stack.pop()
-                if not include_root and node == root_id:
-                    stack.extend(self.graph.successors(node))
-                    continue
                 if serialization:
                     yield ConceptNode(node, self).to_dict(include_children=False)
                 else:
@@ -165,15 +170,23 @@ def iter_nodes(self, root_id: int, order: str = "bfs", include_root: bool = True
             raise ValueError("order must be 'bfs' or 'dfs'")
 
     def union(self, other: "ConceptHierarchy") -> "ConceptHierarchy":
+        new_ident = ConceptHierarchy._normalize_identifier(
+            f"{self.identifier}+{other.identifier}"
+        )
+        if new_ident in ConceptHierarchy._graph_cache:
+            return ConceptHierarchy._graph_cache[new_ident]
+
         """Merge two hierarchies into a new one, aggregating metrics."""
         composed_graph = nx.compose(self.graph, other.graph)
         # merge node metrics
         for n in composed_graph.nodes:
             metrics_self = self.graph.nodes.get(n, {}).get("metrics", {})
             metrics_other = other.graph.nodes.get(n, {}).get("metrics", {})
-            merged = {**metrics_self, **metrics_other}
-            composed_graph.nodes[n]["metrics"] = merged
-        return ConceptHierarchy(composed_graph)
+            composed_graph.nodes[n]["metrics"] = {**metrics_self, **metrics_other}
+
+        new_hierarchy = ConceptHierarchy(composed_graph, new_ident)
+        ConceptHierarchy._graph_cache[new_ident] = new_hierarchy
+        return new_hierarchy
 
     def to_dict(self, root_id: Optional[int] = None) -> dict:
         """
diff --git a/biasanalyzer/database.py b/biasanalyzer/database.py
index 0f9b3f4..6aa7dfa 100644
--- a/biasanalyzer/database.py
+++ b/biasanalyzer/database.py
@@ -242,10 +242,10 @@ def get_cohort_concept_stats(self, cohort_definition_id: int, qry_builder,
                     valid_vocabs = self._execute_query("SELECT distinct vocabulary_id FROM concept")
                     valid_vocab_ids = [row['vocabulary_id'] for row in valid_vocabs]
                     if vocab not in valid_vocab_ids:
-                        notify_users(f"input {vocab} is not a valid vocabulary in OMOP. "
-                                     f"Supported vocabulary ids are: {valid_vocab_ids}",
-                                     level='error')
-                        return concept_stats
+                        err_msg = (f"input {vocab} is not a valid vocabulary in OMOP. "
+                                   f"Supported vocabulary ids are: {valid_vocab_ids}")
+                        notify_users(err_msg, level='error')
+                        raise ValueError(err_msg)
 
                 query = qry_builder.build_concept_prevalence_query(concept_type, cohort_definition_id,
                                                                    filter_count, vocab)
@@ -265,11 +265,11 @@ def get_cohort_concept_stats(self, cohort_definition_id: int, qry_builder,
                     print_hierarchy(hierarchy, parent=root, level=0, parent_details=root_detail)
                 return concept_stats
             else:
-                notify_users("Cannot connect to the OMOP database to query concept table")
-                return concept_stats
+                err_msg = "Cannot connect to the OMOP database to query concept table"
+                raise ValueError(err_msg)
         except Exception as e:
-            notify_users(f"Error computing cohort concept stats: {e}", level='error')
-            return concept_stats
+            err_msg = f"Error computing cohort concept stats: {e}"
+            raise ValueError(err_msg)
 
     def close(self):
         if self.conn:
diff --git a/tests/query_based/test_hierarchical_prevalence.py b/tests/query_based/test_hierarchical_prevalence.py
index 8300f1c..220bf98 100644
--- a/tests/query_based/test_hierarchical_prevalence.py
+++ b/tests/query_based/test_hierarchical_prevalence.py
@@ -1,4 +1,7 @@
-import logging
+import pytest
+from functools import reduce
+from biasanalyzer.concept import ConceptHierarchy, ConceptNode
+
 
 def test_cohort_concept_hierarchical_prevalence(test_db, caplog):
     bias = test_db
@@ -18,24 +21,18 @@ def test_cohort_concept_hierarchical_prevalence(test_db, caplog):
     # Test cohort object and methods
     assert cohort is not None, "Cohort creation failed"
     # test concept_type must be one of the supported OMOP domain name
-    caplog.clear()
-    with caplog.at_level(logging.ERROR):
-        concept_stats = cohort.get_concept_stats(concept_type='dummy_invalid')
-    assert 'Invalid concept_type' in caplog.text
-    assert concept_stats == {}
+    with pytest.raises(ValueError):
+        cohort.get_concept_stats(concept_type='dummy_invalid')
 
     # test vocab must be None to use the default vocab or one of the supported OMOP vocabulary id
-    caplog.clear()
-    with caplog.at_level(logging.ERROR):
-        concept_stats = cohort.get_concept_stats(vocab='dummy_invalid_vocab')
-    assert 'is not a valid vocabulary' in caplog.text
-    assert concept_stats == {}
+    with pytest.raises(ValueError):
+        cohort.get_concept_stats(vocab='dummy_invalid_vocab')
 
     # test the cohort does not have procedure_occurrence related concepts
-    concept_stats = cohort.get_concept_stats(concept_type='procedure_occurrence')
-    assert concept_stats == {'procedure_occurrence': []}
+    with pytest.raises(ValueError):
+        cohort.get_concept_stats(concept_type='procedure_occurrence')
 
-    concept_stats = cohort.get_concept_stats(vocab='ICD10CM')
+    concept_stats, _ = cohort.get_concept_stats(vocab='ICD10CM')
     assert concept_stats is not None, "Failed to fetch concept stats"
     assert len(concept_stats) > 0, "No concept stats returned"
     # check returned data
@@ -57,3 +54,166 @@ def test_cohort_concept_hierarchical_prevalence(test_db, caplog):
     assert diabetes_prevalence['prevalence'] < type1_prevalence['prevalence'] + type2_prevalence['prevalence'], \
         ("Parent diabetes concept prevalence does not reflect overlap between type 1 and type 2 diabetes "
          "children concept prevalence")
+
+def test_identifier_normalization_and_cache():
+    ConceptHierarchy.clear_cache()
+    # identifiers are normalized
+    assert ConceptHierarchy._normalize_identifier("2+1") == "1+2"
+    assert ConceptHierarchy._normalize_identifier("1+2+2") == "1+2"
+
+    # fake minimal results to build hierarchy
+    results = [
+        {"ancestor_concept_id": 1, "descendant_concept_id": 1,
+         "concept_name": "Diabetes", "concept_code": "DIA",
+         "count_in_cohort": 5, "prevalence": 0.5}
+    ]
+    h1 = ConceptHierarchy.build_concept_hierarchy_from_results(1, results)
+    h2 = ConceptHierarchy.build_concept_hierarchy_from_results(1, results)
+    assert h1 is h2  # cache reuse
+    assert h1.identifier == "1"
+
+def test_union_and_cache_behavior():
+    ConceptHierarchy.clear_cache()
+    results1 = [
+        {"ancestor_concept_id": 1, "descendant_concept_id": 1,
+         "concept_name": "Diabetes", "concept_code": "DIA",
+         "count_in_cohort": 5, "prevalence": 0.5}
+    ]
+    results2 = [
+        {"ancestor_concept_id": 2, "descendant_concept_id": 2,
+         "concept_name": "Hypertension", "concept_code": "HYP",
+         "count_in_cohort": 3, "prevalence": 0.3}
+    ]
+
+    h1 = ConceptHierarchy.build_concept_hierarchy_from_results(1, results1)
+    h2 = ConceptHierarchy.build_concept_hierarchy_from_results(2, results2)
+    assert "1" in ConceptHierarchy._graph_cache
+    assert "2" in ConceptHierarchy._graph_cache
+    h12 = h1.union(h2)
+    h21 = h2.union(h1)
+    assert h12.identifier == "1+2"
+    assert h21.identifier == "1+2"
+    assert h12 is h21
+
+def test_traversal_and_serialization():
+    ConceptHierarchy.clear_cache()
+    results = [
+        {"ancestor_concept_id": 1, "descendant_concept_id": 1,
+         "concept_name": "Root", "concept_code": "R",
+         "count_in_cohort": 5, "prevalence": 0.5},
+        {"ancestor_concept_id": 1, "descendant_concept_id": 2,
+         "concept_name": "Child", "concept_code": "C",
+         "count_in_cohort": 2, "prevalence": 0.2}
+    ]
+    h = ConceptHierarchy.build_concept_hierarchy_from_results(1, results)
+
+    # roots
+    roots = h.get_root_nodes()
+    assert len(roots) == 1
+    assert roots[0].name == "Root"
+    assert roots[0].get_metrics(1) == {"count": 5, "prevalence": 0.5}
+    children = roots[0].children()
+    ch_names = [ch.name for ch in children]
+    assert ch_names == ["Child"]
+    # leaves
+    assert h.get_leaf_nodes(serialization=True) == [
+        {
+            'concept_id': 2,
+            'concept_name': 'Child',
+            'concept_code': 'C',
+            'metrics': {
+                'union': {
+                    'count': 2,
+                    'prevalence': 0.2
+                },
+                'cohorts': {
+                    '1': {
+                        'count': 2, 'prevalence': 0.2
+                    }
+                }
+            },
+            'parent_ids': [1]
+        }
+    ]
+    leaves = h.get_leaf_nodes()
+    assert len(leaves) == 1
+    assert leaves[0].name == "Child"
+    parents = leaves[0].parents()
+    par_names = [par.name for par in parents]
+    assert par_names == ["Root"]
+
+    assert h.get_node(1, serialization=True) == {
+        "concept_id": 1,
+        "concept_name": "Root",
+        "concept_code": "R",
+        "metrics": {
+            "union": {
+                "count": 5,
+                "prevalence": 0.5
+            },
+            "cohorts": {
+                "1": {
+                    "count": 5,
+                    "prevalence": 0.5
+                }
+            }
+        },
+        "parent_ids": []
+    }
+
+    # graph traversal
+    with pytest.raises(ValueError):
+        # make sure to use list() to force generator execution
+        # test invalid root_id raises ValueError
+        list(h.iter_nodes(111, order="bfs"))
+
+    with pytest.raises(ValueError):
+        # make sure to use list() to force generator execution
+        # test invalid order raises ValueError
+        list(h.iter_nodes(1, order="dummy"))
+
+    bfs_nodes = [n.id for n in h.iter_nodes(1, order="bfs")]
+    assert bfs_nodes == [1, 2]
+
+    # DFS traversal
+    dfs_nodes = [n.id for n in h.iter_nodes(1, order="dfs")]
+    assert set(dfs_nodes) == {1, 2}
+
+    dfs_nodes = [n['concept_id'] for n in h.iter_nodes(1, order="dfs", serialization=True)]
+    assert set(dfs_nodes) == {1, 2}
+
+    # serialization
+    serialized_root = h.get_root_nodes(serialization=True)[0]
+    assert serialized_root["concept_name"] == "Root"
+    assert "metrics" in serialized_root
+
+    serialized_iter = list(h.iter_nodes(1, serialization=True))
+    assert all(isinstance(n, dict) for n in serialized_iter)
+    assert serialized_iter[0]["concept_id"] == 1
+
+    with pytest.raises(ValueError):
+        h.to_dict(111)
+
+    h_dict = h.to_dict(1)
+    assert h_dict == {'hierarchy': [{
+        'concept_id': 1, 'concept_name': 'Root', 'concept_code': 'R',
+        'metrics': {'union': {'count': 5, 'prevalence': 0.5},
+                    'cohorts': {'1': {'count': 5, 'prevalence': 0.5}}},
+        'parent_ids': [],
+        'children': [{'concept_id': 2, 'concept_name': 'Child', 'concept_code': 'C',
+                      'metrics': {'union': {'count': 2, 'prevalence': 0.2},
+                                  'cohorts': {'1': {'count': 2, 'prevalence': 0.2}}},
+                      'parent_ids': [1], 'children': []}]}
+    ]}
+
+    h_dict = h.to_dict()
+    assert h_dict == {'hierarchy': [{
+        'concept_id': 1, 'concept_name': 'Root', 'concept_code': 'R',
+        'metrics': {'union': {'count': 5, 'prevalence': 0.5},
+                    'cohorts': {'1': {'count': 5, 'prevalence': 0.5}}},
+        'parent_ids': [],
+        'children': [{'concept_id': 2, 'concept_name': 'Child', 'concept_code': 'C',
+                      'metrics': {'union': {'count': 2, 'prevalence': 0.2},
+                                  'cohorts': {'1': {'count': 2, 'prevalence': 0.2}}},
+                      'parent_ids': [1], 'children': []}]}
+    ]}
diff --git a/tests/test_database.py b/tests/test_database.py
index d2cb1ff..9944965 100644
--- a/tests/test_database.py
+++ b/tests/test_database.py
@@ -156,11 +156,8 @@ def test_get_cohort_concept_stats_handles_exception(caplog):
     db = BiasDatabase(":memory:")
     db.omop_cdm_db_url = 'duckdb'
     qry_builder = CohortQueryBuilder(cohort_creation=False)
-    caplog.clear()
-    with caplog.at_level(logging.ERROR):
-        result = db.get_cohort_concept_stats(123, qry_builder)
-    assert 'Error computing cohort concept stats' in caplog.text
-    assert result == {}
+    with pytest.raises(ValueError):
+        db.get_cohort_concept_stats(123, qry_builder)
 
 def test_get_cohort_attributes_handles_exception():
     BiasDatabase._instance = None
@@ -171,6 +168,5 @@ def test_get_cohort_attributes_handles_exception():
     assert result_stats is None
     result = db.get_cohort_distributions(123, 'age')
     assert result is None
-    result = db.get_cohort_concept_stats(123, qry_builder)
-    assert result == {}
-
+    with pytest.raises(ValueError):
+        db.get_cohort_concept_stats(123, qry_builder)

From 5b9d43b6f603d25e33543d48ea32b7c752d318e4 Mon Sep 17 00:00:00 2001
From: hyi <hongyi@renci.org>
Date: Sat, 13 Sep 2025 22:22:23 -0400
Subject: [PATCH 5/8] added more tests

---
 tests/test_biasanalyzer_api.py | 74 +++++++++++++++++++++++++++++++++-
 1 file changed, 73 insertions(+), 1 deletion(-)

diff --git a/tests/test_biasanalyzer_api.py b/tests/test_biasanalyzer_api.py
index 0ed8ae3..6efcf5f 100644
--- a/tests/test_biasanalyzer_api.py
+++ b/tests/test_biasanalyzer_api.py
@@ -3,7 +3,7 @@
 import logging
 import pytest
 from ipytree import Node
-
+from biasanalyzer.concept import ConceptHierarchy
 from biasanalyzer import __version__
 
 
@@ -106,6 +106,78 @@ def test_compare_cohort_with_no_action(caplog, fresh_bias_obj):
         fresh_bias_obj.compare_cohorts(1, 2)
     assert 'failed to create a valid cohort action object' in caplog.text
 
+def test_cohorts_concept_stats_empty_input_cohorts(caplog, fresh_bias_obj):
+    caplog.clear()
+    with caplog.at_level(logging.INFO):
+        fresh_bias_obj.get_cohorts_concept_stats([])
+    assert 'The input cohorts list is empty. At least one cohort id must be provided.' in caplog.text
+
+def test_cohorts_concept_stats_no_cohort_action(caplog, fresh_bias_obj):
+    caplog.clear()
+    with caplog.at_level(logging.INFO):
+        fresh_bias_obj.get_cohorts_concept_stats([1])
+    assert 'failed to get concept prevalence stats for the union of cohorts' in caplog.text
+
+def test_cohorts_union_concept_stats(test_db):
+    ConceptHierarchy.clear_cache()
+    # Show what cohorts exist in the test DB and print cohorts and stats so we know what raw data looks like
+    cohorts_df = test_db.bias_db.conn.execute("""
+                                             SELECT cohort_definition_id, COUNT(*) as n_subjects
+                                             FROM cohort
+                                             WHERE cohort_definition_id = 1 or cohort_definition_id = 2     
+                                             GROUP BY cohort_definition_id
+                                             ORDER BY cohort_definition_id
+                                             """).fetchdf()
+    print("Cohorts in DB:\n", cohorts_df.to_string(index=False), flush=True)
+
+    # Show concept stats per cohort (aggregated for clarity)
+    stats_df = test_db.bias_db.conn.execute("""
+                                           SELECT c.cohort_definition_id,
+                                                  co.condition_concept_id,
+                                                  COUNT(*) as n
+                                           FROM cohort c
+                                                    JOIN condition_occurrence co
+                                                         ON c.subject_id = co.person_id
+                                           WHERE c.cohort_definition_id = 1 or c.cohort_definition_id = 2    
+                                           GROUP BY c.cohort_definition_id, co.condition_concept_id
+                                           ORDER BY c.cohort_definition_id, co.condition_concept_id
+                                           """).fetchdf()
+    print("Concept stats per cohort:\n", stats_df.to_string(index=False), flush=True)
+
+    union_result = test_db.get_cohorts_concept_stats([1, 2])
+    print(f'union_result: {union_result}', flush=True)
+    union_result['hierarchy'] = sorted(union_result['hierarchy'], key=lambda x: x['concept_id'])
+    # NOTE: The union_result takes cohort_start_date and cohort_end_date into account
+    # when joining cohort with condition_occurrence for inclusion/exclusion criteria.
+    # That means counts may differ from the raw numbers above. For example:
+    #   - Concept 4041664 appears 5 times in cohort 1 raw, but only 4 fall within
+    #     the cohort window → {'1': {'count': 4}}
+    #   - Concept 4041664 appears 5 times in cohort 2 raw, but only 1 falls within
+    #     the window → {'2': {'count': 1}}
+    #   - Concept 5 disappears entirely, because its single occurrence is outside
+    #     the cohort date window.
+    # This explains why union_result values differ from the raw stats above.
+    assert union_result == {'hierarchy': [
+        {'concept_id': 316139, 'concept_name': 'Heart failure', 'concept_code': '84114007',
+         'metrics': {'union': {'count': 4, 'prevalence': 0.45},
+                     'cohorts': {'1': {'count': 2, 'prevalence': 0.4},
+                                 '2': {'count': 2, 'prevalence': 0.5}}},
+         'parent_ids': [], 'children': []},
+        {'concept_id': 4041664, 'concept_name': 'Difficulty breathing', 'concept_code': '230145002',
+        'metrics': {
+            'union': {'count': 5, 'prevalence': 0.525},
+            'cohorts': {'1': {'count': 4, 'prevalence': 0.8},
+                        '2': {'count': 1, 'prevalence': 0.25}
+                        }
+        },
+        'parent_ids': [], 'children': []},
+        {'concept_id': 37311061, 'concept_name': 'COVID-19', 'concept_code': '840539006',
+         'metrics': {'union': {'count': 8, 'prevalence': 0.9},
+                     'cohorts': {'1': {'count': 4, 'prevalence': 0.8},
+                                 '2': {'count': 4, 'prevalence': 1.0}}},
+         'parent_ids': [], 'children': []},
+    ]}
+
 def test_get_domains_and_vocabularies_invalid(caplog, fresh_bias_obj):
     caplog.clear()
     with caplog.at_level(logging.INFO):

From fe4a97655fa69a33faf06415dd629bfa1a611406 Mon Sep 17 00:00:00 2001
From: hyi <hongyi@renci.org>
Date: Sat, 13 Sep 2025 22:23:18 -0400
Subject: [PATCH 6/8] minor update to module_test.py

---
 biasanalyzer/module_test.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/biasanalyzer/module_test.py b/biasanalyzer/module_test.py
index 13f0a88..9fad973 100644
--- a/biasanalyzer/module_test.py
+++ b/biasanalyzer/module_test.py
@@ -49,7 +49,8 @@ def condition_cohort_test(bias_obj):
         print(f'the cohort ethnicity stats: {cohort_data.get_stats("ethnicity")}')
         print(f'the cohort age distributions: {cohort_data.get_distributions("age")}')
         t1 = time.time()
-        cohort_concept_hierarchy = cohort_data.get_concept_stats(concept_type='condition_occurrence', filter_count=5000)
+        _, cohort_concept_hierarchy = cohort_data.get_concept_stats(concept_type='condition_occurrence',
+                                                                    filter_count=5000)
         concept_node = cohort_concept_hierarchy.get_node(concept_id=37311061)
         print(f'concept_node 37311061 metric: {concept_node.get_metrics(md["id"])}')
 
@@ -67,7 +68,8 @@ def condition_cohort_test(bias_obj):
         pprint.pprint(hier_dict, indent=2)
 
 
-        cohort_de_concept_hierarchy = cohort_data.get_concept_stats(concept_type='drug_exposure', filter_count=500)
+        _, cohort_de_concept_hierarchy = cohort_data.get_concept_stats(concept_type='drug_exposure',
+                                                                       filter_count=500)
         de_hier_dict = cohort_de_concept_hierarchy.to_dict()
         pprint.pprint(de_hier_dict, indent=2)
         compare_stats = bias_obj.compare_cohorts(cohort_data.metadata['id'], cohort_data.metadata['id'])

From ee2a8533ca75c1ea317c7c2a8d242bd0d8c51743 Mon Sep 17 00:00:00 2001
From: hyi <hongyi@renci.org>
Date: Tue, 23 Sep 2025 22:34:08 -0400
Subject: [PATCH 7/8] code improvements

---
 biasanalyzer/cohort.py                        |  8 ++++---
 biasanalyzer/concept.py                       | 20 ++++++++++--------
 biasanalyzer/database.py                      | 21 +++++++++++--------
 .../test_hierarchical_prevalence.py           |  6 +++---
 4 files changed, 31 insertions(+), 24 deletions(-)

diff --git a/biasanalyzer/cohort.py b/biasanalyzer/cohort.py
index 577f95b..d6b9698 100644
--- a/biasanalyzer/cohort.py
+++ b/biasanalyzer/cohort.py
@@ -54,7 +54,7 @@ def get_distributions(self, variable):
         return self.bias_db.get_cohort_distributions(self.cohort_id, variable)
 
     def get_concept_stats(self, concept_type='condition_occurrence', filter_count=0,
-                          vocab=None):
+                          vocab=None, print_concept_hierarchy=False):
         """
         Get cohort concept statistics such as concept prevalence
         """
@@ -62,7 +62,8 @@ def get_concept_stats(self, concept_type='condition_occurrence', filter_count=0,
                                                              self.query_builder,
                                                              concept_type=concept_type,
                                                              filter_count=filter_count,
-                                                             vocab=vocab)
+                                                             vocab=vocab,
+                                                             print_concept_hierarchy=print_concept_hierarchy)
         return (cohort_stats,
                 ConceptHierarchy.build_concept_hierarchy_from_results(self.cohort_id, cohort_stats[concept_type]))
 
@@ -158,7 +159,8 @@ def get_cohorts_concept_stats(self, cohorts: List[int],
         cohort_concept_stats = [self.bias_db.get_cohort_concept_stats(c, self._query_builder,
                                                                       concept_type=concept_type,
                                                                       filter_count=filter_count,
-                                                                      vocab=vocab) for c in cohorts]
+                                                                      vocab=vocab)
+                                for c in cohorts]
         hierarchies = [ConceptHierarchy.build_concept_hierarchy_from_results(c, c_stats.get(concept_type, []))
                        for c, c_stats in zip(cohorts, cohort_concept_stats)]
         return reduce(lambda h1, h2: h1.union(h2), hierarchies).to_dict()
diff --git a/biasanalyzer/concept.py b/biasanalyzer/concept.py
index e208be5..90a179f 100644
--- a/biasanalyzer/concept.py
+++ b/biasanalyzer/concept.py
@@ -16,9 +16,17 @@ def name(self) -> str:
     def code(self) -> str:
         return self._ch.graph.nodes[self.id]["concept_code"]
 
-    def get_metrics(self, identifier: Union[int, str]) -> dict:
+    @property
+    def parents(self) -> List["ConceptNode"]:
+        return [ConceptNode(p, self._ch) for p in self._ch.graph.predecessors(self.id)]
+
+    @property
+    def children(self) -> List["ConceptNode"]:
+        return [ConceptNode(c, self._ch) for c in self._ch.graph.successors(self.id)]
+
+    def get_metrics(self, cohort_id: Union[int, str]) -> dict:
         metrics = self._ch.graph.nodes[self.id].get("metrics", {})
-        return metrics.get(str(identifier), {})
+        return metrics.get(str(cohort_id), {})
 
     def get_union_metrics(self) -> dict:
         # simple aggregation example
@@ -30,12 +38,6 @@ def get_union_metrics(self) -> dict:
             "prevalence": sum(prevalences) / len(prevalences) if prevalences else 0.0,
         }
 
-    def parents(self) -> List["ConceptNode"]:
-        return [ConceptNode(p, self._ch) for p in self._ch.graph.predecessors(self.id)]
-
-    def children(self) -> List["ConceptNode"]:
-        return [ConceptNode(c, self._ch) for c in self._ch.graph.successors(self.id)]
-
     def to_dict(self, include_children: bool = True) -> dict:
         """
         Serialize this node into a dict. Optionally include nested children.
@@ -51,7 +53,7 @@ def to_dict(self, include_children: bool = True) -> dict:
             "parent_ids": list(self._ch.graph.predecessors(self.id)),
         }
         if include_children:
-            data["children"] = [c.to_dict(include_children=True) for c in self.children()]
+            data["children"] = [c.to_dict(include_children=True) for c in self.children]
         return data
 
 
diff --git a/biasanalyzer/database.py b/biasanalyzer/database.py
index 6aa7dfa..e6e30e2 100644
--- a/biasanalyzer/database.py
+++ b/biasanalyzer/database.py
@@ -228,7 +228,8 @@ def get_cohort_distributions(self, cohort_definition_id: int, variable: str):
             return None
 
     def get_cohort_concept_stats(self, cohort_definition_id: int, qry_builder,
-                                 concept_type='condition_occurrence', filter_count=0, vocab=None):
+                                 concept_type='condition_occurrence', filter_count=0, vocab=None,
+                                 print_concept_hierarchy=False):
         """
         Get concept statistics for a cohort from the cohort table.
         """
@@ -255,14 +256,16 @@ def get_cohort_concept_stats(self, cohort_definition_id: int, qry_builder,
                 cs_df["details"] = cs_df.apply(
                     lambda row: f"{row['concept_name']} (Code: {row['concept_code']}, "
                                 f"Count: {row['count_in_cohort']}, Prevalence: {row['prevalence']:.3%})", axis=1)
-                filtered_cs_df = cs_df[cs_df['ancestor_concept_id'] != cs_df['descendant_concept_id']]
-                roots = find_roots(filtered_cs_df)
-                hierarchy = build_concept_hierarchy(filtered_cs_df)
-                notify_users(f'cohort concept hierarchy for {concept_type} with root concept ids {roots}:')
-                for root in roots:
-                    root_detail = cs_df[(cs_df['ancestor_concept_id'] == root)
-                              & (cs_df['descendant_concept_id'] == root)]['details'].iloc[0]
-                    print_hierarchy(hierarchy, parent=root, level=0, parent_details=root_detail)
+
+                if print_concept_hierarchy:
+                    filtered_cs_df = cs_df[cs_df['ancestor_concept_id'] != cs_df['descendant_concept_id']]
+                    roots = find_roots(filtered_cs_df)
+                    hierarchy = build_concept_hierarchy(filtered_cs_df)
+                    notify_users(f'cohort concept hierarchy for {concept_type} with root concept ids {roots}:')
+                    for root in roots:
+                        root_detail = cs_df[(cs_df['ancestor_concept_id'] == root)
+                                  & (cs_df['descendant_concept_id'] == root)]['details'].iloc[0]
+                        print_hierarchy(hierarchy, parent=root, level=0, parent_details=root_detail)
                 return concept_stats
             else:
                 err_msg = "Cannot connect to the OMOP database to query concept table"
diff --git a/tests/query_based/test_hierarchical_prevalence.py b/tests/query_based/test_hierarchical_prevalence.py
index 220bf98..c0a7022 100644
--- a/tests/query_based/test_hierarchical_prevalence.py
+++ b/tests/query_based/test_hierarchical_prevalence.py
@@ -32,7 +32,7 @@ def test_cohort_concept_hierarchical_prevalence(test_db, caplog):
     with pytest.raises(ValueError):
         cohort.get_concept_stats(concept_type='procedure_occurrence')
 
-    concept_stats, _ = cohort.get_concept_stats(vocab='ICD10CM')
+    concept_stats, _ = cohort.get_concept_stats(vocab='ICD10CM', print_concept_hierarchy=True)
     assert concept_stats is not None, "Failed to fetch concept stats"
     assert len(concept_stats) > 0, "No concept stats returned"
     # check returned data
@@ -112,7 +112,7 @@ def test_traversal_and_serialization():
     assert len(roots) == 1
     assert roots[0].name == "Root"
     assert roots[0].get_metrics(1) == {"count": 5, "prevalence": 0.5}
-    children = roots[0].children()
+    children = roots[0].children
     ch_names = [ch.name for ch in children]
     assert ch_names == ["Child"]
     # leaves
@@ -138,7 +138,7 @@ def test_traversal_and_serialization():
     leaves = h.get_leaf_nodes()
     assert len(leaves) == 1
     assert leaves[0].name == "Child"
-    parents = leaves[0].parents()
+    parents = leaves[0].parents
     par_names = [par.name for par in parents]
     assert par_names == ["Root"]
 

From 5a1c43d83b7faf3aae196aab643d40233efc9133 Mon Sep 17 00:00:00 2001
From: hyi <hongyi@renci.org>
Date: Wed, 24 Sep 2025 22:51:05 -0400
Subject: [PATCH 8/8] updated notebook tutorial and readme

---
 README.md                                     |   15 +-
 biasanalyzer/cohort_query_builder.py          |    2 +-
 .../BiasAnalyzerCohortConceptTutorial.ipynb   | 1000 +++++------------
 3 files changed, 307 insertions(+), 710 deletions(-)

diff --git a/README.md b/README.md
index fb444c1..0e75733 100644
--- a/README.md
+++ b/README.md
@@ -94,9 +94,22 @@ concept ID. All clinical events in OMOP, such as conditions, drug exposures, pro
 represented as concepts. You can get patient counts and prevalence associated with each concept by accessing 
 the method `get_concept_stats()` with a code snippet example shown below.
   ```angular2html
-    cohort_concepts = baseline_cohort_data.get_concept_stats(concept_type='condition_occurrence')
+    cohort_concepts, cohort_concept_hierarchy = baseline_cohort_data.get_concept_stats(concept_type='condition_occurrence')
     print(pd.DataFrame(cohort_concepts["condition_occurrence"]))
+    print(f"returned cohort_concept_hierarchy object converted to dict: {cohort_concept_hierarchy.to_dict()}")
   ```
+  The returned cohort_concept_hierarchy object stores concept hierarchical relationsips with concept nodes indexed 
+to allow quick information retrival of a concept node and provides hierarchy traversal methods for concept hierarchy 
+navigation. For more details, refer to the corresponding tutorial notebook [BiasAnalyzerCohortConceptTutorial.ipynb](https://github.com/VACLab/BiasAnalyzer/blob/main/notebooks/BiasAnalyzerCohortConceptTutorial.ipynb).
+- There is also an API method `get_cohorts_concept_stats(list_of_cohort_ids, concept_type='condition_occurrence', filter_count=0, vocab=None)` 
+that enables users to explore union of concept prevalences over multiple cohorts to facilitate potential cohort 
+selection bias exploration. An example code snippet is shown below to illustrate how to use this method.
+   ```angular2html
+   cohort_list = [baseline_cohort_data.cohort_id, study_cohort_data.cohort_id]
+   aggregated_cohort_metrics_dict = bias.get_cohorts_concept_stats(cohort_list)
+   print('Aggregated concept prevalence metrics over the baseline and study cohorts are:')
+   print(aggregated_cohort_metrics_dict)
+   ```
 - There is also an API method that enables users to compare distributions of two cohorts by calling `bias.compare_cohorts(cohort1_id, cohort2_id)` 
 where cohort1_id and cohort2_id are integers and can be obtained from metadata of a cohort object. Currently, 
 only hellinger distances between distributions of two cohorts are computed.
diff --git a/biasanalyzer/cohort_query_builder.py b/biasanalyzer/cohort_query_builder.py
index 4e3ca46..c0a4e80 100644
--- a/biasanalyzer/cohort_query_builder.py
+++ b/biasanalyzer/cohort_query_builder.py
@@ -19,7 +19,7 @@ def __init__(self, cohort_creation=True):
         except ModuleNotFoundError: # pragma: no cover
             template_path = os.path.join(os.path.dirname(__file__), "sql_templates")
 
-        print(f'template_path: {template_path}, cohort_creation: {cohort_creation}')
+        print(f'template_path: {template_path}')
         self.env = Environment(loader=FileSystemLoader(template_path), extensions=['jinja2.ext.do'])
         if cohort_creation:
             self.env.globals.update(
diff --git a/notebooks/BiasAnalyzerCohortConceptTutorial.ipynb b/notebooks/BiasAnalyzerCohortConceptTutorial.ipynb
index 166d228..0ee4282 100644
--- a/notebooks/BiasAnalyzerCohortConceptTutorial.ipynb
+++ b/notebooks/BiasAnalyzerCohortConceptTutorial.ipynb
@@ -96,12 +96,12 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "b99aedde4936451e9c0b8e75f2bcc620",
+       "model_id": "af2f7ec9bd3544b486882c38ba0aa738",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "Cohort creation:   0%|                                 | 0/3 [00:00<?, ?stage/s]"
+       "Cohort creation:   0%|                                                                                        …"
       ]
      },
      "metadata": {},
@@ -149,22 +149,26 @@
   },
   {
    "cell_type": "markdown",
-   "id": "22edda35",
+   "id": "53b4d2df-d18b-4312-8df3-fd69073c5eca",
    "metadata": {},
    "source": [
     "**Now that you have connected to your OMOP CDM database and created the `cohort_data` cohort object, you are ready to explore cohort concept prevalence.** \n",
     "\n",
     "---\n",
     "\n",
-    "### Exploring cohort concept prevalence\n",
+    "### Exploring cohort concept prevalence with concept hierarchy taken into account\n",
     "You can retrieve concept prevalence statistics for a cohort using the `get_concept_stats(concept_type='condition_occurrence', filter_count=0, vocab=None)` method on the `cohort_data` object. Each input argument to this method has a default value, so you can call the method without specifying all parameters.\n",
     "- The `concept_type` input argument specifies the OMOP domain to analyze. It must be one of the OMOP domain names: `condition_occurrence`, `drug_exposure`, `procedure_occurrence`, `visit_occurrence`, `measurement`, or `observation`.\n",
     "- The `vocab` input argument specifies the OMOP vocabulary ID to filter concepts by. If set to `None`, a default vocabulary is used based on the domain: `RxNorm` for `drug_exposure`, `LOINC` for `measurement`, and `SNOMED` for all other domains.\n",
     "- The `filter_count` input argument filters out concepts with fewer than this number of patients in the cohort. Set it to `0` to include all without filtering.\n",
-    "This method helps identify the most prevalent clinical concepts in your cohort, which can reveal patterns or potential sources of selection bias in the cohort data.\n",
+    "This method helps identify the most prevalent clinical concepts in your cohort with concept hierarchy taken into account, which can reveal patterns or potential sources of selection bias in the cohort data.\n",
     "\n",
     "**Cohort condition occurrence concept prevalence**: \n",
-    "The code block below demonstrates how to use the default parameters of the `get_concept_stats()` method to retrieve concept prevalence for the `condition occurrence` domain. By default, it uses the `SNOMED` vocabulary, excludes hierarchical relationships, and applies no filtering. The method returns a dictionary where the **key** is the `concept_type` (e.g., `condition_occurrence`) and the **value** is a list of concept dictionaries. Each concept dictionary in the list contains `concept_name`, `concept_code`, `count_in_cohort`, `prevalence`, `ancestor_concept_id`, and `descendant_concept_id`. These values allow you to explore which clinical concepts are most prevalent in your cohort and support deeper investigations into potential sources of selection bias.\n",
+    "The code block below demonstrates how to use the default concept_type and vocab parameters of the `get_concept_stats(filter_count=5000)` method to retrieve concept prevalence with concept hierarchical relationships taken into account. By default, it uses the `SNOMED` vocabulary for the `condition occurrence` domain. Concepts with fewer than 5000 patients are excluded as specified in the `filter_count` input parameter. The method returns a dictionary and a `ConceptHierarchy` object as detailed below:\n",
+    "- The returned dictionary contains a key-value pair where the **key** is the `concept_type` (e.g., `condition_occurrence`) and the **value** is a list of concept dictionaries. Each concept dictionary in the list contains `concept_name`, `concept_code`, `count_in_cohort`, `prevalence`, `ancestor_concept_id`, and `descendant_concept_id` (i.e., the concept id corresponding to the item's concept name, code, count, and prevalence). These values allow you to explore which clinical concepts are most prevalent in your cohort with concept hierarchy taken into account and support deeper investigations into potential sources of selection bias.\n",
+    "- The returned `ConceptHierarchy` object stores concept hierarchical relationsips with concept nodes indexed to allow quick information retrival of a concept node and provides hierarchy traversal methods for concept hierarchy navigation. This `ConceptHierarchy` object can be serialized into a dictionary via the `to_dict()` method of the `ConceptHierarchy` class to be loaded easily into a JSON object by downstream apps. The `to_dict(root_id: Optional[int] = None)` method enables downstream apps to retrieve the concept hierarchical information rooted at a specific input concept id or at the root of the concept hierarchy if no input concept id is provided. The returned dictionary contains a key-value pair where `hierarchy` is the key and the value is a list of concept dictionaries with each concept dictionary corresponding to each root of the concept hierarchy. Each concept dictionary in the list contains values corresponding to `concept_id`, `concept_name`, `concept_code`, `metrics`, `parent_ids`, and `children` keys, where `parent_ids` contains a list of parent concept ids, `children` contains a list of serialized, potentially nested, children concept nodes, and `metrics` is a dictionary with the following two keys:\n",
+    "    - 'cohorts`: a dictionary with a cohort id as key and a sub-dictionary as value that includes the count and prevalence corresponding to the cohort. For example, `'cohorts': {'1': {'count': 10208, 'prevalence': 1.0}}}`. The `cohorts` dictionary may include metrics information for multiple cohorts.\n",
+    "    - `union`: a dictionary including unioned counts and prevalence for the concept node over all cohorts included in the `cohorts` dictionary. For example, `'union': {'count': 10208, 'prevalence': 1.0}`. When there is only one cohort included, the union metric dictionary is the same as the single cohort metric dictionary. Refer to [Union of Cohort Concepts Over multiple Cohorts Tutorial](./BiasAnalyzerMultipleCohortConceptUnionTutorial.ipynb) for how to use this `union` method of `ConceptHierarchy` class for aggregating concept metrics over multiple cohorts.\n",
     "\n",
     "**Note** that this prevalence computation may take some time, especially for large cohorts. A progress bar will appear to indicate the progress of the prevalence calculation."
    ]
@@ -180,7 +184,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "80ccef000a6743b7ace1499c7fcdc414",
+       "model_id": "474658acc1bc4faebb1c6992d8a26c69",
        "version_major": 2,
        "version_minor": 0
       },
@@ -194,7 +198,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "21ddc9a433b940bc90a48e2c5177769a",
+       "model_id": "925c9eae056b4a29982d9593f91f935b",
        "version_major": 2,
        "version_minor": 0
       },
@@ -208,7 +212,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "7b6d2b2e54a24b55b6d9a1ed39affefb",
+       "model_id": "7c444aec3bef49479feed30d3b6e4178",
        "version_major": 2,
        "version_minor": 0
       },
@@ -223,610 +227,69 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "cohort concept hierarchy for condition_occurrence with root concept ids []:\n",
-      "                                          concept_name       concept_code  count_in_cohort  prevalence  ancestor_concept_id  descendant_concept_id\n",
-      "0                                     Clinical finding          404684003            10208    1.000000               441840                 441840\n",
-      "1                         Disease due to Coronaviridae           27619001            10208    1.000000              4100065                4100065\n",
-      "2                                        Viral disease           34014006            10208    1.000000               440029                 440029\n",
-      "3                                              Disease           64572001            10208    1.000000              4274025                4274025\n",
-      "4                                Coronavirus infection          186747009            10208    1.000000               439676                 439676\n",
-      "5                                             COVID-19          840539006            10208    1.000000             37311061               37311061\n",
-      "6                            Disorder due to infection           40733004            10208    1.000000               432250                 432250\n",
-      "7            Clinical history and observation findings          250171008             9150    0.896356              4094294                4094294\n",
-      "8            General finding of observation of patient          118222006             9149    0.896258              4041283                4041283\n",
-      "9                                      Finding by site          118234003             9120    0.893417              4042140                4042140\n",
-      "10                          General body state finding           82832008             9080    0.889498              4221108                4221108\n",
-      "11                      Temperature-associated finding          301343009             8769    0.859032              4103474                4103474\n",
-      "12                                               Fever          386661006             8650    0.847375               437663                 437663\n",
-      "13              Body temperature above reference range           50177009             8650    0.847375              4178904                4178904\n",
-      "14                                 Vital signs finding          118227000             8650    0.847375              4042138                4042138\n",
-      "15                            Body temperature finding          105723007             8650    0.847375              4022230                4022230\n",
-      "16                           Abnormal body temperature          123979008             8650    0.847375              4047791                4047791\n",
-      "17                                 Respiratory finding          106048009             7755    0.759698              4024567                4024567\n",
-      "18                              Finding of body region          301857004             7115    0.697002              4199402                4199402\n",
-      "19                                Neurological finding          102957003             6659    0.652332              4011630                4011630\n",
-      "20                      Sensory nervous system finding          106147001             6659    0.652332              4024013                4024013\n",
-      "21                        Finding of sensation by site          699697007             6657    0.652136             44783587               44783587\n",
-      "22                                               Cough           49727002             6596    0.646160               254761                 254761\n",
-      "23                        Respiratory function finding          365852007             6596    0.646160              4267789                4267789\n",
-      "24                     Finding of head and neck region          118254002             6423    0.629212               255919                 255919\n",
-      "25                                        Head finding          406122000             6391    0.626078              4247371                4247371\n",
-      "26                            Digestive system finding          386617003             5953    0.583170              4302537                4302537\n",
-      "27                        Mouth and/or pharynx finding          249376008             5619    0.550451              4091363                4091363\n",
-      "28                              Finding of head region          298364001             5194    0.508817              4182161                4182161\n",
-      "29                             Finding of mouth region          423066003             4895    0.479526              4307122                4307122\n",
-      "30                                 Oral cavity finding          116337000             4895    0.479526              4022570                4022570\n",
-      "31                                       Loss of taste           36955009             4893    0.479330              4289517                4289517\n",
-      "32                           Finding of sense of taste           76489005             4893    0.479330              4296465                4296465\n",
-      "33                    General problem AND/OR complaint          105721009             3776    0.369906              4022830                4022830\n",
-      "34                                   Metabolic finding          106089007             3776    0.369906               432455                 432455\n",
-      "35                                             Fatigue           84229001             3776    0.369906              4223659                4223659\n",
-      "36                          Energy and stamina finding          359752005             3776    0.369906              4230389                4230389\n",
-      "37                          General well-being finding          365275006             3776    0.369906              4272867                4272867\n",
-      "38                                                Pain           22253000             3559    0.348648              4329041                4329041\n",
-      "39                            Pain / sensation finding          276435006             3559    0.348648              4170962                4170962\n",
-      "40                     Pain finding at anatomical site          279001004             3555    0.348256              4132926                4132926\n",
-      "41                                      Sputum finding          248595008             3233    0.316712              4089228                4089228\n",
-      "42     Finding reported by subject or history provider          418799008             2040    0.199843              4303401                4303401\n",
-      "43                        Ear, nose and throat finding          297268004             1992    0.195141              4178545                4178545\n",
-      "44                              Finding of respiration          301282008             1990    0.194945              4115386                4115386\n",
-      "45                       Ease of respiration - finding          366139009             1990    0.194945              4271505                4271505\n",
-      "46                                Difficulty breathing          230145002             1990    0.194945              4041664                4041664\n",
-      "47                     Upper respiratory tract finding          301186004             1967    0.192692              4103320                4103320\n",
-      "48                      General finding of soft tissue          248402002             1797    0.176038              4093991                4093991\n",
-      "49                                             Dyspnea          267036007             1624    0.159091               312437                 312437\n",
-      "50                       Finding of sound of breathing          301285005             1624    0.159091              4115387                4115387\n",
-      "51                                            Wheezing           56018004             1624    0.159091               314754                 314754\n",
-      "52               Finding related to respiratory sounds          106051002             1624    0.159091              4021770                4021770\n",
-      "53                             Musculoskeletal finding          106028002             1487    0.145670               135930                 135930\n",
-      "54                                       Joint finding          118952005             1472    0.144201                77960                  77960\n",
-      "55                                  Pharyngeal finding          116338005             1447    0.141752              4022571                4022571\n",
-      "56                                Musculoskeletal pain          279069000             1445    0.141556              4150129                4150129\n",
-      "57                                      Muscle finding          106030000             1445    0.141556              4024566                4024566\n",
-      "58                                         Muscle pain           68962001             1445    0.141556               442752                 442752\n",
-      "59                                          Joint pain           57676002             1445    0.141556                77074                  77074\n",
-      "60                       Finding of sensation of joint          298249004             1445    0.141556              4179167                4179167\n",
-      "61             Finding of sensation of skeletal muscle          298287007             1445    0.141556              4184117                4184117\n",
-      "62                              Finding of neck region          298378000             1379    0.135090              4184252                4184252\n",
-      "63                         Pain of digestive structure          301362007             1376    0.134796              4116809                4116809\n",
-      "64                        Pain of head and neck region          301365009             1376    0.134796              4116810                4116810\n",
-      "65                                      Pain in throat          162397003             1376    0.134796               259153                 259153\n",
-      "66                                           Neck pain           81680005             1376    0.134796                24134                  24134\n",
-      "67                     Finding of sensation of pharynx          300275004             1376    0.134796              4114487                4114487\n",
-      "68                       Pain of respiratory structure          301355003             1376    0.134796              4115406                4115406\n",
-      "69                                 Sore throat symptom          267102003             1376    0.134796              4147326                4147326\n",
-      "70                                            Headache           25064002             1304    0.127743               378253                 378253\n",
-      "71                                 Shivering or rigors          248456009             1079    0.105701              4087630                4087630\n",
-      "72                                               Chill           43724002             1079    0.105701               434490                 434490\n",
-      "73                               Disorder by body site          123946008              914    0.089538              4047779                4047779\n",
-      "74                             Disorder of body system          362965005              899    0.088068              4180628                4180628\n",
-      "75                               Inflammatory disorder          128139000              813    0.079643              4027384                4027384\n",
-      "76   Inflammation of specific body structures or ti...          363170005              813    0.079643              4180169                4180169\n",
-      "77               Inflammation of specific body systems          363171009              813    0.079643              4178818                4178818\n",
-      "78                      Disorder of respiratory system           50043002              783    0.076705               320136                 320136\n",
-      "79     Inflammatory disorder of the respiratory system          373405005              781    0.076509              4162282                4162282\n",
-      "80      Inflammatory disorder of the respiratory tract          363180009              781    0.076509              4180170                4180170\n",
-      "81        Functional finding of gastrointestinal tract          300358007              766    0.075039              4101343                4101343\n",
-      "82                      Gastrointestinal tract finding          386618008              766    0.075039              4304916                4304916\n",
-      "83                Inflammation of specific body organs          363169009              730    0.071513              4181063                4181063\n",
-      "84                          Finding of trunk structure          302292003              609    0.059659              4117930                4117930\n",
-      "85                                   Disorder of trunk          128121009              609    0.059659              4028071                4028071\n",
-      "86                              Finding of upper trunk          609623002              608    0.059561             43531057               43531057\n",
-      "87               Disorder of thoracic segment of trunk          609622007              608    0.059561             43531056               43531056\n",
-      "88                         Finding of region of thorax          298705000              608    0.059561              4185503                4185503\n",
-      "89                                  Disorder of thorax          118946009              608    0.059561              4043346                4043346\n",
-      "90                Disorder of lower respiratory system          128272009              607    0.059463              4027553                4027553\n",
-      "91                     Lower respiratory tract finding          301226008              607    0.059463              4115259                4115259\n",
-      "92    Inflammatory disorder of lower respiratory tract          128997002              607    0.059463              4028876                4028876\n",
-      "93                            Viscus structure finding          406123005              583    0.057112              4227253                4227253\n",
-      "94                                            Distress           69328002              582    0.057014              4239819                4239819\n",
-      "95                                        Lung finding          301230006              582    0.057014              4115260                4115260\n",
-      "96                               Disorder of blood gas          238157005              582    0.057014              4080012                4080012\n",
-      "97                      General clinical state finding          365860008              582    0.057014               432453                 432453\n",
-      "98                                    Disorder of lung           19829001              582    0.057014               257907                 257907\n",
-      "99                                Respiratory distress          271825005              582    0.057014              4158346                4158346\n",
-      "100                                  Metabolic disease           75934005              582    0.057014               436670                 436670\n",
-      "101                                 Lung consolidation           95436008              582    0.057014              4318404                4318404\n",
-      "102                                          Pneumonia          233604007              582    0.057014               255848                 255848\n",
-      "103                                          Hypoxemia          389087006              582    0.057014               437390                 437390\n",
-      "104                                        Pneumonitis          205237003              582    0.057014               253506                 253506\n",
-      "105                                    Finding of face          301310005              519    0.050842              4103352                4103352\n",
-      "106                                       Nose finding          118237005              518    0.050745              4042142                4042142\n",
-      "107                               Nasal airway finding          249342004              518    0.050745              4096565                4096565\n",
-      "108                                      Acute disease            2704003              516    0.050549               443883                 443883\n",
-      "109                                   Nasal congestion           68235000              513    0.050255              4195085                4195085\n",
-      "110                                Finding of vomiting          300359004              443    0.043397              4101344                4101344\n",
-      "111                                   Vomiting symptom          249497008              443    0.043397              4096715                4096715\n",
-      "112                                             Nausea          422587007              443    0.043397                31967                  31967\n",
-      "113                            Disorder of soft tissue           19660004              373    0.036540               376208                 376208\n",
-      "114                                 Soft tissue lesion          239953001              361    0.035364              4344497                4344497\n",
-      "115                             Cardiovascular finding          106063007              355    0.034777              4023995                4023995\n",
-      "116                  Disorder of cardiovascular system           49601007              355    0.034777               134057                 134057\n",
-      "117                                           Diarrhea           62315008              350    0.034287               196523                 196523\n",
-      "118                             Altered bowel function           88111009              350    0.034287              4338120                4338120\n",
-      "119                            Finding of bowel action          366256008              350    0.034287              4182633                4182633\n",
-      "120                              Finding of defecation          300373008              350    0.034287              4113563                4113563\n",
-      "121                                   Diarrhea symptom          267060006              350    0.034287              4145808                4145808\n",
-      "122                                  Digestive symptom          308925008              350    0.034287               192731                 192731\n",
-      "123                                  Vascular disorder           27550009              334    0.032719               443784                 443784\n",
-      "124                               Blood vessel finding           21829004              334    0.032719              4071689                4071689\n",
-      "125             Acute disease of cardiovascular system          128487001              331    0.032426              4028367                4028367\n",
-      "126                          Acute respiratory disease          111273006              293    0.028703              4006969                4006969\n",
-      "127                      Ear, nose and throat disorder          232208008              208    0.020376              4339468                4339468\n",
-      "128                                Respiratory failure          409622000              205    0.020082              4256228                4256228\n",
-      "129                          Acute respiratory failure           65710008              205    0.020082               319049                 319049\n",
-      "130                          Respiratory insufficiency          409623005              205    0.020082               318459                 318459\n",
-      "131   Inflammatory disorder of upper respiratory tract          129134004              177    0.017339              4043671                4043671\n",
-      "132               Disorder of upper respiratory system          201060008              177    0.017339               254068                 254068\n",
-      "133                         Thrombosis of blood vessel          439129009              169    0.016556              4208466                4208466\n",
-      "134                                     Venous finding          248727005              169    0.016556              4095634                4095634\n",
-      "135                             Deep venous thrombosis          128053003              169    0.016556              4133004                4133004\n",
-      "136                                         Thrombosis          439127006              169    0.016556              4231363                4231363\n",
-      "137                       Acute deep venous thrombosis    132281000119108              169    0.016556             44782746               44782746\n",
-      "138                                  Venous thrombosis          111293003              169    0.016556               444247                 444247\n",
-      "139                                   Disorder of vein           90507008              169    0.016556              4234997                4234997\n",
-      "140                        Respiratory tract infection          275498002              164    0.016066              4170143                4170143\n",
-      "141                        Upper respiratory infection           54150009              164    0.016066              4181583                4181583\n",
-      "142                                  Infection by site          301810000              164    0.016066              4200532                4200532\n",
-      "143                           Pulmonary artery finding          251039005              162    0.015870              4108173                4108173\n",
-      "144                                  Arterial embolism           54687002              162    0.015870               312339                 312339\n",
-      "145                                           Embolism          414086009              162    0.015870              4185607                4185607\n",
-      "146                                   Arterial finding          248718009              162    0.015870              4095631                4095631\n",
-      "147                Disorder of blood vessels of thorax          373434004              162    0.015870              4190192                4190192\n",
-      "148                                 Disorder of artery          359557001              162    0.015870               321887                 321887\n",
-      "149                  Disorder of pulmonary circulation           39785005              162    0.015870               433208                 433208\n",
-      "150                             Trunk arterial embolus          312593004              162    0.015870              4194610                4194610\n",
-      "151                           Acute pulmonary embolism          706870000              162    0.015870             45768439               45768439\n",
-      "152                                 Pulmonary embolism           59282003              162    0.015870               440417                 440417\n",
-      "153                        Disorder of immune function          414029004              145    0.014205               440371                 440371\n",
-      "154                                   Disorder of head          118934005              141    0.013813              4042836                4042836\n",
-      "155                            Viral infection by site          312130009              137    0.013421              4207186                4207186\n",
-      "156            Viral upper respiratory tract infection          281794004              137    0.013421              4085100                4085100\n",
-      "157                        Viral respiratory infection          312133006              137    0.013421              4193169                4193169\n",
-      "158                                             Sepsis           91302008              131    0.012833               132797                 132797\n",
-      "159                             Sepsis caused by virus          770349000              131    0.012833             36674642               36674642\n",
-      "160                         Organ dysfunction syndrome          238147009              131    0.012833              4080011                4080011\n",
-      "161                      Inflammatory disorder of head          363176004              125    0.012245              4181187                4181187\n",
-      "162                                          Sinusitis           36971009               88    0.008621              4283893                4283893\n",
-      "163                            Disorder of nasal sinus            7393007               88    0.008621               256440                 256440\n",
-      "164                               Facial sinus finding          271745005               88    0.008621              4158326                4158326\n",
-      "165                         Acute inflammatory disease          128482007               88    0.008621              4134294                4134294\n",
-      "166                        Disorder of digestive organ           76712006               86    0.008425              4297887                4297887\n",
-      "167                       Disorder of digestive system           53619000               86    0.008425              4201745                4201745\n",
-      "168                        Disorder of digestive tract           84410009               86    0.008425              4309188                4309188\n",
-      "169                  Disorder of upper digestive tract           50410009               86    0.008425              4198525                4198525\n",
-      "170                                Disorder of pharynx           75860007               83    0.008131                31057                  31057\n",
-      "171          Inflammatory disorder of digestive system          373407002               83    0.008131              4190185                4190185\n",
-      "172                      Infection of digestive system          312158001               83    0.008131              4193990                4193990\n",
-      "173              Infectious disease of digestive tract          128398001               83    0.008131              4134887                4134887\n",
-      "174                                        Pharyngitis          405737000               83    0.008131              4226263                4226263\n",
-      "175           Inflammatory disorder of digestive tract          128999004               83    0.008131              4043371                4043371\n",
-      "176                              Infective pharyngitis          312422001               83    0.008131              4193318                4193318\n",
-      "177                         Infective disorder of head          363166002               81    0.007935              4176944                4176944\n",
-      "178                                    Viral sinusitis          444814009               78    0.007641             40481087               40481087\n",
-      "179  Traumatic and/or non-traumatic injury of anato...          609411003               73    0.007151             43530877               43530877\n",
-      "180              Traumatic AND/OR non-traumatic injury          417163006               73    0.007151               432795                 432795\n",
-      "181                                   Traumatic injury          417746004               71    0.006955               440921                 440921\n",
-      "182                           Traumatic injury by site          609336008               71    0.006955             43530815               43530815\n",
-      "183                         Skin AND/OR mucosa finding          415531008               65    0.006368              4212577                4212577\n",
-      "184                           Acute infectious disease           63171007               62    0.006074              4271450                4271450\n",
-      "185                  Acute upper respiratory infection           54398005               62    0.006074               257011                 257011\n",
-      "186                                         Hemoptysis           66857006               62    0.006074               261687                 261687\n",
-      "187                       Acute respiratory infections          195647007               62    0.006074              4112341                4112341\n",
-      "188                                           Bleeding          131148009               62    0.006074               437312                 437312\n",
-      "189                            Acute viral pharyngitis          195662009               59    0.005780              4112343                4112343\n",
-      "190                                Acute viral disease          409631000               59    0.005780              4252853                4252853\n",
-      "191                    Acute digestive system disorder          127321000               59    0.005780              4132552                4132552\n",
-      "192                                    Mucosal finding          128145008               59    0.005780              4028076                4028076\n",
-      "193                                  Acute pharyngitis          363746003               59    0.005780                25297                  25297\n",
-      "194                                  Viral pharyngitis            1532007               59    0.005780              4035987                4035987\n",
-      "195             Viral infection of the digestive tract          312131008               59    0.005780              4193875                4193875\n",
-      "196                              Disorder of extremity          128605003               56    0.005486               133468                 133468\n",
-      "197                          Finding of limb structure          302293008               56    0.005486               138239                 138239\n",
-      "198                    Passive conjunctival congestion          246677007               53    0.005192              4080695                4080695\n",
-      "199                               Conjunctival finding          246875002               53    0.005192              4080857                4080857\n",
-      "200                                      Globe finding          246915008               53    0.005192              4080992                4080992\n",
-      "201                             Ocular surface finding          246869006               53    0.005192              4087936                4087936\n",
-      "202                               Eye / vision finding          118235002               53    0.005192              4038502                4038502\n",
-      "203                                      Orbit finding          246912006               53    0.005192              4087949                4087949\n",
-      "204                           Anterior segment finding          418727003               53    0.005192              4303380                4303380\n",
-      "205                 Disorder of musculoskeletal system             928000               49    0.004800              4244662                4244662\n",
-      "206                        Disorder of skeletal system           88230002               48    0.004702              4339410                4339410\n",
-      "207                   Injury of musculoskeletal system          105606008               46    0.004506              4022201                4022201\n",
-      "208                           Disorder of joint region          785875003               39    0.003821             37206233               37206233\n",
-      "209                      Traumatic injury due to event          419945001               37    0.003625               439215                 439215\n",
-      "210                        Disorder of lower extremity          118937003               36    0.003527               193460                 193460\n",
-      "211                              Finding of lower limb          116312005               36    0.003527              4022922                4022922\n",
-      "212                                Injury by mechanism          282745002               36    0.003527              4154161                4154161\n",
-      "213                       Bacterial infectious disease           87628006               35    0.003429               432545                 432545\n",
-      "214                          Injury of lower extremity          127279002               34    0.003331              4130852                4130852\n",
-      "215                                 Middle ear finding          300162007               33    0.003233              4101079                4101079\n",
-      "216                                       Otitis media           65363002               33    0.003233               372328                 372328\n",
-      "217                           Ear and auditory finding          118236001               33    0.003233              4042141                4042141\n",
-      "218                             Disorder of middle ear           68996008               33    0.003233               374364                 374364\n",
-      "219                                        Ear finding          247234006               33    0.003233              4082416                4082416\n",
-      "220                                    Disorder of ear           25906001               33    0.003233               378161                 378161\n",
-      "221                                             Otitis           43275000               33    0.003233              4183452                4183452\n",
-      "222                        Disorder of free lower limb          700012005               33    0.003233             44782620               44782620\n",
-      "223                        Disorder of auditory system          362966006               33    0.003233              4176644                4176644\n",
-      "224                                        Arthropathy          399269003               32    0.003135                73553                  73553\n",
-      "225                          Injury of free lower limb          700010002               31    0.003037             44784105               44784105\n",
-      "226                              Traumatic arthropathy           58188004               30    0.002939                74124                  74124\n",
-      "227                                 Soft tissue injury          282026002               30    0.002939              4083964                4083964\n",
-      "228                      Disorder of connective tissue          105969002               30    0.002939               253549                 253549\n",
-      "229     Musculoskeletal and connective tissue disorder          312225001               29    0.002841              4208786                4208786\n",
-      "230                                    Lesion of joint          298149009               28    0.002743              4179141                4179141\n",
-      "231                           Finding of ankle or foot          419518009               27    0.002645              4305027                4305027\n",
-      "232                    Bacterial respiratory infection          312117008               27    0.002645              4207184                4207184\n",
-      "233                                   Ligament finding          250132005               27    0.002645              4094284                4094284\n",
-      "234              Bacterial upper respiratory infection          312118003               27    0.002645              4207185                4207185\n",
-      "235                               Disorder of ligament           60492000               27    0.002645               442628                 442628\n",
-      "236                        Bacterial infection by site          301811001               27    0.002645              4200533                4200533\n",
-      "237                                       Joint injury          125610000               26    0.002547              4054054                4054054\n",
-      "238                          Tracheobronchial disorder          233776003               26    0.002547               252662                 252662\n",
-      "239                        Injury of connective tissue          385424001               26    0.002547              4300157                4300157\n",
-      "240                                  Bronchial finding          301229001               26    0.002547              4116777                4116777\n",
-      "241                                   Acute bronchitis           10509002               26    0.002547               260139                 260139\n",
-      "242                                    Sprain of joint          105611005               26    0.002547              4023316                4023316\n",
-      "243                                    Ligament injury          263126002               26    0.002547              4136694                4136694\n",
-      "244                                 Sprain of ligament          398878007               26    0.002547              4160875                4160875\n",
-      "245                                         Bronchitis           32398004               26    0.002547               256451                 256451\n",
-      "246                               Disorder of bronchus           41427001               26    0.002547               260131                 260131\n",
-      "247                   Streptococcal infectious disease           85769006               24    0.002351               437779                 437779\n",
-      "248         Bacterial infection of the digestive tract          312129004               24    0.002351              4193874                4193874\n",
-      "249              Disease due to Gram-positive bacteria          371582002               24    0.002351              4161193                4161193\n",
-      "250                          Streptococcal sore throat           43878008               24    0.002351                28060                  28060\n",
-      "251                            Finding of ankle region          116315007               24    0.002351              4023577                4023577\n",
-      "252                                    Injury of ankle          125603006               24    0.002351                77162                  77162\n",
-      "253                                  Disorder of ankle          128138008               24    0.002351                78831                  78831\n",
-      "254                Disease due to Gram-positive coccus          408637006               24    0.002351              4248801                4248801\n",
-      "255                       Finding related to pregnancy          118185001               23    0.002253               444094                 444094\n",
-      "256                                           Pregnant           77386006               23    0.002253              4299535                4299535\n",
-      "257                                   Normal pregnancy           72892002               23    0.002253              4217975                4217975\n",
-      "258       Pregnancy, childbirth and puerperium finding          248982007               23    0.002253              4088927                4088927\n",
-      "259                                Ankle joint finding          299413005               21    0.002057               443357                 443357\n",
-      "260                            Disorder of ankle joint          428776005               21    0.002057               443583                 443583\n",
-      "261     Traumatic arthropathy of the ankle and/or foot          201938008               21    0.002057                75620                  75620\n",
-      "262                        Traumatic arthropathy-ankle          201954006               21    0.002057              4114605                4114605\n",
-      "263            Lesion of ligaments of the ankle region          240019006               21    0.002057              4344271                4344271\n",
-      "264                        Sprain of ankle and/or foot          209529003               21    0.002057              4016673                4016673\n",
-      "265             Disorder of joint of ankle and/or foot          442246002               21    0.002057             40482662               40482662\n",
-      "266           Traumatic arthropathy of lower extremity          373575008               21    0.002057              4189458                4189458\n",
-      "267                                    Sprain of ankle           44465007               21    0.002057                81151                  81151\n",
-      "268                   Sprain of ligament of lower limb          281599007               21    0.002057              4105866                4105866\n",
-      "269                                   Fracture of bone          125605004               20    0.001959                75053                  75053\n",
-      "270               Cardiovascular measurement - finding          366157005               20    0.001959              4277352                4277352\n",
-      "271                                        Bone injury          284003005               20    0.001959              4154739                4154739\n",
-      "272                                   Disorder of bone           76069003               20    0.001959                75909                  75909\n",
-      "273                              Finding of upper limb          116307009               20    0.001959              4020346                4020346\n",
-      "274                                       Bone finding          118953000               20    0.001959              4042505                4042505\n",
-      "275                        Disorder of upper extremity          118947000               20    0.001959              4042503                4042503\n",
-      "276                              Hypertensive disorder           38341003               20    0.001959               316866                 316866\n",
-      "277                          Injury of upper extremity          127278005               19    0.001861              4130851                4130851\n",
-      "278                             Essential hypertension           59621000               18    0.001763               320128                 320128\n",
-      "279                         Hypersensitivity condition          473010000               16    0.001567             43021226               43021226\n",
-      "280                                 Allergic condition          473011001               14    0.001371             43021227               43021227\n",
-      "281                         Disorder of nervous system          118940003               12    0.001176               376337                 376337\n",
-      "282                     Central nervous system finding          246556002               12    0.001176              4086181                4086181\n",
-      "283             Disorder of the central nervous system           23853001               12    0.001176               376106                 376106\n",
-      "284                             Fracture of upper limb           23406007               12    0.001176              4050747                4050747\n",
-      "285                      Finding of bone of upper limb          298756009               12    0.001176              4186164                4186164\n",
-      "286                                       Complication          116223007               12    0.001176               433128                 433128\n",
-      "287                                         Open wound          125643001               11    0.001078               444187                 444187\n",
-      "288                                              Wound          416462003               11    0.001078              4168335                4168335\n",
-      "289                                   Finding of brain          299718000               11    0.001078              4101796                4101796\n",
-      "290                                  Disorder of brain           81308009               11    0.001078               372887                 372887\n",
-      "291                                      Wound finding          225552003               11    0.001078              4021667                4021667\n",
-      "292                                Laceration - injury          312608009               11    0.001078               443419                 443419\n",
-      "293                               Head and neck injury          282749008               11    0.001078              4154162                4154162\n",
-      "294                            Finding of wrist region          116310002                9    0.000882              4020347                4020347\n",
-      "295           Traumatic arthropathy of upper extremity          373574007                9    0.000882              4162433                4162433\n",
-      "296                        Traumatic arthropathy-wrist          201946009                9    0.000882              4116594                4116594\n",
-      "297                                 Open wound of limb          105616000                9    0.000882              4023317                4023317\n",
-      "298                                  Disorder of wrist          128130001                9    0.000882              4028074                4028074\n",
-      "299                                       Chronic pain           82423001                8    0.000784               436096                 436096\n",
-      "300                          Hypersensitivity reaction          421961002                8    0.000784              4223616                4223616\n",
-      "301                                   Adverse reaction          281647001                8    0.000784              4105886                4105886\n",
-      "302                                    Chronic disease           27624003                8    0.000784               443783                 443783\n",
-      "303                                   Disorder of face          118930001                8    0.000784              4042835                4042835\n",
-      "304                                     Injury of head           82271004                8    0.000784               375415                 375415\n",
-      "305                            Acute allergic reaction          241929008                8    0.000784              4084167                4084167\n",
-      "306                                  Allergic reaction          419076005                8    0.000784             40589905               40589905\n",
-      "307                                Neurological lesion          299735001                7    0.000686              4103662                4103662\n",
-      "308                           Open wound of lower limb           26947005                7    0.000686              4097962                4097962\n",
-      "309                    Disorder of soft tissue of limb          280134004                7    0.000686              4090615                4090615\n",
-      "310                           Laceration of lower limb          283357002                7    0.000686              4152960                4152960\n",
-      "311                                       Skin finding          106076001                6    0.000588               141960                 141960\n",
-      "312                                Intracranial injury          127296001                6    0.000588               437409                 437409\n",
-      "313                   Injury of central nervous system          128126004                6    0.000588              4134439                4134439\n",
-      "314                           Injury of nervous system          128239009                6    0.000588              4134134                4134134\n",
-      "315                               Disorder of the nose           89488007                6    0.000588              4229909                4229909\n",
-      "316            Inflammatory disease of mucous membrane           95361005                6    0.000588               432661                 432661\n",
-      "317                     IgE-mediated allergic disorder          422076005                6    0.000588              4223759                4223759\n",
-      "318                    Disorder of soft tissue of head          280131007                6    0.000588              4090614                4090614\n",
-      "319                                  Allergic disorder          781474001                6    0.000588             36683564               36683564\n",
-      "320                       Integumentary system finding          106077005                6    0.000588               444112                 444112\n",
-      "321        Disorder of skin and/or subcutaneous tissue           80659006                6    0.000588               200174                 200174\n",
-      "322  Traumatic brain injury with no loss of conscio...          127302008                6    0.000588              4133715                4133715\n",
-      "323                             Disorder of integument          128598002                6    0.000588              4028387                4028387\n",
-      "324              Disorder of soft tissue of upper limb          280135003                6    0.000588              4090616                4090616\n",
-      "325                                    Cardiac finding          301095005                6    0.000588              4103183                4103183\n",
-      "326                                    Lesion of brain          301766008                6    0.000588              4200516                4200516\n",
-      "327                             Traumatic brain injury          127295002                6    0.000588              4132546                4132546\n",
-      "328                Concussion injury of body structure          708540005                6    0.000588             45769811               45769811\n",
-      "329                         Concussion injury of brain          110030002                6    0.000588              4001336                4001336\n",
-      "330                                 Evaluation finding          441742003                6    0.000588             40480457               40480457\n",
-      "331           Concussion with no loss of consciousness           62106007                6    0.000588               378001                 378001\n",
-      "332                                           Rhinitis           70076002                6    0.000588              4320791                4320791\n",
-      "333                        Disorder of mucous membrane           95351003                6    0.000588              4318379                4318379\n",
-      "334        Traumatic AND/OR non-traumatic brain injury          127294003                6    0.000588              4133611                4133611\n",
-      "335                                   Disorder of skin           95320005                6    0.000588              4317258                4317258\n",
-      "336                        Perennial allergic rhinitis          446096008                6    0.000588             40486433               40486433\n",
-      "337                                  Allergic rhinitis           61582004                6    0.000588               257007                 257007\n",
-      "338                   Disorder of nose and nasopharynx          232339008                6    0.000588              4049222                4049222\n",
-      "339                               Nasal mucosa finding          249353005                6    0.000588               442983                 442983\n",
-      "340                             Fracture of lower limb           46866001                6    0.000588              4187096                4187096\n",
-      "341      Immune hypersensitivity disorder by mechanism          427439005                6    0.000588              4141833                4141833\n",
-      "342                            Disorder of mediastinum           49483002                6    0.000588               440142                 440142\n",
-      "343                                      Heart disease           56265001                6    0.000588               321588                 321588\n",
-      "344                                Mediastinal finding          301296002                6    0.000588              4115390                4115390\n",
-      "345              Atopic IgE-mediated allergic disorder          421871004                6    0.000588              4223595                4223595\n",
-      "346                             Developmental disorder            5294002                6    0.000588               435244                 435244\n",
-      "347                      Acquired coagulation disorder          234466008                5    0.000490              4120613                4120613\n",
-      "348                          Sprain of upper extremity          123536004                5    0.000490              4048512                4048512\n",
-      "349                               Fracture of clavicle           58150001                5    0.000490              4237458                4237458\n",
-      "350                        Sprain of wrist and/or hand          209436000                5    0.000490              4018956                4018956\n",
-      "351                         Blood coagulation disorder           64779008                5    0.000490               432585                 432585\n",
-      "352                      Disorder of hemostatic system          362970003                5    0.000490              4179872                4179872\n",
-      "353                                    Injury of wrist          125598003                5    0.000490               444129                 444129\n",
-      "354                            Disorder of wrist joint          428107009                5    0.000490              4323193                4323193\n",
-      "355                                    Sprain of wrist           70704007                5    0.000490                78272                  78272\n",
-      "356                             Finding of wrist joint          298940007                5    0.000490              4181251                4181251\n",
-      "357                                    Clavicle injury          282760004                5    0.000490              4151199                4151199\n",
-      "358                                 Lesion of clavicle          298766001                5    0.000490              4186167                4186167\n",
-      "359                       Disorder of cardiac function          105981003                5    0.000490              4024552                4024552\n",
-      "360                               Fracture of shoulder  16250001000004107                5    0.000490             46270317               46270317\n",
-      "361                                      Heart failure           84114007                5    0.000490               316139                 316139\n",
-      "362                      Finding of clavicle structure          298761006                5    0.000490              4185643                4185643\n",
-      "363                               Dislocation of wrist          833335001                4    0.000392              3654438                3654438\n",
-      "364                         Finding of substance level          785671009                4    0.000392             37203927               37203927\n",
-      "365                               Subluxation of joint          263031003                4    0.000392              4134174                4134174\n",
-      "366                      Fracture dislocation of joint          263063009                4    0.000392              4134184                4134184\n",
-      "367        Fracture subluxation of joint of upper limb          281519006                4    0.000392              4085546                4085546\n",
-      "368                Fracture at wrist and/or hand level          208388003                4    0.000392              4015350                4015350\n",
-      "369                          Abnormal blood cell count          762656009                4    0.000392             42538830               42538830\n",
-      "370                       Hematopoietic system finding          106200001                4    0.000392              4021915                4021915\n",
-      "371                                  Injury of forearm          125597008                4    0.000392               134222                 134222\n",
-      "372                                               Burn          125666000                4    0.000392               442013                 442013\n",
-      "373                                Disorder of forearm          128132009                4    0.000392               136779                 136779\n",
-      "374                     Traumatic dislocation of joint          129156001                4    0.000392              4043679                4043679\n",
-      "375                                    Injury of thigh            7523003                4    0.000392               442564                 442564\n",
-      "376                    Chronic nervous system disorder          128283000                4    0.000392              4134145                4134145\n",
-      "377        Measurement finding outside reference range          442096005                4    0.000392             40481841               40481841\n",
-      "378          Measurement finding below reference range          442686002                4    0.000392             40484533               40484533\n",
-      "379                              Skin or mucosa lesion          247440002                4    0.000392              4083787                4083787\n",
-      "380  Lesion of skin and/or skin-associated mucous m...          714974000                4    0.000392             37018424               37018424\n",
-      "381                                             Anemia          271737000                4    0.000392               439777                 439777\n",
-      "382        Fracture dislocation of joint of upper limb          263073006                4    0.000392              4135097                4135097\n",
-      "383                            Protein level - finding          365799007                4    0.000392              4276572                4276572\n",
-      "384            Disorder of cellular component of blood          414022008                4    0.000392               443723                 443723\n",
-      "385                                   Finding of thigh          419003001                4    0.000392              4169466                4169466\n",
-      "386                                          Cytopenia           50820005                4    0.000392              4179922                4179922\n",
-      "387                                       Burn of skin          284196006                4    0.000392              4108467                4108467\n",
-      "388                                 RBC count abnormal          165427000                4    0.000392              4013518                4013518\n",
-      "389                               Subluxation of wrist          833334002                4    0.000392              3654437                3654437\n",
-      "390                                  Disorder of thigh          128135006                4    0.000392               444211                 444211\n",
-      "391                                        Skin lesion           95324001                4    0.000392              4316083                4316083\n",
-      "392                 Subluxation of joint of upper limb          263047001                4    0.000392              4135090                4135090\n",
-      "393                      Fracture subluxation of wrist          263102004                4    0.000392              4134304                4134304\n",
-      "394                                Laceration of thigh          283385000                4    0.000392              4152936                4152936\n",
-      "395                                      RBC count low          165423001                4    0.000392              4013842                4013842\n",
-      "396                                       Erythropenia           62574001                4    0.000392              4267432                4267432\n",
-      "397                                Measurement finding          118245000                4    0.000392              4041436                4041436\n",
-      "398                      Chronic inflammatory disorder          128294001                4    0.000392               444208                 444208\n",
-      "399                      Fracture subluxation of joint          263094009                4    0.000392              4136573                4136573\n",
-      "400  Complication of pregnancy, childbirth and/or t...          198609003                4    0.000392               435875                 435875\n",
-      "401                               Dislocation of joint          108367008                4    0.000392                74726                  74726\n",
-      "402                               Injury of integument          125592002                4    0.000392              4053826                4053826\n",
-      "403            Traumatic dislocation of joint of wrist          125618007                4    0.000392              4054058                4054058\n",
-      "404                 Dislocation of joint of upper limb          263017003                4    0.000392                75047                  75047\n",
-      "405                                  Chronic sinusitis           40055000                4    0.000392               257012                 257012\n",
-      "406                                     Hemoglobin low          165397008                4    0.000392              4013074                4013074\n",
-      "407              Chronic disease of respiratory system           17097001                4    0.000392              4063381                4063381\n",
-      "408                                Open wound of thigh          125659001                4    0.000392              4053602                4053602\n",
-      "409           Hemoglobin level outside reference range          441793007                4    0.000392             40480513               40480513\n",
-      "410                            Injury of pelvic girdle          700009007                3    0.000294             44782619               44782619\n",
-      "411                          Disorder of pelvic girdle          700011003                3    0.000294             44784106               44784106\n",
-      "412                             Finding of foot region          116316008                3    0.000294              4022924                4022924\n",
-      "413                                   Disease of mouth          118938008                3    0.000294              4042502                4042502\n",
-      "414                               Injury of hip region          125600009                3    0.000294               193666                 193666\n",
-      "415                                  Headache disorder          230461009                3    0.000294               375527                 375527\n",
-      "416                               Transformed migraine          427419006                3    0.000294              4141827                4141827\n",
-      "417                                    Impacted molars          196416002                3    0.000294              4055754                4055754\n",
-      "418                                     Injury of foot          125604000                3    0.000294               444130                 444130\n",
-      "419                                Bacterial sinusitis          703470001                3    0.000294             45766333               45766333\n",
-      "420                                     Injury of neck           90460009                3    0.000294                24818                  24818\n",
-      "421          Chronic intractable migraine without aura    124171000119105                3    0.000294             43530652               43530652\n",
-      "422                                Fracture of forearm           65966004                3    0.000294              4278672                4278672\n",
-      "423                          Anomaly of tooth position           81256000                3    0.000294               433243                 433243\n",
-      "424                                     Impacted tooth          235104008                3    0.000294              4123726                4123726\n",
-      "425                                           Migraine           37796009                3    0.000294               318736                 318736\n",
-      "426                              Migraine without aura           56097005                3    0.000294               378735                 378735\n",
-      "427                              Finding of hip region          116313000                3    0.000294               444220                 444220\n",
-      "428                     Disorder characterized by pain          373673007                3    0.000294              4160062                4160062\n",
-      "429                                 Laceration of foot          284551006                3    0.000294              4109685                4109685\n",
-      "430                                   Disorder of foot          118932009                3    0.000294               444090                 444090\n",
-      "431                                 Open wound of foot          125663008                3    0.000294              4054067                4054067\n",
-      "432           Chronic disease of cardiovascular system          128292002                3    0.000294              4028244                4028244\n",
-      "433                             Chronic brain syndrome           78689005                3    0.000294              4301371                4301371\n",
-      "434                                   Childhood asthma          233678006                3    0.000294              4051466                4051466\n",
-      "435                      Closed fracture of lower limb           52603002                3    0.000294              4199590                4199590\n",
-      "436                                      Tooth finding          278544002                3    0.000294              4132462                4132462\n",
-      "437     Disorder of teeth AND/OR supporting structures          105995000                3    0.000294               201603                 201603\n",
-      "438                                  Vascular headache          128187005                3    0.000294              4134454                4134454\n",
-      "439                          Acute bacterial sinusitis           75498004                3    0.000294              4294548                4294548\n",
-      "440                                             Asthma          195967001                3    0.000294               317009                 317009\n",
-      "441                             Closed fracture of hip          359817006                3    0.000294              4230399                4230399\n",
-      "442                      Disorder of tooth development          371136004                3    0.000294              4159157                4159157\n",
-      "443                   Refractory migraine without aura          423279000                3    0.000294               443616                 443616\n",
-      "444                   Pain of cardiovascular structure          301358001                3    0.000294              4115408                4115408\n",
-      "445                             Epidermal burn of skin          403190006                3    0.000294              4296204                4296204\n",
-      "446                                Refractory migraine          423894005                3    0.000294               443615                 443615\n",
-      "447                                    Disorder of hip          118935006                3    0.000294              4042501                4042501\n",
-      "448                                   Disorder of neck          118939000                3    0.000294              4042837                4042837\n",
-      "449                     Fracture of bone of hip region          700097003                3    0.000294             45763653               45763653\n",
-      "450  Disease of circulatory system complicating pre...          724497009                3    0.000294             37110290               37110290\n",
-      "451                            Whiplash injury to neck           39848009                3    0.000294              4218389                4218389\n",
-      "452                                     Tooth disorder          234947003                3    0.000294              4122115                4122115\n",
-      "453                                    Disorder of jaw           37156001                3    0.000294               435569                 435569\n",
-      "454                                    Closed fracture          423125000                3    0.000294              4307254                4307254\n",
-      "455                          Chronic headache disorder          431237007                3    0.000294               374639                 374639\n",
-      "456                                     Lesion of neck          298397000                3    0.000294              4185207                4185207\n",
-      "457                                    Acute sinusitis           15805002                3    0.000294               260123                 260123\n",
-      "458                                  Fracture of ankle           16114001                3    0.000294              4059173                4059173\n",
-      "459                          Arthropathy of knee joint          428724006                2    0.000196              4324765                4324765\n",
-      "460    Inflammation of skin and/or subcutaneous tissue          363168001                2    0.000196              4181062                4181062\n",
-      "461                                     Lesion of face          767811005                2    0.000196             35624868               35624868\n",
-      "462                                              Atopy          115665000                2    0.000196              4019380                4019380\n",
-      "463                                   Disorder of knee          128136007                2    0.000196              4134443                4134443\n",
-      "464                                 Laceration of head          428088000                2    0.000196              4179823                4179823\n",
-      "465                                 Disorder of tendon           68172002                2    0.000196               442264                 442264\n",
-      "466                        Neurodevelopmental disorder          700364009                2    0.000196             45771096               45771096\n",
-      "467  Perennial allergic rhinitis with seasonal vari...          232353008                2    0.000196              4048171                4048171\n",
-      "468                              Disorder of pregnancy          173300003                2    0.000196               439658                 439658\n",
-      "469                            Inflammatory dermatosis          703938007                2    0.000196             45766714               45766714\n",
-      "470                             Finding of knee region          116314006                2    0.000196              4022923                4022923\n",
-      "471           Disorders of attention and motor control          229712006                2    0.000196              4047120                4047120\n",
-      "472                                  Atopic dermatitis           24079001                2    0.000196               133834                 133834\n",
-      "473                     Open wound of head AND/OR neck          397180001                2    0.000196              4246695                4246695\n",
-      "474                                      Pre-eclampsia          398254007                2    0.000196               439393                 439393\n",
-      "475                                             Eczema           43116000                2    0.000196               133835                 133835\n",
-      "476                             Genetic predisposition           47708004                2    0.000196              4166231                4166231\n",
-      "477                                   Seizure disorder          128613002                2    0.000196              4029498                4029498\n",
-      "478                                       Chest injury          262525000                2    0.000196              4094683                4094683\n",
-      "479                     Propensity to adverse reaction          420134006                2    0.000196              4172024                4172024\n",
-      "480                                    Injury of trunk           48125009                2    0.000196               194526                 194526\n",
-      "481                            Seizure related finding          313287004                2    0.000196              4196708                4196708\n",
-      "482                                    Genetic finding          106221001                2    0.000196              4025367                4025367\n",
-      "483     Finding of functional performance and activity          248536006                2    0.000196              4089214                4089214\n",
-      "484           Developmental disorder of motor function          268674003                2    0.000196              4148091                4148091\n",
-      "485           Attention deficit hyperactivity disorder          406506008                2    0.000196               438409                 438409\n",
-      "486                           Laceration of upper limb          283366003                2    0.000196              4152932                4152932\n",
-      "487                                 Knee joint finding          299321000                2    0.000196              4100932                4100932\n",
-      "488                                     Injury of face          125593007                2    0.000196               444191                 444191\n",
-      "489                    Pregnancy with abortive outcome          363681007                2    0.000196             40539858               40539858\n",
-      "490                                 Open wound of head           38354005                2    0.000196              4243161                4243161\n",
-      "491                   Child attention deficit disorder          192127007                2    0.000196               440086                 440086\n",
-      "492                                 Open wound of face          210339009                2    0.000196              4049957                4049957\n",
-      "493                       Hypersensitivity disposition          609433001                2    0.000196             43530897               43530897\n",
-      "494                      Developmental mental disorder          129104009                2    0.000196              4043545                4043545\n",
-      "495                           Open wound of upper limb           81405006                2    0.000196              4216185                4216185\n",
-      "496                                            Seizure           91175000                2    0.000196               377091                 377091\n",
-      "497                         Cutaneous hypersensitivity           21626009                2    0.000196              4070025                4070025\n",
-      "498                                     Tendon finding          250133000                2    0.000196              4095203                4095203\n",
-      "499                                  Facial laceration          370247008                2    0.000196              4156265                4156265\n",
-      "500                     Pregnancy-induced hypertension           48194001                2    0.000196              4167493                4167493\n",
-      "501                        Laceration of head and neck          283358007                2    0.000196              4155030                4155030\n",
-      "502                                        Miscarriage           17369002                2    0.000196              4067106                4067106\n",
-      "503  Hypertension AND/OR vomiting complicating preg...          106005003                2    0.000196              4024560                4024560\n",
-      "504                                 Functional finding          118228005                2    0.000196              4041284                4041284\n",
-      "505                                    Mental disorder           74732009                2    0.000196               432586                 432586\n",
-      "506                     Miscarriage in first trimester           19169002                2    0.000196              4078393                4078393\n",
-      "507                                              Edema          267038008                1    0.000098               433595                 433595\n",
-      "508                  Finding of pelvic region of trunk          609625009                1    0.000098             43531059               43531059\n",
-      "509                                 Urogenital finding          118238000                1    0.000098              4041285                4041285\n",
-      "510                                  Cartilage finding          118954006                1    0.000098              4043349                4043349\n",
-      "511                                      Blighted ovum           35999006                1    0.000098              4262136                4262136\n",
-      "512                       Fracture of vertebral column           50448004                1    0.000098              4174520                4174520\n",
-      "513                                 Finding of abdomen          609624008                1    0.000098             43531058               43531058\n",
-      "514                                  Finding of pelvis          609626005                1    0.000098             43531060               43531060\n",
-      "515                             Injury of rotator cuff          718539004                1    0.000098             36713625               36713625\n",
-      "516       Traumatic injury of vertebral region of back          737566006                1    0.000098             42537893               42537893\n",
-      "517                         Finding of shoulder region          116308004                1    0.000098              4022449                4022449\n",
-      "518                               Disorder of shoulder          118944007                1    0.000098                77630                  77630\n",
-      "519                         Fracture of bones of trunk           65354004                1    0.000098              4279139                4279139\n",
-      "520                                    Finding of back          414252009                1    0.000098              4213101                4213101\n",
-      "521                                      Drug overdose           55680006                1    0.000098              4208104                4208104\n",
-      "522  Fracture of vertebral column with spinal cord ...            1734006                1    0.000098              4066995                4066995\n",
-      "523                 Disorder of pelvic region of trunk          609619005                1    0.000098             43531053               43531053\n",
-      "524                      Injury of intrathoracic organ          733217006                1    0.000098             37116489               37116489\n",
-      "525                           Injury of internal organ          105612003                1    0.000098               193631                 193631\n",
-      "526                        Lower urinary tract finding          106100005                1    0.000098              4021780                4021780\n",
-      "527                             Finding of hand region          116311003                1    0.000098                77358                  77358\n",
-      "528                                   Disorder of hand          118933004                1    0.000098                77635                  77635\n",
-      "529                       Structural disorder of heart          128599005                1    0.000098              4027255                4027255\n",
-      "530                Acute respiratory distress syndrome           67782005                1    0.000098              4195694                4195694\n",
-      "531         Finding of abdominopelvic segment of trunk          822987005                1    0.000098             37311678               37311678\n",
-      "532                  Soft tissue lesion of knee region          239999004                1    0.000098              4344027                4344027\n",
-      "533                          Disorder of urinary tract           41368006                1    0.000098               197331                 197331\n",
-      "534                             Urinary system finding          106098005                1    0.000098              4024000                4024000\n",
-      "535                           Vertebral column finding          119414006                1    0.000098              4002898                4002898\n",
-      "536                  Disorder of product of conception          128604004                1    0.000098              4029496                4029496\n",
-      "537        Disorder of abdominopelvic segment of trunk          822988000                1    0.000098             37311677               37311677\n",
-      "538                Central nervous system complication           87536007                1    0.000098               373087                 373087\n",
-      "539                          Interstitial lung disease          233703007                1    0.000098              4119786                4119786\n",
-      "540                                    Bladder finding          249585009                1    0.000098              4092881                4092881\n",
-      "541                             Partial thickness burn          403191005                1    0.000098              4296205                4296205\n",
-      "542                                Disorder of bladder           42643001                1    0.000098               201337                 201337\n",
-      "543                                 Laceration of hand          284549007                1    0.000098              4113008                4113008\n",
-      "544                                 Paralytic syndrome           29426003                1    0.000098               374377                 374377\n",
-      "545                           Finding of spinal region          298379008                1    0.000098              4182165                4182165\n",
-      "546            Complication occurring during pregnancy          609496007                1    0.000098             43530950               43530950\n",
-      "547                                Disorder of abdomen          118948005                1    0.000098               444089                 444089\n",
-      "548                                     Injury of hand          125599006                1    0.000098                80004                  80004\n",
-      "549                  Rupture of ligament of knee joint          263139003                1    0.000098              4134312                4134312\n",
-      "550                                     Edema of trunk          301867009                1    0.000098              4199409                4199409\n",
-      "551            Connective tissue disorder by body site          363044007                1    0.000098              4180645                4180645\n",
-      "552       Traumatic or non-traumatic rupture of tendon          415746003                1    0.000098              4215217                4215217\n",
-      "553                                    Pulmonary edema           19242006                1    0.000098              4078925                4078925\n",
-      "554                                    Injury of heart           86175003                1    0.000098              4311280                4311280\n",
-      "555                     Disorder of the urinary system          128606002                1    0.000098                75865                  75865\n",
-      "556                               Injury of chest wall           65978000                1    0.000098                75128                  75128\n",
-      "557                       Rupture of quadriceps tendon            6849006                1    0.000098               195632                 195632\n",
-      "558                           Tear of meniscus of knee          239720000                1    0.000098              4035415                4035415\n",
-      "559                            Abdominal organ finding          249561001                1    0.000098              4096864                4096864\n",
-      "560                                      Spinal injury          262521009                1    0.000098              4095850                4095850\n",
-      "561                    Abnormal products of conception           39804004                1    0.000098               436477                 436477\n",
-      "562                              Disorder of body wall          399986003                1    0.000098              4266188                4266188\n",
-      "563               Disorder of the genitourinary system           42030000                1    0.000098              4171379                4171379\n",
-      "564              Disorder of soft tissue of lower limb          280136002                1    0.000098              4093228                4093228\n",
-      "565                    Rupture of tendon of lower limb          281549008                1    0.000098              4084434                4084434\n",
-      "566                              Laceration of forearm          283371005                1    0.000098              4155034                4155034\n",
-      "567                                 Disorder of pelvis          609620004                1    0.000098             43531054               43531054\n",
-      "568                       Disorder of vertebral column          699699005                1    0.000098             44782549               44782549\n",
-      "569      Traumatic and/or non-traumatic injury of back          712893003                1    0.000098             37016775               37016775\n",
-      "570              Disorder of tendon of shoulder region           76318008                1    0.000098                79116                  79116\n",
-      "571                              Drug-related disorder           87858002                1    0.000098               444363                 444363\n",
-      "572                                   Ligament rupture          263134008                1    0.000098              4138286                4138286\n",
-      "573                             Finding of spinal cord          299733008                1    0.000098              4103661                4103661\n",
-      "574                             Eclampsia in pregnancy          198992004                1    0.000098               137613                 137613\n",
-      "575                           Disorder of rotator cuff          414033006                1    0.000098              4212887                4212887\n",
-      "576                                Spinal cord disease           48522003                1    0.000098               135526                 135526\n",
-      "577                Finding of structures of conception          289262005                1    0.000098              4128846                4128846\n",
-      "578                                     Injury of ribs          282770002                1    0.000098              4151202                4151202\n",
-      "579                                Finding of vertebra          298385001                1    0.000098              4185206                4185206\n",
-      "580                         Rupture of patellar tendon           30832001                1    0.000098              4149245                4149245\n",
-      "581                                          Eclampsia           15938005                1    0.000098               443700                 443700\n",
-      "582     Chronic paralysis due to lesion of spinal cord          698754002                1    0.000098             44782520               44782520\n",
-      "583                               Pelvic organ finding          700006000                1    0.000098             44784102               44784102\n",
-      "584                    Disorder characterized by edema          118654009                1    0.000098              4040388                4040388\n",
-      "585                                 Open wound of hand          125652005                1    0.000098              4129405                4129405\n",
-      "586                              Open wound of forearm          125649002                1    0.000098              4053599                4053599\n",
-      "587                Disorder of the lower urinary tract            7793005                1    0.000098              4301471                4301471\n",
-      "588                                 Spinal cord injury           90584004                1    0.000098              4235863                4235863\n",
-      "589                    Finding of urinary tract proper          249273002                1    0.000098              4091213                4091213\n",
-      "590             Paralysis due to lesion of spinal cord          372310001                1    0.000098              4157607                4157607\n",
-      "591      Inflammatory disorder of genitourinary system          373406006                1    0.000098              4159963                4159963\n",
-      "592                                           Cystitis           38822007                1    0.000098               195588                 195588\n",
-      "593                          Disorder of spinal region          410730009                1    0.000098              4260918                4260918\n",
-      "594                                 Cartilage disorder           50927007                1    0.000098              4178431                4178431\n",
-      "595                           Internal injury of chest           27817002                1    0.000098                74786                  74786\n",
-      "596                              Cardiovascular injury          282728007                1    0.000098              4152156                4152156\n",
-      "597   Injury of tendon of the rotator cuff of shoulder          307731004                1    0.000098              4146173                4146173\n",
-      "598                                   Disorder of back           33308003                1    0.000098               140190                 140190\n",
-      "599                                    Fracture of rib           33737001                1    0.000098              4142905                4142905\n",
-      "600                               Complete miscarriage          156073000                1    0.000098             40318618               40318618\n",
-      "the time taken to get cohort concept stats for condition_occurrence is 143.1435580253601s\n"
+      "                                 concept_name concept_code  count_in_cohort  prevalence  ancestor_concept_id  descendant_concept_id\n",
+      "0                   Disorder due to infection     40733004            10208    1.000000               432250                 432250\n",
+      "1                                     Disease     64572001            10208    1.000000               441840                4274025\n",
+      "2                                    COVID-19    840539006            10208    1.000000             37311061               37311061\n",
+      "3                               Viral disease     34014006            10208    1.000000               432250                 440029\n",
+      "4                       Coronavirus infection    186747009            10208    1.000000              4100065                 439676\n",
+      "5                Disease due to Coronaviridae     27619001            10208    1.000000              4100065                4100065\n",
+      "6                Disease due to Coronaviridae     27619001            10208    1.000000               440029                4100065\n",
+      "7                               Viral disease     34014006            10208    1.000000               440029                 440029\n",
+      "8                   Disorder due to infection     40733004            10208    1.000000              4274025                 432250\n",
+      "9                                    COVID-19    840539006            10208    1.000000               439676               37311061\n",
+      "10                      Coronavirus infection    186747009            10208    1.000000               439676                 439676\n",
+      "11                           Clinical finding    404684003            10208    1.000000               441840                 441840\n",
+      "12                                    Disease     64572001            10208    1.000000              4274025                4274025\n",
+      "13  Clinical history and observation findings    250171008             9150    0.896356               441840                4094294\n",
+      "14  Clinical history and observation findings    250171008             9150    0.896356              4094294                4094294\n",
+      "15  General finding of observation of patient    118222006             9149    0.896258              4041283                4041283\n",
+      "16  General finding of observation of patient    118222006             9149    0.896258              4094294                4041283\n",
+      "17                            Finding by site    118234003             9120    0.893417              4042140                4042140\n",
+      "18                            Finding by site    118234003             9120    0.893417               441840                4042140\n",
+      "19                 General body state finding     82832008             9080    0.889498              4221108                4221108\n",
+      "20                 General body state finding     82832008             9080    0.889498              4041283                4221108\n",
+      "21             Temperature-associated finding    301343009             8769    0.859032              4041283                4103474\n",
+      "22             Temperature-associated finding    301343009             8769    0.859032              4103474                4103474\n",
+      "23                   Body temperature finding    105723007             8650    0.847375              4103474                4022230\n",
+      "24     Body temperature above reference range     50177009             8650    0.847375              4047791                4178904\n",
+      "25                                      Fever    386661006             8650    0.847375               437663                 437663\n",
+      "26     Body temperature above reference range     50177009             8650    0.847375              4178904                4178904\n",
+      "27                  Abnormal body temperature    123979008             8650    0.847375              4022230                4047791\n",
+      "28                   Body temperature finding    105723007             8650    0.847375              4022230                4022230\n",
+      "29                        Vital signs finding    118227000             8650    0.847375              4042138                4042138\n",
+      "30                  Abnormal body temperature    123979008             8650    0.847375              4047791                4047791\n",
+      "31                                      Fever    386661006             8650    0.847375              4178904                 437663\n",
+      "32                        Vital signs finding    118227000             8650    0.847375              4221108                4042138\n",
+      "33                   Body temperature finding    105723007             8650    0.847375              4042138                4022230\n",
+      "34                        Respiratory finding    106048009             7755    0.759698              4024567                4024567\n",
+      "35                        Respiratory finding    106048009             7755    0.759698              4042140                4024567\n",
+      "36                     Finding of body region    301857004             7115    0.697002              4199402                4199402\n",
+      "37                     Finding of body region    301857004             7115    0.697002              4042140                4199402\n",
+      "38                       Neurological finding    102957003             6659    0.652332              4011630                4011630\n",
+      "39             Sensory nervous system finding    106147001             6659    0.652332              4024013                4024013\n",
+      "40             Sensory nervous system finding    106147001             6659    0.652332              4011630                4024013\n",
+      "41                       Neurological finding    102957003             6659    0.652332               441840                4011630\n",
+      "42               Finding of sensation by site    699697007             6657    0.652136             44783587               44783587\n",
+      "43               Finding of sensation by site    699697007             6657    0.652136              4042140               44783587\n",
+      "44               Finding of sensation by site    699697007             6657    0.652136              4024013               44783587\n",
+      "45                                      Cough     49727002             6596    0.646160               254761                 254761\n",
+      "46               Respiratory function finding    365852007             6596    0.646160              4024567                4267789\n",
+      "47                                      Cough     49727002             6596    0.646160              4267789                 254761\n",
+      "48               Respiratory function finding    365852007             6596    0.646160              4267789                4267789\n",
+      "49            Finding of head and neck region    118254002             6423    0.629212               255919                 255919\n",
+      "50            Finding of head and neck region    118254002             6423    0.629212              4199402                 255919\n",
+      "51                               Head finding    406122000             6391    0.626078              4247371                4247371\n",
+      "52                               Head finding    406122000             6391    0.626078               255919                4247371\n",
+      "53                   Digestive system finding    386617003             5953    0.583170              4302537                4302537\n",
+      "54                   Digestive system finding    386617003             5953    0.583170              4042140                4302537\n",
+      "55               Mouth and/or pharynx finding    249376008             5619    0.550451              4091363                4091363\n",
+      "56               Mouth and/or pharynx finding    249376008             5619    0.550451              4302537                4091363\n",
+      "57               Mouth and/or pharynx finding    249376008             5619    0.550451               255919                4091363\n",
+      "58                     Finding of head region    298364001             5194    0.508817              4182161                4182161\n",
+      "59                     Finding of head region    298364001             5194    0.508817              4247371                4182161\n",
+      "returned cohort_concept_hierarchy object converted to dict: {'hierarchy': [{'concept_id': 441840, 'concept_name': 'Clinical finding', 'concept_code': '404684003', 'metrics': {'union': {'count': 10208, 'prevalence': 1.0}, 'cohorts': {'1': {'count': 10208, 'prevalence': 1.0}}}, 'parent_ids': [], 'children': [{'concept_id': 4274025, 'concept_name': 'Disease', 'concept_code': '64572001', 'metrics': {'union': {'count': 10208, 'prevalence': 1.0}, 'cohorts': {'1': {'count': 10208, 'prevalence': 1.0}}}, 'parent_ids': [441840], 'children': [{'concept_id': 432250, 'concept_name': 'Disorder due to infection', 'concept_code': '40733004', 'metrics': {'union': {'count': 10208, 'prevalence': 1.0}, 'cohorts': {'1': {'count': 10208, 'prevalence': 1.0}}}, 'parent_ids': [4274025], 'children': [{'concept_id': 440029, 'concept_name': 'Viral disease', 'concept_code': '34014006', 'metrics': {'union': {'count': 10208, 'prevalence': 1.0}, 'cohorts': {'1': {'count': 10208, 'prevalence': 1.0}}}, 'parent_ids': [432250], 'children': [{'concept_id': 4100065, 'concept_name': 'Disease due to Coronaviridae', 'concept_code': '27619001', 'metrics': {'union': {'count': 10208, 'prevalence': 1.0}, 'cohorts': {'1': {'count': 10208, 'prevalence': 1.0}}}, 'parent_ids': [440029], 'children': [{'concept_id': 439676, 'concept_name': 'Coronavirus infection', 'concept_code': '186747009', 'metrics': {'union': {'count': 10208, 'prevalence': 1.0}, 'cohorts': {'1': {'count': 10208, 'prevalence': 1.0}}}, 'parent_ids': [4100065], 'children': [{'concept_id': 37311061, 'concept_name': 'COVID-19', 'concept_code': '840539006', 'metrics': {'union': {'count': 10208, 'prevalence': 1.0}, 'cohorts': {'1': {'count': 10208, 'prevalence': 1.0}}}, 'parent_ids': [439676], 'children': []}]}]}]}]}]}, {'concept_id': 4094294, 'concept_name': 'Clinical history and observation findings', 'concept_code': '250171008', 'metrics': {'union': {'count': 9150, 'prevalence': 0.8963557993730408}, 'cohorts': {'1': {'count': 9150, 'prevalence': 0.8963557993730408}}}, 'parent_ids': [441840], 'children': [{'concept_id': 4041283, 'concept_name': 'General finding of observation of patient', 'concept_code': '118222006', 'metrics': {'union': {'count': 9149, 'prevalence': 0.8962578369905956}, 'cohorts': {'1': {'count': 9149, 'prevalence': 0.8962578369905956}}}, 'parent_ids': [4094294], 'children': [{'concept_id': 4221108, 'concept_name': 'General body state finding', 'concept_code': '82832008', 'metrics': {'union': {'count': 9080, 'prevalence': 0.8894984326018809}, 'cohorts': {'1': {'count': 9080, 'prevalence': 0.8894984326018809}}}, 'parent_ids': [4041283], 'children': [{'concept_id': 4042138, 'concept_name': 'Vital signs finding', 'concept_code': '118227000', 'metrics': {'union': {'count': 8650, 'prevalence': 0.8473746081504702}, 'cohorts': {'1': {'count': 8650, 'prevalence': 0.8473746081504702}}}, 'parent_ids': [4221108], 'children': [{'concept_id': 4022230, 'concept_name': 'Body temperature finding', 'concept_code': '105723007', 'metrics': {'union': {'count': 8650, 'prevalence': 0.8473746081504702}, 'cohorts': {'1': {'count': 8650, 'prevalence': 0.8473746081504702}}}, 'parent_ids': [4103474, 4042138], 'children': [{'concept_id': 4047791, 'concept_name': 'Abnormal body temperature', 'concept_code': '123979008', 'metrics': {'union': {'count': 8650, 'prevalence': 0.8473746081504702}, 'cohorts': {'1': {'count': 8650, 'prevalence': 0.8473746081504702}}}, 'parent_ids': [4022230], 'children': [{'concept_id': 4178904, 'concept_name': 'Body temperature above reference range', 'concept_code': '50177009', 'metrics': {'union': {'count': 8650, 'prevalence': 0.8473746081504702}, 'cohorts': {'1': {'count': 8650, 'prevalence': 0.8473746081504702}}}, 'parent_ids': [4047791], 'children': [{'concept_id': 437663, 'concept_name': 'Fever', 'concept_code': '386661006', 'metrics': {'union': {'count': 8650, 'prevalence': 0.8473746081504702}, 'cohorts': {'1': {'count': 8650, 'prevalence': 0.8473746081504702}}}, 'parent_ids': [4178904], 'children': []}]}]}]}]}]}, {'concept_id': 4103474, 'concept_name': 'Temperature-associated finding', 'concept_code': '301343009', 'metrics': {'union': {'count': 8769, 'prevalence': 0.859032131661442}, 'cohorts': {'1': {'count': 8769, 'prevalence': 0.859032131661442}}}, 'parent_ids': [4041283], 'children': [{'concept_id': 4022230, 'concept_name': 'Body temperature finding', 'concept_code': '105723007', 'metrics': {'union': {'count': 8650, 'prevalence': 0.8473746081504702}, 'cohorts': {'1': {'count': 8650, 'prevalence': 0.8473746081504702}}}, 'parent_ids': [4103474, 4042138], 'children': [{'concept_id': 4047791, 'concept_name': 'Abnormal body temperature', 'concept_code': '123979008', 'metrics': {'union': {'count': 8650, 'prevalence': 0.8473746081504702}, 'cohorts': {'1': {'count': 8650, 'prevalence': 0.8473746081504702}}}, 'parent_ids': [4022230], 'children': [{'concept_id': 4178904, 'concept_name': 'Body temperature above reference range', 'concept_code': '50177009', 'metrics': {'union': {'count': 8650, 'prevalence': 0.8473746081504702}, 'cohorts': {'1': {'count': 8650, 'prevalence': 0.8473746081504702}}}, 'parent_ids': [4047791], 'children': [{'concept_id': 437663, 'concept_name': 'Fever', 'concept_code': '386661006', 'metrics': {'union': {'count': 8650, 'prevalence': 0.8473746081504702}, 'cohorts': {'1': {'count': 8650, 'prevalence': 0.8473746081504702}}}, 'parent_ids': [4178904], 'children': []}]}]}]}]}]}]}, {'concept_id': 4042140, 'concept_name': 'Finding by site', 'concept_code': '118234003', 'metrics': {'union': {'count': 9120, 'prevalence': 0.8934169278996865}, 'cohorts': {'1': {'count': 9120, 'prevalence': 0.8934169278996865}}}, 'parent_ids': [441840], 'children': [{'concept_id': 4024567, 'concept_name': 'Respiratory finding', 'concept_code': '106048009', 'metrics': {'union': {'count': 7755, 'prevalence': 0.759698275862069}, 'cohorts': {'1': {'count': 7755, 'prevalence': 0.759698275862069}}}, 'parent_ids': [4042140], 'children': [{'concept_id': 4267789, 'concept_name': 'Respiratory function finding', 'concept_code': '365852007', 'metrics': {'union': {'count': 6596, 'prevalence': 0.6461598746081505}, 'cohorts': {'1': {'count': 6596, 'prevalence': 0.6461598746081505}}}, 'parent_ids': [4024567], 'children': [{'concept_id': 254761, 'concept_name': 'Cough', 'concept_code': '49727002', 'metrics': {'union': {'count': 6596, 'prevalence': 0.6461598746081505}, 'cohorts': {'1': {'count': 6596, 'prevalence': 0.6461598746081505}}}, 'parent_ids': [4267789], 'children': []}]}]}, {'concept_id': 4199402, 'concept_name': 'Finding of body region', 'concept_code': '301857004', 'metrics': {'union': {'count': 7115, 'prevalence': 0.6970023510971787}, 'cohorts': {'1': {'count': 7115, 'prevalence': 0.6970023510971787}}}, 'parent_ids': [4042140], 'children': [{'concept_id': 255919, 'concept_name': 'Finding of head and neck region', 'concept_code': '118254002', 'metrics': {'union': {'count': 6423, 'prevalence': 0.6292123824451411}, 'cohorts': {'1': {'count': 6423, 'prevalence': 0.6292123824451411}}}, 'parent_ids': [4199402], 'children': [{'concept_id': 4247371, 'concept_name': 'Head finding', 'concept_code': '406122000', 'metrics': {'union': {'count': 6391, 'prevalence': 0.6260775862068966}, 'cohorts': {'1': {'count': 6391, 'prevalence': 0.6260775862068966}}}, 'parent_ids': [255919], 'children': [{'concept_id': 4182161, 'concept_name': 'Finding of head region', 'concept_code': '298364001', 'metrics': {'union': {'count': 5194, 'prevalence': 0.5088166144200627}, 'cohorts': {'1': {'count': 5194, 'prevalence': 0.5088166144200627}}}, 'parent_ids': [4247371], 'children': []}]}, {'concept_id': 4091363, 'concept_name': 'Mouth and/or pharynx finding', 'concept_code': '249376008', 'metrics': {'union': {'count': 5619, 'prevalence': 0.5504506269592476}, 'cohorts': {'1': {'count': 5619, 'prevalence': 0.5504506269592476}}}, 'parent_ids': [4302537, 255919], 'children': []}]}]}, {'concept_id': 44783587, 'concept_name': 'Finding of sensation by site', 'concept_code': '699697007', 'metrics': {'union': {'count': 6657, 'prevalence': 0.6521355799373041}, 'cohorts': {'1': {'count': 6657, 'prevalence': 0.6521355799373041}}}, 'parent_ids': [4042140, 4024013], 'children': []}, {'concept_id': 4302537, 'concept_name': 'Digestive system finding', 'concept_code': '386617003', 'metrics': {'union': {'count': 5953, 'prevalence': 0.5831700626959248}, 'cohorts': {'1': {'count': 5953, 'prevalence': 0.5831700626959248}}}, 'parent_ids': [4042140], 'children': [{'concept_id': 4091363, 'concept_name': 'Mouth and/or pharynx finding', 'concept_code': '249376008', 'metrics': {'union': {'count': 5619, 'prevalence': 0.5504506269592476}, 'cohorts': {'1': {'count': 5619, 'prevalence': 0.5504506269592476}}}, 'parent_ids': [4302537, 255919], 'children': []}]}]}, {'concept_id': 4011630, 'concept_name': 'Neurological finding', 'concept_code': '102957003', 'metrics': {'union': {'count': 6659, 'prevalence': 0.6523315047021944}, 'cohorts': {'1': {'count': 6659, 'prevalence': 0.6523315047021944}}}, 'parent_ids': [441840], 'children': [{'concept_id': 4024013, 'concept_name': 'Sensory nervous system finding', 'concept_code': '106147001', 'metrics': {'union': {'count': 6659, 'prevalence': 0.6523315047021944}, 'cohorts': {'1': {'count': 6659, 'prevalence': 0.6523315047021944}}}, 'parent_ids': [4011630], 'children': [{'concept_id': 44783587, 'concept_name': 'Finding of sensation by site', 'concept_code': '699697007', 'metrics': {'union': {'count': 6657, 'prevalence': 0.6521355799373041}, 'cohorts': {'1': {'count': 6657, 'prevalence': 0.6521355799373041}}}, 'parent_ids': [4042140, 4024013], 'children': []}]}]}]}]}\n",
+      "the time taken to get cohort concept stats for condition_occurrence is 138.0793159008026s\n"
      ]
     }
    ],
@@ -840,24 +303,42 @@
     "\n",
     "# get cohort concept prevalance\n",
     "t1 = time.time()\n",
-    "cohort_concepts = cohort_data.get_concept_stats()\n",
+    "cohort_concepts, cohort_concept_hierarchy = cohort_data.get_concept_stats(filter_count=5000)\n",
     "print(pd.DataFrame(cohort_concepts[\"condition_occurrence\"]))\n",
+    "print(f\"returned cohort_concept_hierarchy object converted to dict: {cohort_concept_hierarchy.to_dict()}\")\n",
     "print(f'the time taken to get cohort concept stats for condition_occurrence is {time.time() - t1}s')"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "10305fac-8ae3-49ca-8542-47d0a0636f97",
+   "id": "77af0451-f2e6-47ad-814e-3cb052e07aa5",
    "metadata": {},
    "source": [
     "———————————————\n",
     "\n",
-    "**Cohort drug exposure concept prevalence**: \n",
-    "The code block below demonstrates how to use `get_concept_stats(concept_type='drug_exposure', filter_count=500)` method to retrieve concept prevalence for the `drug_exposure` domain. By default, this uses the `RxNorm` vocabulary. Concepts with fewer than 500 patients are excluded, and hierarchical relationships are included in the results. The method returns a dictionary where the **key** is the `concept_type` (in this case, `drug_exposure`) and the **value** is a list of concept dictionaries. Each concept dictionary in the list contains the following fields: `concept_name`, `concept_code`, `count_in_cohort`, `prevalence`, `ancestor_concept_id`, and `descendant_concept_id`. These values allow you to explore which clinical concepts are most prevalent in your cohort and suppoert deeper investigations into potential sources of selection bias.\n",
+    "**Navigating cohort concept hierarchy**\n",
+    "The following methods in the `ConceptHierarchy` class enables concept hierarchical navigation:\n",
+    "- `get_root_nodes(serialization=False)`: If the input parameter `serialization` is False, it returns a list of root nodes of the `ConceptHierarchy` object where each root node is a `ConceptNode` object. If the input parameter `serialization` is True, it returns a list of dictionaries with each dict item representing a serialized root `ConceptNode` object ready to be converted into a JSON object for downstream apps.\n",
+    "- `get_node(concept_id, serialization=False)`: returns the `ConceptNode` object (when `serialization` is set to False) or a dict item representing a serialized `ConceptNode` object (when `serialization` is set to True) corresponding to the input `concept_id`.\n",
+    "- `get_leaf_nodes(serialization=False)`: If the input parameter `serialization` is False, it returns a list of leaf nodes of the `ConceptHierarchy` object where each leaf node is a `ConceptNode` object. If the input parameter `serialization` is True, it returns a list of dictionaries with each dict item representing a serialized leaf `ConceptNode` object ready to be converted into a JSON object for downstream apps.\n",
+    "- `iter_nodes(root_id, order='bfs', serialization=False)`: allows downstream apps to iterate the concept hierarchy in breadth-first search (`bfs`) or depth-first search (`dfs`) order and yield an ordered list of ConceptNode objects (`serialization` is set to False) or an ordered list of dict items with each item representing a serialized ConceptNode object (`serialization` is set to True). \n",
+    "- `union(other)`: merges current `ConceptHierarchy` object with another `ConceptHierarchy` object (specified by `other` input parameter) to get an aggregated metrics between two hierarchies. It returns a new unioned `ConceptHierarchy` object with aggregated metrics computed.\n",
+    "- `to_dict(root_id=None)`: converts the entire `ConceptHierarchy` object (if `root_id` input parameter is None) or a sub-hierarchy rooted at the `root_id` input concept id parameter in the concept hierarchy to a serialized and nested dict structure ready to be loaded into a JSON object by downstream apps. The returned dict is a key-value pair with `hierarchy` as the key and a list of serialized ConceptNode objects as the value.\n",
+    "\n",
+    "The code block below demonstrate how to navigate the cohort condition occurrence concept hierarchy."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "926bc4ad-1618-4078-8c5e-75d6473218d5",
+   "metadata": {},
+   "source": [
+    "———————————————\n",
     "\n",
-    "**Note**: Prevalence computation may take some time, especially for large cohorts or when hierarchical relationships are included. A progress bar will appear to indicate the progress of the computation. \n",
+    "**Cohort drug exposure concept prevalence**: \n",
+    "The code block below demonstrates how to use `get_concept_stats(concept_type='drug_exposure', filter_count=500)` method to retrieve concept prevalence for the `drug_exposure` domain with hierarchical relationships included in the results. By default, this uses the `RxNorm` vocabulary. Concepts with fewer than 500 patients are excluded as specified in the `filter_count` input parameter. The method returns a dictionary and a ConceptHierarchy object. Refer to the detailed description for the returned dictionary and ConceptHierarchy object with the 'condition_occurrence` concept domain example above.  These returned results allow you to explore which clinical concepts are most prevalent in your cohort and suppoert deeper investigations into potential sources of selection bias.\n",
     "\n",
-    "The output also includes a text-based, indented representation of the concept hierarchy. Each concept is displayed along with its **concept code**, **patient count**, and **prevalence** in parentheses, providing a quick summary of both the structure and frequency of clinical concepts in the cohort."
+    "**Note**: Prevalence computation may take some time, especially for large cohorts or when hierarchical relationships are included. A progress bar will appear to indicate the progress of the computation. \n"
    ]
   },
   {
@@ -869,7 +350,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "5087188325984ffb8977c95fed3c8acd",
+       "model_id": "2d690d2989fd454f8bd42d7311a1b934",
        "version_major": 2,
        "version_minor": 0
       },
@@ -884,107 +365,210 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "cohort concept hierarchy for drug_exposure with root concept ids [36217216, 1301025, 1125315, 36217210, 36217214]:\n",
-      "Pill (Code: 1151133, Count: 931, Prevalence: 9.120%)\n",
-      "  acetaminophen Pill (Code: 1152843, Count: 638, Prevalence: 6.250%)\n",
-      "    acetaminophen Oral Tablet (Code: 369097, Count: 609, Prevalence: 5.966%)\n",
-      "      acetaminophen 500 MG Oral Tablet (Code: 198440, Count: 582, Prevalence: 5.701%)\n",
-      "    acetaminophen 500 MG Oral Tablet (Code: 198440, Count: 582, Prevalence: 5.701%)\n",
-      "enoxaparin (Code: 67108, Count: 582, Prevalence: 5.701%)\n",
-      "  enoxaparin Prefilled Syringe (Code: 727722, Count: 582, Prevalence: 5.701%)\n",
-      "    enoxaparin sodium 100 MG/ML Prefilled Syringe (Code: 1360019, Count: 582, Prevalence: 5.701%)\n",
-      "      0.4 ML enoxaparin sodium 100 MG/ML Prefilled Syringe (Code: 854235, Count: 582, Prevalence: 5.701%)\n",
-      "    0.4 ML enoxaparin sodium 100 MG/ML Prefilled Syringe (Code: 854235, Count: 582, Prevalence: 5.701%)\n",
-      "  enoxaparin sodium 100 MG/ML (Code: 854227, Count: 582, Prevalence: 5.701%)\n",
-      "    enoxaparin sodium 100 MG/ML Prefilled Syringe (Code: 1360019, Count: 582, Prevalence: 5.701%)\n",
-      "      0.4 ML enoxaparin sodium 100 MG/ML Prefilled Syringe (Code: 854235, Count: 582, Prevalence: 5.701%)\n",
-      "    0.4 ML enoxaparin sodium 100 MG/ML Prefilled Syringe (Code: 854235, Count: 582, Prevalence: 5.701%)\n",
-      "  enoxaparin Injectable Product (Code: 1162664, Count: 582, Prevalence: 5.701%)\n",
-      "    enoxaparin Prefilled Syringe (Code: 727722, Count: 582, Prevalence: 5.701%)\n",
-      "      enoxaparin sodium 100 MG/ML Prefilled Syringe (Code: 1360019, Count: 582, Prevalence: 5.701%)\n",
-      "        0.4 ML enoxaparin sodium 100 MG/ML Prefilled Syringe (Code: 854235, Count: 582, Prevalence: 5.701%)\n",
-      "      0.4 ML enoxaparin sodium 100 MG/ML Prefilled Syringe (Code: 854235, Count: 582, Prevalence: 5.701%)\n",
-      "    0.4 ML enoxaparin sodium 100 MG/ML Prefilled Syringe (Code: 854235, Count: 582, Prevalence: 5.701%)\n",
-      "    enoxaparin sodium 100 MG/ML Prefilled Syringe (Code: 1360019, Count: 582, Prevalence: 5.701%)\n",
-      "      0.4 ML enoxaparin sodium 100 MG/ML Prefilled Syringe (Code: 854235, Count: 582, Prevalence: 5.701%)\n",
-      "acetaminophen (Code: 161, Count: 676, Prevalence: 6.622%)\n",
-      "  acetaminophen Pill (Code: 1152843, Count: 638, Prevalence: 6.250%)\n",
-      "    acetaminophen Oral Tablet (Code: 369097, Count: 609, Prevalence: 5.966%)\n",
-      "      acetaminophen 500 MG Oral Tablet (Code: 198440, Count: 582, Prevalence: 5.701%)\n",
-      "    acetaminophen 500 MG Oral Tablet (Code: 198440, Count: 582, Prevalence: 5.701%)\n",
-      "  acetaminophen Oral Product (Code: 1152842, Count: 638, Prevalence: 6.250%)\n",
-      "    acetaminophen Oral Tablet (Code: 369097, Count: 609, Prevalence: 5.966%)\n",
-      "      acetaminophen 500 MG Oral Tablet (Code: 198440, Count: 582, Prevalence: 5.701%)\n",
-      "    acetaminophen 500 MG Oral Tablet (Code: 198440, Count: 582, Prevalence: 5.701%)\n",
-      "  acetaminophen Oral Tablet (Code: 369097, Count: 609, Prevalence: 5.966%)\n",
-      "    acetaminophen 500 MG Oral Tablet (Code: 198440, Count: 582, Prevalence: 5.701%)\n",
-      "  acetaminophen 500 MG (Code: 315266, Count: 582, Prevalence: 5.701%)\n",
-      "    acetaminophen 500 MG Oral Tablet (Code: 198440, Count: 582, Prevalence: 5.701%)\n",
-      "Injectable Product (Code: 1151126, Count: 606, Prevalence: 5.937%)\n",
-      "  enoxaparin Injectable Product (Code: 1162664, Count: 582, Prevalence: 5.701%)\n",
-      "    enoxaparin Prefilled Syringe (Code: 727722, Count: 582, Prevalence: 5.701%)\n",
-      "      enoxaparin sodium 100 MG/ML Prefilled Syringe (Code: 1360019, Count: 582, Prevalence: 5.701%)\n",
-      "        0.4 ML enoxaparin sodium 100 MG/ML Prefilled Syringe (Code: 854235, Count: 582, Prevalence: 5.701%)\n",
-      "      0.4 ML enoxaparin sodium 100 MG/ML Prefilled Syringe (Code: 854235, Count: 582, Prevalence: 5.701%)\n",
-      "    0.4 ML enoxaparin sodium 100 MG/ML Prefilled Syringe (Code: 854235, Count: 582, Prevalence: 5.701%)\n",
-      "    enoxaparin sodium 100 MG/ML Prefilled Syringe (Code: 1360019, Count: 582, Prevalence: 5.701%)\n",
-      "      0.4 ML enoxaparin sodium 100 MG/ML Prefilled Syringe (Code: 854235, Count: 582, Prevalence: 5.701%)\n",
-      "Oral Product (Code: 1151131, Count: 937, Prevalence: 9.179%)\n",
-      "  acetaminophen Oral Product (Code: 1152842, Count: 638, Prevalence: 6.250%)\n",
-      "    acetaminophen Oral Tablet (Code: 369097, Count: 609, Prevalence: 5.966%)\n",
-      "      acetaminophen 500 MG Oral Tablet (Code: 198440, Count: 582, Prevalence: 5.701%)\n",
-      "    acetaminophen 500 MG Oral Tablet (Code: 198440, Count: 582, Prevalence: 5.701%)\n",
       "                                         concept_name concept_code  count_in_cohort  prevalence  ancestor_concept_id  descendant_concept_id\n",
       "0                                        Oral Product      1151131              937    0.091791             36217214               36217214\n",
       "1                                                Pill      1151133              931    0.091203             36217216               36217216\n",
       "2                                       acetaminophen          161              676    0.066223              1125315                1125315\n",
-      "3                                  acetaminophen Pill      1152843              638    0.062500              1125315               36216999\n",
-      "4                                  acetaminophen Pill      1152843              638    0.062500             36217216               36216999\n",
-      "5                          acetaminophen Oral Product      1152842              638    0.062500              1125315               36216998\n",
-      "6                                  acetaminophen Pill      1152843              638    0.062500             36216999               36216999\n",
-      "7                          acetaminophen Oral Product      1152842              638    0.062500             36216998               36216998\n",
-      "8                          acetaminophen Oral Product      1152842              638    0.062500             36217214               36216998\n",
-      "9                           acetaminophen Oral Tablet       369097              609    0.059659             36216998               40005746\n",
-      "10                          acetaminophen Oral Tablet       369097              609    0.059659             36216999               40005746\n",
-      "11                          acetaminophen Oral Tablet       369097              609    0.059659             40005746               40005746\n",
-      "12                          acetaminophen Oral Tablet       369097              609    0.059659              1125315               40005746\n",
+      "3                                  acetaminophen Pill      1152843              638    0.062500             36216999               36216999\n",
+      "4                          acetaminophen Oral Product      1152842              638    0.062500              1125315               36216998\n",
+      "5                                  acetaminophen Pill      1152843              638    0.062500             36217216               36216999\n",
+      "6                          acetaminophen Oral Product      1152842              638    0.062500             36217214               36216998\n",
+      "7                                  acetaminophen Pill      1152843              638    0.062500              1125315               36216999\n",
+      "8                          acetaminophen Oral Product      1152842              638    0.062500             36216998               36216998\n",
+      "9                           acetaminophen Oral Tablet       369097              609    0.059659              1125315               40005746\n",
+      "10                          acetaminophen Oral Tablet       369097              609    0.059659             40005746               40005746\n",
+      "11                          acetaminophen Oral Tablet       369097              609    0.059659             36216998               40005746\n",
+      "12                          acetaminophen Oral Tablet       369097              609    0.059659             36216999               40005746\n",
       "13                                 Injectable Product      1151126              606    0.059365             36217210               36217210\n",
-      "14                                         enoxaparin        67108              582    0.057014              1301025                1301025\n",
-      "15                       enoxaparin Prefilled Syringe       727722              582    0.057014              1301025               40141787\n",
-      "16                       enoxaparin Prefilled Syringe       727722              582    0.057014             36224590               40141787\n",
-      "17  0.4 ML enoxaparin sodium 100 MG/ML Prefilled S...       854235              582    0.057014             36224590               40160973\n",
-      "18                      enoxaparin Injectable Product      1162664              582    0.057014             36217210               36224590\n",
-      "19      enoxaparin sodium 100 MG/ML Prefilled Syringe      1360019              582    0.057014             40141787               42902906\n",
-      "20                       enoxaparin Prefilled Syringe       727722              582    0.057014             40141787               40141787\n",
-      "21                        enoxaparin sodium 100 MG/ML       854227              582    0.057014              1301025               40160947\n",
-      "22      enoxaparin sodium 100 MG/ML Prefilled Syringe      1360019              582    0.057014             40160947               42902906\n",
-      "23      enoxaparin sodium 100 MG/ML Prefilled Syringe      1360019              582    0.057014             42902906               42902906\n",
-      "24                   acetaminophen 500 MG Oral Tablet       198440              582    0.057014             19020053               19020053\n",
-      "25                               acetaminophen 500 MG       315266              582    0.057014              1127527                1127527\n",
-      "26  0.4 ML enoxaparin sodium 100 MG/ML Prefilled S...       854235              582    0.057014             40160973               40160973\n",
-      "27      enoxaparin sodium 100 MG/ML Prefilled Syringe      1360019              582    0.057014             36224590               42902906\n",
-      "28  0.4 ML enoxaparin sodium 100 MG/ML Prefilled S...       854235              582    0.057014             40141787               40160973\n",
-      "29                      enoxaparin Injectable Product      1162664              582    0.057014              1301025               36224590\n",
-      "30  0.4 ML enoxaparin sodium 100 MG/ML Prefilled S...       854235              582    0.057014             42902906               40160973\n",
-      "31                   acetaminophen 500 MG Oral Tablet       198440              582    0.057014             36216998               19020053\n",
-      "32                   acetaminophen 500 MG Oral Tablet       198440              582    0.057014              1127527               19020053\n",
-      "33                               acetaminophen 500 MG       315266              582    0.057014              1125315                1127527\n",
-      "34  0.4 ML enoxaparin sodium 100 MG/ML Prefilled S...       854235              582    0.057014             40160947               40160973\n",
-      "35                   acetaminophen 500 MG Oral Tablet       198440              582    0.057014             40005746               19020053\n",
-      "36                   acetaminophen 500 MG Oral Tablet       198440              582    0.057014             36216999               19020053\n",
-      "37                        enoxaparin sodium 100 MG/ML       854227              582    0.057014             40160947               40160947\n",
-      "38                      enoxaparin Injectable Product      1162664              582    0.057014             36224590               36224590\n",
-      "the time taken to get cohort concept stats for drug_exposure is 30.243131399154663s\n"
+      "14                       enoxaparin Prefilled Syringe       727722              582    0.057014              1301025               40141787\n",
+      "15                       enoxaparin Prefilled Syringe       727722              582    0.057014             40141787               40141787\n",
+      "16  0.4 ML enoxaparin sodium 100 MG/ML Prefilled S...       854235              582    0.057014             42902906               40160973\n",
+      "17                   acetaminophen 500 MG Oral Tablet       198440              582    0.057014              1127527               19020053\n",
+      "18                       enoxaparin Prefilled Syringe       727722              582    0.057014             36224590               40141787\n",
+      "19  0.4 ML enoxaparin sodium 100 MG/ML Prefilled S...       854235              582    0.057014             40160947               40160973\n",
+      "20                        enoxaparin sodium 100 MG/ML       854227              582    0.057014              1301025               40160947\n",
+      "21                                         enoxaparin        67108              582    0.057014              1301025                1301025\n",
+      "22                      enoxaparin Injectable Product      1162664              582    0.057014             36217210               36224590\n",
+      "23      enoxaparin sodium 100 MG/ML Prefilled Syringe      1360019              582    0.057014             40141787               42902906\n",
+      "24                   acetaminophen 500 MG Oral Tablet       198440              582    0.057014             40005746               19020053\n",
+      "25                   acetaminophen 500 MG Oral Tablet       198440              582    0.057014             19020053               19020053\n",
+      "26                               acetaminophen 500 MG       315266              582    0.057014              1127527                1127527\n",
+      "27  0.4 ML enoxaparin sodium 100 MG/ML Prefilled S...       854235              582    0.057014             40160973               40160973\n",
+      "28  0.4 ML enoxaparin sodium 100 MG/ML Prefilled S...       854235              582    0.057014             36224590               40160973\n",
+      "29      enoxaparin sodium 100 MG/ML Prefilled Syringe      1360019              582    0.057014             36224590               42902906\n",
+      "30      enoxaparin sodium 100 MG/ML Prefilled Syringe      1360019              582    0.057014             42902906               42902906\n",
+      "31      enoxaparin sodium 100 MG/ML Prefilled Syringe      1360019              582    0.057014             40160947               42902906\n",
+      "32                   acetaminophen 500 MG Oral Tablet       198440              582    0.057014             36216999               19020053\n",
+      "33  0.4 ML enoxaparin sodium 100 MG/ML Prefilled S...       854235              582    0.057014             40141787               40160973\n",
+      "34                      enoxaparin Injectable Product      1162664              582    0.057014              1301025               36224590\n",
+      "35                        enoxaparin sodium 100 MG/ML       854227              582    0.057014             40160947               40160947\n",
+      "36                      enoxaparin Injectable Product      1162664              582    0.057014             36224590               36224590\n",
+      "37                   acetaminophen 500 MG Oral Tablet       198440              582    0.057014             36216998               19020053\n",
+      "38                               acetaminophen 500 MG       315266              582    0.057014              1125315                1127527\n",
+      "returned cohort_de_concept_hierarchy object converted to dict: {'hierarchy': [{'concept_id': 441840, 'concept_name': 'Clinical finding', 'concept_code': '404684003', 'metrics': {'union': {'count': 10208, 'prevalence': 1.0}, 'cohorts': {'1': {'count': 10208, 'prevalence': 1.0}}}, 'parent_ids': [], 'children': [{'concept_id': 4274025, 'concept_name': 'Disease', 'concept_code': '64572001', 'metrics': {'union': {'count': 10208, 'prevalence': 1.0}, 'cohorts': {'1': {'count': 10208, 'prevalence': 1.0}}}, 'parent_ids': [441840], 'children': [{'concept_id': 432250, 'concept_name': 'Disorder due to infection', 'concept_code': '40733004', 'metrics': {'union': {'count': 10208, 'prevalence': 1.0}, 'cohorts': {'1': {'count': 10208, 'prevalence': 1.0}}}, 'parent_ids': [4274025], 'children': [{'concept_id': 440029, 'concept_name': 'Viral disease', 'concept_code': '34014006', 'metrics': {'union': {'count': 10208, 'prevalence': 1.0}, 'cohorts': {'1': {'count': 10208, 'prevalence': 1.0}}}, 'parent_ids': [432250], 'children': [{'concept_id': 4100065, 'concept_name': 'Disease due to Coronaviridae', 'concept_code': '27619001', 'metrics': {'union': {'count': 10208, 'prevalence': 1.0}, 'cohorts': {'1': {'count': 10208, 'prevalence': 1.0}}}, 'parent_ids': [440029], 'children': [{'concept_id': 439676, 'concept_name': 'Coronavirus infection', 'concept_code': '186747009', 'metrics': {'union': {'count': 10208, 'prevalence': 1.0}, 'cohorts': {'1': {'count': 10208, 'prevalence': 1.0}}}, 'parent_ids': [4100065], 'children': [{'concept_id': 37311061, 'concept_name': 'COVID-19', 'concept_code': '840539006', 'metrics': {'union': {'count': 10208, 'prevalence': 1.0}, 'cohorts': {'1': {'count': 10208, 'prevalence': 1.0}}}, 'parent_ids': [439676], 'children': []}]}]}]}]}]}, {'concept_id': 4094294, 'concept_name': 'Clinical history and observation findings', 'concept_code': '250171008', 'metrics': {'union': {'count': 9150, 'prevalence': 0.8963557993730408}, 'cohorts': {'1': {'count': 9150, 'prevalence': 0.8963557993730408}}}, 'parent_ids': [441840], 'children': [{'concept_id': 4041283, 'concept_name': 'General finding of observation of patient', 'concept_code': '118222006', 'metrics': {'union': {'count': 9149, 'prevalence': 0.8962578369905956}, 'cohorts': {'1': {'count': 9149, 'prevalence': 0.8962578369905956}}}, 'parent_ids': [4094294], 'children': [{'concept_id': 4221108, 'concept_name': 'General body state finding', 'concept_code': '82832008', 'metrics': {'union': {'count': 9080, 'prevalence': 0.8894984326018809}, 'cohorts': {'1': {'count': 9080, 'prevalence': 0.8894984326018809}}}, 'parent_ids': [4041283], 'children': [{'concept_id': 4042138, 'concept_name': 'Vital signs finding', 'concept_code': '118227000', 'metrics': {'union': {'count': 8650, 'prevalence': 0.8473746081504702}, 'cohorts': {'1': {'count': 8650, 'prevalence': 0.8473746081504702}}}, 'parent_ids': [4221108], 'children': [{'concept_id': 4022230, 'concept_name': 'Body temperature finding', 'concept_code': '105723007', 'metrics': {'union': {'count': 8650, 'prevalence': 0.8473746081504702}, 'cohorts': {'1': {'count': 8650, 'prevalence': 0.8473746081504702}}}, 'parent_ids': [4103474, 4042138], 'children': [{'concept_id': 4047791, 'concept_name': 'Abnormal body temperature', 'concept_code': '123979008', 'metrics': {'union': {'count': 8650, 'prevalence': 0.8473746081504702}, 'cohorts': {'1': {'count': 8650, 'prevalence': 0.8473746081504702}}}, 'parent_ids': [4022230], 'children': [{'concept_id': 4178904, 'concept_name': 'Body temperature above reference range', 'concept_code': '50177009', 'metrics': {'union': {'count': 8650, 'prevalence': 0.8473746081504702}, 'cohorts': {'1': {'count': 8650, 'prevalence': 0.8473746081504702}}}, 'parent_ids': [4047791], 'children': [{'concept_id': 437663, 'concept_name': 'Fever', 'concept_code': '386661006', 'metrics': {'union': {'count': 8650, 'prevalence': 0.8473746081504702}, 'cohorts': {'1': {'count': 8650, 'prevalence': 0.8473746081504702}}}, 'parent_ids': [4178904], 'children': []}]}]}]}]}]}, {'concept_id': 4103474, 'concept_name': 'Temperature-associated finding', 'concept_code': '301343009', 'metrics': {'union': {'count': 8769, 'prevalence': 0.859032131661442}, 'cohorts': {'1': {'count': 8769, 'prevalence': 0.859032131661442}}}, 'parent_ids': [4041283], 'children': [{'concept_id': 4022230, 'concept_name': 'Body temperature finding', 'concept_code': '105723007', 'metrics': {'union': {'count': 8650, 'prevalence': 0.8473746081504702}, 'cohorts': {'1': {'count': 8650, 'prevalence': 0.8473746081504702}}}, 'parent_ids': [4103474, 4042138], 'children': [{'concept_id': 4047791, 'concept_name': 'Abnormal body temperature', 'concept_code': '123979008', 'metrics': {'union': {'count': 8650, 'prevalence': 0.8473746081504702}, 'cohorts': {'1': {'count': 8650, 'prevalence': 0.8473746081504702}}}, 'parent_ids': [4022230], 'children': [{'concept_id': 4178904, 'concept_name': 'Body temperature above reference range', 'concept_code': '50177009', 'metrics': {'union': {'count': 8650, 'prevalence': 0.8473746081504702}, 'cohorts': {'1': {'count': 8650, 'prevalence': 0.8473746081504702}}}, 'parent_ids': [4047791], 'children': [{'concept_id': 437663, 'concept_name': 'Fever', 'concept_code': '386661006', 'metrics': {'union': {'count': 8650, 'prevalence': 0.8473746081504702}, 'cohorts': {'1': {'count': 8650, 'prevalence': 0.8473746081504702}}}, 'parent_ids': [4178904], 'children': []}]}]}]}]}]}]}, {'concept_id': 4042140, 'concept_name': 'Finding by site', 'concept_code': '118234003', 'metrics': {'union': {'count': 9120, 'prevalence': 0.8934169278996865}, 'cohorts': {'1': {'count': 9120, 'prevalence': 0.8934169278996865}}}, 'parent_ids': [441840], 'children': [{'concept_id': 4024567, 'concept_name': 'Respiratory finding', 'concept_code': '106048009', 'metrics': {'union': {'count': 7755, 'prevalence': 0.759698275862069}, 'cohorts': {'1': {'count': 7755, 'prevalence': 0.759698275862069}}}, 'parent_ids': [4042140], 'children': [{'concept_id': 4267789, 'concept_name': 'Respiratory function finding', 'concept_code': '365852007', 'metrics': {'union': {'count': 6596, 'prevalence': 0.6461598746081505}, 'cohorts': {'1': {'count': 6596, 'prevalence': 0.6461598746081505}}}, 'parent_ids': [4024567], 'children': [{'concept_id': 254761, 'concept_name': 'Cough', 'concept_code': '49727002', 'metrics': {'union': {'count': 6596, 'prevalence': 0.6461598746081505}, 'cohorts': {'1': {'count': 6596, 'prevalence': 0.6461598746081505}}}, 'parent_ids': [4267789], 'children': []}]}]}, {'concept_id': 4199402, 'concept_name': 'Finding of body region', 'concept_code': '301857004', 'metrics': {'union': {'count': 7115, 'prevalence': 0.6970023510971787}, 'cohorts': {'1': {'count': 7115, 'prevalence': 0.6970023510971787}}}, 'parent_ids': [4042140], 'children': [{'concept_id': 255919, 'concept_name': 'Finding of head and neck region', 'concept_code': '118254002', 'metrics': {'union': {'count': 6423, 'prevalence': 0.6292123824451411}, 'cohorts': {'1': {'count': 6423, 'prevalence': 0.6292123824451411}}}, 'parent_ids': [4199402], 'children': [{'concept_id': 4247371, 'concept_name': 'Head finding', 'concept_code': '406122000', 'metrics': {'union': {'count': 6391, 'prevalence': 0.6260775862068966}, 'cohorts': {'1': {'count': 6391, 'prevalence': 0.6260775862068966}}}, 'parent_ids': [255919], 'children': [{'concept_id': 4182161, 'concept_name': 'Finding of head region', 'concept_code': '298364001', 'metrics': {'union': {'count': 5194, 'prevalence': 0.5088166144200627}, 'cohorts': {'1': {'count': 5194, 'prevalence': 0.5088166144200627}}}, 'parent_ids': [4247371], 'children': []}]}, {'concept_id': 4091363, 'concept_name': 'Mouth and/or pharynx finding', 'concept_code': '249376008', 'metrics': {'union': {'count': 5619, 'prevalence': 0.5504506269592476}, 'cohorts': {'1': {'count': 5619, 'prevalence': 0.5504506269592476}}}, 'parent_ids': [4302537, 255919], 'children': []}]}]}, {'concept_id': 44783587, 'concept_name': 'Finding of sensation by site', 'concept_code': '699697007', 'metrics': {'union': {'count': 6657, 'prevalence': 0.6521355799373041}, 'cohorts': {'1': {'count': 6657, 'prevalence': 0.6521355799373041}}}, 'parent_ids': [4042140, 4024013], 'children': []}, {'concept_id': 4302537, 'concept_name': 'Digestive system finding', 'concept_code': '386617003', 'metrics': {'union': {'count': 5953, 'prevalence': 0.5831700626959248}, 'cohorts': {'1': {'count': 5953, 'prevalence': 0.5831700626959248}}}, 'parent_ids': [4042140], 'children': [{'concept_id': 4091363, 'concept_name': 'Mouth and/or pharynx finding', 'concept_code': '249376008', 'metrics': {'union': {'count': 5619, 'prevalence': 0.5504506269592476}, 'cohorts': {'1': {'count': 5619, 'prevalence': 0.5504506269592476}}}, 'parent_ids': [4302537, 255919], 'children': []}]}]}, {'concept_id': 4011630, 'concept_name': 'Neurological finding', 'concept_code': '102957003', 'metrics': {'union': {'count': 6659, 'prevalence': 0.6523315047021944}, 'cohorts': {'1': {'count': 6659, 'prevalence': 0.6523315047021944}}}, 'parent_ids': [441840], 'children': [{'concept_id': 4024013, 'concept_name': 'Sensory nervous system finding', 'concept_code': '106147001', 'metrics': {'union': {'count': 6659, 'prevalence': 0.6523315047021944}, 'cohorts': {'1': {'count': 6659, 'prevalence': 0.6523315047021944}}}, 'parent_ids': [4011630], 'children': [{'concept_id': 44783587, 'concept_name': 'Finding of sensation by site', 'concept_code': '699697007', 'metrics': {'union': {'count': 6657, 'prevalence': 0.6521355799373041}, 'cohorts': {'1': {'count': 6657, 'prevalence': 0.6521355799373041}}}, 'parent_ids': [4042140, 4024013], 'children': []}]}]}]}]}\n",
+      "the time taken to get cohort concept stats for drug_exposure is 28.4456946849823s\n"
      ]
     }
    ],
    "source": [
     "t1 = time.time()\n",
-    "cohort_de_concepts = cohort_data.get_concept_stats(concept_type='drug_exposure', filter_count=500)\n",
+    "cohort_de_concepts, cohort_de_concept_hierarchy = cohort_data.get_concept_stats(concept_type='drug_exposure', filter_count=500)\n",
     "print(pd.DataFrame(cohort_de_concepts[\"drug_exposure\"]))\n",
+    "print(f\"returned cohort_de_concept_hierarchy object converted to dict: {cohort_de_concept_hierarchy.to_dict()}\")\n",
     "print(f'the time taken to get cohort concept stats for drug_exposure is {time.time() - t1}s')"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "b6c6a8a1-d07d-4a1a-93f7-59efeb1084dc",
+   "metadata": {},
+   "source": [
+    "———————————————\n",
+    "\n",
+    "**Navigating cohort concept hierarchy**\n",
+    "The following methods in the `ConceptHierarchy` class enables concept hierarchical navigation:\n",
+    "- `get_root_nodes(serialization=False)`: If the input parameter `serialization` is False, it returns a list of root nodes of the `ConceptHierarchy` object where each root node is a `ConceptNode` object. If the input parameter `serialization` is True, it returns a list of dictionaries with each dict item representing a serialized root `ConceptNode` object ready to be converted into a JSON object for downstream apps.\n",
+    "- `get_node(concept_id, serialization=False)`: returns the `ConceptNode` object (when `serialization` is set to False) or a dict item representing a serialized `ConceptNode` object (when `serialization` is set to True) corresponding to the input `concept_id`.\n",
+    "- `get_leaf_nodes(serialization=False)`: If the input parameter `serialization` is False, it returns a list of leaf nodes of the `ConceptHierarchy` object where each leaf node is a `ConceptNode` object. If the input parameter `serialization` is True, it returns a list of dictionaries with each dict item representing a serialized leaf `ConceptNode` object ready to be converted into a JSON object for downstream apps.\n",
+    "- `iter_nodes(root_id, order='bfs', serialization=False)`: allows downstream apps to iterate the concept hierarchy in breadth-first search (`bfs`) or depth-first search (`dfs`) order and yield an ordered list of ConceptNode objects (`serialization` is set to False) or an ordered list of dict items with each item representing a serialized ConceptNode object (`serialization` is set to True). \n",
+    "- `union(other)`: merges current `ConceptHierarchy` object with another `ConceptHierarchy` object (specified by `other` input parameter) to get an aggregated metrics between two hierarchies. It returns a new unioned `ConceptHierarchy` object with aggregated metrics computed.\n",
+    "- `to_dict(root_id=None)`: converts the entire `ConceptHierarchy` object (if `root_id` input parameter is None) or a sub-hierarchy rooted at the `root_id` input concept id parameter in the concept hierarchy to a serialized and nested dict structure ready to be loaded into a JSON object by downstream apps. The returned dict is a key-value pair with `hierarchy` as the key and a list of serialized ConceptNode objects as the value.\n",
+    "\n",
+    "Since `ConceptHierarchy` methods return a `ConceptNode` object or a list of `ConceptNode` objects if `serialization` is set to False, downstream apps can access `ConceptNode` properties such as `name`, `code`, `parents`, and `children` to get the concept's name, code, parents, and children, and call `ConceptNode` methods, such as `get_metrics(cohort_id)` to get the concept's count and prevalence metrics computed by the linked `ConceptHierarchy` object for the cohort identified by the input `cohort_id` parameter, and `get_union_metrics()` to get the concept's unioned or aggregated count and prevalence metrics computed by the linked `ConceptHierarchy` object across multiple cohorts. There is also a `ConceptNode` `to_dict(include_children=True)` method that returns the serialized dict item representing the `ConceptNode` object either with nested children included (`include_children` input parameter is set to True) or not (`include_children` is set to False).   \n",
+    "\n",
+    "The code block below demonstrate how to navigate the cohort condition occurrence concept hierarchy using `ConceptHierarchy` and `ConceptNode` objects."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "0e581285-6df1-4ab2-a402-99a6b42a2da5",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "root node 0 info - name: Clinical finding, code: 404684003\n",
+      "root metric info - {'count': 10208, 'prevalence': 1.0}\n",
+      "leaf node 0 info - name: COVID-19, code: 840539006\n",
+      "leaf metric info - {'count': 10208, 'prevalence': 1.0}\n",
+      "leaf node 1 info - name: Fever, code: 386661006\n",
+      "leaf metric info - {'count': 8650, 'prevalence': 0.8473746081504702}\n",
+      "leaf node 2 info - name: Finding of sensation by site, code: 699697007\n",
+      "leaf metric info - {'count': 6657, 'prevalence': 0.6521355799373041}\n",
+      "leaf node 3 info - name: Cough, code: 49727002\n",
+      "leaf metric info - {'count': 6596, 'prevalence': 0.6461598746081505}\n",
+      "leaf node 4 info - name: Mouth and/or pharynx finding, code: 249376008\n",
+      "leaf metric info - {'count': 5619, 'prevalence': 0.5504506269592476}\n",
+      "leaf node 5 info - name: Finding of head region, code: 298364001\n",
+      "leaf metric info - {'count': 5194, 'prevalence': 0.5088166144200627}\n",
+      "infection_node info - name: Disorder due to infection, code: 40733004\n",
+      "infection_node metric info - {'count': 10208, 'prevalence': 1.0}\n",
+      "infection_node union metric info - {'count': 10208, 'prevalence': 1.0}\n",
+      "serialized infection_node dict info - {'concept_id': 432250, 'concept_name': 'Disorder due to infection', 'concept_code': '40733004', 'metrics': {'union': {'count': 10208, 'prevalence': 1.0}, 'cohorts': {'1': {'count': 10208, 'prevalence': 1.0}}}, 'parent_ids': [4274025], 'children': [{'concept_id': 440029, 'concept_name': 'Viral disease', 'concept_code': '34014006', 'metrics': {'union': {'count': 10208, 'prevalence': 1.0}, 'cohorts': {'1': {'count': 10208, 'prevalence': 1.0}}}, 'parent_ids': [432250], 'children': [{'concept_id': 4100065, 'concept_name': 'Disease due to Coronaviridae', 'concept_code': '27619001', 'metrics': {'union': {'count': 10208, 'prevalence': 1.0}, 'cohorts': {'1': {'count': 10208, 'prevalence': 1.0}}}, 'parent_ids': [440029], 'children': [{'concept_id': 439676, 'concept_name': 'Coronavirus infection', 'concept_code': '186747009', 'metrics': {'union': {'count': 10208, 'prevalence': 1.0}, 'cohorts': {'1': {'count': 10208, 'prevalence': 1.0}}}, 'parent_ids': [4100065], 'children': [{'concept_id': 37311061, 'concept_name': 'COVID-19', 'concept_code': '840539006', 'metrics': {'union': {'count': 10208, 'prevalence': 1.0}, 'cohorts': {'1': {'count': 10208, 'prevalence': 1.0}}}, 'parent_ids': [439676], 'children': []}]}]}]}]}\n",
+      "print all concept nodes being iterated in the concept hierarchy starting from the root node in breadth-first order:\n",
+      "(441840, Clinical finding)\n",
+      "(4274025, Disease)\n",
+      "(4094294, Clinical history and observation findings)\n",
+      "(4042140, Finding by site)\n",
+      "(4011630, Neurological finding)\n",
+      "(432250, Disorder due to infection)\n",
+      "(4041283, General finding of observation of patient)\n",
+      "(4024567, Respiratory finding)\n",
+      "(4199402, Finding of body region)\n",
+      "(44783587, Finding of sensation by site)\n",
+      "(4302537, Digestive system finding)\n",
+      "(4024013, Sensory nervous system finding)\n",
+      "(440029, Viral disease)\n",
+      "(4221108, General body state finding)\n",
+      "(4103474, Temperature-associated finding)\n",
+      "(4267789, Respiratory function finding)\n",
+      "(255919, Finding of head and neck region)\n",
+      "(4091363, Mouth and/or pharynx finding)\n",
+      "(44783587, Finding of sensation by site)\n",
+      "(4100065, Disease due to Coronaviridae)\n",
+      "(4042138, Vital signs finding)\n",
+      "(4022230, Body temperature finding)\n",
+      "(254761, Cough)\n",
+      "(4247371, Head finding)\n",
+      "(4091363, Mouth and/or pharynx finding)\n",
+      "(439676, Coronavirus infection)\n",
+      "(4022230, Body temperature finding)\n",
+      "(4047791, Abnormal body temperature)\n",
+      "(4182161, Finding of head region)\n",
+      "(37311061, COVID-19)\n",
+      "(4047791, Abnormal body temperature)\n",
+      "(4178904, Body temperature above reference range)\n",
+      "(4178904, Body temperature above reference range)\n",
+      "(437663, Fever)\n",
+      "(437663, Fever)\n",
+      "print all concept nodes being iterated in the concept hierarchy starting from the root node in depth-first order:\n",
+      "(441840, Clinical finding)\n",
+      "(4011630, Neurological finding)\n",
+      "(4024013, Sensory nervous system finding)\n",
+      "(44783587, Finding of sensation by site)\n",
+      "(4042140, Finding by site)\n",
+      "(4302537, Digestive system finding)\n",
+      "(4091363, Mouth and/or pharynx finding)\n",
+      "(44783587, Finding of sensation by site)\n",
+      "(4199402, Finding of body region)\n",
+      "(255919, Finding of head and neck region)\n",
+      "(4091363, Mouth and/or pharynx finding)\n",
+      "(4247371, Head finding)\n",
+      "(4182161, Finding of head region)\n",
+      "(4024567, Respiratory finding)\n",
+      "(4267789, Respiratory function finding)\n",
+      "(254761, Cough)\n",
+      "(4094294, Clinical history and observation findings)\n",
+      "(4041283, General finding of observation of patient)\n",
+      "(4103474, Temperature-associated finding)\n",
+      "(4022230, Body temperature finding)\n",
+      "(4047791, Abnormal body temperature)\n",
+      "(4178904, Body temperature above reference range)\n",
+      "(437663, Fever)\n",
+      "(4221108, General body state finding)\n",
+      "(4042138, Vital signs finding)\n",
+      "(4022230, Body temperature finding)\n",
+      "(4047791, Abnormal body temperature)\n",
+      "(4178904, Body temperature above reference range)\n",
+      "(437663, Fever)\n",
+      "(4274025, Disease)\n",
+      "(432250, Disorder due to infection)\n",
+      "(440029, Viral disease)\n",
+      "(4100065, Disease due to Coronaviridae)\n",
+      "(439676, Coronavirus infection)\n",
+      "(37311061, COVID-19)\n",
+      "print serialized dict of all concept nodes being iterated in the concept hierarchy starting from the root node in depth-first order:\n",
+      "[{'concept_id': 441840, 'concept_name': 'Clinical finding', 'concept_code': '404684003', 'metrics': {'union': {'count': 10208, 'prevalence': 1.0}, 'cohorts': {'1': {'count': 10208, 'prevalence': 1.0}}}, 'parent_ids': []}, {'concept_id': 4011630, 'concept_name': 'Neurological finding', 'concept_code': '102957003', 'metrics': {'union': {'count': 6659, 'prevalence': 0.6523315047021944}, 'cohorts': {'1': {'count': 6659, 'prevalence': 0.6523315047021944}}}, 'parent_ids': [441840]}, {'concept_id': 4024013, 'concept_name': 'Sensory nervous system finding', 'concept_code': '106147001', 'metrics': {'union': {'count': 6659, 'prevalence': 0.6523315047021944}, 'cohorts': {'1': {'count': 6659, 'prevalence': 0.6523315047021944}}}, 'parent_ids': [4011630]}, {'concept_id': 44783587, 'concept_name': 'Finding of sensation by site', 'concept_code': '699697007', 'metrics': {'union': {'count': 6657, 'prevalence': 0.6521355799373041}, 'cohorts': {'1': {'count': 6657, 'prevalence': 0.6521355799373041}}}, 'parent_ids': [4042140, 4024013]}, {'concept_id': 4042140, 'concept_name': 'Finding by site', 'concept_code': '118234003', 'metrics': {'union': {'count': 9120, 'prevalence': 0.8934169278996865}, 'cohorts': {'1': {'count': 9120, 'prevalence': 0.8934169278996865}}}, 'parent_ids': [441840]}, {'concept_id': 4302537, 'concept_name': 'Digestive system finding', 'concept_code': '386617003', 'metrics': {'union': {'count': 5953, 'prevalence': 0.5831700626959248}, 'cohorts': {'1': {'count': 5953, 'prevalence': 0.5831700626959248}}}, 'parent_ids': [4042140]}, {'concept_id': 4091363, 'concept_name': 'Mouth and/or pharynx finding', 'concept_code': '249376008', 'metrics': {'union': {'count': 5619, 'prevalence': 0.5504506269592476}, 'cohorts': {'1': {'count': 5619, 'prevalence': 0.5504506269592476}}}, 'parent_ids': [4302537, 255919]}, {'concept_id': 44783587, 'concept_name': 'Finding of sensation by site', 'concept_code': '699697007', 'metrics': {'union': {'count': 6657, 'prevalence': 0.6521355799373041}, 'cohorts': {'1': {'count': 6657, 'prevalence': 0.6521355799373041}}}, 'parent_ids': [4042140, 4024013]}, {'concept_id': 4199402, 'concept_name': 'Finding of body region', 'concept_code': '301857004', 'metrics': {'union': {'count': 7115, 'prevalence': 0.6970023510971787}, 'cohorts': {'1': {'count': 7115, 'prevalence': 0.6970023510971787}}}, 'parent_ids': [4042140]}, {'concept_id': 255919, 'concept_name': 'Finding of head and neck region', 'concept_code': '118254002', 'metrics': {'union': {'count': 6423, 'prevalence': 0.6292123824451411}, 'cohorts': {'1': {'count': 6423, 'prevalence': 0.6292123824451411}}}, 'parent_ids': [4199402]}, {'concept_id': 4091363, 'concept_name': 'Mouth and/or pharynx finding', 'concept_code': '249376008', 'metrics': {'union': {'count': 5619, 'prevalence': 0.5504506269592476}, 'cohorts': {'1': {'count': 5619, 'prevalence': 0.5504506269592476}}}, 'parent_ids': [4302537, 255919]}, {'concept_id': 4247371, 'concept_name': 'Head finding', 'concept_code': '406122000', 'metrics': {'union': {'count': 6391, 'prevalence': 0.6260775862068966}, 'cohorts': {'1': {'count': 6391, 'prevalence': 0.6260775862068966}}}, 'parent_ids': [255919]}, {'concept_id': 4182161, 'concept_name': 'Finding of head region', 'concept_code': '298364001', 'metrics': {'union': {'count': 5194, 'prevalence': 0.5088166144200627}, 'cohorts': {'1': {'count': 5194, 'prevalence': 0.5088166144200627}}}, 'parent_ids': [4247371]}, {'concept_id': 4024567, 'concept_name': 'Respiratory finding', 'concept_code': '106048009', 'metrics': {'union': {'count': 7755, 'prevalence': 0.759698275862069}, 'cohorts': {'1': {'count': 7755, 'prevalence': 0.759698275862069}}}, 'parent_ids': [4042140]}, {'concept_id': 4267789, 'concept_name': 'Respiratory function finding', 'concept_code': '365852007', 'metrics': {'union': {'count': 6596, 'prevalence': 0.6461598746081505}, 'cohorts': {'1': {'count': 6596, 'prevalence': 0.6461598746081505}}}, 'parent_ids': [4024567]}, {'concept_id': 254761, 'concept_name': 'Cough', 'concept_code': '49727002', 'metrics': {'union': {'count': 6596, 'prevalence': 0.6461598746081505}, 'cohorts': {'1': {'count': 6596, 'prevalence': 0.6461598746081505}}}, 'parent_ids': [4267789]}, {'concept_id': 4094294, 'concept_name': 'Clinical history and observation findings', 'concept_code': '250171008', 'metrics': {'union': {'count': 9150, 'prevalence': 0.8963557993730408}, 'cohorts': {'1': {'count': 9150, 'prevalence': 0.8963557993730408}}}, 'parent_ids': [441840]}, {'concept_id': 4041283, 'concept_name': 'General finding of observation of patient', 'concept_code': '118222006', 'metrics': {'union': {'count': 9149, 'prevalence': 0.8962578369905956}, 'cohorts': {'1': {'count': 9149, 'prevalence': 0.8962578369905956}}}, 'parent_ids': [4094294]}, {'concept_id': 4103474, 'concept_name': 'Temperature-associated finding', 'concept_code': '301343009', 'metrics': {'union': {'count': 8769, 'prevalence': 0.859032131661442}, 'cohorts': {'1': {'count': 8769, 'prevalence': 0.859032131661442}}}, 'parent_ids': [4041283]}, {'concept_id': 4022230, 'concept_name': 'Body temperature finding', 'concept_code': '105723007', 'metrics': {'union': {'count': 8650, 'prevalence': 0.8473746081504702}, 'cohorts': {'1': {'count': 8650, 'prevalence': 0.8473746081504702}}}, 'parent_ids': [4103474, 4042138]}, {'concept_id': 4047791, 'concept_name': 'Abnormal body temperature', 'concept_code': '123979008', 'metrics': {'union': {'count': 8650, 'prevalence': 0.8473746081504702}, 'cohorts': {'1': {'count': 8650, 'prevalence': 0.8473746081504702}}}, 'parent_ids': [4022230]}, {'concept_id': 4178904, 'concept_name': 'Body temperature above reference range', 'concept_code': '50177009', 'metrics': {'union': {'count': 8650, 'prevalence': 0.8473746081504702}, 'cohorts': {'1': {'count': 8650, 'prevalence': 0.8473746081504702}}}, 'parent_ids': [4047791]}, {'concept_id': 437663, 'concept_name': 'Fever', 'concept_code': '386661006', 'metrics': {'union': {'count': 8650, 'prevalence': 0.8473746081504702}, 'cohorts': {'1': {'count': 8650, 'prevalence': 0.8473746081504702}}}, 'parent_ids': [4178904]}, {'concept_id': 4221108, 'concept_name': 'General body state finding', 'concept_code': '82832008', 'metrics': {'union': {'count': 9080, 'prevalence': 0.8894984326018809}, 'cohorts': {'1': {'count': 9080, 'prevalence': 0.8894984326018809}}}, 'parent_ids': [4041283]}, {'concept_id': 4042138, 'concept_name': 'Vital signs finding', 'concept_code': '118227000', 'metrics': {'union': {'count': 8650, 'prevalence': 0.8473746081504702}, 'cohorts': {'1': {'count': 8650, 'prevalence': 0.8473746081504702}}}, 'parent_ids': [4221108]}, {'concept_id': 4022230, 'concept_name': 'Body temperature finding', 'concept_code': '105723007', 'metrics': {'union': {'count': 8650, 'prevalence': 0.8473746081504702}, 'cohorts': {'1': {'count': 8650, 'prevalence': 0.8473746081504702}}}, 'parent_ids': [4103474, 4042138]}, {'concept_id': 4047791, 'concept_name': 'Abnormal body temperature', 'concept_code': '123979008', 'metrics': {'union': {'count': 8650, 'prevalence': 0.8473746081504702}, 'cohorts': {'1': {'count': 8650, 'prevalence': 0.8473746081504702}}}, 'parent_ids': [4022230]}, {'concept_id': 4178904, 'concept_name': 'Body temperature above reference range', 'concept_code': '50177009', 'metrics': {'union': {'count': 8650, 'prevalence': 0.8473746081504702}, 'cohorts': {'1': {'count': 8650, 'prevalence': 0.8473746081504702}}}, 'parent_ids': [4047791]}, {'concept_id': 437663, 'concept_name': 'Fever', 'concept_code': '386661006', 'metrics': {'union': {'count': 8650, 'prevalence': 0.8473746081504702}, 'cohorts': {'1': {'count': 8650, 'prevalence': 0.8473746081504702}}}, 'parent_ids': [4178904]}, {'concept_id': 4274025, 'concept_name': 'Disease', 'concept_code': '64572001', 'metrics': {'union': {'count': 10208, 'prevalence': 1.0}, 'cohorts': {'1': {'count': 10208, 'prevalence': 1.0}}}, 'parent_ids': [441840]}, {'concept_id': 432250, 'concept_name': 'Disorder due to infection', 'concept_code': '40733004', 'metrics': {'union': {'count': 10208, 'prevalence': 1.0}, 'cohorts': {'1': {'count': 10208, 'prevalence': 1.0}}}, 'parent_ids': [4274025]}, {'concept_id': 440029, 'concept_name': 'Viral disease', 'concept_code': '34014006', 'metrics': {'union': {'count': 10208, 'prevalence': 1.0}, 'cohorts': {'1': {'count': 10208, 'prevalence': 1.0}}}, 'parent_ids': [432250]}, {'concept_id': 4100065, 'concept_name': 'Disease due to Coronaviridae', 'concept_code': '27619001', 'metrics': {'union': {'count': 10208, 'prevalence': 1.0}, 'cohorts': {'1': {'count': 10208, 'prevalence': 1.0}}}, 'parent_ids': [440029]}, {'concept_id': 439676, 'concept_name': 'Coronavirus infection', 'concept_code': '186747009', 'metrics': {'union': {'count': 10208, 'prevalence': 1.0}, 'cohorts': {'1': {'count': 10208, 'prevalence': 1.0}}}, 'parent_ids': [4100065]}, {'concept_id': 37311061, 'concept_name': 'COVID-19', 'concept_code': '840539006', 'metrics': {'union': {'count': 10208, 'prevalence': 1.0}, 'cohorts': {'1': {'count': 10208, 'prevalence': 1.0}}}, 'parent_ids': [439676]}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "root_nodes = cohort_concept_hierarchy.get_root_nodes()\n",
+    "leaf_nodes = cohort_concept_hierarchy.get_leaf_nodes()\n",
+    "infection_node = cohort_concept_hierarchy.get_node(432250) # disorder due to infection\n",
+    "for index, root in enumerate(root_nodes):\n",
+    "    print(f'root node {index} info - name: {root.name}, code: {root.code}')\n",
+    "    print(f'root metric info - {root.get_metrics(1)}')\n",
+    "for index, leaf in enumerate(leaf_nodes):    \n",
+    "    print(f'leaf node {index} info - name: {leaf.name}, code: {leaf.code}')\n",
+    "    print(f'leaf metric info - {leaf.get_metrics(1)}')\n",
+    "print(f'infection_node info - name: {infection_node.name}, code: {infection_node.code}')\n",
+    "print(f'infection_node metric info - {infection_node.get_metrics(1)}')\n",
+    "print(f'infection_node union metric info - {infection_node.get_union_metrics()}')\n",
+    "print(f'serialized infection_node dict info - {infection_node.to_dict()}')\n",
+    "\n",
+    "print('print all concept nodes being iterated in the concept hierarchy starting from the root node in breadth-first order:')\n",
+    "for n in cohort_concept_hierarchy.iter_nodes(441840):\n",
+    "    print(f'({n.id}, {n.name})')\n",
+    "print('print all concept nodes being iterated in the concept hierarchy starting from the root node in depth-first order:')\n",
+    "for n in cohort_concept_hierarchy.iter_nodes(441840, order=\"dfs\"):\n",
+    "    print(f'({n.id}, {n.name})')\n",
+    "print('print serialized dict of all concept nodes being iterated in the concept hierarchy starting from the root node in depth-first order:')\n",
+    "print(list(cohort_concept_hierarchy.iter_nodes(441840, order=\"dfs\", serialization=True)))"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "d8e53808-cac2-41c7-9d60-f7a3b661ff6f",
@@ -997,7 +581,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "id": "128c7b02-faef-4a35-97a6-c92baa5a37dd",
    "metadata": {},
    "outputs": [