From 73ad8c20f034c809b180df8c85ab2806ae34f330 Mon Sep 17 00:00:00 2001
From: hyi <hongyi@renci.org>
Date: Sun, 15 Jun 2025 16:29:24 -0400
Subject: [PATCH 01/10] update pyproject.toml to have it pypi-release ready

---
 pyproject.toml | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index c3e1c21..5f63a93 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,6 +4,8 @@ version = "0.1.0"
 description = "A python package for health data bias quantification to support visual analytics techniques for tracking and communicating bias in cohort selection"
 authors = ["Hong Yi <hongyi@renci.org>"]
 readme = "README.md"
+license="MIT"
+keywords=["bias", "healthcare", "cohort", "OMOP", "analytics", "observational research"]
 include = [
     {path = "biasanalyzer/sql_templates/*.sql", format=["sdist", "wheel"]}
 ]
@@ -29,9 +31,6 @@ pytest = "^8.3.3"
 [tool.poetry.group.dev.dependencies]
 pytest-cov = "5.0.0"
 
-[tool.setuptools.package-data]
-biasanalyzer = ["biasanalyzer/sql_templates/*.sql"]
-
 [build-system]
 requires = ["poetry-core>=1.0.0"]
 build-backend = "poetry.core.masonry.api"

From 2fe47a7b24da35a560740db4ae1f7c54f85f7272 Mon Sep 17 00:00:00 2001
From: hyi <hongyi@renci.org>
Date: Mon, 16 Jun 2025 18:21:26 -0400
Subject: [PATCH 02/10] updated cohorts notebook from developer testing based
 to user tutorial

---
 notebooks/BiasAnalyzerCohortsTutorial.ipynb | 548 +++++++++++++++++++
 notebooks/BiasAnalyzerTestingCohorts.ipynb  | 577 --------------------
 2 files changed, 548 insertions(+), 577 deletions(-)
 create mode 100644 notebooks/BiasAnalyzerCohortsTutorial.ipynb
 delete mode 100644 notebooks/BiasAnalyzerTestingCohorts.ipynb

diff --git a/notebooks/BiasAnalyzerCohortsTutorial.ipynb b/notebooks/BiasAnalyzerCohortsTutorial.ipynb
new file mode 100644
index 0000000..8957503
--- /dev/null
+++ b/notebooks/BiasAnalyzerCohortsTutorial.ipynb
@@ -0,0 +1,548 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "3bf748e7",
+   "metadata": {},
+   "source": [
+    "# BiasAnalyzer Tutorial: Exploring Cohorts\n",
+    "\n",
+    "This tutorial demonstrates how to use the `BiasAnalyzer` package to create and analyze cohorts by connecting to an [OMOP (Observational Medical Outcomes Partnership) CDM (Common Data Model)](https://www.ohdsi.org/data-standardization/) database. The currently supported database types include postgreSQL and duckDB. \n",
+    "\n",
+    "---\n",
+    "\n",
+    "### Overview\n",
+    "\n",
+    "**Objective**:  \n",
+    "Guide users through the creation, exploration, and comparison of a baseline and a study cohort using `BiasAnalyzer`, illustrating how to define, explore, and compare them.\n",
+    "\n",
+    "**Before You Begin**:\n",
+    "The `BiasAnalyzer` package is currently in active development and has not yet been officially released on PyPI.\n",
+    "You can install it in one of the two ways:\n",
+    "\n",
+    "- **Install from GitHub (recommended during development)**:\n",
+    "```bash\n",
+    "pip install git+https://github.com/vaclab/BiasAnalyzer.git\n",
+    "```\n",
+    "- **Install from PyPI (once the pacakge is officially released)**:\n",
+    "```bash\n",
+    "pip install biasanalyzer\n",
+    "```\n",
+    "For full setup and usage instructions, refer to the [README](https://github.com/VACLab/BiasAnalyzer/blob/main/README.md).\n",
+    "\n",
+    "---\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c5618746",
+   "metadata": {},
+   "source": [
+    "### Preparation for cohort creation\n",
+    "**Preparation step 1**: Import the `BIAS` class from the `api` module of the `BiasAnalyzer` package"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "4de3a621",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from biasanalyzer.api import BIAS"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "46559918",
+   "metadata": {},
+   "source": [
+    "**Preparation step 2**: Create an object of the `BIAS` class"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "86862060",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bias = BIAS()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ff3bdfd8",
+   "metadata": {},
+   "source": [
+    "**Preparation step 3**: Specifiy OMOP Common Data Model (CDM) database configurations on the `bias` object to allow connection to the OMOP CDM database for cohort creation and selection bias analysis. A configuration file must include root_omop_cdm_database key. An example of the configuration file is shown below:\n",
+    "```\n",
+    "root_omop_cdm_database:\n",
+    "  database_type: duckdb   # set it to one of the two supported types: postgresql or duckdb\n",
+    "  username: test_username\n",
+    "  password: test_password\n",
+    "  hostname: test_db_hostname\n",
+    "  database: \"shared_test_db.duckdb\"    # use a shared name for an in-memory duckdb or database name for postgresql\n",
+    "  port: 5432\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "83e992d1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "configuration specified in ../config.yaml loaded successfully\n"
+     ]
+    }
+   ],
+   "source": [
+    "bias.set_config('../config.yaml')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6d9c7881-0029-470c-ae84-6eb420c10ae9",
+   "metadata": {},
+   "source": [
+    "**Preparation step 4**: Set OMOP CDM database as specified in the configuration on the `bias` object to connect to the OMOP CDM database and create Cohort Definition metadata table and Cohort data table."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "283156f8-63da-42a5-bbd7-ee2b7719652c",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Connected to the OMOP CDM database (read-only).\n",
+      "Cohort Definition table created.\n",
+      "Cohort table created.\n"
+     ]
+    }
+   ],
+   "source": [
+    "bias.set_root_omop()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c7219629-1a30-44af-9ec5-5eb9b4a52c5a",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "\n",
+    "**Now that you have connected to your OMOP CDM database, you can start to use the APIs to explore your data. The rest of this notebook illustrates how to create and explore a baseline and a study cohort, and then compare them using the BiasAnalyzer APIs.**\n",
+    "\n",
+    "### Baseline cohort creation and exploration\n",
+    "**Baseline cohort creation**: Create a baseline cohort of young female patients on the `bias` object by calling the `create_cohort(cohort_name, cohort_description, query_or_yaml_file, created_by)` function and passing the name of the cohort (first argument), the description of the cohort (second argument), a yaml file that specifies cohort inclusion and exclusion criteria or a cohort selection SQL query (third argument), and the cohort owner's name indicating who owns or creates this cohort (fourth argument). The function will show a progress bar to indicate cohort creation progress over three stages."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "a68f3eaf-92fd-49a2-9768-d685d826fd57",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "template_path: /home/hongyi/BiasAnalyzer/biasanalyzer/sql_templates\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d51f3cdd95894de3ae541cae8ec581da",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Cohort creation:   0%|                                 | 0/3 [00:00<?, ?stage/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "configuration specified in ../tests/assets/cohort_creation/test_cohort_creation_condition_occurrence_config_baseline.yaml loaded successfully\n",
+      "Cohort definition inserted successfully.\n",
+      "Cohort Young female patients successfully created.\n",
+      "cohort created successfully\n"
+     ]
+    }
+   ],
+   "source": [
+    "baseline_cohort = bias.create_cohort('Young female patients', \n",
+    "                                     'A cohort of female patients born between 2000 and 2020', \n",
+    "                                     '../tests/assets/cohort_creation/test_cohort_creation_condition_occurrence_config_baseline.yaml', \n",
+    "                                     'system')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c2bb8ad7-e7d0-462c-8809-e328e9253578",
+   "metadata": {},
+   "source": [
+    "———————————————\n",
+    "\n",
+    "**Baseline cohort basic exploration**: Check the baseline cohort metadata and data to ensure the cohort is created successfully and get a high level idea about the cohort. Note that the SQL query string converted from the input YAML file for creating this cohort is included in the `creation_info` field of the cohort definition table as part of the cohort metadata."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "86688e7c-ca1a-434a-967a-db359c1ba48f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Baseline cohort definition metadata: {'id': 1, 'name': 'Young female patients', 'description': 'A cohort of female patients born between 2000 and 2020', 'created_date': datetime.date(2025, 6, 16), 'creation_info': 'WITH ranked_events_condition_occurrence AS ( SELECT person_id, condition_concept_id AS concept_id, condition_start_date AS event_start_date, condition_end_date AS event_end_date, ROW_NUMBER() OVER ( PARTITION BY person_id, condition_concept_id ORDER BY condition_start_date ASC ) AS event_instance FROM condition_occurrence ), ranked_events_drug_exposure AS ( SELECT person_id, drug_concept_id AS concept_id, drug_exposure_start_date AS event_start_date, drug_exposure_end_date AS event_end_date, ROW_NUMBER() OVER ( PARTITION BY person_id, drug_concept_id ORDER BY drug_exposure_start_date ASC ) AS event_instance FROM drug_exposure ), ranked_events_procedure_occurrence AS ( SELECT person_id, procedure_concept_id AS concept_id, procedure_date AS event_start_date, procedure_date AS event_end_date, ROW_NUMBER() OVER ( PARTITION BY person_id, procedure_concept_id ORDER BY procedure_date ASC ) AS event_instance FROM procedure_occurrence ), ranked_events_visit_occurrence AS ( SELECT person_id, visit_concept_id AS concept_id, visit_start_date AS event_start_date, visit_end_date AS event_end_date, ROW_NUMBER() OVER ( PARTITION BY person_id, visit_concept_id ORDER BY visit_start_date ASC ) AS event_instance FROM visit_occurrence ), ranked_events_measurement AS ( SELECT person_id, measurement_concept_id AS concept_id, measurement_date AS event_start_date, measurement_date AS event_end_date, ROW_NUMBER() OVER ( PARTITION BY person_id, measurement_concept_id ORDER BY measurement_date ASC ) AS event_instance FROM measurement ), ranked_events_observation AS ( SELECT person_id, observation_concept_id AS concept_id, observation_date AS event_start_date, observation_date AS event_end_date, ROW_NUMBER() OVER ( PARTITION BY person_id, observation_concept_id ORDER BY observation_date ASC ) AS event_instance FROM observation ), domain_qualifying_events AS ( SELECT person_id FROM person p ), filtered_cohort AS ( SELECT c.person_id, MIN(all_events.event_start_date) AS cohort_start_date, MAX(all_events.event_end_date) AS cohort_end_date FROM domain_qualifying_events c JOIN person p ON c.person_id = p.person_id LEFT JOIN ( SELECT person_id, condition_start_date AS event_start_date, condition_end_date AS event_end_date FROM condition_occurrence UNION ALL SELECT person_id, drug_exposure_start_date AS event_start_date, drug_exposure_end_date AS event_end_date FROM drug_exposure UNION ALL SELECT person_id, procedure_date AS event_start_date, procedure_date AS event_end_date FROM procedure_occurrence UNION ALL SELECT person_id, visit_start_date AS event_start_date, visit_end_date AS event_end_date FROM visit_occurrence UNION ALL SELECT person_id, measurement_date AS event_start_date, measurement_date AS event_end_date FROM measurement UNION ALL SELECT person_id, observation_date AS event_start_date, observation_date AS event_end_date FROM observation ) all_events ON c.person_id = all_events.person_id WHERE 1=1 AND p.gender_concept_id = 8532 AND p.year_of_birth >= 2000 AND p.year_of_birth <= 2020 GROUP BY c.person_id ) SELECT * FROM filtered_cohort f', 'created_by': 'system'}\n",
+      "The total number of patients in the baseline cohort: 12360\n",
+      "The first five patients in the baseline cohort: [{'subject_id': 42583, 'cohort_definition_id': 1, 'cohort_start_date': datetime.date(2020, 4, 26), 'cohort_end_date': datetime.date(2020, 5, 12)}, {'subject_id': 33685, 'cohort_definition_id': 1, 'cohort_start_date': datetime.date(2017, 12, 8), 'cohort_end_date': datetime.date(2020, 5, 10)}, {'subject_id': 74383, 'cohort_definition_id': 1, 'cohort_start_date': datetime.date(2019, 1, 31), 'cohort_end_date': datetime.date(2020, 3, 25)}, {'subject_id': 23986, 'cohort_definition_id': 1, 'cohort_start_date': datetime.date(2019, 6, 15), 'cohort_end_date': datetime.date(2020, 3, 28)}, {'subject_id': 93962, 'cohort_definition_id': 1, 'cohort_start_date': datetime.date(2019, 7, 1), 'cohort_end_date': datetime.date(2020, 5, 15)}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "baseline_cohort_def = baseline_cohort.metadata\n",
+    "print(f'Baseline cohort definition metadata: {baseline_cohort_def}')\n",
+    "baseline_cohort_data = baseline_cohort.data\n",
+    "print(f'The total number of patients in the baseline cohort: {len(baseline_cohort_data)}')\n",
+    "print(f'The first five patients in the baseline cohort: {baseline_cohort_data[:5]}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e25fea43-d14e-42cc-8072-063455336fae",
+   "metadata": {},
+   "source": [
+    "———————————————\n",
+    "\n",
+    "**Baseline cohort deeper exploration**: you can get statistics on age, gender, race, and ethnicity of the baseline cohort by calling `get_stats()` method on the created baseline cohort object. You can also get cohort distributions on age and gender by calling `get_distributions()` method on the created baseline cohort object."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "b1415805-b065-40b8-b2cd-6db6f5f9ca41",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "the baseline cohort stats: [{'total_count': 12360, 'earliest_start_date': datetime.date(2000, 2, 19), 'latest_start_date': datetime.date(2020, 5, 26), 'earliest_end_date': datetime.date(2002, 7, 20), 'latest_end_date': datetime.date(2020, 5, 27), 'min_duration_days': 0, 'max_duration_days': 7379, 'avg_duration_days': 1192.32, 'median_duration': 296, 'stddev_duration': 1779.19}]\n",
+      "the baseline cohort age stats: [{'total_count': 12360, 'min_age': 0, 'max_age': 25, 'avg_age': 7.24, 'median_age': 6, 'stddev_age': 6.01}]\n",
+      "the baseline cohort gender stats: [{'gender': 'female', 'gender_count': 12360, 'probability': 1.0}]\n",
+      "the baseline cohort race stats: [{'race': 'Other', 'race_count': 66, 'probability': 0.01}, {'race': 'Asian', 'race_count': 878, 'probability': 0.07}, {'race': 'Black or African American', 'race_count': 1056, 'probability': 0.09}, {'race': 'White', 'race_count': 10360, 'probability': 0.84}]\n",
+      "the baseline cohort ethnicity stats: [{'ethnicity': 'other', 'ethnicity_count': 12360, 'probability': 1.0}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# get stats of the baseline cohort\n",
+    "cohort_stats = baseline_cohort.get_stats()\n",
+    "print(f'the baseline cohort stats: {cohort_stats}')\n",
+    "cohort_age_stats = baseline_cohort.get_stats(\"age\")\n",
+    "print(f'the baseline cohort age stats: {cohort_age_stats}')\n",
+    "cohort_gender_stats = baseline_cohort.get_stats(\"gender\")\n",
+    "print(f'the baseline cohort gender stats: {cohort_gender_stats}')\n",
+    "cohort_race_stats = baseline_cohort.get_stats(\"race\")\n",
+    "print(f'the baseline cohort race stats: {cohort_race_stats}')\n",
+    "cohort_ethnicity_stats = baseline_cohort.get_stats(\"ethnicity\")\n",
+    "print(f'the baseline cohort ethnicity stats: {cohort_ethnicity_stats}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "d54e39da-6f78-4dc1-91ae-a8c26852582a",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "the baseline cohort age discrete probability distribution: [{'age_bin': '0-10', 'bin_count': 8230, 'probability': 0.6659}, {'age_bin': '11-20', 'bin_count': 4129, 'probability': 0.3341}, {'age_bin': '21-30', 'bin_count': 1, 'probability': 0.0001}, {'age_bin': '31-40', 'bin_count': 0, 'probability': 0.0}, {'age_bin': '41-50', 'bin_count': 0, 'probability': 0.0}, {'age_bin': '51-60', 'bin_count': 0, 'probability': 0.0}, {'age_bin': '61-70', 'bin_count': 0, 'probability': 0.0}, {'age_bin': '71-80', 'bin_count': 0, 'probability': 0.0}, {'age_bin': '81-90', 'bin_count': 0, 'probability': 0.0}, {'age_bin': '91+', 'bin_count': 0, 'probability': 0.0}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# get discrete probability distribution of the age variable in the baseline cohort\n",
+    "cohort_age_distr = baseline_cohort.get_distributions('age')\n",
+    "print(f'the baseline cohort age discrete probability distribution: {cohort_age_distr}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5d92f81a-99f8-4534-bcb1-29369262c17e",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "\n",
+    "### Study cohort creation and exploration\n",
+    "**Study cohort creation**: Create a study cohort of young female COVID patients on the bias object by calling the create_cohort(cohort_name, cohort_description, query_or_yaml_file, created_by) function and passing the name of the cohort (first argument), the description of the cohort (second argument), a yaml file that specifies cohort inclusion and exclusion criteria or a cohort selection SQL query (third argument), and the cohort owner's name indicating who owns or creates this cohort (fourth argument). The function will show a progress bar to indicate cohort creation progress over three stages."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "e3f5ace2-6cc4-4940-a067-e1a3fc14e1ce",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "8afad29563224f62b1d76a5f9f201490",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Cohort creation:   0%|                                 | 0/3 [00:00<?, ?stage/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "configuration specified in ../tests/assets/cohort_creation/test_cohort_creation_condition_occurrence_config_study.yaml loaded successfully\n",
+      "Cohort definition inserted successfully.\n",
+      "Cohort Young female COVID patients successfully created.\n",
+      "cohort created successfully\n"
+     ]
+    }
+   ],
+   "source": [
+    "# create a user study cohort with all COVID patients above the age of 65\n",
+    "study_cohort = bias.create_cohort('Young female COVID patients', \n",
+    "                                  'A cohort of female COVID patients born between 2000 and 2020', \n",
+    "                                  '../tests/assets/cohort_creation/test_cohort_creation_condition_occurrence_config_study.yaml', \n",
+    "                                  'system')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cd152755-3f38-4471-854d-92320de804a8",
+   "metadata": {},
+   "source": [
+    "———————————————\n",
+    "\n",
+    "**Study line cohort basic exploration**: Check the study cohort metadata and data to ensure the cohort is created successfully and get a high level idea about the cohort. Note that the SQL query string converted from the input YAML file for creating this cohort is included in the creation_info field of the cohort definition table as part of the cohort metadata."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "311b06e8-a80e-4aa3-b0b5-aa85de004c4b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Young female COVID-19 patient cohort definition: {'id': 2, 'name': 'Young female COVID patients', 'description': 'A cohort of female COVID patients born between 2000 and 2020', 'created_date': datetime.date(2025, 6, 16), 'creation_info': 'WITH ranked_events_condition_occurrence AS ( SELECT person_id, condition_concept_id AS concept_id, condition_start_date AS event_start_date, condition_end_date AS event_end_date, ROW_NUMBER() OVER ( PARTITION BY person_id, condition_concept_id ORDER BY condition_start_date ASC ) AS event_instance FROM condition_occurrence ), domain_qualifying_events AS ( (SELECT person_id, event_start_date, event_end_date FROM ranked_events_condition_occurrence WHERE concept_id = 37311061) ), filtered_cohort AS ( SELECT c.person_id, MIN(c.event_start_date) AS cohort_start_date, MAX(c.event_end_date) AS cohort_end_date FROM domain_qualifying_events c JOIN person p ON c.person_id = p.person_id WHERE 1=1 AND p.gender_concept_id = 8532 AND p.year_of_birth >= 2000 AND p.year_of_birth <= 2020 GROUP BY c.person_id ) SELECT * FROM filtered_cohort f', 'created_by': 'system'}\n",
+      "The total number of patients in the study cohort: 10208\n",
+      "The first five patients in the young female COVID-19 patient cohort: [{'subject_id': 22344, 'cohort_definition_id': 2, 'cohort_start_date': datetime.date(2020, 3, 11), 'cohort_end_date': datetime.date(2020, 4, 13)}, {'subject_id': 53949, 'cohort_definition_id': 2, 'cohort_start_date': datetime.date(2020, 2, 28), 'cohort_end_date': datetime.date(2020, 3, 11)}, {'subject_id': 80198, 'cohort_definition_id': 2, 'cohort_start_date': datetime.date(2020, 3, 5), 'cohort_end_date': datetime.date(2020, 4, 9)}, {'subject_id': 30052, 'cohort_definition_id': 2, 'cohort_start_date': datetime.date(2020, 3, 6), 'cohort_end_date': datetime.date(2020, 4, 8)}, {'subject_id': 88837, 'cohort_definition_id': 2, 'cohort_start_date': datetime.date(2020, 2, 24), 'cohort_end_date': datetime.date(2020, 3, 12)}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "study_cohort_def = study_cohort.metadata\n",
+    "print(f'Young female COVID-19 patient cohort definition: {study_cohort_def}')\n",
+    "study_cohort_data = study_cohort.data\n",
+    "print(f'The total number of patients in the study cohort: {len(study_cohort_data)}')\n",
+    "print(f'The first five patients in the young female COVID-19 patient cohort: {study_cohort_data[:5]}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0cac81eb-4006-494e-956c-5b4f5015ab20",
+   "metadata": {},
+   "source": [
+    "———————————————\n",
+    "\n",
+    "**Study cohort deeper exploration**: you can get statistics on age, gender, race, and ethnicity of the study cohort by \n",
+    "calling `get_stats()` method on the created study cohort object. You can also get cohort distributions on age and gender by \n",
+    "calling `get_distributions()` method on the created study cohort object."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "8be5061b-cfdf-4dc0-9ef8-f18277ab9fbe",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "the user study cohort stats: [{'total_count': 10208, 'earliest_start_date': datetime.date(2020, 1, 18), 'latest_start_date': datetime.date(2020, 3, 30), 'earliest_end_date': datetime.date(2020, 2, 7), 'latest_end_date': datetime.date(2020, 5, 3), 'min_duration_days': 8, 'max_duration_days': 37, 'avg_duration_days': 24.25, 'median_duration': 24, 'stddev_duration': 7.2}]\n",
+      "the user study cohort age stats: [{'total_count': 10208, 'min_age': 0, 'max_age': 20, 'avg_age': 10.94, 'median_age': 11, 'stddev_age': 5.92}]\n",
+      "the user study gender stats: [{'gender': 'female', 'gender_count': 10208, 'probability': 1.0}]\n",
+      "the user study cohort race stats: [{'race': 'Other', 'race_count': 53, 'probability': 0.01}, {'race': 'Asian', 'race_count': 723, 'probability': 0.07}, {'race': 'Black or African American', 'race_count': 866, 'probability': 0.08}, {'race': 'White', 'race_count': 8566, 'probability': 0.84}]\n",
+      "the user study ethnicity stats: [{'ethnicity': 'other', 'ethnicity_count': 10208, 'probability': 1.0}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# get stats and distributions of the user study cohort\n",
+    "study_cohort_stats = study_cohort.get_stats()\n",
+    "print(f'the user study cohort stats: {study_cohort_stats}')\n",
+    "study_cohort_age_stats = study_cohort.get_stats(\"age\")\n",
+    "print(f'the user study cohort age stats: {study_cohort_age_stats}')\n",
+    "study_cohort_gender_stats = study_cohort.get_stats(\"gender\")\n",
+    "print(f'the user study gender stats: {study_cohort_gender_stats}')\n",
+    "study_cohort_race_stats = study_cohort.get_stats(\"race\")\n",
+    "print(f'the user study cohort race stats: {study_cohort_race_stats}')\n",
+    "study_cohort_ethnicity_stats = study_cohort.get_stats(\"ethnicity\")\n",
+    "print(f'the user study ethnicity stats: {study_cohort_ethnicity_stats}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "c7ad0b7b-21dc-4572-af21-fe1580361999",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "the user study cohort age discrete probability distribution: [{'age_bin': '0-10', 'bin_count': 4744, 'probability': 0.4647}, {'age_bin': '11-20', 'bin_count': 5464, 'probability': 0.5353}, {'age_bin': '21-30', 'bin_count': 0, 'probability': 0.0}, {'age_bin': '31-40', 'bin_count': 0, 'probability': 0.0}, {'age_bin': '41-50', 'bin_count': 0, 'probability': 0.0}, {'age_bin': '51-60', 'bin_count': 0, 'probability': 0.0}, {'age_bin': '61-70', 'bin_count': 0, 'probability': 0.0}, {'age_bin': '71-80', 'bin_count': 0, 'probability': 0.0}, {'age_bin': '81-90', 'bin_count': 0, 'probability': 0.0}, {'age_bin': '91+', 'bin_count': 0, 'probability': 0.0}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# get discrete probability distribution of the age variable in the baseline cohort\n",
+    "study_cohort_age_distr = study_cohort.get_distributions('age')\n",
+    "print(f'the user study cohort age discrete probability distribution: {study_cohort_age_distr}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d300e804-69da-4d30-80ad-a5239acba562",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "\n",
+    "### Baseline and study cohort comparison\n",
+    "You can compare the baseline and study cohorts by calling the method `compare_cohorts(id1, id2)` on the `bias` object. Note that currently only hellinger distances between age and gender distributions of two cohorts are computed as a comparison metrics. More comparative metrics will be added in the future."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "0d03cf95-3c68-4eee-be41-5482dea68b84",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[{'age_hellinger_distance': 0.14447523081257604}, {'gender_hellinger_distance': 0.0}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# compare the baseline and user study cohorts\n",
+    "result = bias.compare_cohorts(baseline_cohort_def['id'], study_cohort_def['id'])\n",
+    "print(result)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "22984b7e-0001-4add-aacb-ecf1252f2b7a",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "\n",
+    "### Final cleanup to ensure database connections are closed"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "128c7b02-faef-4a35-97a6-c92baa5a37dd",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Connection to BiasDatabase closed.\n",
+      "Connection to the OMOP CDM database closed.\n"
+     ]
+    }
+   ],
+   "source": [
+    "bias.cleanup()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1eddbdd7",
+   "metadata": {},
+   "source": [
+    "### ✅ Summary\n",
+    "\n",
+    "In this tutorial, you learned how to connect to an OMOP CDM database, create a baseline and a study cohort, explore each created cohort, and compare two created cohorts using the BiasAnalyzer python package.\n",
+    "\n",
+    "For more information, refer to the [BiasAnalyzer GitHub repo](https://github.com/VACLab/BiasAnalyzer) and the [README file](https://github.com/VACLab/BiasAnalyzer/blob/main/README.md).\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python (biasanalyzer)",
+   "language": "python",
+   "name": "biasanalyzer-py3.8"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/BiasAnalyzerTestingCohorts.ipynb b/notebooks/BiasAnalyzerTestingCohorts.ipynb
deleted file mode 100644
index 7810c49..0000000
--- a/notebooks/BiasAnalyzerTestingCohorts.ipynb
+++ /dev/null
@@ -1,577 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "a25ba48a-9e2c-4e1d-9e93-80f7ea3ff3e3",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Collecting git+https://github.com/vaclab/BiasAnalyzer.git\n",
-      "  Cloning https://github.com/vaclab/BiasAnalyzer.git to ./temp/pip-req-build-sqm_zvhy\n",
-      "  Running command git clone --filter=blob:none --quiet https://github.com/vaclab/BiasAnalyzer.git /home/hyi/temp/pip-req-build-sqm_zvhy\n",
-      "  Resolved https://github.com/vaclab/BiasAnalyzer.git to commit 8d821839e93b1d9a208c5c66352ee66db60d1e53\n",
-      "  Installing build dependencies ... \u001B[?25ldone\n",
-      "\u001B[?25h  Getting requirements to build wheel ... \u001B[?25ldone\n",
-      "\u001B[?25h  Preparing metadata (pyproject.toml) ... \u001B[?25ldone\n",
-      "\u001B[?25hCollecting duckdb<2.0.0,>=1.1.1 (from biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for duckdb<2.0.0,>=1.1.1 from https://files.pythonhosted.org/packages/50/52/6e6f5b5b07841cec334ca6b98f2e02b7bb54ab3b99c49aa3a161cc0b4b37/duckdb-1.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n",
-      "  Using cached duckdb-1.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (966 bytes)\n",
-      "Collecting duckdb-engine<0.14.0,>=0.13.2 (from biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for duckdb-engine<0.14.0,>=0.13.2 from https://files.pythonhosted.org/packages/ef/5d/81a0d67483d0767e4fbf7444b079b3f21574a184b0888782ced1c2172777/duckdb_engine-0.13.6-py3-none-any.whl.metadata\n",
-      "  Using cached duckdb_engine-0.13.6-py3-none-any.whl.metadata (8.0 kB)\n",
-      "Collecting ipytree<0.3.0,>=0.2.2 (from biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for ipytree<0.3.0,>=0.2.2 from https://files.pythonhosted.org/packages/e4/03/35cf1742598d784e96153175233318a2332f71863e55ad1007c9264c1a7a/ipytree-0.2.2-py2.py3-none-any.whl.metadata\n",
-      "  Using cached ipytree-0.2.2-py2.py3-none-any.whl.metadata (849 bytes)\n",
-      "Collecting ipywidgets<9.0.0,>=8.1.5 (from biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for ipywidgets<9.0.0,>=8.1.5 from https://files.pythonhosted.org/packages/22/2d/9c0b76f2f9cc0ebede1b9371b6f317243028ed60b90705863d493bae622e/ipywidgets-8.1.5-py3-none-any.whl.metadata\n",
-      "  Using cached ipywidgets-8.1.5-py3-none-any.whl.metadata (2.3 kB)\n",
-      "Collecting jinja2==3.1.5 (from biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for jinja2==3.1.5 from https://files.pythonhosted.org/packages/bd/0f/2ba5fbcd631e3e88689309dbe978c5769e883e4b84ebfe7da30b43275c5a/jinja2-3.1.5-py3-none-any.whl.metadata\n",
-      "  Using cached jinja2-3.1.5-py3-none-any.whl.metadata (2.6 kB)\n",
-      "Collecting numpy==1.24.4 (from biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for numpy==1.24.4 from https://files.pythonhosted.org/packages/22/97/dfb1a31bb46686f09e68ea6ac5c63fdee0d22d7b23b8f3f7ea07712869ef/numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n",
-      "  Using cached numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB)\n",
-      "Collecting pandas==2.0.3 (from biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for pandas==2.0.3 from https://files.pythonhosted.org/packages/d0/28/88b81881c056376254618fad622a5e94b5126db8c61157ea1910cd1c040a/pandas-2.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n",
-      "  Using cached pandas-2.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)\n",
-      "Collecting psycopg2<3.0.0,>=2.9.1 (from biasanalyzer==0.1.0)\n",
-      "  Using cached psycopg2-2.9.10-cp311-cp311-linux_x86_64.whl\n",
-      "Collecting pydantic<3.0.0,>=2.9.2 (from biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for pydantic<3.0.0,>=2.9.2 from https://files.pythonhosted.org/packages/f4/3c/8cc1cc84deffa6e25d2d0c688ebb80635dfdbf1dbea3e30c541c8cf4d860/pydantic-2.10.6-py3-none-any.whl.metadata\n",
-      "  Using cached pydantic-2.10.6-py3-none-any.whl.metadata (30 kB)\n",
-      "Collecting pyyaml<7.0.0,>=6.0.2 (from biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for pyyaml<7.0.0,>=6.0.2 from https://files.pythonhosted.org/packages/75/e4/2c27590dfc9992f73aabbeb9241ae20220bd9452df27483b6e56d3975cc5/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n",
-      "  Using cached PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.1 kB)\n",
-      "Collecting scipy==1.10.1 (from biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for scipy==1.10.1 from https://files.pythonhosted.org/packages/21/cd/fe2d4af234b80dc08c911ce63fdaee5badcdde3e9bcd9a68884580652ef0/scipy-1.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n",
-      "  Using cached scipy-1.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (58 kB)\n",
-      "Collecting sqlalchemy<3.0.0,>=2.0.35 (from biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for sqlalchemy<3.0.0,>=2.0.35 from https://files.pythonhosted.org/packages/ff/0a/46f3171f564a19a1daf6e7e0e6c8afc6ecd792f947c6de435519d4d16af3/sqlalchemy-2.0.39-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n",
-      "  Using cached sqlalchemy-2.0.39-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)\n",
-      "Collecting MarkupSafe>=2.0 (from jinja2==3.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for MarkupSafe>=2.0 from https://files.pythonhosted.org/packages/f1/a4/aefb044a2cd8d7334c8a47d3fb2c9f328ac48cb349468cc31c20b539305f/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n",
-      "  Using cached MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.0 kB)\n",
-      "Collecting python-dateutil>=2.8.2 (from pandas==2.0.3->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for python-dateutil>=2.8.2 from https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl.metadata\n",
-      "  Using cached python_dateutil-2.9.0.post0-py2.py3-none-any.whl.metadata (8.4 kB)\n",
-      "Collecting pytz>=2020.1 (from pandas==2.0.3->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for pytz>=2020.1 from https://files.pythonhosted.org/packages/eb/38/ac33370d784287baa1c3d538978b5e2ea064d4c1b93ffbd12826c190dd10/pytz-2025.1-py2.py3-none-any.whl.metadata\n",
-      "  Using cached pytz-2025.1-py2.py3-none-any.whl.metadata (22 kB)\n",
-      "Collecting tzdata>=2022.1 (from pandas==2.0.3->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for tzdata>=2022.1 from https://files.pythonhosted.org/packages/0f/dd/84f10e23edd882c6f968c21c2434fe67bd4a528967067515feca9e611e5e/tzdata-2025.1-py2.py3-none-any.whl.metadata\n",
-      "  Using cached tzdata-2025.1-py2.py3-none-any.whl.metadata (1.4 kB)\n",
-      "Collecting packaging>=21 (from duckdb-engine<0.14.0,>=0.13.2->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for packaging>=21 from https://files.pythonhosted.org/packages/88/ef/eb23f262cca3c0c4eb7ab1933c3b1f03d021f2c48f54763065b6f0e321be/packaging-24.2-py3-none-any.whl.metadata\n",
-      "  Using cached packaging-24.2-py3-none-any.whl.metadata (3.2 kB)\n",
-      "Collecting comm>=0.1.3 (from ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for comm>=0.1.3 from https://files.pythonhosted.org/packages/e6/75/49e5bfe642f71f272236b5b2d2691cf915a7283cc0ceda56357b61daa538/comm-0.2.2-py3-none-any.whl.metadata\n",
-      "  Using cached comm-0.2.2-py3-none-any.whl.metadata (3.7 kB)\n",
-      "Collecting ipython>=6.1.0 (from ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for ipython>=6.1.0 from https://files.pythonhosted.org/packages/20/3a/917cb9e72f4e1a4ea13c862533205ae1319bd664119189ee5cc9e4e95ebf/ipython-9.0.2-py3-none-any.whl.metadata\n",
-      "  Using cached ipython-9.0.2-py3-none-any.whl.metadata (4.3 kB)\n",
-      "Collecting traitlets>=4.3.1 (from ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for traitlets>=4.3.1 from https://files.pythonhosted.org/packages/00/c0/8f5d070730d7836adc9c9b6408dec68c6ced86b304a9b26a14df072a6e8c/traitlets-5.14.3-py3-none-any.whl.metadata\n",
-      "  Using cached traitlets-5.14.3-py3-none-any.whl.metadata (10 kB)\n",
-      "Collecting widgetsnbextension~=4.0.12 (from ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for widgetsnbextension~=4.0.12 from https://files.pythonhosted.org/packages/21/02/88b65cc394961a60c43c70517066b6b679738caf78506a5da7b88ffcb643/widgetsnbextension-4.0.13-py3-none-any.whl.metadata\n",
-      "  Using cached widgetsnbextension-4.0.13-py3-none-any.whl.metadata (1.6 kB)\n",
-      "Collecting jupyterlab-widgets~=3.0.12 (from ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for jupyterlab-widgets~=3.0.12 from https://files.pythonhosted.org/packages/a9/93/858e87edc634d628e5d752ba944c2833133a28fa87bb093e6832ced36a3e/jupyterlab_widgets-3.0.13-py3-none-any.whl.metadata\n",
-      "  Using cached jupyterlab_widgets-3.0.13-py3-none-any.whl.metadata (4.1 kB)\n",
-      "Collecting annotated-types>=0.6.0 (from pydantic<3.0.0,>=2.9.2->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for annotated-types>=0.6.0 from https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl.metadata\n",
-      "  Using cached annotated_types-0.7.0-py3-none-any.whl.metadata (15 kB)\n",
-      "Collecting pydantic-core==2.27.2 (from pydantic<3.0.0,>=2.9.2->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for pydantic-core==2.27.2 from https://files.pythonhosted.org/packages/a8/7c/b860618c25678bbd6d1d99dbdfdf0510ccb50790099b963ff78a124b754f/pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n",
-      "  Using cached pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n",
-      "Collecting typing-extensions>=4.12.2 (from pydantic<3.0.0,>=2.9.2->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for typing-extensions>=4.12.2 from https://files.pythonhosted.org/packages/26/9f/ad63fc0248c5379346306f8668cda6e2e2e9c95e01216d2b8ffd9ff037d0/typing_extensions-4.12.2-py3-none-any.whl.metadata\n",
-      "  Using cached typing_extensions-4.12.2-py3-none-any.whl.metadata (3.0 kB)\n",
-      "Collecting greenlet!=0.4.17 (from sqlalchemy<3.0.0,>=2.0.35->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for greenlet!=0.4.17 from https://files.pythonhosted.org/packages/f7/4b/1c9695aa24f808e156c8f4813f685d975ca73c000c2a5056c514c64980f6/greenlet-3.1.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata\n",
-      "  Using cached greenlet-3.1.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (3.8 kB)\n",
-      "Collecting decorator (from ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for decorator from https://files.pythonhosted.org/packages/4e/8c/f3147f5c4b73e7550fe5f9352eaa956ae838d5c51eb58e7a25b9f3e2643b/decorator-5.2.1-py3-none-any.whl.metadata\n",
-      "  Using cached decorator-5.2.1-py3-none-any.whl.metadata (3.9 kB)\n",
-      "Collecting ipython-pygments-lexers (from ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for ipython-pygments-lexers from https://files.pythonhosted.org/packages/d9/33/1f075bf72b0b747cb3288d011319aaf64083cf2efef8354174e3ed4540e2/ipython_pygments_lexers-1.1.1-py3-none-any.whl.metadata\n",
-      "  Using cached ipython_pygments_lexers-1.1.1-py3-none-any.whl.metadata (1.1 kB)\n",
-      "Collecting jedi>=0.16 (from ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for jedi>=0.16 from https://files.pythonhosted.org/packages/c0/5a/9cac0c82afec3d09ccd97c8b6502d48f165f9124db81b4bcb90b4af974ee/jedi-0.19.2-py2.py3-none-any.whl.metadata\n",
-      "  Using cached jedi-0.19.2-py2.py3-none-any.whl.metadata (22 kB)\n",
-      "Collecting matplotlib-inline (from ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for matplotlib-inline from https://files.pythonhosted.org/packages/8f/8e/9ad090d3553c280a8060fbf6e24dc1c0c29704ee7d1c372f0c174aa59285/matplotlib_inline-0.1.7-py3-none-any.whl.metadata\n",
-      "  Using cached matplotlib_inline-0.1.7-py3-none-any.whl.metadata (3.9 kB)\n",
-      "Collecting pexpect>4.3 (from ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for pexpect>4.3 from https://files.pythonhosted.org/packages/9e/c3/059298687310d527a58bb01f3b1965787ee3b40dce76752eda8b44e9a2c5/pexpect-4.9.0-py2.py3-none-any.whl.metadata\n",
-      "  Using cached pexpect-4.9.0-py2.py3-none-any.whl.metadata (2.5 kB)\n",
-      "Collecting prompt_toolkit<3.1.0,>=3.0.41 (from ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for prompt_toolkit<3.1.0,>=3.0.41 from https://files.pythonhosted.org/packages/e4/ea/d836f008d33151c7a1f62caf3d8dd782e4d15f6a43897f64480c2b8de2ad/prompt_toolkit-3.0.50-py3-none-any.whl.metadata\n",
-      "  Using cached prompt_toolkit-3.0.50-py3-none-any.whl.metadata (6.6 kB)\n",
-      "Collecting pygments>=2.4.0 (from ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for pygments>=2.4.0 from https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl.metadata\n",
-      "  Using cached pygments-2.19.1-py3-none-any.whl.metadata (2.5 kB)\n",
-      "Collecting stack_data (from ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for stack_data from https://files.pythonhosted.org/packages/f1/7b/ce1eafaf1a76852e2ec9b22edecf1daa58175c090266e9f6c64afcd81d91/stack_data-0.6.3-py3-none-any.whl.metadata\n",
-      "  Using cached stack_data-0.6.3-py3-none-any.whl.metadata (18 kB)\n",
-      "Collecting six>=1.5 (from python-dateutil>=2.8.2->pandas==2.0.3->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for six>=1.5 from https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl.metadata\n",
-      "  Using cached six-1.17.0-py2.py3-none-any.whl.metadata (1.7 kB)\n",
-      "Collecting parso<0.9.0,>=0.8.4 (from jedi>=0.16->ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for parso<0.9.0,>=0.8.4 from https://files.pythonhosted.org/packages/c6/ac/dac4a63f978e4dcb3c6d3a78c4d8e0192a113d288502a1216950c41b1027/parso-0.8.4-py2.py3-none-any.whl.metadata\n",
-      "  Using cached parso-0.8.4-py2.py3-none-any.whl.metadata (7.7 kB)\n",
-      "Collecting ptyprocess>=0.5 (from pexpect>4.3->ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for ptyprocess>=0.5 from https://files.pythonhosted.org/packages/22/a6/858897256d0deac81a172289110f31629fc4cee19b6f01283303e18c8db3/ptyprocess-0.7.0-py2.py3-none-any.whl.metadata\n",
-      "  Using cached ptyprocess-0.7.0-py2.py3-none-any.whl.metadata (1.3 kB)\n",
-      "Collecting wcwidth (from prompt_toolkit<3.1.0,>=3.0.41->ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for wcwidth from https://files.pythonhosted.org/packages/fd/84/fd2ba7aafacbad3c4201d395674fc6348826569da3c0937e75505ead3528/wcwidth-0.2.13-py2.py3-none-any.whl.metadata\n",
-      "  Using cached wcwidth-0.2.13-py2.py3-none-any.whl.metadata (14 kB)\n",
-      "Collecting executing>=1.2.0 (from stack_data->ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for executing>=1.2.0 from https://files.pythonhosted.org/packages/7b/8f/c4d9bafc34ad7ad5d8dc16dd1347ee0e507a52c3adb6bfa8887e1c6a26ba/executing-2.2.0-py2.py3-none-any.whl.metadata\n",
-      "  Using cached executing-2.2.0-py2.py3-none-any.whl.metadata (8.9 kB)\n",
-      "Collecting asttokens>=2.1.0 (from stack_data->ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for asttokens>=2.1.0 from https://files.pythonhosted.org/packages/25/8a/c46dcc25341b5bce5472c718902eb3d38600a903b14fa6aeecef3f21a46f/asttokens-3.0.0-py3-none-any.whl.metadata\n",
-      "  Using cached asttokens-3.0.0-py3-none-any.whl.metadata (4.7 kB)\n",
-      "Collecting pure-eval (from stack_data->ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for pure-eval from https://files.pythonhosted.org/packages/8e/37/efad0257dc6e593a18957422533ff0f87ede7c9c6ea010a2177d738fb82f/pure_eval-0.2.3-py3-none-any.whl.metadata\n",
-      "  Using cached pure_eval-0.2.3-py3-none-any.whl.metadata (6.3 kB)\n",
-      "Using cached jinja2-3.1.5-py3-none-any.whl (134 kB)\n",
-      "Using cached numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)\n",
-      "Using cached pandas-2.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.2 MB)\n",
-      "Using cached scipy-1.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.1 MB)\n",
-      "Using cached duckdb-1.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (20.2 MB)\n",
-      "Using cached duckdb_engine-0.13.6-py3-none-any.whl (48 kB)\n",
-      "Using cached ipytree-0.2.2-py2.py3-none-any.whl (1.3 MB)\n",
-      "Using cached ipywidgets-8.1.5-py3-none-any.whl (139 kB)\n",
-      "Using cached pydantic-2.10.6-py3-none-any.whl (431 kB)\n",
-      "Using cached pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)\n",
-      "Using cached PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (762 kB)\n",
-      "Using cached sqlalchemy-2.0.39-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB)\n",
-      "Using cached annotated_types-0.7.0-py3-none-any.whl (13 kB)\n",
-      "Using cached comm-0.2.2-py3-none-any.whl (7.2 kB)\n",
-      "Using cached greenlet-3.1.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (602 kB)\n",
-      "Using cached ipython-9.0.2-py3-none-any.whl (600 kB)\n",
-      "Using cached jupyterlab_widgets-3.0.13-py3-none-any.whl (214 kB)\n",
-      "Using cached MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (23 kB)\n",
-      "Using cached packaging-24.2-py3-none-any.whl (65 kB)\n",
-      "Using cached python_dateutil-2.9.0.post0-py2.py3-none-any.whl (229 kB)\n",
-      "Using cached pytz-2025.1-py2.py3-none-any.whl (507 kB)\n",
-      "Using cached traitlets-5.14.3-py3-none-any.whl (85 kB)\n",
-      "Using cached typing_extensions-4.12.2-py3-none-any.whl (37 kB)\n",
-      "Using cached tzdata-2025.1-py2.py3-none-any.whl (346 kB)\n",
-      "Using cached widgetsnbextension-4.0.13-py3-none-any.whl (2.3 MB)\n",
-      "Using cached jedi-0.19.2-py2.py3-none-any.whl (1.6 MB)\n",
-      "Using cached pexpect-4.9.0-py2.py3-none-any.whl (63 kB)\n",
-      "Using cached prompt_toolkit-3.0.50-py3-none-any.whl (387 kB)\n",
-      "Using cached pygments-2.19.1-py3-none-any.whl (1.2 MB)\n",
-      "Using cached six-1.17.0-py2.py3-none-any.whl (11 kB)\n",
-      "Using cached decorator-5.2.1-py3-none-any.whl (9.2 kB)\n",
-      "Using cached ipython_pygments_lexers-1.1.1-py3-none-any.whl (8.1 kB)\n",
-      "Using cached matplotlib_inline-0.1.7-py3-none-any.whl (9.9 kB)\n",
-      "Using cached stack_data-0.6.3-py3-none-any.whl (24 kB)\n",
-      "Using cached asttokens-3.0.0-py3-none-any.whl (26 kB)\n",
-      "Using cached executing-2.2.0-py2.py3-none-any.whl (26 kB)\n",
-      "Using cached parso-0.8.4-py2.py3-none-any.whl (103 kB)\n",
-      "Using cached ptyprocess-0.7.0-py2.py3-none-any.whl (13 kB)\n",
-      "Using cached pure_eval-0.2.3-py3-none-any.whl (11 kB)\n",
-      "Using cached wcwidth-0.2.13-py2.py3-none-any.whl (34 kB)\n",
-      "Building wheels for collected packages: biasanalyzer\n",
-      "  Building wheel for biasanalyzer (pyproject.toml) ... \u001B[?25ldone\n",
-      "\u001B[?25h  Created wheel for biasanalyzer: filename=biasanalyzer-0.1.0-py3-none-any.whl size=25475 sha256=1982c82749337f81db1a730b8cc25c049d0c0788cd6b782f69ce8be1d92a397c\n",
-      "  Stored in directory: /home/hyi/temp/pip-ephem-wheel-cache-7pwouolk/wheels/25/75/4e/079d96d69cc58148ce31d3d44f858e4db5f689604112dcb7c3\n",
-      "Successfully built biasanalyzer\n",
-      "Installing collected packages: wcwidth, pytz, pure-eval, ptyprocess, widgetsnbextension, tzdata, typing-extensions, traitlets, six, pyyaml, pygments, psycopg2, prompt_toolkit, pexpect, parso, packaging, numpy, MarkupSafe, jupyterlab-widgets, greenlet, executing, duckdb, decorator, asttokens, annotated-types, stack_data, sqlalchemy, scipy, python-dateutil, pydantic-core, matplotlib-inline, jinja2, jedi, ipython-pygments-lexers, comm, pydantic, pandas, ipython, duckdb-engine, ipywidgets, ipytree, biasanalyzer\n",
-      "\u001B[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
-      "ipympl 0.9.3 requires ipython<9, but you have ipython 9.0.2 which is incompatible.\u001B[0m\u001B[31m\n",
-      "\u001B[0mSuccessfully installed MarkupSafe-3.0.2 annotated-types-0.7.0 asttokens-3.0.0 biasanalyzer-0.1.0 comm-0.2.2 decorator-5.2.1 duckdb-1.2.1 duckdb-engine-0.13.6 executing-2.2.0 greenlet-3.1.1 ipython-9.0.2 ipython-pygments-lexers-1.1.1 ipytree-0.2.2 ipywidgets-8.1.5 jedi-0.19.2 jinja2-3.1.5 jupyterlab-widgets-3.0.13 matplotlib-inline-0.1.7 numpy-1.24.4 packaging-24.2 pandas-2.0.3 parso-0.8.4 pexpect-4.9.0 prompt_toolkit-3.0.50 psycopg2-2.9.10 ptyprocess-0.7.0 pure-eval-0.2.3 pydantic-2.10.6 pydantic-core-2.27.2 pygments-2.19.1 python-dateutil-2.9.0.post0 pytz-2025.1 pyyaml-6.0.2 scipy-1.10.1 six-1.17.0 sqlalchemy-2.0.39 stack_data-0.6.3 traitlets-5.14.3 typing-extensions-4.12.2 tzdata-2025.1 wcwidth-0.2.13 widgetsnbextension-4.0.13\n",
-      "Requirement already satisfied: typing-extensions in /opt/conda/lib/python3.11/site-packages (4.12.2)\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Have to specify TMPDIR and target in pip install command to work around the kernel crash issue due to \n",
-    "# the small ephemeral local storage quota allocated to /tmp which is used by default by pip install\n",
-    "!TMPDIR=/home/hyi/temp pip install git+https://github.com/vaclab/BiasAnalyzer.git --target /home/hyi/target --upgrade\n",
-    "!pip install --upgrade typing-extensions"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "9ce3b87c-0754-4eae-9f85-8210104e2b0b",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# append the target folder where HealthDataBias module was installed to PYTHONPATH\n",
-    "import sys\n",
-    "sys.path.append('/home/hyi/target')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "729e8803-74f8-4180-aa8b-0e44567f8aeb",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "from biasanalyzer.api import BIAS"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "548223ed-8948-461e-b9d6-40a0ec7fc89f",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "no configuration file specified. Call set_config(config_file_path) next to specify configurations\n"
-     ]
-    }
-   ],
-   "source": [
-    "# create an object of BIAS class\n",
-    "bias = BIAS()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "7d440d9f-c7fa-4ef1-ad66-31274ebef4ea",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "configuration specified in /home/hyi/bias/config/config.yaml loaded successfully\n"
-     ]
-    }
-   ],
-   "source": [
-    "bias.set_config('/home/hyi/bias/config/config.yaml')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "283156f8-63da-42a5-bbd7-ee2b7719652c",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Connected to the OMOP CDM database (read-only).\n",
-      "Cohort Definition table created.\n",
-      "Cohort table created.\n"
-     ]
-    }
-   ],
-   "source": [
-    "# the configuration file includes root_omop_cdm_database configuration info with an example shown \n",
-    "# in https://github.com/hyi/HealthDataBias/blob/main/tests/assets/config/test_config.yaml\n",
-    "bias.set_root_omop()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "a68f3eaf-92fd-49a2-9768-d685d826fd57",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "template_path: /home/hyi/target/biasanalyzer/sql_templates\n",
-      "configuration specified in /home/hyi/bias/config/test_cohort_creation_condition_occurrence_config_baseline.yaml loaded successfully\n",
-      "Cohort definition inserted successfully.\n",
-      "Cohort Young female patients successfully created.\n",
-      "cohort created successfully\n",
-      "young female patient cohort definition: {'id': 1, 'name': 'Young female patients', 'description': 'Young female patients', 'created_date': datetime.date(2025, 3, 12), 'creation_info': 'WITH ranked_events AS ( SELECT person_id, condition_concept_id, condition_start_date AS event_start_date, condition_end_date AS event_end_date, ROW_NUMBER() OVER ( PARTITION BY person_id, condition_concept_id ORDER BY condition_start_date ASC ) AS event_instance FROM condition_occurrence ), ranked_visits AS ( SELECT person_id, visit_concept_id, visit_start_date AS event_start_date, visit_end_date AS event_end_date, ROW_NUMBER() OVER ( PARTITION BY person_id, visit_concept_id ORDER BY visit_start_date ASC ) AS event_instance FROM visit_occurrence ), condition_qualifying_events AS ( SELECT person_id, condition_start_date as event_start_date, condition_end_date as event_end_date FROM condition_occurrence ), filtered_cohort AS ( SELECT c.person_id, MIN(c.event_start_date) AS cohort_start_date, MAX(c.event_end_date) AS cohort_end_date FROM condition_qualifying_events c JOIN person p ON c.person_id = p.person_id WHERE 1=1 AND p.gender_concept_id = 8532 AND p.year_of_birth >= 2000 AND p.year_of_birth <= 2020 GROUP BY c.person_id ) SELECT * FROM filtered_cohort f', 'created_by': 'system'}\n",
-      "The first five patients in the young female patient cohort: [{'subject_id': 8, 'cohort_definition_id': 1, 'cohort_start_date': datetime.date(2020, 3, 11), 'cohort_end_date': None}, {'subject_id': 13, 'cohort_definition_id': 1, 'cohort_start_date': datetime.date(2011, 11, 13), 'cohort_end_date': datetime.date(2020, 3, 22)}, {'subject_id': 14, 'cohort_definition_id': 1, 'cohort_start_date': datetime.date(2015, 4, 20), 'cohort_end_date': datetime.date(2020, 3, 19)}, {'subject_id': 21, 'cohort_definition_id': 1, 'cohort_start_date': datetime.date(2017, 8, 25), 'cohort_end_date': None}, {'subject_id': 25, 'cohort_definition_id': 1, 'cohort_start_date': datetime.date(2007, 4, 3), 'cohort_end_date': None}]\n"
-     ]
-    }
-   ],
-   "source": [
-    "baseline_cohort = bias.create_cohort('Young female patients', 'Young female patients', '/home/hyi/bias/config/test_cohort_creation_condition_occurrence_config_baseline.yaml', 'system')\n",
-    "baseline_cohort_def = baseline_cohort.metadata\n",
-    "print(f'young female patient cohort definition: {baseline_cohort_def}')\n",
-    "baseline_cohort_data = baseline_cohort.data\n",
-    "print(f'The first five patients in the young female patient cohort: {baseline_cohort_data[:5]}')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "9a52ab5f-57a8-4942-8a03-ec86651e919e",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Cohort definition inserted successfully.\n",
-      "Cohort COVID-19 patients successfully created.\n",
-      "cohort created successfully\n",
-      "all COVID-19 patient cohort definition: {'id': 2, 'name': 'COVID-19 patients', 'description': 'Patients with COVID-19 condition', 'created_date': datetime.date(2025, 3, 12), 'creation_info': 'SELECT person_id, condition_start_date as cohort_start_date, condition_end_date as cohort_end_date FROM condition_occurrence WHERE condition_concept_id = 37311061', 'created_by': 'system'}\n",
-      "The first five patients in the COVID-19 patient cohort: [{'subject_id': 20342, 'cohort_definition_id': 2, 'cohort_start_date': datetime.date(2020, 3, 11), 'cohort_end_date': datetime.date(2020, 4, 3)}, {'subject_id': 20343, 'cohort_definition_id': 2, 'cohort_start_date': datetime.date(2020, 3, 9), 'cohort_end_date': datetime.date(2020, 4, 7)}, {'subject_id': 20344, 'cohort_definition_id': 2, 'cohort_start_date': datetime.date(2020, 3, 11), 'cohort_end_date': datetime.date(2020, 4, 8)}, {'subject_id': 20345, 'cohort_definition_id': 2, 'cohort_start_date': datetime.date(2020, 3, 2), 'cohort_end_date': datetime.date(2020, 3, 19)}, {'subject_id': 20347, 'cohort_definition_id': 2, 'cohort_start_date': datetime.date(2020, 3, 5), 'cohort_end_date': datetime.date(2020, 3, 25)}]\n"
-     ]
-    }
-   ],
-   "source": [
-    "# create a baseline cohort with all COVID-19 patients\n",
-    "baseline_cohort_query = ('SELECT person_id, condition_start_date as cohort_start_date, '\n",
-    "                                 'condition_end_date as cohort_end_date '\n",
-    "                                 'FROM condition_occurrence '\n",
-    "                                 'WHERE condition_concept_id = 37311061')\n",
-    "\n",
-    "baseline_cohort = bias.create_cohort('COVID-19 patients', 'Patients with COVID-19 condition', baseline_cohort_query, 'system')\n",
-    "baseline_cohort_def = baseline_cohort.metadata\n",
-    "print(f'all COVID-19 patient cohort definition: {baseline_cohort_def}')\n",
-    "baseline_cohort_data = baseline_cohort.data\n",
-    "print(f'The first five patients in the COVID-19 patient cohort: {baseline_cohort_data[:5]}')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "b1415805-b065-40b8-b2cd-6db6f5f9ca41",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "the baseline cohort stats: [{'total_count': 88166, 'earliest_start_date': datetime.date(2020, 1, 14), 'latest_start_date': datetime.date(2020, 3, 31), 'earliest_end_date': datetime.date(2020, 1, 30), 'latest_end_date': datetime.date(2020, 5, 3), 'min_duration_days': 8, 'max_duration_days': 37, 'avg_duration_days': 22.63, 'median_duration': 23, 'stddev_duration': 8.06}]\n",
-      "the baseline cohort age stats: [{'total_count': 88166, 'min_age': 0, 'max_age': 111, 'avg_age': 41.6, 'median_age': 41, 'stddev_age': 23.71}]\n",
-      "the baseline cohort gender stats: [{'gender': 'male', 'gender_count': 42961, 'probability': 0.49}, {'gender': 'female', 'gender_count': 45205, 'probability': 0.51}]\n",
-      "the baseline cohort race stats: [{'race': 'Asian', 'race_count': 6165, 'probability': 0.07}, {'race': 'Other', 'race_count': 511, 'probability': 0.01}, {'race': 'White', 'race_count': 74065, 'probability': 0.84}, {'race': 'Black or African American', 'race_count': 7425, 'probability': 0.08}]\n",
-      "the baseline cohort ethnicity stats: [{'ethnicity': 'other', 'ethnicity_count': 88166, 'probability': 1.0}]\n"
-     ]
-    }
-   ],
-   "source": [
-    "# get stats of the baseline cohort\n",
-    "cohort_stats = baseline_cohort.get_stats()\n",
-    "print(f'the baseline cohort stats: {cohort_stats}')\n",
-    "cohort_age_stats = baseline_cohort.get_stats(\"age\")\n",
-    "print(f'the baseline cohort age stats: {cohort_age_stats}')\n",
-    "cohort_gender_stats = baseline_cohort.get_stats(\"gender\")\n",
-    "print(f'the baseline cohort gender stats: {cohort_gender_stats}')\n",
-    "cohort_race_stats = baseline_cohort.get_stats(\"race\")\n",
-    "print(f'the baseline cohort race stats: {cohort_race_stats}')\n",
-    "cohort_ethnicity_stats = baseline_cohort.get_stats(\"ethnicity\")\n",
-    "print(f'the baseline cohort ethnicity stats: {cohort_ethnicity_stats}')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "d54e39da-6f78-4dc1-91ae-a8c26852582a",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "the baseline cohort age discrete probability distribution: [{'age_bin': '0-10', 'bin_count': 9231, 'probability': 0.1047}, {'age_bin': '11-20', 'bin_count': 10746, 'probability': 0.1219}, {'age_bin': '21-30', 'bin_count': 12377, 'probability': 0.1404}, {'age_bin': '31-40', 'bin_count': 10896, 'probability': 0.1236}, {'age_bin': '41-50', 'bin_count': 11450, 'probability': 0.1299}, {'age_bin': '51-60', 'bin_count': 13081, 'probability': 0.1484}, {'age_bin': '61-70', 'bin_count': 9985, 'probability': 0.1133}, {'age_bin': '71-80', 'bin_count': 5865, 'probability': 0.0665}, {'age_bin': '81-90', 'bin_count': 2810, 'probability': 0.0319}, {'age_bin': '91+', 'bin_count': 1725, 'probability': 0.0196}]\n"
-     ]
-    }
-   ],
-   "source": [
-    "# get discrete probability distribution of the age variable in the baseline cohort\n",
-    "cohort_age_distr = baseline_cohort.get_distributions('age')\n",
-    "print(f'the baseline cohort age discrete probability distribution: {cohort_age_distr}')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "id": "e3f5ace2-6cc4-4940-a067-e1a3fc14e1ce",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Cohort definition inserted successfully.\n",
-      "Cohort Older COVID-19 patients successfully created.\n",
-      "cohort created successfully\n",
-      "Older COVID-19 patient cohort definition: {'id': 3, 'name': 'Older COVID-19 patients', 'description': 'Patients with COVID-19 condition who are older than 65', 'created_date': datetime.date(2025, 3, 12), 'creation_info': 'SELECT c.person_id, c.condition_start_date as cohort_start_date, c.condition_end_date as cohort_end_date FROM condition_occurrence c JOIN person p ON c.person_id = p.person_id WHERE c.condition_concept_id = 37311061 AND p.year_of_birth < 1955', 'created_by': 'system'}\n",
-      "The first five patients in the older COVID-19 patient cohort: [{'subject_id': 20344, 'cohort_definition_id': 3, 'cohort_start_date': datetime.date(2020, 3, 11), 'cohort_end_date': datetime.date(2020, 4, 8)}, {'subject_id': 20352, 'cohort_definition_id': 3, 'cohort_start_date': datetime.date(2020, 3, 5), 'cohort_end_date': datetime.date(2020, 3, 31)}, {'subject_id': 20361, 'cohort_definition_id': 3, 'cohort_start_date': datetime.date(2020, 3, 9), 'cohort_end_date': datetime.date(2020, 4, 2)}, {'subject_id': 20378, 'cohort_definition_id': 3, 'cohort_start_date': datetime.date(2020, 2, 28), 'cohort_end_date': datetime.date(2020, 3, 11)}, {'subject_id': 20381, 'cohort_definition_id': 3, 'cohort_start_date': datetime.date(2020, 3, 12), 'cohort_end_date': datetime.date(2020, 4, 15)}]\n"
-     ]
-    }
-   ],
-   "source": [
-    "# create a user study cohort with all COVID patients above the age of 65\n",
-    "study_cohort_query = ('SELECT c.person_id, c.condition_start_date as cohort_start_date, '\n",
-    "                      'c.condition_end_date as cohort_end_date '\n",
-    "                      'FROM condition_occurrence c JOIN '\n",
-    "                      'person p ON c.person_id = p.person_id '\n",
-    "                      'WHERE c.condition_concept_id = 37311061 AND p.year_of_birth < 1955')\n",
-    "\n",
-    "study_cohort = bias.create_cohort('Older COVID-19 patients', 'Patients with COVID-19 condition who are older than 65', study_cohort_query, 'system')\n",
-    "study_cohort_def = study_cohort.metadata\n",
-    "print(f'Older COVID-19 patient cohort definition: {study_cohort_def}')\n",
-    "study_cohort_data = study_cohort.data\n",
-    "print(f'The first five patients in the older COVID-19 patient cohort: {study_cohort_data[:5]}')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "id": "8be5061b-cfdf-4dc0-9ef8-f18277ab9fbe",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "the user study cohort stats: [{'total_count': 14786, 'earliest_start_date': datetime.date(2020, 1, 20), 'latest_start_date': datetime.date(2020, 3, 29), 'earliest_end_date': datetime.date(2020, 2, 5), 'latest_end_date': datetime.date(2020, 4, 28), 'min_duration_days': 8, 'max_duration_days': 37, 'avg_duration_days': 22.05, 'median_duration': 22, 'stddev_duration': 8.36}]\n",
-      "the user study cohort age stats: [{'total_count': 14786, 'min_age': 66, 'max_age': 111, 'avg_age': 77.64, 'median_age': 75, 'stddev_age': 10.4}]\n",
-      "the user study gender stats: [{'gender': 'male', 'gender_count': 7321, 'probability': 0.5}, {'gender': 'female', 'gender_count': 7465, 'probability': 0.5}]\n",
-      "the user study cohort race stats: [{'race': 'Other', 'race_count': 115, 'probability': 0.01}, {'race': 'Asian', 'race_count': 992, 'probability': 0.07}, {'race': 'White', 'race_count': 12474, 'probability': 0.84}, {'race': 'Black or African American', 'race_count': 1205, 'probability': 0.08}]\n",
-      "the user study ethnicity stats: [{'ethnicity': 'other', 'ethnicity_count': 14786, 'probability': 1.0}]\n"
-     ]
-    }
-   ],
-   "source": [
-    "# get stats and distributions of the user study cohort\n",
-    "study_cohort_stats = study_cohort.get_stats()\n",
-    "print(f'the user study cohort stats: {study_cohort_stats}')\n",
-    "study_cohort_age_stats = study_cohort.get_stats(\"age\")\n",
-    "print(f'the user study cohort age stats: {study_cohort_age_stats}')\n",
-    "study_cohort_gender_stats = study_cohort.get_stats(\"gender\")\n",
-    "print(f'the user study gender stats: {study_cohort_gender_stats}')\n",
-    "study_cohort_race_stats = study_cohort.get_stats(\"race\")\n",
-    "print(f'the user study cohort race stats: {study_cohort_race_stats}')\n",
-    "study_cohort_ethnicity_stats = study_cohort.get_stats(\"ethnicity\")\n",
-    "print(f'the user study ethnicity stats: {study_cohort_ethnicity_stats}')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "id": "c7ad0b7b-21dc-4572-af21-fe1580361999",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "the user study cohort age discrete probability distribution: [{'age_bin': '0-10', 'bin_count': 0, 'probability': 0.0}, {'age_bin': '11-20', 'bin_count': 0, 'probability': 0.0}, {'age_bin': '21-30', 'bin_count': 0, 'probability': 0.0}, {'age_bin': '31-40', 'bin_count': 0, 'probability': 0.0}, {'age_bin': '41-50', 'bin_count': 0, 'probability': 0.0}, {'age_bin': '51-60', 'bin_count': 0, 'probability': 0.0}, {'age_bin': '61-70', 'bin_count': 4386, 'probability': 0.2966}, {'age_bin': '71-80', 'bin_count': 5865, 'probability': 0.3967}, {'age_bin': '81-90', 'bin_count': 2810, 'probability': 0.19}, {'age_bin': '91+', 'bin_count': 1725, 'probability': 0.1167}]\n"
-     ]
-    }
-   ],
-   "source": [
-    "# get discrete probability distribution of the age variable in the baseline cohort\n",
-    "study_cohort_age_distr = study_cohort.get_distributions('age')\n",
-    "print(f'the user study cohort age discrete probability distribution: {study_cohort_age_distr}')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "id": "0d03cf95-3c68-4eee-be41-5482dea68b84",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[{'age_hellinger_distance': 0.728150848822386}, {'gender_hellinger_distance': 0.5328876752208462}]\n"
-     ]
-    }
-   ],
-   "source": [
-    "# compare the baseline and user study cohorts\n",
-    "result = bias.compare_cohorts(1, 2)\n",
-    "print(result)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "id": "128c7b02-faef-4a35-97a6-c92baa5a37dd",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Connection to BiasDatabase closed.\n",
-      "Connection to the OMOP CDM database closed.\n"
-     ]
-    }
-   ],
-   "source": [
-    "bias.cleanup()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7e2bf375-b4fb-4c50-aab9-fff4c1a02a95",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}

From 8b692e3625b628b20e75ed5d966258ff86d8701e Mon Sep 17 00:00:00 2001
From: hyi <hongyi@renci.org>
Date: Tue, 17 Jun 2025 18:22:39 -0400
Subject: [PATCH 03/10] updated async cohort creation developer-testing jupyter
 notebook to user-friendly tutorial

---
 .../BiasAnalyzerAsyncCohortsTutorial.ipynb    | 431 ++++++++++++++++++
 notebooks/BiasAnalyzerCohortsTutorial.ipynb   |   9 +-
 ...asAnalyzerTestingAsyncCohortCreation.ipynb | 352 --------------
 3 files changed, 436 insertions(+), 356 deletions(-)
 create mode 100644 notebooks/BiasAnalyzerAsyncCohortsTutorial.ipynb
 delete mode 100644 notebooks/BiasAnalyzerTestingAsyncCohortCreation.ipynb

diff --git a/notebooks/BiasAnalyzerAsyncCohortsTutorial.ipynb b/notebooks/BiasAnalyzerAsyncCohortsTutorial.ipynb
new file mode 100644
index 0000000..7e25e9b
--- /dev/null
+++ b/notebooks/BiasAnalyzerAsyncCohortsTutorial.ipynb
@@ -0,0 +1,431 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "fdc0d263",
+   "metadata": {},
+   "source": [
+    "# Using BiasAnalyzer for Asynchronous Cohort Creation and Exploration\n",
+    "\n",
+    "This tutorial demonstrates how to use the `BiasAnalyzer` package to create multiple cohorts asynchronously for exploration, which can improve performance and responsiveness when working with large datasets or complex cohort definitions. It complements the [Cohort Exploration Tutorial](./BiasAnalyzerCohortsTutorial.ipynb), following a similar workflow but optimized for performance by introducing asynchronous processing.\n",
+    "\n",
+    "---\n",
+    "\n",
+    "### Overview\n",
+    "\n",
+    "**Objective**:  \n",
+    "Show how to define and create multiple cohorts using asynchronous execution to improve responsiveness and performance when working with large or complex datasets.\n",
+    "\n",
+    "**Before You Begin**:  \n",
+    "The `BiasAnalyzer` package is currently in active development and has not yet been officially released on PyPI.\n",
+    "You can install it in one of the two ways:\n",
+    "\n",
+    "- **Install from GitHub (recommended during development)**:\n",
+    "```bash\n",
+    "pip install git+https://github.com/vaclab/BiasAnalyzer.git\n",
+    "```\n",
+    "- **Install from PyPI (once the pacakge is officially released)**:\n",
+    "```bash\n",
+    "pip install biasanalyzer\n",
+    "```\n",
+    "\n",
+    "For full setup and usage instructions, refer to the [README](https://github.com/VACLab/BiasAnalyzer/blob/main/README.md).\n",
+    "\n",
+    "---\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bb028875",
+   "metadata": {},
+   "source": [
+    "### Preparation for asynchronous cohort creation\n",
+    "**Preparation step 1**: Import the `BIAS` class from the `api` module of the `BiasAnalyzer` package, create an object `bias` of the `BIAS` class, specify OMOP CDM database configurations on the `bias` object, and set OMOP CDM database to enable connection to the database. Refer to the [Cohort Exploration Tutorial](./BiasAnalyzerCohortsTutorial.ipynb) for more details."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "6dc76f46",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "configuration specified in ../config.yaml loaded successfully\n",
+      "Connected to the OMOP CDM database (read-only).\n",
+      "Cohort Definition table created.\n",
+      "Cohort table created.\n"
+     ]
+    }
+   ],
+   "source": [
+    "from biasanalyzer.api import BIAS\n",
+    "\n",
+    "bias = BIAS()\n",
+    "\n",
+    "bias.set_config('../config.yaml')\n",
+    "\n",
+    "bias.set_root_omop()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8731e481",
+   "metadata": {},
+   "source": [
+    "**Preparation step 2**: Import `BackgroundResult` class and the `run_in_background` function from the `background.threading_utils` module of the `BiasAnalyzer` package to support asynchronous cohort creation."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "31cac333",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from biasanalyzer.background.threading_utils import BackgroundResult, run_in_background"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "22edda35",
+   "metadata": {},
+   "source": [
+    "**Now that you have connected to your OMOP CDM database and imported the necessary utilities for asynchronous processing, you are ready to create cohorts asynchronously using the `BiasAnalyzer` APIs.** This rest of this notebook illustrates how to create both a baseline and a study cohort asynchronously, and explore and compare them once they are ready. With asynchronous execution, you don't need to wait for cohort creation to finish - you can continue running the subsequent cells and explore the data as it becomes available.\n",
+    "\n",
+    "---\n",
+    "\n",
+    "### Asynchronous cohort creation\n",
+    "**Baseline cohort creation**: To create a baseline cohort of young female patients asynchronously, use the `run_in_background()` function on the `bias` object to run `create_cohort(cohort_name, cohort_description, query_or_yaml_file, created_by)` function in a background thread. You'll pass the target function as the first argument, the cohort creation target function input arguments as the next four arguments, a `BackgroundResult` object via the `result_holder` optional parameter to store the created baseline cohort result, and a `delay` value (e.g., 120 seconds) to simulate asynchronous execution of long-running process for testing purposes. The created baseline cohort will be identical to the one created in the [Cohort Exploration Tutorial](./BiasAnalyzerCohortsTutorial.ipynb), except that the cohort creation now runs asychronously in a background thread."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "c9c9c7c5",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[*] Background task started...\n",
+      "Baseline cohort creation running in background...\n",
+      "template_path: /home/hongyi/BiasAnalyzer/biasanalyzer/sql_templates\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "9975fb06d4994afa80e7bc7aef956450",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Cohort creation:   0%|                                 | 0/3 [00:00<?, ?stage/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "configuration specified in ../tests/assets/cohort_creation/test_cohort_creation_condition_occurrence_config_baseline.yaml loaded successfully\n",
+      "Cohort definition inserted successfully.\n",
+      "Cohort Young female patients successfully created.\n",
+      "[DEBUG] Simulating long-running task with 120 seconds delay...\n",
+      "cohort created successfully\n",
+      "[✓] Background task completed.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Create baseline cohort result holder\n",
+    "baseline_result = BackgroundResult()\n",
+    "\n",
+    "# Start background task to run create_cohort() function for a baseline cohort in a background thread\n",
+    "baseline_thread = run_in_background(\n",
+    "    bias.create_cohort,\n",
+    "    \"Young female patients\",\n",
+    "    \"A cohort of female patients born between 2000 and 2020\",\n",
+    "    \"../tests/assets/cohort_creation/test_cohort_creation_condition_occurrence_config_baseline.yaml\",\n",
+    "    \"system\",\n",
+    "    result_holder=baseline_result,\n",
+    "    delay=120  # simulate 2 minutes delay for async testing\n",
+    ")\n",
+    "\n",
+    "print(\"Baseline cohort creation running in background...\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f7c7bbca-11ea-43b4-81a8-1c37dfa5b41e",
+   "metadata": {},
+   "source": [
+    "———————————————\n",
+    "\n",
+    "**Study cohort creation**: To create a study cohort of young female COVID patients asynchronously, use the `run_in_background()` function on the `bias` object to run `create_cohort(cohort_name, cohort_description, query_or_yaml_file, created_by)` function in a background thread. You'll pass the target function as the first argument, the cohort creation target function input arguments as the next four arguments, a `BackgroundResult` object via the `result_holder` optional parameter to store the created baseline cohort result, and a `delay` value (e.g., 120 seconds) to simulate asynchronous execution of long-running process for testing purposes. The created study cohort will be identical to the one created in the [Cohort Exploration Tutorial](./BiasAnalyzerCohortsTutorial.ipynb), except that the cohort creation now runs asychronously in a background thread."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "9a52ab5f-57a8-4942-8a03-ec86651e919e",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[*] Background task started...\n",
+      "Study cohort creation running in background...\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "58b79951ee934f7892424d935d553acb",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Cohort creation:   0%|                                 | 0/3 [00:00<?, ?stage/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "configuration specified in /home/hongyi/BiasAnalyzer/tests/assets/cohort_creation/test_cohort_creation_condition_occurrence_config_study.yaml loaded successfully\n",
+      "Cohort definition inserted successfully.\n",
+      "Cohort Young COVID female patients successfully created.\n",
+      "[DEBUG] Simulating long-running task with 120 seconds delay...\n",
+      "cohort created successfully\n",
+      "[✓] Background task completed.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Create study cohort result holder\n",
+    "study_result = BackgroundResult()\n",
+    "\n",
+    "# Start background task to run create_cohort() function for a study cohort in a background thread\n",
+    "study_thread = run_in_background(\n",
+    "    bias.create_cohort,\n",
+    "    \"Young COVID female patients\",\n",
+    "    \"Young COVID female patients\",\n",
+    "    '/home/hongyi/BiasAnalyzer/tests/assets/cohort_creation/test_cohort_creation_condition_occurrence_config_study.yaml',\n",
+    "    \"system\",\n",
+    "    result_holder=study_result,\n",
+    "    delay=120  # simulate 2 minutes delay for async testing\n",
+    ")\n",
+    "\n",
+    "print(\"Study cohort creation running in background...\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "30bbc765-b324-42ef-ab2e-97d34e728b3d",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "\n",
+    "### Cohort exploration when available\n",
+    "**Exploring the baseline cohort**: To explore the baseline cohort once it's available, check the `ready` property of the `baseline_result` - the `BackgroundResult` object provided as the `result_holder` during asynchronous cohort creation. If the result is ready, verify whether the background process completed successfully by checking the `error` property of the `baseline_result`. If no error occurred, you can retrieve the created baseline cohort object and explore it, just as demonstrated in the [Cohort Exploration Tutorial](./BiasAnalyzerCohortsTutorial.ipynb)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "11d73a3e-f43a-45f5-bb9c-acf54dc480eb",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Baseline cohort created with metadata: {'id': 1, 'name': 'Young female patients', 'description': 'A cohort of female patients born between 2000 and 2020', 'created_date': datetime.date(2025, 6, 17), 'creation_info': 'WITH ranked_events_condition_occurrence AS ( SELECT person_id, condition_concept_id AS concept_id, condition_start_date AS event_start_date, condition_end_date AS event_end_date, ROW_NUMBER() OVER ( PARTITION BY person_id, condition_concept_id ORDER BY condition_start_date ASC ) AS event_instance FROM condition_occurrence ), ranked_events_drug_exposure AS ( SELECT person_id, drug_concept_id AS concept_id, drug_exposure_start_date AS event_start_date, drug_exposure_end_date AS event_end_date, ROW_NUMBER() OVER ( PARTITION BY person_id, drug_concept_id ORDER BY drug_exposure_start_date ASC ) AS event_instance FROM drug_exposure ), ranked_events_procedure_occurrence AS ( SELECT person_id, procedure_concept_id AS concept_id, procedure_date AS event_start_date, procedure_date AS event_end_date, ROW_NUMBER() OVER ( PARTITION BY person_id, procedure_concept_id ORDER BY procedure_date ASC ) AS event_instance FROM procedure_occurrence ), ranked_events_visit_occurrence AS ( SELECT person_id, visit_concept_id AS concept_id, visit_start_date AS event_start_date, visit_end_date AS event_end_date, ROW_NUMBER() OVER ( PARTITION BY person_id, visit_concept_id ORDER BY visit_start_date ASC ) AS event_instance FROM visit_occurrence ), ranked_events_measurement AS ( SELECT person_id, measurement_concept_id AS concept_id, measurement_date AS event_start_date, measurement_date AS event_end_date, ROW_NUMBER() OVER ( PARTITION BY person_id, measurement_concept_id ORDER BY measurement_date ASC ) AS event_instance FROM measurement ), ranked_events_observation AS ( SELECT person_id, observation_concept_id AS concept_id, observation_date AS event_start_date, observation_date AS event_end_date, ROW_NUMBER() OVER ( PARTITION BY person_id, observation_concept_id ORDER BY observation_date ASC ) AS event_instance FROM observation ), domain_qualifying_events AS ( SELECT person_id FROM person p ), filtered_cohort AS ( SELECT c.person_id, MIN(all_events.event_start_date) AS cohort_start_date, MAX(all_events.event_end_date) AS cohort_end_date FROM domain_qualifying_events c JOIN person p ON c.person_id = p.person_id LEFT JOIN ( SELECT person_id, condition_start_date AS event_start_date, condition_end_date AS event_end_date FROM condition_occurrence UNION ALL SELECT person_id, drug_exposure_start_date AS event_start_date, drug_exposure_end_date AS event_end_date FROM drug_exposure UNION ALL SELECT person_id, procedure_date AS event_start_date, procedure_date AS event_end_date FROM procedure_occurrence UNION ALL SELECT person_id, visit_start_date AS event_start_date, visit_end_date AS event_end_date FROM visit_occurrence UNION ALL SELECT person_id, measurement_date AS event_start_date, measurement_date AS event_end_date FROM measurement UNION ALL SELECT person_id, observation_date AS event_start_date, observation_date AS event_end_date FROM observation ) all_events ON c.person_id = all_events.person_id WHERE 1=1 AND p.gender_concept_id = 8532 AND p.year_of_birth >= 2000 AND p.year_of_birth <= 2020 GROUP BY c.person_id ) SELECT * FROM filtered_cohort f', 'created_by': 'system'}\n",
+      "Baseline cohort created with stats: [{'total_count': 12360, 'earliest_start_date': datetime.date(2000, 2, 19), 'latest_start_date': datetime.date(2020, 5, 26), 'earliest_end_date': datetime.date(2002, 7, 20), 'latest_end_date': datetime.date(2020, 5, 27), 'min_duration_days': 0, 'max_duration_days': 7379, 'avg_duration_days': 1192.32, 'median_duration': 296, 'stddev_duration': 1779.19}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "if baseline_result.ready:\n",
+    "    if baseline_result.error:\n",
+    "        print(f\"Baseline cohort creation failed: {baseline_result.error}\")\n",
+    "    else:\n",
+    "        baseline_cohort = baseline_result.value\n",
+    "        baseline_cohort_def = baseline_cohort.metadata\n",
+    "        print(f\"Baseline cohort created with metadata: {baseline_cohort_def}\")\n",
+    "        baseline_cohort_data = baseline_cohort.data\n",
+    "        baseline_cohort_stats = baseline_cohort.get_stats()\n",
+    "        print(f\"Baseline cohort created with stats: {baseline_cohort_stats}\")\n",
+    "else:\n",
+    "    print(\"Still creating baseline cohort...\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e06df5e3-6cb9-4bbb-842c-c8e987657edb",
+   "metadata": {},
+   "source": [
+    "———————————————\n",
+    "\n",
+    "**Exploring the study cohort**: To explore the study cohort once it's available, check the `ready` property of the `study_result` - the `BackgroundResult` object provided as the `result_holder` during asynchronous cohort creation. If the result is ready, verify whether the background process completed successfully by checking the `error` property of the `study_result`. If no error occurred, you can retrieve the created study cohort object and explore it, just as demonstrated in the [Cohort Exploration Tutorial](./BiasAnalyzerCohortsTutorial.ipynb)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "b1415805-b065-40b8-b2cd-6db6f5f9ca41",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Study cohort created with metadata: {'id': 2, 'name': 'Young COVID female patients', 'description': 'Young COVID female patients', 'created_date': datetime.date(2025, 6, 17), 'creation_info': 'WITH ranked_events_condition_occurrence AS ( SELECT person_id, condition_concept_id AS concept_id, condition_start_date AS event_start_date, condition_end_date AS event_end_date, ROW_NUMBER() OVER ( PARTITION BY person_id, condition_concept_id ORDER BY condition_start_date ASC ) AS event_instance FROM condition_occurrence ), domain_qualifying_events AS ( (SELECT person_id, event_start_date, event_end_date FROM ranked_events_condition_occurrence WHERE concept_id = 37311061) ), filtered_cohort AS ( SELECT c.person_id, MIN(c.event_start_date) AS cohort_start_date, MAX(c.event_end_date) AS cohort_end_date FROM domain_qualifying_events c JOIN person p ON c.person_id = p.person_id WHERE 1=1 AND p.gender_concept_id = 8532 AND p.year_of_birth >= 2000 AND p.year_of_birth <= 2020 GROUP BY c.person_id ) SELECT * FROM filtered_cohort f', 'created_by': 'system'}\n",
+      "Study cohort created with stats: [{'total_count': 10208, 'earliest_start_date': datetime.date(2020, 1, 18), 'latest_start_date': datetime.date(2020, 3, 30), 'earliest_end_date': datetime.date(2020, 2, 7), 'latest_end_date': datetime.date(2020, 5, 3), 'min_duration_days': 8, 'max_duration_days': 37, 'avg_duration_days': 24.25, 'median_duration': 24, 'stddev_duration': 7.2}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "if study_result.ready:\n",
+    "    if study_result.error:\n",
+    "        print(f\"Study cohort creation failed: {study_result.error}\")\n",
+    "    else:\n",
+    "        study_cohort = study_result.value\n",
+    "        study_cohort_def = study_cohort.metadata\n",
+    "        print(f\"Study cohort created with metadata: {study_cohort_def}\")\n",
+    "        study_cohort_data = study_cohort.data\n",
+    "        study_cohort_stats = study_cohort.get_stats()\n",
+    "        print(f\"Study cohort created with stats: {study_cohort_stats}\")\n",
+    "else:\n",
+    "    print(\"Still creating study cohort...\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "56de0456-104f-4d4b-9f8e-1a65a07a6a2e",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "\n",
+    "### Cohort comparison when available\n",
+    "To compare the baseline and study cohorts once they are available, check the `ready` property of both `baseline_result` and `study_result` - the `BackgroundResult` objects passed as `result_holder` during asynchronous cohort creation. If both results are ready, you can retrieve and compare the cohorts using the same approach demonstrated in the [Cohort Exploration Tutorial](./BiasAnalyzerCohortsTutorial.ipynb)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "0d03cf95-3c68-4eee-be41-5482dea68b84",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "first 5 patient in baseline cohort data: [{'subject_id': 42583, 'cohort_definition_id': 1, 'cohort_start_date': datetime.date(2020, 4, 26), 'cohort_end_date': datetime.date(2020, 5, 12)}, {'subject_id': 33685, 'cohort_definition_id': 1, 'cohort_start_date': datetime.date(2017, 12, 8), 'cohort_end_date': datetime.date(2020, 5, 10)}, {'subject_id': 74383, 'cohort_definition_id': 1, 'cohort_start_date': datetime.date(2019, 1, 31), 'cohort_end_date': datetime.date(2020, 3, 25)}, {'subject_id': 23986, 'cohort_definition_id': 1, 'cohort_start_date': datetime.date(2019, 6, 15), 'cohort_end_date': datetime.date(2020, 3, 28)}, {'subject_id': 93962, 'cohort_definition_id': 1, 'cohort_start_date': datetime.date(2019, 7, 1), 'cohort_end_date': datetime.date(2020, 5, 15)}]\n",
+      "first 5 patient in study cohort data: [{'subject_id': 22344, 'cohort_definition_id': 2, 'cohort_start_date': datetime.date(2020, 3, 11), 'cohort_end_date': datetime.date(2020, 4, 13)}, {'subject_id': 53949, 'cohort_definition_id': 2, 'cohort_start_date': datetime.date(2020, 2, 28), 'cohort_end_date': datetime.date(2020, 3, 11)}, {'subject_id': 80198, 'cohort_definition_id': 2, 'cohort_start_date': datetime.date(2020, 3, 5), 'cohort_end_date': datetime.date(2020, 4, 9)}, {'subject_id': 30052, 'cohort_definition_id': 2, 'cohort_start_date': datetime.date(2020, 3, 6), 'cohort_end_date': datetime.date(2020, 4, 8)}, {'subject_id': 88837, 'cohort_definition_id': 2, 'cohort_start_date': datetime.date(2020, 2, 24), 'cohort_end_date': datetime.date(2020, 3, 12)}]\n",
+      "the baseline cohort age stats: [{'total_count': 12360, 'min_age': 0, 'max_age': 25, 'avg_age': 7.24, 'median_age': 6, 'stddev_age': 6.01}]\n",
+      "the baseline cohort gender stats: [{'gender': 'female', 'gender_count': 12360, 'probability': 1.0}]\n",
+      "the study cohort age stats: [{'total_count': 10208, 'min_age': 0, 'max_age': 20, 'avg_age': 10.94, 'median_age': 11, 'stddev_age': 5.92}]\n",
+      "the study cohort gender stats: [{'gender': 'female', 'gender_count': 10208, 'probability': 1.0}]\n",
+      "[{'age_hellinger_distance': 0.14447523081257604}, {'gender_hellinger_distance': 0.0}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# compare the baseline and user study cohorts\n",
+    "if baseline_result.ready and study_result.ready:\n",
+    "    print(f\"first 5 patient in baseline cohort data: {baseline_cohort_data[:5]}\")\n",
+    "    print(f\"first 5 patient in study cohort data: {study_cohort_data[:5]}\")\n",
+    "    baseline_cohort_age_stats = baseline_cohort.get_stats(\"age\")\n",
+    "    print(f'the baseline cohort age stats: {baseline_cohort_age_stats}')\n",
+    "    baseline_cohort_gender_stats = baseline_cohort.get_stats(\"gender\")\n",
+    "    print(f'the baseline cohort gender stats: {baseline_cohort_gender_stats}')\n",
+    "    study_cohort_age_stats = study_cohort.get_stats(\"age\")\n",
+    "    print(f'the study cohort age stats: {study_cohort_age_stats}')\n",
+    "    study_cohort_gender_stats = study_cohort.get_stats(\"gender\")\n",
+    "    print(f'the study cohort gender stats: {study_cohort_gender_stats}')\n",
+    "    result = bias.compare_cohorts(baseline_cohort_def['id'], study_cohort_def['id'])\n",
+    "    print(result)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d8e53808-cac2-41c7-9d60-f7a3b661ff6f",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "\n",
+    "### Final cleanup to ensure database connections are closed"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "128c7b02-faef-4a35-97a6-c92baa5a37dd",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Connection to BiasDatabase closed.\n",
+      "Connection to the OMOP CDM database closed.\n"
+     ]
+    }
+   ],
+   "source": [
+    "bias.cleanup()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e3ea28f8",
+   "metadata": {},
+   "source": [
+    "### ✅ Summary\n",
+    "\n",
+    "In this tutorial, you learned how to use the BiasAnalyzer package to create a baseline and a study cohort asynchronously for improved performance and responsiveness when working with large datasets or complex cohort definitions. For testing purposes, a `delay` optional parameter is introduced in the `run_in_background()` function to simulate asynchronous execution of long-running process. This tutorial complements the [Cohort Exploration Tutorial](./BiasAnalyzerCohortsTutorial.ipynb), following a similar workflow but optimized for performance by introducing asynchronous processing.\n",
+    "  \n",
+    "For more information, refer to the [BiasAnalyzer GitHub repo](https://github.com/VACLab/BiasAnalyzer) and the [README file](https://github.com/VACLab/BiasAnalyzer/blob/main/README.md).\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python (biasanalyzer)",
+   "language": "python",
+   "name": "biasanalyzer-py3.8"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/BiasAnalyzerCohortsTutorial.ipynb b/notebooks/BiasAnalyzerCohortsTutorial.ipynb
index 8957503..d196ccf 100644
--- a/notebooks/BiasAnalyzerCohortsTutorial.ipynb
+++ b/notebooks/BiasAnalyzerCohortsTutorial.ipynb
@@ -28,6 +28,7 @@
     "```bash\n",
     "pip install biasanalyzer\n",
     "```\n",
+    "\n",
     "For full setup and usage instructions, refer to the [README](https://github.com/VACLab/BiasAnalyzer/blob/main/README.md).\n",
     "\n",
     "---\n"
@@ -140,12 +141,12 @@
    "id": "c7219629-1a30-44af-9ec5-5eb9b4a52c5a",
    "metadata": {},
    "source": [
-    "---\n",
+    "**Now that you have connected to your OMOP CDM database, you can start to use the APIs to explore your data.** The rest of this notebook illustrates how to create and explore a baseline and a study cohort, and then compare them using the BiasAnalyzer APIs.\n",
     "\n",
-    "**Now that you have connected to your OMOP CDM database, you can start to use the APIs to explore your data. The rest of this notebook illustrates how to create and explore a baseline and a study cohort, and then compare them using the BiasAnalyzer APIs.**\n",
+    "---\n",
     "\n",
     "### Baseline cohort creation and exploration\n",
-    "**Baseline cohort creation**: Create a baseline cohort of young female patients on the `bias` object by calling the `create_cohort(cohort_name, cohort_description, query_or_yaml_file, created_by)` function and passing the name of the cohort (first argument), the description of the cohort (second argument), a yaml file that specifies cohort inclusion and exclusion criteria or a cohort selection SQL query (third argument), and the cohort owner's name indicating who owns or creates this cohort (fourth argument). The function will show a progress bar to indicate cohort creation progress over three stages."
+    "**Baseline cohort creation**: To create a baseline cohort of young female patients, use the `create_cohort(cohort_name, cohort_description, query_or_yaml_file, created_by)` function on the `bias` object. You'll pass the name of the cohort as the first argument, the description of the cohort as the second argument, a yaml file that specifies cohort inclusion and exclusion criteria or a cohort selection SQL query as the third argument, and the cohort owner's name indicating who owns or creates this cohort as the fourth argument. The function will show a progress bar to indicate cohort creation progress over three stages."
    ]
   },
   {
@@ -303,7 +304,7 @@
     "---\n",
     "\n",
     "### Study cohort creation and exploration\n",
-    "**Study cohort creation**: Create a study cohort of young female COVID patients on the bias object by calling the create_cohort(cohort_name, cohort_description, query_or_yaml_file, created_by) function and passing the name of the cohort (first argument), the description of the cohort (second argument), a yaml file that specifies cohort inclusion and exclusion criteria or a cohort selection SQL query (third argument), and the cohort owner's name indicating who owns or creates this cohort (fourth argument). The function will show a progress bar to indicate cohort creation progress over three stages."
+    "**Study cohort creation**: To create a study cohort of young female COVID patients, use the `create_cohort(cohort_name, cohort_description, query_or_yaml_file, created_by)` function on the `bias` object. You'll pass the name of the cohort as the first argument, the description of the cohort as the second argument, a yaml file that specifies cohort inclusion and exclusion criteria or a cohort selection SQL query as the third argument, and the cohort owner's name indicating who owns or creates this cohort as the fourth argument. The function will show a progress bar to indicate cohort creation progress over three stages.\n"
    ]
   },
   {
diff --git a/notebooks/BiasAnalyzerTestingAsyncCohortCreation.ipynb b/notebooks/BiasAnalyzerTestingAsyncCohortCreation.ipynb
deleted file mode 100644
index 763bbc1..0000000
--- a/notebooks/BiasAnalyzerTestingAsyncCohortCreation.ipynb
+++ /dev/null
@@ -1,352 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "10a97b84-7514-4bba-aaf2-0a46a44cc5fd",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from biasanalyzer.api import BIAS\n",
-    "from biasanalyzer.background.threading_utils import BackgroundResult, run_in_background"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "548223ed-8948-461e-b9d6-40a0ec7fc89f",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "no configuration file specified. Call set_config(config_file_path) next to specify configurations\n"
-     ]
-    }
-   ],
-   "source": [
-    "# create an object of BIAS class\n",
-    "bias = BIAS()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "7d440d9f-c7fa-4ef1-ad66-31274ebef4ea",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "configuration specified in /home/hongyi/BiasAnalyzer/config.yaml loaded successfully\n"
-     ]
-    }
-   ],
-   "source": [
-    "bias.set_config('/home/hongyi/BiasAnalyzer/config.yaml')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "283156f8-63da-42a5-bbd7-ee2b7719652c",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Connected to the OMOP CDM database (read-only).\n",
-      "Cohort Definition table created.\n",
-      "Cohort table created.\n"
-     ]
-    }
-   ],
-   "source": [
-    "# the configuration file includes root_omop_cdm_database configuration info with an example shown \n",
-    "# in https://github.com/hyi/HealthDataBias/blob/main/tests/assets/config/test_config.yaml\n",
-    "bias.set_root_omop()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "a68f3eaf-92fd-49a2-9768-d685d826fd57",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[*] Background task started...\n",
-      "Baseline cohort creation running in background...\n",
-      "template_path: /home/hongyi/BiasAnalyzer/biasanalyzer/sql_templates\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "b6db954d48fe41ab9e53ff6b6e358fcd",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Cohort creation:   0%|                                 | 0/3 [00:00<?, ?stage/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "configuration specified in /home/hongyi/BiasAnalyzer/tests/assets/cohort_creation/test_cohort_creation_condition_occurrence_config_baseline.yaml loaded successfully\n",
-      "Cohort definition inserted successfully.\n",
-      "Cohort Young female patients successfully created.\n",
-      "[DEBUG] Simulating long-running task with 120 seconds delay...\n",
-      "cohort created successfully\n",
-      "[✓] Background task completed.\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Create baseline cohort result holder\n",
-    "baseline_result = BackgroundResult()\n",
-    "\n",
-    "# Start background task to run create_cohort() function for a baseline cohort in a background thread\n",
-    "baseline_thread = run_in_background(\n",
-    "    bias.create_cohort,\n",
-    "    \"Young female patients\",\n",
-    "    \"Young female patients\",\n",
-    "    '/home/hongyi/BiasAnalyzer/tests/assets/cohort_creation/test_cohort_creation_condition_occurrence_config_baseline.yaml',\n",
-    "    \"system\",\n",
-    "    result_holder=baseline_result,\n",
-    "    delay=120  # simulate 5 minutes delay for async testing\n",
-    ")\n",
-    "\n",
-    "print(\"Baseline cohort creation running in background...\")\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "9a52ab5f-57a8-4942-8a03-ec86651e919e",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[*] Background task started...\n",
-      "Study cohort creation running in background...\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "b14a7bfb88e740cb8bd20f4bacd761c8",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Cohort creation:   0%|                                 | 0/3 [00:00<?, ?stage/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "configuration specified in /home/hongyi/BiasAnalyzer/tests/assets/cohort_creation/test_cohort_creation_condition_occurrence_config_study.yaml loaded successfully\n",
-      "Cohort definition inserted successfully.\n",
-      "Cohort Young COVID female patients successfully created.\n",
-      "[DEBUG] Simulating long-running task with 150 seconds delay...\n",
-      "cohort created successfully\n",
-      "[✓] Background task completed.\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Create study cohort result holder\n",
-    "study_result = BackgroundResult()\n",
-    "\n",
-    "# Start background task to run create_cohort() function for a study cohort in a background thread\n",
-    "study_thread = run_in_background(\n",
-    "    bias.create_cohort,\n",
-    "    \"Young COVID female patients\",\n",
-    "    \"Young COVID female patients\",\n",
-    "    '/home/hongyi/BiasAnalyzer/tests/assets/cohort_creation/test_cohort_creation_condition_occurrence_config_study.yaml',\n",
-    "    \"system\",\n",
-    "    result_holder=study_result,\n",
-    "    delay=150  # simulate 5 minutes delay for async testing\n",
-    ")\n",
-    "\n",
-    "print(\"Study cohort creation running in background...\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "id": "11d73a3e-f43a-45f5-bb9c-acf54dc480eb",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Baseline cohort created with metadata: {'id': 2, 'name': 'Young female patients', 'description': 'Young female patients', 'created_date': datetime.date(2025, 6, 4), 'creation_info': 'WITH ranked_events_condition_occurrence AS ( SELECT person_id, condition_concept_id AS concept_id, condition_start_date AS event_start_date, condition_end_date AS event_end_date, ROW_NUMBER() OVER ( PARTITION BY person_id, condition_concept_id ORDER BY condition_start_date ASC ) AS event_instance FROM condition_occurrence ), ranked_events_drug_exposure AS ( SELECT person_id, drug_concept_id AS concept_id, drug_exposure_start_date AS event_start_date, drug_exposure_end_date AS event_end_date, ROW_NUMBER() OVER ( PARTITION BY person_id, drug_concept_id ORDER BY drug_exposure_start_date ASC ) AS event_instance FROM drug_exposure ), ranked_events_procedure_occurrence AS ( SELECT person_id, procedure_concept_id AS concept_id, procedure_date AS event_start_date, procedure_date AS event_end_date, ROW_NUMBER() OVER ( PARTITION BY person_id, procedure_concept_id ORDER BY procedure_date ASC ) AS event_instance FROM procedure_occurrence ), ranked_events_visit_occurrence AS ( SELECT person_id, visit_concept_id AS concept_id, visit_start_date AS event_start_date, visit_end_date AS event_end_date, ROW_NUMBER() OVER ( PARTITION BY person_id, visit_concept_id ORDER BY visit_start_date ASC ) AS event_instance FROM visit_occurrence ), ranked_events_measurement AS ( SELECT person_id, measurement_concept_id AS concept_id, measurement_date AS event_start_date, measurement_date AS event_end_date, ROW_NUMBER() OVER ( PARTITION BY person_id, measurement_concept_id ORDER BY measurement_date ASC ) AS event_instance FROM measurement ), ranked_events_observation AS ( SELECT person_id, observation_concept_id AS concept_id, observation_date AS event_start_date, observation_date AS event_end_date, ROW_NUMBER() OVER ( PARTITION BY person_id, observation_concept_id ORDER BY observation_date ASC ) AS event_instance FROM observation ), domain_qualifying_events AS ( SELECT person_id FROM person p ), filtered_cohort AS ( SELECT c.person_id, MIN(all_events.event_start_date) AS cohort_start_date, MAX(all_events.event_end_date) AS cohort_end_date FROM domain_qualifying_events c JOIN person p ON c.person_id = p.person_id LEFT JOIN ( SELECT person_id, condition_start_date AS event_start_date, condition_end_date AS event_end_date FROM condition_occurrence UNION ALL SELECT person_id, drug_exposure_start_date AS event_start_date, drug_exposure_end_date AS event_end_date FROM drug_exposure UNION ALL SELECT person_id, procedure_date AS event_start_date, procedure_date AS event_end_date FROM procedure_occurrence UNION ALL SELECT person_id, visit_start_date AS event_start_date, visit_end_date AS event_end_date FROM visit_occurrence UNION ALL SELECT person_id, measurement_date AS event_start_date, measurement_date AS event_end_date FROM measurement UNION ALL SELECT person_id, observation_date AS event_start_date, observation_date AS event_end_date FROM observation ) all_events ON c.person_id = all_events.person_id WHERE 1=1 AND p.gender_concept_id = 8532 AND p.year_of_birth >= 2000 AND p.year_of_birth <= 2020 GROUP BY c.person_id ) SELECT * FROM filtered_cohort f', 'created_by': 'system'}\n",
-      "Baseline cohort created with stats: [{'total_count': 12360, 'earliest_start_date': datetime.date(2000, 2, 19), 'latest_start_date': datetime.date(2020, 5, 26), 'earliest_end_date': datetime.date(2002, 7, 20), 'latest_end_date': datetime.date(2020, 5, 27), 'min_duration_days': 0, 'max_duration_days': 7379, 'avg_duration_days': 1192.32, 'median_duration': 296, 'stddev_duration': 1779.19}]\n"
-     ]
-    }
-   ],
-   "source": [
-    "if baseline_result.ready:\n",
-    "    if baseline_result.error:\n",
-    "        print(f\"Baseline cohort creation failed: {baseline_result.error}\")\n",
-    "    else:\n",
-    "        baseline_cohort = baseline_result.value\n",
-    "        baseline_cohort_def = baseline_cohort.metadata\n",
-    "        print(f\"Baseline cohort created with metadata: {baseline_cohort_def}\")\n",
-    "        baseline_cohort_data = baseline_cohort.data\n",
-    "        baseline_cohort_stats = baseline_cohort.get_stats()\n",
-    "        print(f\"Baseline cohort created with stats: {baseline_cohort_stats}\")\n",
-    "else:\n",
-    "    print(\"Still creating baseline cohort...\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "id": "b1415805-b065-40b8-b2cd-6db6f5f9ca41",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Study cohort created with metadata: {'id': 1, 'name': 'Young COVID female patients', 'description': 'Young COVID female patients', 'created_date': datetime.date(2025, 6, 4), 'creation_info': 'WITH ranked_events_condition_occurrence AS ( SELECT person_id, condition_concept_id AS concept_id, condition_start_date AS event_start_date, condition_end_date AS event_end_date, ROW_NUMBER() OVER ( PARTITION BY person_id, condition_concept_id ORDER BY condition_start_date ASC ) AS event_instance FROM condition_occurrence ), domain_qualifying_events AS ( (SELECT person_id, event_start_date, event_end_date FROM ranked_events_condition_occurrence WHERE concept_id = 37311061) ), filtered_cohort AS ( SELECT c.person_id, MIN(c.event_start_date) AS cohort_start_date, MAX(c.event_end_date) AS cohort_end_date FROM domain_qualifying_events c JOIN person p ON c.person_id = p.person_id WHERE 1=1 AND p.gender_concept_id = 8532 AND p.year_of_birth >= 2000 AND p.year_of_birth <= 2020 GROUP BY c.person_id ) SELECT * FROM filtered_cohort f', 'created_by': 'system'}\n",
-      "Study cohort created with stats: [{'total_count': 10208, 'earliest_start_date': datetime.date(2020, 1, 18), 'latest_start_date': datetime.date(2020, 3, 30), 'earliest_end_date': datetime.date(2020, 2, 7), 'latest_end_date': datetime.date(2020, 5, 3), 'min_duration_days': 8, 'max_duration_days': 37, 'avg_duration_days': 24.25, 'median_duration': 24, 'stddev_duration': 7.2}]\n"
-     ]
-    }
-   ],
-   "source": [
-    "if study_result.ready:\n",
-    "    if study_result.error:\n",
-    "        print(f\"Study cohort creation failed: {study_result.error}\")\n",
-    "    else:\n",
-    "        study_cohort = study_result.value\n",
-    "        study_cohort_def = study_cohort.metadata\n",
-    "        print(f\"Study cohort created with metadata: {study_cohort_def}\")\n",
-    "        study_cohort_data = study_cohort.data\n",
-    "        study_cohort_stats = study_cohort.get_stats()\n",
-    "        print(f\"Study cohort created with stats: {study_cohort_stats}\")\n",
-    "else:\n",
-    "    print(\"Still creating study cohort...\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "id": "0d03cf95-3c68-4eee-be41-5482dea68b84",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "first 5 patient in baseline cohort data: [{'subject_id': 42583, 'cohort_definition_id': 2, 'cohort_start_date': datetime.date(2020, 4, 26), 'cohort_end_date': datetime.date(2020, 5, 12)}, {'subject_id': 33685, 'cohort_definition_id': 2, 'cohort_start_date': datetime.date(2017, 12, 8), 'cohort_end_date': datetime.date(2020, 5, 10)}, {'subject_id': 74383, 'cohort_definition_id': 2, 'cohort_start_date': datetime.date(2019, 1, 31), 'cohort_end_date': datetime.date(2020, 3, 25)}, {'subject_id': 23986, 'cohort_definition_id': 2, 'cohort_start_date': datetime.date(2019, 6, 15), 'cohort_end_date': datetime.date(2020, 3, 28)}, {'subject_id': 93962, 'cohort_definition_id': 2, 'cohort_start_date': datetime.date(2019, 7, 1), 'cohort_end_date': datetime.date(2020, 5, 15)}]\n",
-      "first 5 patient in study cohort data: [{'subject_id': 53949, 'cohort_definition_id': 1, 'cohort_start_date': datetime.date(2020, 2, 28), 'cohort_end_date': datetime.date(2020, 3, 11)}, {'subject_id': 22344, 'cohort_definition_id': 1, 'cohort_start_date': datetime.date(2020, 3, 11), 'cohort_end_date': datetime.date(2020, 4, 13)}, {'subject_id': 80198, 'cohort_definition_id': 1, 'cohort_start_date': datetime.date(2020, 3, 5), 'cohort_end_date': datetime.date(2020, 4, 9)}, {'subject_id': 30052, 'cohort_definition_id': 1, 'cohort_start_date': datetime.date(2020, 3, 6), 'cohort_end_date': datetime.date(2020, 4, 8)}, {'subject_id': 94887, 'cohort_definition_id': 1, 'cohort_start_date': datetime.date(2020, 2, 29), 'cohort_end_date': datetime.date(2020, 3, 24)}]\n",
-      "the baseline cohort age stats: [{'total_count': 12360, 'min_age': 0, 'max_age': 25, 'avg_age': 10.71, 'median_age': 11, 'stddev_age': 5.98}]\n",
-      "the baseline cohort gender stats: [{'gender': 'female', 'gender_count': 12360, 'probability': 1.0}]\n",
-      "the study cohort age stats: [{'total_count': 10208, 'min_age': 0, 'max_age': 24, 'avg_age': 10.94, 'median_age': 11, 'stddev_age': 5.93}]\n",
-      "the study cohort gender stats: [{'gender': 'female', 'gender_count': 10208, 'probability': 1.0}]\n",
-      "[{'age_hellinger_distance': 0.010623813022853212}, {'gender_hellinger_distance': 0.0}]\n"
-     ]
-    }
-   ],
-   "source": [
-    "# compare the baseline and user study cohorts\n",
-    "if baseline_result.ready and study_result.ready:\n",
-    "    print(f\"first 5 patient in baseline cohort data: {baseline_cohort_data[:5]}\")\n",
-    "    print(f\"first 5 patient in study cohort data: {study_cohort_data[:5]}\")\n",
-    "    baseline_cohort_age_stats = baseline_cohort.get_stats(\"age\")\n",
-    "    print(f'the baseline cohort age stats: {baseline_cohort_age_stats}')\n",
-    "    baseline_cohort_gender_stats = baseline_cohort.get_stats(\"gender\")\n",
-    "    print(f'the baseline cohort gender stats: {baseline_cohort_gender_stats}')\n",
-    "    study_cohort_age_stats = study_cohort.get_stats(\"age\")\n",
-    "    print(f'the study cohort age stats: {study_cohort_age_stats}')\n",
-    "    study_cohort_gender_stats = study_cohort.get_stats(\"gender\")\n",
-    "    print(f'the study cohort gender stats: {study_cohort_gender_stats}')\n",
-    "    result = bias.compare_cohorts(1, 2)\n",
-    "    print(result)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "id": "128c7b02-faef-4a35-97a6-c92baa5a37dd",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Connection to BiasDatabase closed.\n",
-      "Connection to the OMOP CDM database closed.\n"
-     ]
-    }
-   ],
-   "source": [
-    "bias.cleanup()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2a6c2d12-91b6-4074-8565-6ff2f61f2f00",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python (biasanalyzer)",
-   "language": "python",
-   "name": "biasanalyzer-py3.8"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.10"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}

From 440372f7f0a3579e442433b151fe6fde967b8822 Mon Sep 17 00:00:00 2001
From: hyi <hongyi@renci.org>
Date: Thu, 19 Jun 2025 17:24:36 -0400
Subject: [PATCH 04/10] updated code to compute cohort concept prevalence for
 any domains

---
 biasanalyzer/cohort.py                        |   4 +-
 biasanalyzer/cohort_query_builder.py          |  57 ++++++--
 biasanalyzer/database.py                      |  38 +++---
 biasanalyzer/models.py                        |   7 +
 biasanalyzer/sql.py                           | 124 ------------------
 .../test_hierarchical_prevalence.py           |  24 +++-
 tests/test_database.py                        |   8 +-
 7 files changed, 94 insertions(+), 168 deletions(-)

diff --git a/biasanalyzer/cohort.py b/biasanalyzer/cohort.py
index 6ca1d75..83aa2e8 100644
--- a/biasanalyzer/cohort.py
+++ b/biasanalyzer/cohort.py
@@ -18,6 +18,7 @@ def __init__(self, cohort_id: int, bias_db: BiasDatabase, omop_db: OMOPCDMDataba
         self.omop_db = omop_db
         self._cohort_data = None # cache the cohort data
         self._metadata = None
+        self.query_builder = CohortQueryBuilder(cohort_creation=False)
 
     @property
     def data(self):
@@ -55,6 +56,7 @@ def get_concept_stats(self, concept_type='condition_occurrence', filter_count=0,
         Get cohort concept statistics such as concept prevalence
         """
         cohort_stats = self.bias_db.get_cohort_concept_stats(self.cohort_id,
+                                                             self.query_builder,
                                                              concept_type=concept_type,
                                                              filter_count=filter_count,
                                                              vocab=vocab,
@@ -106,7 +108,7 @@ def create_cohort(self, cohort_name: str, description: str, query_or_yaml_file:
                 notify_users(f'cohort creation configuration yaml file is not valid with validation error: {ex}')
                 return None
 
-            query = self._query_builder.build_query(cohort_config)
+            query = self._query_builder.build_query_cohort_creation(cohort_config)
         else:
             query = clean_string(query_or_yaml_file)
         progress.update(1)
diff --git a/biasanalyzer/cohort_query_builder.py b/biasanalyzer/cohort_query_builder.py
index 860970a..79fda66 100644
--- a/biasanalyzer/cohort_query_builder.py
+++ b/biasanalyzer/cohort_query_builder.py
@@ -6,7 +6,7 @@
 
 
 class CohortQueryBuilder:
-    def __init__(self):
+    def __init__(self, cohort_creation=True):
         """Get the path to SQL templates, whether running from source or installed."""
         try:
             if sys.version_info >= (3, 9): # pragma: no cover
@@ -19,12 +19,13 @@ def __init__(self):
         except ModuleNotFoundError: # pragma: no cover
             template_path = os.path.join(os.path.dirname(__file__), "sql_templates")
 
-        print(f'template_path: {template_path}')
+        print(f'template_path: {template_path}, cohort_creation: {cohort_creation}')
         self.env = Environment(loader=FileSystemLoader(template_path), extensions=['jinja2.ext.do'])
-        self.env.globals.update(
-            demographics_filter=self._load_macro('demographics_filter'),
-            temporal_event_filter=self.temporal_event_filter
-        )
+        if cohort_creation:
+            self.env.globals.update(
+                demographics_filter=self._load_macro('demographics_filter'),
+                temporal_event_filter=self.temporal_event_filter
+            )
 
     def _extract_domains(self, events):
         domains = set()
@@ -42,16 +43,11 @@ def _load_macro(self, macro_name):
         macros_template = self.env.get_template('macros.sql.j2')
         return macros_template.module.__dict__[macro_name]
 
-
-    def build_query(self, cohort_config: dict) -> str:
+    def build_query_cohort_creation(self, cohort_config: dict) -> str:
         """
         Build a SQL query from the CohortCreationConfig object.
-
-        Args:
-            cohort_config: dict object loaded from yaml file for building sql query.
-
-        Returns:
-            str: The rendered SQL query.
+        :param cohort_config: dict object loaded from yaml file for building sql query.
+        :return: The rendered SQL query.
         """
         inclusion_criteria = cohort_config.get('inclusion_criteria')
         exclusion_criteria = cohort_config.get('exclusion_criteria', {})
@@ -75,6 +71,39 @@ def build_query(self, cohort_config: dict) -> str:
             temporal_events=temporal_events
         )
 
+    def build_concept_prevalence_query(self, concept_type: str, cid: int, filter_count: int, vocab: str,
+                                       include_hierarchy: bool) -> str:
+        """
+        Build a SQL query for concept prevalence statistics for a given domain and cohort.
+        :param concept_type: Domain from DOMAIN_MAPPING (e.g., 'condition_occurrence').
+        :param cid: Cohort definition ID.
+        :param filter_count: Minimum count threshold for concepts with 0 meaning no filtering
+        :param vocab: Vocabulary ID. Defaults to domain-specific vocabulary as defined in DOMAIN_MAPPING if set to None
+        :param include_hierarchy: Include concept hierarchy in results or not
+        :return: The rendered SQL query
+        :raises ValueError if concept_type is not invalid
+        """
+
+        # Validate concept_type
+        if concept_type not in DOMAIN_MAPPING or DOMAIN_MAPPING[concept_type]["table"] is None:
+            valid_domains = [k for k in DOMAIN_MAPPING.keys() if DOMAIN_MAPPING[k]["table"] is not None]
+            raise ValueError(f"Invalid concept_type: {concept_type}. Must be one of {valid_domains}")
+
+        # The provided vocab is assumed to be already validated if it is not set to None. Otherwise,
+        # if set to None, use domain-specific default vocabulary
+        effective_vocab = vocab if vocab is not None else DOMAIN_MAPPING[concept_type]["default_vocab"]
+        # Load and render the template
+        template = self.env.get_template("cohort_concept_prevalence_query.sql.j2")
+        return template.render(
+            table_name=DOMAIN_MAPPING[concept_type]["table"],
+            concept_id_column=DOMAIN_MAPPING[concept_type]["concept_id"],
+            start_date_column=DOMAIN_MAPPING[concept_type]["start_date"],
+            cid=cid,
+            filter_count=filter_count,
+            vocab=effective_vocab,
+            include_hierarchy=include_hierarchy
+        )
+
     @staticmethod
     def render_event(event):
         """
diff --git a/biasanalyzer/database.py b/biasanalyzer/database.py
index af951bb..0c9d21f 100644
--- a/biasanalyzer/database.py
+++ b/biasanalyzer/database.py
@@ -6,7 +6,7 @@
 from sqlalchemy.orm import sessionmaker
 from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy import create_engine, text
-from biasanalyzer.models import Cohort, CohortDefinition
+from biasanalyzer.models import CohortDefinition
 from biasanalyzer.sql import *
 from biasanalyzer.utils import build_concept_hierarchy, print_hierarchy, find_roots, notify_users
 
@@ -22,16 +22,6 @@ class BiasDatabase:
         "race": RACE_STATS_QUERY,
         "ethnicity": ETHNICITY_STATS_QUERY
     }
-    cohort_concept_queries = {
-        'condition_occurrence': {
-            'query': COHORT_CONCEPT_CONDITION_PREVALENCE_QUERY,
-            'default_vocab': 'SNOMED'
-        },
-        'drug_exposure': {
-            'query': COHORT_CONCEPT_DRUG_PREVALENCE_QUERY,
-            'default_vocab': 'RxNorm'
-        }
-    }
     _instance = None  # indicating a singleton with only one instance of the class ever created
     def __new__(cls, *args, **kwargs):
         if cls._instance is None:
@@ -142,7 +132,7 @@ def get_cohort(self, cohort_definition_id):
         return [dict(zip(headers, row)) for row in rows]
 
     def _create_omop_table(self, table_name):
-        if self.omop_cdm_db_url is not None and not self.omop_cdm_db_url.endswith('.duckdb'):
+        if self.omop_cdm_db_url is not None and not self.omop_cdm_db_url.endswith('duckdb'):
             # need to create person table from OMOP CDM postgreSQL database
             self.conn.execute(f"""
                 CREATE TABLE IF NOT EXISTS {table_name} AS 
@@ -237,25 +227,29 @@ def get_cohort_distributions(self, cohort_definition_id: int, variable: str):
             notify_users(f"Error computing cohort {variable} distributions: {e}", level='error')
             return None
 
-    def get_cohort_concept_stats(self, cohort_definition_id: int,
+    def get_cohort_concept_stats(self, cohort_definition_id: int, qry_builder,
                                  concept_type='condition_occurrence', filter_count=0, vocab=None,
                                  include_hierarchy=False):
         """
         Get concept statistics for a cohort from the cohort table.
         """
         concept_stats = {}
-        if concept_type not in self.__class__.cohort_concept_queries:
-            notify_users(f"input {concept_type} is not a valid concept type. "
-                         f"Supported concept types are: {self.__class__.cohort_concept_queries.keys()}", level='error')
-            return concept_stats
+
         try:
             if (self._create_omop_table('concept') and self._create_omop_table('concept_ancestor')
                     and self._create_omop_table(concept_type)):
-                query_str = self.__class__.cohort_concept_queries[concept_type]['query']
-                if not vocab:
-                    vocab = self.__class__.cohort_concept_queries[concept_type]['default_vocab']
-                query = query_str.format(cid=cohort_definition_id, filter_count=filter_count,
-                                         vocab=vocab, include_hierarchy=include_hierarchy)
+                # validate input vocab if it is not None
+                if vocab is not None:
+                    valid_vocabs = self._execute_query("SELECT distinct vocabulary_id FROM concept")
+                    valid_vocab_ids = [row['vocabulary_id'] for row in valid_vocabs]
+                    if vocab not in valid_vocab_ids:
+                        notify_users(f"input {vocab} is not a valid vocabulary in OMOP. "
+                                     f"Supported vocabulary ids are: {valid_vocab_ids}",
+                                     level='error')
+                        return concept_stats
+
+                query = qry_builder.build_concept_prevalence_query(concept_type, cohort_definition_id,
+                                                                   filter_count, vocab, include_hierarchy)
                 concept_stats[concept_type] = self._execute_query(query)
                 cs_df = pd.DataFrame(concept_stats[concept_type])
                 # Combine concept_name and prevalence into a "details" column
diff --git a/biasanalyzer/models.py b/biasanalyzer/models.py
index 2ec6140..72790fa 100644
--- a/biasanalyzer/models.py
+++ b/biasanalyzer/models.py
@@ -9,42 +9,49 @@
         "concept_id": "condition_concept_id",
         "start_date": "condition_start_date",
         "end_date": "condition_end_date",
+        "default_vocab": "SNOMED"  # for use by concept prevalence query
     },
     "drug_exposure": {
         "table": "drug_exposure",
         "concept_id": "drug_concept_id",
         "start_date": "drug_exposure_start_date",
         "end_date": "drug_exposure_end_date",
+        "default_vocab": "RxNorm"  # for use by concept prevalence query
     },
     "procedure_occurrence": {
         "table": "procedure_occurrence",
         "concept_id": "procedure_concept_id",
         "start_date": "procedure_date",
         "end_date": "procedure_date",
+        "default_vocab": "SNOMED"  # for use by concept prevalence query
     },
     "visit_occurrence": {
         "table": "visit_occurrence",
         "concept_id": "visit_concept_id",
         "start_date": "visit_start_date",
         "end_date": "visit_end_date",
+        "default_vocab": "SNOMED"  # for use by concept prevalence query
     },
     "measurement": {
         "table": "measurement",
         "concept_id": "measurement_concept_id",
         "start_date": "measurement_date",
         "end_date": "measurement_date",
+        "default_vocab": "LOINC"  # for use by concept prevalence query
     },
     "observation": {
         "table": "observation",
         "concept_id": "observation_concept_id",
         "start_date": "observation_date",
         "end_date": "observation_date",
+        "default_vocab": "SNOMED"  # for use by concept prevalence query
     },
     "date": {  # Special case for static timestamps
         "table": None,
         "concept_id": None,
         "start_date": "timestamp",
        "end_date": "timestamp",
+        "default_vocab": None
     }
 }
 
diff --git a/biasanalyzer/sql.py b/biasanalyzer/sql.py
index a87c665..10bbef7 100644
--- a/biasanalyzer/sql.py
+++ b/biasanalyzer/sql.py
@@ -146,127 +146,3 @@
     WHERE c.cohort_definition_id = {}
     GROUP BY p.ethnicity_concept_id
 '''
-
-COHORT_CONCEPT_CONDITION_PREVALENCE_QUERY = '''
-    WITH cohort_conditions AS (
-        -- Compute the counts for each condition node
-        SELECT
-            co.condition_concept_id AS concept_id,
-            ct.subject_id
-        FROM
-            cohort ct
-        JOIN
-            condition_occurrence co ON ct.subject_id = co.person_id
-            AND co.condition_start_date >= ct.cohort_start_date
-            AND (co.condition_end_date IS NULL OR co.condition_start_date <= ct.cohort_end_date)
-        WHERE ct.cohort_definition_id = {cid}   
-    ),
-    aggregated_counts AS (
-        -- Aggregate counts for parent nodes using the concept_ancestor table
-        SELECT
-            ca.ancestor_concept_id AS concept_id,
-            COUNT(DISTINCT cc.subject_id) AS count_in_cohort
-        FROM
-            cohort_conditions cc
-        JOIN
-            concept_ancestor ca 
-            ON cc.concept_id = ca.descendant_concept_id
-        WHERE 
-            ca.min_levels_of_separation >= 0
-        GROUP BY
-            ca.ancestor_concept_id
-    ),
-    concept_hierarchy AS (
-        -- Retrieve the direct parent-child hierarchy for all concepts involved
-        SELECT
-            ca.ancestor_concept_id,
-            ca.descendant_concept_id,
-        FROM
-            concept_ancestor ca
-        WHERE
-            ca.min_levels_of_separation <= 1 
-            AND ca.descendant_concept_id IN (SELECT concept_id FROM aggregated_counts where count_in_cohort > {filter_count})
-            AND ca.ancestor_concept_id IN (SELECT concept_id FROM aggregated_counts where count_in_cohort > {filter_count})
-    )
-    -- Combine counts and hierarchy with concept details
-    SELECT DISTINCT
-        c.concept_name,
-        c.concept_code,
-        ac.count_in_cohort,
-        (ac.count_in_cohort * 1.0 / (SELECT COUNT(DISTINCT subject_id) FROM cohort WHERE cohort_definition_id = {cid})) AS prevalence,
-        ch.ancestor_concept_id,
-        ch.descendant_concept_id
-    FROM
-        aggregated_counts ac
-    JOIN
-        concept_hierarchy ch ON ac.concept_id = ch.descendant_concept_id
-    JOIN
-        concept c ON ac.concept_id = c.concept_id
-    WHERE ac.count_in_cohort > {filter_count} 
-        AND ({include_hierarchy} = True OR ch.ancestor_concept_id = ch.descendant_concept_id)
-    ORDER BY 
-        prevalence DESC;
-'''
-COHORT_CONCEPT_DRUG_PREVALENCE_QUERY = '''
-    WITH cohort_drugs AS (
-        -- Compute the counts for each drug node
-        SELECT
-            de.drug_concept_id AS concept_id,
-            ct.subject_id
-        FROM
-            cohort ct
-        JOIN
-            drug_exposure de ON ct.subject_id = de.person_id
-            AND de.drug_exposure_start_date >= ct.cohort_start_date
-            AND (de.drug_exposure_start_date IS NULL OR de.drug_exposure_start_date <= ct.cohort_end_date)
-        WHERE ct.cohort_definition_id = {cid}       
-    ),
-    aggregated_counts AS (
-        -- Aggregate counts for parent nodes using the concept_ancestor table
-        SELECT
-            ca.ancestor_concept_id AS concept_id,
-            COUNT(DISTINCT cd.subject_id) AS count_in_cohort
-        FROM
-            cohort_drugs cd
-        JOIN
-            concept_ancestor ca 
-            ON cd.concept_id = ca.descendant_concept_id
-        JOIN
-            concept anc ON ca.ancestor_concept_id = anc.concept_id    
-        WHERE
-            anc.vocabulary_id = '{vocab}' AND 
-            ca.min_levels_of_separation >= 0 -- Ensure valid ancestor relationships
-        GROUP BY
-            ca.ancestor_concept_id
-    ),
-    concept_hierarchy AS (
-        -- Retrieve the hierarchy for all concepts involved
-        SELECT
-            ca.ancestor_concept_id,
-            ca.descendant_concept_id
-        FROM
-            concept_ancestor ca
-        WHERE
-            ca.min_levels_of_separation = 1
-            AND ca.descendant_concept_id IN (SELECT concept_id FROM aggregated_counts where count_in_cohort > {filter_count})
-            AND ca.ancestor_concept_id IN (SELECT concept_id FROM aggregated_counts where count_in_cohort > {filter_count})
-    )
-    -- Combine counts and hierarchy with concept details
-    SELECT DISTINCT
-        c.concept_name,
-        c.concept_code,
-        ac.count_in_cohort,
-        (ac.count_in_cohort * 1.0 / (SELECT COUNT(DISTINCT subject_id) FROM cohort WHERE cohort_definition_id = {cid})) AS prevalence,
-        ch.ancestor_concept_id,
-        ch.descendant_concept_id
-    FROM
-        aggregated_counts ac
-    JOIN
-        concept_hierarchy ch ON ac.concept_id = ch.descendant_concept_id
-    JOIN
-        concept c ON ac.concept_id = c.concept_id
-    WHERE ac.count_in_cohort > {filter_count} 
-        AND ({include_hierarchy} = True OR ch.ancestor_concept_id = ch.descendant_concept_id)
-    ORDER BY 
-        prevalence DESC;
-'''
\ No newline at end of file
diff --git a/tests/query_based/test_hierarchical_prevalence.py b/tests/query_based/test_hierarchical_prevalence.py
index ba849fa..e0ad55c 100644
--- a/tests/query_based/test_hierarchical_prevalence.py
+++ b/tests/query_based/test_hierarchical_prevalence.py
@@ -1,4 +1,6 @@
-def test_cohort_concept_hierarchical_prevalence(test_db):
+import logging
+
+def test_cohort_concept_hierarchical_prevalence(test_db, caplog):
     bias = test_db
     cohort_query = """
         SELECT person_id, condition_concept_id, 
@@ -15,13 +17,27 @@ def test_cohort_concept_hierarchical_prevalence(test_db):
     )
     # Test cohort object and methods
     assert cohort is not None, "Cohort creation failed"
-    # test cohort.get_concept_stats only supports concept stats for condition_occurrence and drug_exposures currently
-    concept_stats = cohort.get_concept_stats(concept_type='procedure_occurrence')
+    # test concept_type must be one of the supported OMOP domain name
+    caplog.clear()
+    with caplog.at_level(logging.ERROR):
+        concept_stats = cohort.get_concept_stats(concept_type='dummy_invalid')
+    assert 'Invalid concept_type' in caplog.text
+    assert concept_stats == {}
+
+    # test vocab must be None to use the default vocab or one of the supported OMOP vocabulary id
+    caplog.clear()
+    with caplog.at_level(logging.ERROR):
+        concept_stats = cohort.get_concept_stats(vocab='dummy_invalid_vocab')
+    assert 'is not a valid vocabulary' in caplog.text
     assert concept_stats == {}
 
+    # test the cohort does not have procedure_occurrence related concepts
+    concept_stats = cohort.get_concept_stats(concept_type='procedure_occurrence')
+    assert concept_stats == {'procedure_occurrence': []}
+
     include_hierarchy_flags = [True, False]
     for flag in include_hierarchy_flags:
-        concept_stats = cohort.get_concept_stats(include_hierarchy=flag)
+        concept_stats = cohort.get_concept_stats(vocab='ICD10CM', include_hierarchy=flag)
         assert concept_stats is not None, "Failed to fetch concept stats"
         assert len(concept_stats) > 0, "No concept stats returned"
         # check returned data with different include_hierarchy flag
diff --git a/tests/test_database.py b/tests/test_database.py
index 89652ed..d2cb1ff 100644
--- a/tests/test_database.py
+++ b/tests/test_database.py
@@ -1,6 +1,7 @@
 import duckdb
 import pytest
 import logging
+from biasanalyzer.cohort_query_builder import CohortQueryBuilder
 from biasanalyzer.database import BiasDatabase
 
 
@@ -154,21 +155,22 @@ def test_get_cohort_concept_stats_handles_exception(caplog):
     BiasDatabase._instance = None
     db = BiasDatabase(":memory:")
     db.omop_cdm_db_url = 'duckdb'
+    qry_builder = CohortQueryBuilder(cohort_creation=False)
     caplog.clear()
     with caplog.at_level(logging.ERROR):
-        result = db.get_cohort_concept_stats(123)
+        result = db.get_cohort_concept_stats(123, qry_builder)
     assert 'Error computing cohort concept stats' in caplog.text
     assert result == {}
 
 def test_get_cohort_attributes_handles_exception():
     BiasDatabase._instance = None
     db = BiasDatabase(":memory:")
-
+    qry_builder = CohortQueryBuilder(cohort_creation=False)
     db.omop_cdm_db_url = None
     result_stats = db.get_cohort_basic_stats(123, variable='age')
     assert result_stats is None
     result = db.get_cohort_distributions(123, 'age')
     assert result is None
-    result = db.get_cohort_concept_stats(123)
+    result = db.get_cohort_concept_stats(123, qry_builder)
     assert result == {}
 

From 17e406c44aab70e8d3096081d0f76f6881019924 Mon Sep 17 00:00:00 2001
From: hyi <hongyi@renci.org>
Date: Thu, 19 Jun 2025 17:30:41 -0400
Subject: [PATCH 05/10] add the jinja2 SQL template to support cohort concept
 prevalence across domains

---
 .../cohort_concept_prevalence_query.sql.j2    | 61 +++++++++++++++++++
 1 file changed, 61 insertions(+)
 create mode 100644 biasanalyzer/sql_templates/cohort_concept_prevalence_query.sql.j2

diff --git a/biasanalyzer/sql_templates/cohort_concept_prevalence_query.sql.j2 b/biasanalyzer/sql_templates/cohort_concept_prevalence_query.sql.j2
new file mode 100644
index 0000000..ff3be54
--- /dev/null
+++ b/biasanalyzer/sql_templates/cohort_concept_prevalence_query.sql.j2
@@ -0,0 +1,61 @@
+WITH cohort_events AS (
+    -- Compute the counts for each concept node
+    SELECT
+        e.{{ concept_id_column }} AS concept_id,
+        ct.subject_id
+    FROM
+        cohort ct
+    JOIN
+        {{ table_name }} e ON ct.subject_id = e.person_id
+        AND e.{{ start_date_column }} >= ct.cohort_start_date
+        AND (ct.cohort_end_date IS NULL OR e.{{ start_date_column }} <= ct.cohort_end_date)
+    WHERE ct.cohort_definition_id = {{ cid }}
+),
+aggregated_counts AS (
+    -- Aggregate counts for parent nodes using the concept_ancestor table
+    SELECT
+        ca.ancestor_concept_id AS concept_id,
+        COUNT(DISTINCT ce.subject_id) AS count_in_cohort
+    FROM
+        cohort_events ce
+    JOIN
+        concept_ancestor ca ON ce.concept_id = ca.descendant_concept_id
+    JOIN
+        concept anc ON ca.ancestor_concept_id = anc.concept_id
+    WHERE
+        anc.vocabulary_id = '{{ vocab }}'
+        AND ca.min_levels_of_separation >= 0
+    GROUP BY
+        ca.ancestor_concept_id
+),
+concept_hierarchy AS (
+    -- Retrieve the direct parent-child hierarchy for all concepts involved
+    SELECT
+        ca.ancestor_concept_id,
+        ca.descendant_concept_id
+    FROM
+        concept_ancestor ca
+    WHERE
+        ca.min_levels_of_separation <= 1
+        AND ca.descendant_concept_id IN (SELECT concept_id FROM aggregated_counts WHERE count_in_cohort > {{ filter_count }})
+        AND ca.ancestor_concept_id IN (SELECT concept_id FROM aggregated_counts WHERE count_in_cohort > {{ filter_count }})
+)
+-- Combine counts and hierarchy with concept details
+SELECT DISTINCT
+    c.concept_name,
+    c.concept_code,
+    ac.count_in_cohort,
+    (ac.count_in_cohort * 1.0 / (SELECT COUNT(DISTINCT subject_id) FROM cohort WHERE cohort_definition_id = {{ cid }})) AS prevalence,
+    ch.ancestor_concept_id,
+    ch.descendant_concept_id
+FROM
+    aggregated_counts ac
+JOIN
+    concept_hierarchy ch ON ac.concept_id = ch.descendant_concept_id
+JOIN
+    concept c ON ac.concept_id = c.concept_id
+WHERE
+    ac.count_in_cohort > {{ filter_count }}
+    AND ({{ include_hierarchy }} = True OR ch.ancestor_concept_id = ch.descendant_concept_id)
+ORDER BY
+    prevalence DESC;
\ No newline at end of file

From 991aa887647d3a3e6114dd0f161eccc823bb4860 Mon Sep 17 00:00:00 2001
From: hyi <hongyi@renci.org>
Date: Thu, 19 Jun 2025 17:33:41 -0400
Subject: [PATCH 06/10] corrected a comment in the cohorts totorial notebook

---
 notebooks/BiasAnalyzerCohortsTutorial.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/notebooks/BiasAnalyzerCohortsTutorial.ipynb b/notebooks/BiasAnalyzerCohortsTutorial.ipynb
index d196ccf..4ac4bfc 100644
--- a/notebooks/BiasAnalyzerCohortsTutorial.ipynb
+++ b/notebooks/BiasAnalyzerCohortsTutorial.ipynb
@@ -341,7 +341,7 @@
     }
    ],
    "source": [
-    "# create a user study cohort with all COVID patients above the age of 65\n",
+    "# create a user study cohort with young female COVID patients\n",
     "study_cohort = bias.create_cohort('Young female COVID patients', \n",
     "                                  'A cohort of female COVID patients born between 2000 and 2020', \n",
     "                                  '../tests/assets/cohort_creation/test_cohort_creation_condition_occurrence_config_study.yaml', \n",

From 50e786f3c92e7e672ff42e6e5c5e483b61aef4e5 Mon Sep 17 00:00:00 2001
From: hyi <hongyi@renci.org>
Date: Thu, 19 Jun 2025 22:34:04 -0400
Subject: [PATCH 07/10] replaced developer version of cohort concept prevalence
 notebook with user-friendly tutorial notebook

---
 .../BiasAnalyzerCohortConceptTutorial.ipynb   | 1052 ++++++++++++++
 ...iasAnalyzerTestingCohortConceptStats.ipynb | 1205 -----------------
 2 files changed, 1052 insertions(+), 1205 deletions(-)
 create mode 100644 notebooks/BiasAnalyzerCohortConceptTutorial.ipynb
 delete mode 100644 notebooks/BiasAnalyzerTestingCohortConceptStats.ipynb

diff --git a/notebooks/BiasAnalyzerCohortConceptTutorial.ipynb b/notebooks/BiasAnalyzerCohortConceptTutorial.ipynb
new file mode 100644
index 0000000..a35de32
--- /dev/null
+++ b/notebooks/BiasAnalyzerCohortConceptTutorial.ipynb
@@ -0,0 +1,1052 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "fdc0d263",
+   "metadata": {},
+   "source": [
+    "# Using BiasAnalyzer for Cohort Concept Prevalence Exploration\n",
+    "\n",
+    "This tutorial demonstrates how to use the `BiasAnalyzer` package to explore **concept prevalence** within a cohort - a key step in identifying potential biases during cohort selection. It complements the [Cohort Exploration Tutorial](./BiasAnalyzerCohortsTutorial.ipynb) by focusing specifically on analyzing which clincial concepts (e.g., diagnoses, procedures, medications) are most common in a selected cohort. In the OMOP (Observational Medical Outcomes Partnership) CDM (Common Data Model), a **concept** refers to a coded term from a standardized medical vocabulary, uniquely identified by a **concept ID**. All clinical events in OMOP, such as conditions, drug exposures, procedures, measurements, and events, are represented as concepts.\n",
+    "\n",
+    "---\n",
+    "\n",
+    "### Overview\n",
+    "\n",
+    "**Objective**:  \n",
+    "Learn how to retrieve and analyze concept prevalence within a cohort using `BiasAnalyzer`.\n",
+    "\n",
+    "**Before You Begin**:  \n",
+    "The `BiasAnalyzer` package is currently in active development and has not yet been officially released on PyPI.\n",
+    "You can install it in one of the two ways:\n",
+    "\n",
+    "- **Install from GitHub (recommended during development)**:\n",
+    "```bash\n",
+    "pip install git+https://github.com/vaclab/BiasAnalyzer.git\n",
+    "```\n",
+    "- **Install from PyPI (once the pacakge is officially released)**:\n",
+    "```bash\n",
+    "pip install biasanalyzer\n",
+    "```\n",
+    "\n",
+    "For full setup and usage instructions, refer to the [README](https://github.com/VACLab/BiasAnalyzer/blob/main/README.md).\n",
+    "\n",
+    "---\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bb028875",
+   "metadata": {},
+   "source": [
+    "### Preparation for cohort concept prevalence exploration\n",
+    "**Preparation step 1**: Import the `BIAS` class from the `api` module of the `BiasAnalyzer` package, create an object `bias` of the `BIAS` class, specify OMOP CDM database configurations on the `bias` object, and set OMOP CDM database to enable connection to the database. Refer to the [Cohort Exploration Tutorial](./BiasAnalyzerCohortsTutorial.ipynb) for more details."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "6dc76f46",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "configuration specified in ../config.yaml loaded successfully\n",
+      "Connected to the OMOP CDM database (read-only).\n",
+      "Cohort Definition table created.\n",
+      "Cohort table created.\n"
+     ]
+    }
+   ],
+   "source": [
+    "from biasanalyzer.api import BIAS\n",
+    "\n",
+    "bias = BIAS()\n",
+    "\n",
+    "bias.set_config('../config.yaml')\n",
+    "\n",
+    "bias.set_root_omop()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8731e481",
+   "metadata": {},
+   "source": [
+    "———————————————\n",
+    "\n",
+    "**Preparation step 2**: Create a cohort of young female COVID patients using the `create_cohort(cohort_name, cohort_description, query_or_yaml_file, created_by)` function on the `bias` object for cohort concept prevalence exploration. You'll pass the name of the cohort as the first argument, the description of the cohort as the second argument, a yaml file that specifies cohort inclusion and exclusion criteria or a cohort selection SQL query as the third argument, and the cohort owner's name indicating who owns or creates this cohort as the fourth argument. After the cohort is created, you can call `get_stats()` and `get_distributions()` functions on the returned `cohort_data` object to explore cohort statistics and distributions."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "51969248-f348-4f0d-914f-bb908183e3f1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "template_path: /home/hongyi/BiasAnalyzer/biasanalyzer/sql_templates, cohort_creation: True\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b99aedde4936451e9c0b8e75f2bcc620",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Cohort creation:   0%|                                 | 0/3 [00:00<?, ?stage/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "configuration specified in ../tests/assets/cohort_creation/test_cohort_creation_condition_occurrence_config_study.yaml loaded successfully\n",
+      "Cohort definition inserted successfully.\n",
+      "Cohort Young female COVID patients successfully created.\n",
+      "template_path: /home/hongyi/BiasAnalyzer/biasanalyzer/sql_templates, cohort_creation: False\n",
+      "cohort created successfully\n",
+      "the cohort stats: [{'total_count': 10208, 'earliest_start_date': datetime.date(2020, 1, 18), 'latest_start_date': datetime.date(2020, 3, 30), 'earliest_end_date': datetime.date(2020, 2, 7), 'latest_end_date': datetime.date(2020, 5, 3), 'min_duration_days': 8, 'max_duration_days': 37, 'avg_duration_days': 24.25, 'median_duration': 24, 'stddev_duration': 7.2}]\n",
+      "the cohort age stats: [{'total_count': 10208, 'min_age': 0, 'max_age': 20, 'avg_age': 10.94, 'median_age': 11, 'stddev_age': 5.92}]\n",
+      "the cohort gender stats: [{'gender': 'female', 'gender_count': 10208, 'probability': 1.0}]\n",
+      "the cohort race stats: [{'race': 'Other', 'race_count': 53, 'probability': 0.01}, {'race': 'Asian', 'race_count': 723, 'probability': 0.07}, {'race': 'Black or African American', 'race_count': 866, 'probability': 0.08}, {'race': 'White', 'race_count': 8566, 'probability': 0.84}]\n",
+      "the cohort ethnicity stats: [{'ethnicity': 'other', 'ethnicity_count': 10208, 'probability': 1.0}]\n",
+      "the cohort age discrete probability distribution: [{'age_bin': '0-10', 'bin_count': 4744, 'probability': 0.4647}, {'age_bin': '11-20', 'bin_count': 5464, 'probability': 0.5353}, {'age_bin': '21-30', 'bin_count': 0, 'probability': 0.0}, {'age_bin': '31-40', 'bin_count': 0, 'probability': 0.0}, {'age_bin': '41-50', 'bin_count': 0, 'probability': 0.0}, {'age_bin': '51-60', 'bin_count': 0, 'probability': 0.0}, {'age_bin': '61-70', 'bin_count': 0, 'probability': 0.0}, {'age_bin': '71-80', 'bin_count': 0, 'probability': 0.0}, {'age_bin': '81-90', 'bin_count': 0, 'probability': 0.0}, {'age_bin': '91+', 'bin_count': 0, 'probability': 0.0}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# create a cohort with young female COVID patients\n",
+    "cohort_data = bias.create_cohort('Young female COVID patients', \n",
+    "                                  'A cohort of female COVID patients born between 2000 and 2020', \n",
+    "                                  '../tests/assets/cohort_creation/test_cohort_creation_condition_occurrence_config_study.yaml', \n",
+    "                                  'system')\n",
+    "# get stats of the cohort\n",
+    "cohort_stats = cohort_data.get_stats()\n",
+    "print(f'the cohort stats: {cohort_stats}')\n",
+    "cohort_age_stats = cohort_data.get_stats(\"age\")\n",
+    "print(f'the cohort age stats: {cohort_age_stats}')\n",
+    "cohort_gender_stats = cohort_data.get_stats(\"gender\")\n",
+    "print(f'the cohort gender stats: {cohort_gender_stats}')\n",
+    "cohort_race_stats = cohort_data.get_stats(\"race\")\n",
+    "print(f'the cohort race stats: {cohort_race_stats}')\n",
+    "cohort_ethnicity_stats = cohort_data.get_stats(\"ethnicity\")\n",
+    "print(f'the cohort ethnicity stats: {cohort_ethnicity_stats}')\n",
+    "# get discrete probability distribution of the age variable in the cohort\n",
+    "cohort_age_distr = cohort_data.get_distributions('age')\n",
+    "print(f'the cohort age discrete probability distribution: {cohort_age_distr}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "22edda35",
+   "metadata": {},
+   "source": [
+    "**Now that you have connected to your OMOP CDM database and created the `cohort_data` cohort object, you are ready to explore cohort concept prevalence.** \n",
+    "\n",
+    "---\n",
+    "\n",
+    "### Exploring cohort concept prevalence\n",
+    "You can retrieve concept prevalence statistics for a cohort using the `get_concept_stats(concept_type='condition_occurrence', filter_count=0, vocab=None, include_hierarchy=False)` function on the `cohort_data` object. Each input argument to this function has a default value, so you can call the function without specifying all parameters. \n",
+    "- The `concept_type` input argument specifies the OMOP domain to analyze. It must be one of the OMOP domain names: `condition_occurrence`, `drug_exposure`, `procedure_occurrence`, `visit_occurrence`, `measurement`, or `observation`.\n",
+    "- The `vocab` input argument specifies the OMOP vocabulary ID to filter concepts by. If set to `None`, a default vocabulary is used based on the domain: `RxNorm` for `drug_exposure`, `LOINC` for `measurement`, and `SNOMED` for all other domains.\n",
+    "- The `filter_count` input argument filters out concepts with fewer than this number of patients in the cohort. Set it to `0` to include all without filtering.\n",
+    "- The `include_hierarchy` input argument specifies whether to include concept hierarchical relationship. If set to `True`, ancestor concepts using the OMOP concept hierarchy are included when calculating prevalence.\n",
+    "This function helps identify the most prevalent clinical concepts in your cohort, which can reveal patterns or potential sources of selection bias in the cohort data.\n",
+    "\n",
+    "**Cohort condition occurrence concept prevalence**: \n",
+    "The code block below demonstrates how to use the default parameters of the `get_concept_stats()` function to retrieve concept prevalence for the `condition occurrence` domain. By default, it uses the `SNOMED` vocabulary, excludes hierarchical relationships, and applies no filtering. The function returns a dictionary where the **key** is the `concept_type` (e.g., `condition_occurrence`) and the **value** is a list of concept dictionaries. Each concept dictionary in the list contains `concept_name`, `concept_code`, `count_in_cohort`, `prevalence`, `ancestor_concept_id`, and `descendant_concept_id`. These values allow you to explore which clinical concepts are most prevalent in your cohort and suppoert deeper investigations into potential sources of selection bias.\n",
+    "\n",
+    "**Note** that this prevalence computation may take some time, especially for large cohorts. A progress bar will appear to indicate the progress of the prevalence calculation."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "b1415805-b065-40b8-b2cd-6db6f5f9ca41",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "80ccef000a6743b7ace1499c7fcdc414",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "21ddc9a433b940bc90a48e2c5177769a",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7b6d2b2e54a24b55b6d9a1ed39affefb",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "cohort concept hierarchy for condition_occurrence with root concept ids []:\n",
+      "                                          concept_name       concept_code  count_in_cohort  prevalence  ancestor_concept_id  descendant_concept_id\n",
+      "0                                     Clinical finding          404684003            10208    1.000000               441840                 441840\n",
+      "1                         Disease due to Coronaviridae           27619001            10208    1.000000              4100065                4100065\n",
+      "2                                        Viral disease           34014006            10208    1.000000               440029                 440029\n",
+      "3                                              Disease           64572001            10208    1.000000              4274025                4274025\n",
+      "4                                Coronavirus infection          186747009            10208    1.000000               439676                 439676\n",
+      "5                                             COVID-19          840539006            10208    1.000000             37311061               37311061\n",
+      "6                            Disorder due to infection           40733004            10208    1.000000               432250                 432250\n",
+      "7            Clinical history and observation findings          250171008             9150    0.896356              4094294                4094294\n",
+      "8            General finding of observation of patient          118222006             9149    0.896258              4041283                4041283\n",
+      "9                                      Finding by site          118234003             9120    0.893417              4042140                4042140\n",
+      "10                          General body state finding           82832008             9080    0.889498              4221108                4221108\n",
+      "11                      Temperature-associated finding          301343009             8769    0.859032              4103474                4103474\n",
+      "12                                               Fever          386661006             8650    0.847375               437663                 437663\n",
+      "13              Body temperature above reference range           50177009             8650    0.847375              4178904                4178904\n",
+      "14                                 Vital signs finding          118227000             8650    0.847375              4042138                4042138\n",
+      "15                            Body temperature finding          105723007             8650    0.847375              4022230                4022230\n",
+      "16                           Abnormal body temperature          123979008             8650    0.847375              4047791                4047791\n",
+      "17                                 Respiratory finding          106048009             7755    0.759698              4024567                4024567\n",
+      "18                              Finding of body region          301857004             7115    0.697002              4199402                4199402\n",
+      "19                                Neurological finding          102957003             6659    0.652332              4011630                4011630\n",
+      "20                      Sensory nervous system finding          106147001             6659    0.652332              4024013                4024013\n",
+      "21                        Finding of sensation by site          699697007             6657    0.652136             44783587               44783587\n",
+      "22                                               Cough           49727002             6596    0.646160               254761                 254761\n",
+      "23                        Respiratory function finding          365852007             6596    0.646160              4267789                4267789\n",
+      "24                     Finding of head and neck region          118254002             6423    0.629212               255919                 255919\n",
+      "25                                        Head finding          406122000             6391    0.626078              4247371                4247371\n",
+      "26                            Digestive system finding          386617003             5953    0.583170              4302537                4302537\n",
+      "27                        Mouth and/or pharynx finding          249376008             5619    0.550451              4091363                4091363\n",
+      "28                              Finding of head region          298364001             5194    0.508817              4182161                4182161\n",
+      "29                             Finding of mouth region          423066003             4895    0.479526              4307122                4307122\n",
+      "30                                 Oral cavity finding          116337000             4895    0.479526              4022570                4022570\n",
+      "31                                       Loss of taste           36955009             4893    0.479330              4289517                4289517\n",
+      "32                           Finding of sense of taste           76489005             4893    0.479330              4296465                4296465\n",
+      "33                    General problem AND/OR complaint          105721009             3776    0.369906              4022830                4022830\n",
+      "34                                   Metabolic finding          106089007             3776    0.369906               432455                 432455\n",
+      "35                                             Fatigue           84229001             3776    0.369906              4223659                4223659\n",
+      "36                          Energy and stamina finding          359752005             3776    0.369906              4230389                4230389\n",
+      "37                          General well-being finding          365275006             3776    0.369906              4272867                4272867\n",
+      "38                                                Pain           22253000             3559    0.348648              4329041                4329041\n",
+      "39                            Pain / sensation finding          276435006             3559    0.348648              4170962                4170962\n",
+      "40                     Pain finding at anatomical site          279001004             3555    0.348256              4132926                4132926\n",
+      "41                                      Sputum finding          248595008             3233    0.316712              4089228                4089228\n",
+      "42     Finding reported by subject or history provider          418799008             2040    0.199843              4303401                4303401\n",
+      "43                        Ear, nose and throat finding          297268004             1992    0.195141              4178545                4178545\n",
+      "44                              Finding of respiration          301282008             1990    0.194945              4115386                4115386\n",
+      "45                       Ease of respiration - finding          366139009             1990    0.194945              4271505                4271505\n",
+      "46                                Difficulty breathing          230145002             1990    0.194945              4041664                4041664\n",
+      "47                     Upper respiratory tract finding          301186004             1967    0.192692              4103320                4103320\n",
+      "48                      General finding of soft tissue          248402002             1797    0.176038              4093991                4093991\n",
+      "49                                             Dyspnea          267036007             1624    0.159091               312437                 312437\n",
+      "50                       Finding of sound of breathing          301285005             1624    0.159091              4115387                4115387\n",
+      "51                                            Wheezing           56018004             1624    0.159091               314754                 314754\n",
+      "52               Finding related to respiratory sounds          106051002             1624    0.159091              4021770                4021770\n",
+      "53                             Musculoskeletal finding          106028002             1487    0.145670               135930                 135930\n",
+      "54                                       Joint finding          118952005             1472    0.144201                77960                  77960\n",
+      "55                                  Pharyngeal finding          116338005             1447    0.141752              4022571                4022571\n",
+      "56                                Musculoskeletal pain          279069000             1445    0.141556              4150129                4150129\n",
+      "57                                      Muscle finding          106030000             1445    0.141556              4024566                4024566\n",
+      "58                                         Muscle pain           68962001             1445    0.141556               442752                 442752\n",
+      "59                                          Joint pain           57676002             1445    0.141556                77074                  77074\n",
+      "60                       Finding of sensation of joint          298249004             1445    0.141556              4179167                4179167\n",
+      "61             Finding of sensation of skeletal muscle          298287007             1445    0.141556              4184117                4184117\n",
+      "62                              Finding of neck region          298378000             1379    0.135090              4184252                4184252\n",
+      "63                         Pain of digestive structure          301362007             1376    0.134796              4116809                4116809\n",
+      "64                        Pain of head and neck region          301365009             1376    0.134796              4116810                4116810\n",
+      "65                                      Pain in throat          162397003             1376    0.134796               259153                 259153\n",
+      "66                                           Neck pain           81680005             1376    0.134796                24134                  24134\n",
+      "67                     Finding of sensation of pharynx          300275004             1376    0.134796              4114487                4114487\n",
+      "68                       Pain of respiratory structure          301355003             1376    0.134796              4115406                4115406\n",
+      "69                                 Sore throat symptom          267102003             1376    0.134796              4147326                4147326\n",
+      "70                                            Headache           25064002             1304    0.127743               378253                 378253\n",
+      "71                                 Shivering or rigors          248456009             1079    0.105701              4087630                4087630\n",
+      "72                                               Chill           43724002             1079    0.105701               434490                 434490\n",
+      "73                               Disorder by body site          123946008              914    0.089538              4047779                4047779\n",
+      "74                             Disorder of body system          362965005              899    0.088068              4180628                4180628\n",
+      "75                               Inflammatory disorder          128139000              813    0.079643              4027384                4027384\n",
+      "76   Inflammation of specific body structures or ti...          363170005              813    0.079643              4180169                4180169\n",
+      "77               Inflammation of specific body systems          363171009              813    0.079643              4178818                4178818\n",
+      "78                      Disorder of respiratory system           50043002              783    0.076705               320136                 320136\n",
+      "79     Inflammatory disorder of the respiratory system          373405005              781    0.076509              4162282                4162282\n",
+      "80      Inflammatory disorder of the respiratory tract          363180009              781    0.076509              4180170                4180170\n",
+      "81        Functional finding of gastrointestinal tract          300358007              766    0.075039              4101343                4101343\n",
+      "82                      Gastrointestinal tract finding          386618008              766    0.075039              4304916                4304916\n",
+      "83                Inflammation of specific body organs          363169009              730    0.071513              4181063                4181063\n",
+      "84                          Finding of trunk structure          302292003              609    0.059659              4117930                4117930\n",
+      "85                                   Disorder of trunk          128121009              609    0.059659              4028071                4028071\n",
+      "86                              Finding of upper trunk          609623002              608    0.059561             43531057               43531057\n",
+      "87               Disorder of thoracic segment of trunk          609622007              608    0.059561             43531056               43531056\n",
+      "88                         Finding of region of thorax          298705000              608    0.059561              4185503                4185503\n",
+      "89                                  Disorder of thorax          118946009              608    0.059561              4043346                4043346\n",
+      "90                Disorder of lower respiratory system          128272009              607    0.059463              4027553                4027553\n",
+      "91                     Lower respiratory tract finding          301226008              607    0.059463              4115259                4115259\n",
+      "92    Inflammatory disorder of lower respiratory tract          128997002              607    0.059463              4028876                4028876\n",
+      "93                            Viscus structure finding          406123005              583    0.057112              4227253                4227253\n",
+      "94                                            Distress           69328002              582    0.057014              4239819                4239819\n",
+      "95                                        Lung finding          301230006              582    0.057014              4115260                4115260\n",
+      "96                               Disorder of blood gas          238157005              582    0.057014              4080012                4080012\n",
+      "97                      General clinical state finding          365860008              582    0.057014               432453                 432453\n",
+      "98                                    Disorder of lung           19829001              582    0.057014               257907                 257907\n",
+      "99                                Respiratory distress          271825005              582    0.057014              4158346                4158346\n",
+      "100                                  Metabolic disease           75934005              582    0.057014               436670                 436670\n",
+      "101                                 Lung consolidation           95436008              582    0.057014              4318404                4318404\n",
+      "102                                          Pneumonia          233604007              582    0.057014               255848                 255848\n",
+      "103                                          Hypoxemia          389087006              582    0.057014               437390                 437390\n",
+      "104                                        Pneumonitis          205237003              582    0.057014               253506                 253506\n",
+      "105                                    Finding of face          301310005              519    0.050842              4103352                4103352\n",
+      "106                                       Nose finding          118237005              518    0.050745              4042142                4042142\n",
+      "107                               Nasal airway finding          249342004              518    0.050745              4096565                4096565\n",
+      "108                                      Acute disease            2704003              516    0.050549               443883                 443883\n",
+      "109                                   Nasal congestion           68235000              513    0.050255              4195085                4195085\n",
+      "110                                Finding of vomiting          300359004              443    0.043397              4101344                4101344\n",
+      "111                                   Vomiting symptom          249497008              443    0.043397              4096715                4096715\n",
+      "112                                             Nausea          422587007              443    0.043397                31967                  31967\n",
+      "113                            Disorder of soft tissue           19660004              373    0.036540               376208                 376208\n",
+      "114                                 Soft tissue lesion          239953001              361    0.035364              4344497                4344497\n",
+      "115                             Cardiovascular finding          106063007              355    0.034777              4023995                4023995\n",
+      "116                  Disorder of cardiovascular system           49601007              355    0.034777               134057                 134057\n",
+      "117                                           Diarrhea           62315008              350    0.034287               196523                 196523\n",
+      "118                             Altered bowel function           88111009              350    0.034287              4338120                4338120\n",
+      "119                            Finding of bowel action          366256008              350    0.034287              4182633                4182633\n",
+      "120                              Finding of defecation          300373008              350    0.034287              4113563                4113563\n",
+      "121                                   Diarrhea symptom          267060006              350    0.034287              4145808                4145808\n",
+      "122                                  Digestive symptom          308925008              350    0.034287               192731                 192731\n",
+      "123                                  Vascular disorder           27550009              334    0.032719               443784                 443784\n",
+      "124                               Blood vessel finding           21829004              334    0.032719              4071689                4071689\n",
+      "125             Acute disease of cardiovascular system          128487001              331    0.032426              4028367                4028367\n",
+      "126                          Acute respiratory disease          111273006              293    0.028703              4006969                4006969\n",
+      "127                      Ear, nose and throat disorder          232208008              208    0.020376              4339468                4339468\n",
+      "128                                Respiratory failure          409622000              205    0.020082              4256228                4256228\n",
+      "129                          Acute respiratory failure           65710008              205    0.020082               319049                 319049\n",
+      "130                          Respiratory insufficiency          409623005              205    0.020082               318459                 318459\n",
+      "131   Inflammatory disorder of upper respiratory tract          129134004              177    0.017339              4043671                4043671\n",
+      "132               Disorder of upper respiratory system          201060008              177    0.017339               254068                 254068\n",
+      "133                         Thrombosis of blood vessel          439129009              169    0.016556              4208466                4208466\n",
+      "134                                     Venous finding          248727005              169    0.016556              4095634                4095634\n",
+      "135                             Deep venous thrombosis          128053003              169    0.016556              4133004                4133004\n",
+      "136                                         Thrombosis          439127006              169    0.016556              4231363                4231363\n",
+      "137                       Acute deep venous thrombosis    132281000119108              169    0.016556             44782746               44782746\n",
+      "138                                  Venous thrombosis          111293003              169    0.016556               444247                 444247\n",
+      "139                                   Disorder of vein           90507008              169    0.016556              4234997                4234997\n",
+      "140                        Respiratory tract infection          275498002              164    0.016066              4170143                4170143\n",
+      "141                        Upper respiratory infection           54150009              164    0.016066              4181583                4181583\n",
+      "142                                  Infection by site          301810000              164    0.016066              4200532                4200532\n",
+      "143                           Pulmonary artery finding          251039005              162    0.015870              4108173                4108173\n",
+      "144                                  Arterial embolism           54687002              162    0.015870               312339                 312339\n",
+      "145                                           Embolism          414086009              162    0.015870              4185607                4185607\n",
+      "146                                   Arterial finding          248718009              162    0.015870              4095631                4095631\n",
+      "147                Disorder of blood vessels of thorax          373434004              162    0.015870              4190192                4190192\n",
+      "148                                 Disorder of artery          359557001              162    0.015870               321887                 321887\n",
+      "149                  Disorder of pulmonary circulation           39785005              162    0.015870               433208                 433208\n",
+      "150                             Trunk arterial embolus          312593004              162    0.015870              4194610                4194610\n",
+      "151                           Acute pulmonary embolism          706870000              162    0.015870             45768439               45768439\n",
+      "152                                 Pulmonary embolism           59282003              162    0.015870               440417                 440417\n",
+      "153                        Disorder of immune function          414029004              145    0.014205               440371                 440371\n",
+      "154                                   Disorder of head          118934005              141    0.013813              4042836                4042836\n",
+      "155                            Viral infection by site          312130009              137    0.013421              4207186                4207186\n",
+      "156            Viral upper respiratory tract infection          281794004              137    0.013421              4085100                4085100\n",
+      "157                        Viral respiratory infection          312133006              137    0.013421              4193169                4193169\n",
+      "158                                             Sepsis           91302008              131    0.012833               132797                 132797\n",
+      "159                             Sepsis caused by virus          770349000              131    0.012833             36674642               36674642\n",
+      "160                         Organ dysfunction syndrome          238147009              131    0.012833              4080011                4080011\n",
+      "161                      Inflammatory disorder of head          363176004              125    0.012245              4181187                4181187\n",
+      "162                                          Sinusitis           36971009               88    0.008621              4283893                4283893\n",
+      "163                            Disorder of nasal sinus            7393007               88    0.008621               256440                 256440\n",
+      "164                               Facial sinus finding          271745005               88    0.008621              4158326                4158326\n",
+      "165                         Acute inflammatory disease          128482007               88    0.008621              4134294                4134294\n",
+      "166                        Disorder of digestive organ           76712006               86    0.008425              4297887                4297887\n",
+      "167                       Disorder of digestive system           53619000               86    0.008425              4201745                4201745\n",
+      "168                        Disorder of digestive tract           84410009               86    0.008425              4309188                4309188\n",
+      "169                  Disorder of upper digestive tract           50410009               86    0.008425              4198525                4198525\n",
+      "170                                Disorder of pharynx           75860007               83    0.008131                31057                  31057\n",
+      "171          Inflammatory disorder of digestive system          373407002               83    0.008131              4190185                4190185\n",
+      "172                      Infection of digestive system          312158001               83    0.008131              4193990                4193990\n",
+      "173              Infectious disease of digestive tract          128398001               83    0.008131              4134887                4134887\n",
+      "174                                        Pharyngitis          405737000               83    0.008131              4226263                4226263\n",
+      "175           Inflammatory disorder of digestive tract          128999004               83    0.008131              4043371                4043371\n",
+      "176                              Infective pharyngitis          312422001               83    0.008131              4193318                4193318\n",
+      "177                         Infective disorder of head          363166002               81    0.007935              4176944                4176944\n",
+      "178                                    Viral sinusitis          444814009               78    0.007641             40481087               40481087\n",
+      "179  Traumatic and/or non-traumatic injury of anato...          609411003               73    0.007151             43530877               43530877\n",
+      "180              Traumatic AND/OR non-traumatic injury          417163006               73    0.007151               432795                 432795\n",
+      "181                                   Traumatic injury          417746004               71    0.006955               440921                 440921\n",
+      "182                           Traumatic injury by site          609336008               71    0.006955             43530815               43530815\n",
+      "183                         Skin AND/OR mucosa finding          415531008               65    0.006368              4212577                4212577\n",
+      "184                           Acute infectious disease           63171007               62    0.006074              4271450                4271450\n",
+      "185                  Acute upper respiratory infection           54398005               62    0.006074               257011                 257011\n",
+      "186                                         Hemoptysis           66857006               62    0.006074               261687                 261687\n",
+      "187                       Acute respiratory infections          195647007               62    0.006074              4112341                4112341\n",
+      "188                                           Bleeding          131148009               62    0.006074               437312                 437312\n",
+      "189                            Acute viral pharyngitis          195662009               59    0.005780              4112343                4112343\n",
+      "190                                Acute viral disease          409631000               59    0.005780              4252853                4252853\n",
+      "191                    Acute digestive system disorder          127321000               59    0.005780              4132552                4132552\n",
+      "192                                    Mucosal finding          128145008               59    0.005780              4028076                4028076\n",
+      "193                                  Acute pharyngitis          363746003               59    0.005780                25297                  25297\n",
+      "194                                  Viral pharyngitis            1532007               59    0.005780              4035987                4035987\n",
+      "195             Viral infection of the digestive tract          312131008               59    0.005780              4193875                4193875\n",
+      "196                              Disorder of extremity          128605003               56    0.005486               133468                 133468\n",
+      "197                          Finding of limb structure          302293008               56    0.005486               138239                 138239\n",
+      "198                    Passive conjunctival congestion          246677007               53    0.005192              4080695                4080695\n",
+      "199                               Conjunctival finding          246875002               53    0.005192              4080857                4080857\n",
+      "200                                      Globe finding          246915008               53    0.005192              4080992                4080992\n",
+      "201                             Ocular surface finding          246869006               53    0.005192              4087936                4087936\n",
+      "202                               Eye / vision finding          118235002               53    0.005192              4038502                4038502\n",
+      "203                                      Orbit finding          246912006               53    0.005192              4087949                4087949\n",
+      "204                           Anterior segment finding          418727003               53    0.005192              4303380                4303380\n",
+      "205                 Disorder of musculoskeletal system             928000               49    0.004800              4244662                4244662\n",
+      "206                        Disorder of skeletal system           88230002               48    0.004702              4339410                4339410\n",
+      "207                   Injury of musculoskeletal system          105606008               46    0.004506              4022201                4022201\n",
+      "208                           Disorder of joint region          785875003               39    0.003821             37206233               37206233\n",
+      "209                      Traumatic injury due to event          419945001               37    0.003625               439215                 439215\n",
+      "210                        Disorder of lower extremity          118937003               36    0.003527               193460                 193460\n",
+      "211                              Finding of lower limb          116312005               36    0.003527              4022922                4022922\n",
+      "212                                Injury by mechanism          282745002               36    0.003527              4154161                4154161\n",
+      "213                       Bacterial infectious disease           87628006               35    0.003429               432545                 432545\n",
+      "214                          Injury of lower extremity          127279002               34    0.003331              4130852                4130852\n",
+      "215                                 Middle ear finding          300162007               33    0.003233              4101079                4101079\n",
+      "216                                       Otitis media           65363002               33    0.003233               372328                 372328\n",
+      "217                           Ear and auditory finding          118236001               33    0.003233              4042141                4042141\n",
+      "218                             Disorder of middle ear           68996008               33    0.003233               374364                 374364\n",
+      "219                                        Ear finding          247234006               33    0.003233              4082416                4082416\n",
+      "220                                    Disorder of ear           25906001               33    0.003233               378161                 378161\n",
+      "221                                             Otitis           43275000               33    0.003233              4183452                4183452\n",
+      "222                        Disorder of free lower limb          700012005               33    0.003233             44782620               44782620\n",
+      "223                        Disorder of auditory system          362966006               33    0.003233              4176644                4176644\n",
+      "224                                        Arthropathy          399269003               32    0.003135                73553                  73553\n",
+      "225                          Injury of free lower limb          700010002               31    0.003037             44784105               44784105\n",
+      "226                              Traumatic arthropathy           58188004               30    0.002939                74124                  74124\n",
+      "227                                 Soft tissue injury          282026002               30    0.002939              4083964                4083964\n",
+      "228                      Disorder of connective tissue          105969002               30    0.002939               253549                 253549\n",
+      "229     Musculoskeletal and connective tissue disorder          312225001               29    0.002841              4208786                4208786\n",
+      "230                                    Lesion of joint          298149009               28    0.002743              4179141                4179141\n",
+      "231                           Finding of ankle or foot          419518009               27    0.002645              4305027                4305027\n",
+      "232                    Bacterial respiratory infection          312117008               27    0.002645              4207184                4207184\n",
+      "233                                   Ligament finding          250132005               27    0.002645              4094284                4094284\n",
+      "234              Bacterial upper respiratory infection          312118003               27    0.002645              4207185                4207185\n",
+      "235                               Disorder of ligament           60492000               27    0.002645               442628                 442628\n",
+      "236                        Bacterial infection by site          301811001               27    0.002645              4200533                4200533\n",
+      "237                                       Joint injury          125610000               26    0.002547              4054054                4054054\n",
+      "238                          Tracheobronchial disorder          233776003               26    0.002547               252662                 252662\n",
+      "239                        Injury of connective tissue          385424001               26    0.002547              4300157                4300157\n",
+      "240                                  Bronchial finding          301229001               26    0.002547              4116777                4116777\n",
+      "241                                   Acute bronchitis           10509002               26    0.002547               260139                 260139\n",
+      "242                                    Sprain of joint          105611005               26    0.002547              4023316                4023316\n",
+      "243                                    Ligament injury          263126002               26    0.002547              4136694                4136694\n",
+      "244                                 Sprain of ligament          398878007               26    0.002547              4160875                4160875\n",
+      "245                                         Bronchitis           32398004               26    0.002547               256451                 256451\n",
+      "246                               Disorder of bronchus           41427001               26    0.002547               260131                 260131\n",
+      "247                   Streptococcal infectious disease           85769006               24    0.002351               437779                 437779\n",
+      "248         Bacterial infection of the digestive tract          312129004               24    0.002351              4193874                4193874\n",
+      "249              Disease due to Gram-positive bacteria          371582002               24    0.002351              4161193                4161193\n",
+      "250                          Streptococcal sore throat           43878008               24    0.002351                28060                  28060\n",
+      "251                            Finding of ankle region          116315007               24    0.002351              4023577                4023577\n",
+      "252                                    Injury of ankle          125603006               24    0.002351                77162                  77162\n",
+      "253                                  Disorder of ankle          128138008               24    0.002351                78831                  78831\n",
+      "254                Disease due to Gram-positive coccus          408637006               24    0.002351              4248801                4248801\n",
+      "255                       Finding related to pregnancy          118185001               23    0.002253               444094                 444094\n",
+      "256                                           Pregnant           77386006               23    0.002253              4299535                4299535\n",
+      "257                                   Normal pregnancy           72892002               23    0.002253              4217975                4217975\n",
+      "258       Pregnancy, childbirth and puerperium finding          248982007               23    0.002253              4088927                4088927\n",
+      "259                                Ankle joint finding          299413005               21    0.002057               443357                 443357\n",
+      "260                            Disorder of ankle joint          428776005               21    0.002057               443583                 443583\n",
+      "261     Traumatic arthropathy of the ankle and/or foot          201938008               21    0.002057                75620                  75620\n",
+      "262                        Traumatic arthropathy-ankle          201954006               21    0.002057              4114605                4114605\n",
+      "263            Lesion of ligaments of the ankle region          240019006               21    0.002057              4344271                4344271\n",
+      "264                        Sprain of ankle and/or foot          209529003               21    0.002057              4016673                4016673\n",
+      "265             Disorder of joint of ankle and/or foot          442246002               21    0.002057             40482662               40482662\n",
+      "266           Traumatic arthropathy of lower extremity          373575008               21    0.002057              4189458                4189458\n",
+      "267                                    Sprain of ankle           44465007               21    0.002057                81151                  81151\n",
+      "268                   Sprain of ligament of lower limb          281599007               21    0.002057              4105866                4105866\n",
+      "269                                   Fracture of bone          125605004               20    0.001959                75053                  75053\n",
+      "270               Cardiovascular measurement - finding          366157005               20    0.001959              4277352                4277352\n",
+      "271                                        Bone injury          284003005               20    0.001959              4154739                4154739\n",
+      "272                                   Disorder of bone           76069003               20    0.001959                75909                  75909\n",
+      "273                              Finding of upper limb          116307009               20    0.001959              4020346                4020346\n",
+      "274                                       Bone finding          118953000               20    0.001959              4042505                4042505\n",
+      "275                        Disorder of upper extremity          118947000               20    0.001959              4042503                4042503\n",
+      "276                              Hypertensive disorder           38341003               20    0.001959               316866                 316866\n",
+      "277                          Injury of upper extremity          127278005               19    0.001861              4130851                4130851\n",
+      "278                             Essential hypertension           59621000               18    0.001763               320128                 320128\n",
+      "279                         Hypersensitivity condition          473010000               16    0.001567             43021226               43021226\n",
+      "280                                 Allergic condition          473011001               14    0.001371             43021227               43021227\n",
+      "281                         Disorder of nervous system          118940003               12    0.001176               376337                 376337\n",
+      "282                     Central nervous system finding          246556002               12    0.001176              4086181                4086181\n",
+      "283             Disorder of the central nervous system           23853001               12    0.001176               376106                 376106\n",
+      "284                             Fracture of upper limb           23406007               12    0.001176              4050747                4050747\n",
+      "285                      Finding of bone of upper limb          298756009               12    0.001176              4186164                4186164\n",
+      "286                                       Complication          116223007               12    0.001176               433128                 433128\n",
+      "287                                         Open wound          125643001               11    0.001078               444187                 444187\n",
+      "288                                              Wound          416462003               11    0.001078              4168335                4168335\n",
+      "289                                   Finding of brain          299718000               11    0.001078              4101796                4101796\n",
+      "290                                  Disorder of brain           81308009               11    0.001078               372887                 372887\n",
+      "291                                      Wound finding          225552003               11    0.001078              4021667                4021667\n",
+      "292                                Laceration - injury          312608009               11    0.001078               443419                 443419\n",
+      "293                               Head and neck injury          282749008               11    0.001078              4154162                4154162\n",
+      "294                            Finding of wrist region          116310002                9    0.000882              4020347                4020347\n",
+      "295           Traumatic arthropathy of upper extremity          373574007                9    0.000882              4162433                4162433\n",
+      "296                        Traumatic arthropathy-wrist          201946009                9    0.000882              4116594                4116594\n",
+      "297                                 Open wound of limb          105616000                9    0.000882              4023317                4023317\n",
+      "298                                  Disorder of wrist          128130001                9    0.000882              4028074                4028074\n",
+      "299                                       Chronic pain           82423001                8    0.000784               436096                 436096\n",
+      "300                          Hypersensitivity reaction          421961002                8    0.000784              4223616                4223616\n",
+      "301                                   Adverse reaction          281647001                8    0.000784              4105886                4105886\n",
+      "302                                    Chronic disease           27624003                8    0.000784               443783                 443783\n",
+      "303                                   Disorder of face          118930001                8    0.000784              4042835                4042835\n",
+      "304                                     Injury of head           82271004                8    0.000784               375415                 375415\n",
+      "305                            Acute allergic reaction          241929008                8    0.000784              4084167                4084167\n",
+      "306                                  Allergic reaction          419076005                8    0.000784             40589905               40589905\n",
+      "307                                Neurological lesion          299735001                7    0.000686              4103662                4103662\n",
+      "308                           Open wound of lower limb           26947005                7    0.000686              4097962                4097962\n",
+      "309                    Disorder of soft tissue of limb          280134004                7    0.000686              4090615                4090615\n",
+      "310                           Laceration of lower limb          283357002                7    0.000686              4152960                4152960\n",
+      "311                                       Skin finding          106076001                6    0.000588               141960                 141960\n",
+      "312                                Intracranial injury          127296001                6    0.000588               437409                 437409\n",
+      "313                   Injury of central nervous system          128126004                6    0.000588              4134439                4134439\n",
+      "314                           Injury of nervous system          128239009                6    0.000588              4134134                4134134\n",
+      "315                               Disorder of the nose           89488007                6    0.000588              4229909                4229909\n",
+      "316            Inflammatory disease of mucous membrane           95361005                6    0.000588               432661                 432661\n",
+      "317                     IgE-mediated allergic disorder          422076005                6    0.000588              4223759                4223759\n",
+      "318                    Disorder of soft tissue of head          280131007                6    0.000588              4090614                4090614\n",
+      "319                                  Allergic disorder          781474001                6    0.000588             36683564               36683564\n",
+      "320                       Integumentary system finding          106077005                6    0.000588               444112                 444112\n",
+      "321        Disorder of skin and/or subcutaneous tissue           80659006                6    0.000588               200174                 200174\n",
+      "322  Traumatic brain injury with no loss of conscio...          127302008                6    0.000588              4133715                4133715\n",
+      "323                             Disorder of integument          128598002                6    0.000588              4028387                4028387\n",
+      "324              Disorder of soft tissue of upper limb          280135003                6    0.000588              4090616                4090616\n",
+      "325                                    Cardiac finding          301095005                6    0.000588              4103183                4103183\n",
+      "326                                    Lesion of brain          301766008                6    0.000588              4200516                4200516\n",
+      "327                             Traumatic brain injury          127295002                6    0.000588              4132546                4132546\n",
+      "328                Concussion injury of body structure          708540005                6    0.000588             45769811               45769811\n",
+      "329                         Concussion injury of brain          110030002                6    0.000588              4001336                4001336\n",
+      "330                                 Evaluation finding          441742003                6    0.000588             40480457               40480457\n",
+      "331           Concussion with no loss of consciousness           62106007                6    0.000588               378001                 378001\n",
+      "332                                           Rhinitis           70076002                6    0.000588              4320791                4320791\n",
+      "333                        Disorder of mucous membrane           95351003                6    0.000588              4318379                4318379\n",
+      "334        Traumatic AND/OR non-traumatic brain injury          127294003                6    0.000588              4133611                4133611\n",
+      "335                                   Disorder of skin           95320005                6    0.000588              4317258                4317258\n",
+      "336                        Perennial allergic rhinitis          446096008                6    0.000588             40486433               40486433\n",
+      "337                                  Allergic rhinitis           61582004                6    0.000588               257007                 257007\n",
+      "338                   Disorder of nose and nasopharynx          232339008                6    0.000588              4049222                4049222\n",
+      "339                               Nasal mucosa finding          249353005                6    0.000588               442983                 442983\n",
+      "340                             Fracture of lower limb           46866001                6    0.000588              4187096                4187096\n",
+      "341      Immune hypersensitivity disorder by mechanism          427439005                6    0.000588              4141833                4141833\n",
+      "342                            Disorder of mediastinum           49483002                6    0.000588               440142                 440142\n",
+      "343                                      Heart disease           56265001                6    0.000588               321588                 321588\n",
+      "344                                Mediastinal finding          301296002                6    0.000588              4115390                4115390\n",
+      "345              Atopic IgE-mediated allergic disorder          421871004                6    0.000588              4223595                4223595\n",
+      "346                             Developmental disorder            5294002                6    0.000588               435244                 435244\n",
+      "347                      Acquired coagulation disorder          234466008                5    0.000490              4120613                4120613\n",
+      "348                          Sprain of upper extremity          123536004                5    0.000490              4048512                4048512\n",
+      "349                               Fracture of clavicle           58150001                5    0.000490              4237458                4237458\n",
+      "350                        Sprain of wrist and/or hand          209436000                5    0.000490              4018956                4018956\n",
+      "351                         Blood coagulation disorder           64779008                5    0.000490               432585                 432585\n",
+      "352                      Disorder of hemostatic system          362970003                5    0.000490              4179872                4179872\n",
+      "353                                    Injury of wrist          125598003                5    0.000490               444129                 444129\n",
+      "354                            Disorder of wrist joint          428107009                5    0.000490              4323193                4323193\n",
+      "355                                    Sprain of wrist           70704007                5    0.000490                78272                  78272\n",
+      "356                             Finding of wrist joint          298940007                5    0.000490              4181251                4181251\n",
+      "357                                    Clavicle injury          282760004                5    0.000490              4151199                4151199\n",
+      "358                                 Lesion of clavicle          298766001                5    0.000490              4186167                4186167\n",
+      "359                       Disorder of cardiac function          105981003                5    0.000490              4024552                4024552\n",
+      "360                               Fracture of shoulder  16250001000004107                5    0.000490             46270317               46270317\n",
+      "361                                      Heart failure           84114007                5    0.000490               316139                 316139\n",
+      "362                      Finding of clavicle structure          298761006                5    0.000490              4185643                4185643\n",
+      "363                               Dislocation of wrist          833335001                4    0.000392              3654438                3654438\n",
+      "364                         Finding of substance level          785671009                4    0.000392             37203927               37203927\n",
+      "365                               Subluxation of joint          263031003                4    0.000392              4134174                4134174\n",
+      "366                      Fracture dislocation of joint          263063009                4    0.000392              4134184                4134184\n",
+      "367        Fracture subluxation of joint of upper limb          281519006                4    0.000392              4085546                4085546\n",
+      "368                Fracture at wrist and/or hand level          208388003                4    0.000392              4015350                4015350\n",
+      "369                          Abnormal blood cell count          762656009                4    0.000392             42538830               42538830\n",
+      "370                       Hematopoietic system finding          106200001                4    0.000392              4021915                4021915\n",
+      "371                                  Injury of forearm          125597008                4    0.000392               134222                 134222\n",
+      "372                                               Burn          125666000                4    0.000392               442013                 442013\n",
+      "373                                Disorder of forearm          128132009                4    0.000392               136779                 136779\n",
+      "374                     Traumatic dislocation of joint          129156001                4    0.000392              4043679                4043679\n",
+      "375                                    Injury of thigh            7523003                4    0.000392               442564                 442564\n",
+      "376                    Chronic nervous system disorder          128283000                4    0.000392              4134145                4134145\n",
+      "377        Measurement finding outside reference range          442096005                4    0.000392             40481841               40481841\n",
+      "378          Measurement finding below reference range          442686002                4    0.000392             40484533               40484533\n",
+      "379                              Skin or mucosa lesion          247440002                4    0.000392              4083787                4083787\n",
+      "380  Lesion of skin and/or skin-associated mucous m...          714974000                4    0.000392             37018424               37018424\n",
+      "381                                             Anemia          271737000                4    0.000392               439777                 439777\n",
+      "382        Fracture dislocation of joint of upper limb          263073006                4    0.000392              4135097                4135097\n",
+      "383                            Protein level - finding          365799007                4    0.000392              4276572                4276572\n",
+      "384            Disorder of cellular component of blood          414022008                4    0.000392               443723                 443723\n",
+      "385                                   Finding of thigh          419003001                4    0.000392              4169466                4169466\n",
+      "386                                          Cytopenia           50820005                4    0.000392              4179922                4179922\n",
+      "387                                       Burn of skin          284196006                4    0.000392              4108467                4108467\n",
+      "388                                 RBC count abnormal          165427000                4    0.000392              4013518                4013518\n",
+      "389                               Subluxation of wrist          833334002                4    0.000392              3654437                3654437\n",
+      "390                                  Disorder of thigh          128135006                4    0.000392               444211                 444211\n",
+      "391                                        Skin lesion           95324001                4    0.000392              4316083                4316083\n",
+      "392                 Subluxation of joint of upper limb          263047001                4    0.000392              4135090                4135090\n",
+      "393                      Fracture subluxation of wrist          263102004                4    0.000392              4134304                4134304\n",
+      "394                                Laceration of thigh          283385000                4    0.000392              4152936                4152936\n",
+      "395                                      RBC count low          165423001                4    0.000392              4013842                4013842\n",
+      "396                                       Erythropenia           62574001                4    0.000392              4267432                4267432\n",
+      "397                                Measurement finding          118245000                4    0.000392              4041436                4041436\n",
+      "398                      Chronic inflammatory disorder          128294001                4    0.000392               444208                 444208\n",
+      "399                      Fracture subluxation of joint          263094009                4    0.000392              4136573                4136573\n",
+      "400  Complication of pregnancy, childbirth and/or t...          198609003                4    0.000392               435875                 435875\n",
+      "401                               Dislocation of joint          108367008                4    0.000392                74726                  74726\n",
+      "402                               Injury of integument          125592002                4    0.000392              4053826                4053826\n",
+      "403            Traumatic dislocation of joint of wrist          125618007                4    0.000392              4054058                4054058\n",
+      "404                 Dislocation of joint of upper limb          263017003                4    0.000392                75047                  75047\n",
+      "405                                  Chronic sinusitis           40055000                4    0.000392               257012                 257012\n",
+      "406                                     Hemoglobin low          165397008                4    0.000392              4013074                4013074\n",
+      "407              Chronic disease of respiratory system           17097001                4    0.000392              4063381                4063381\n",
+      "408                                Open wound of thigh          125659001                4    0.000392              4053602                4053602\n",
+      "409           Hemoglobin level outside reference range          441793007                4    0.000392             40480513               40480513\n",
+      "410                            Injury of pelvic girdle          700009007                3    0.000294             44782619               44782619\n",
+      "411                          Disorder of pelvic girdle          700011003                3    0.000294             44784106               44784106\n",
+      "412                             Finding of foot region          116316008                3    0.000294              4022924                4022924\n",
+      "413                                   Disease of mouth          118938008                3    0.000294              4042502                4042502\n",
+      "414                               Injury of hip region          125600009                3    0.000294               193666                 193666\n",
+      "415                                  Headache disorder          230461009                3    0.000294               375527                 375527\n",
+      "416                               Transformed migraine          427419006                3    0.000294              4141827                4141827\n",
+      "417                                    Impacted molars          196416002                3    0.000294              4055754                4055754\n",
+      "418                                     Injury of foot          125604000                3    0.000294               444130                 444130\n",
+      "419                                Bacterial sinusitis          703470001                3    0.000294             45766333               45766333\n",
+      "420                                     Injury of neck           90460009                3    0.000294                24818                  24818\n",
+      "421          Chronic intractable migraine without aura    124171000119105                3    0.000294             43530652               43530652\n",
+      "422                                Fracture of forearm           65966004                3    0.000294              4278672                4278672\n",
+      "423                          Anomaly of tooth position           81256000                3    0.000294               433243                 433243\n",
+      "424                                     Impacted tooth          235104008                3    0.000294              4123726                4123726\n",
+      "425                                           Migraine           37796009                3    0.000294               318736                 318736\n",
+      "426                              Migraine without aura           56097005                3    0.000294               378735                 378735\n",
+      "427                              Finding of hip region          116313000                3    0.000294               444220                 444220\n",
+      "428                     Disorder characterized by pain          373673007                3    0.000294              4160062                4160062\n",
+      "429                                 Laceration of foot          284551006                3    0.000294              4109685                4109685\n",
+      "430                                   Disorder of foot          118932009                3    0.000294               444090                 444090\n",
+      "431                                 Open wound of foot          125663008                3    0.000294              4054067                4054067\n",
+      "432           Chronic disease of cardiovascular system          128292002                3    0.000294              4028244                4028244\n",
+      "433                             Chronic brain syndrome           78689005                3    0.000294              4301371                4301371\n",
+      "434                                   Childhood asthma          233678006                3    0.000294              4051466                4051466\n",
+      "435                      Closed fracture of lower limb           52603002                3    0.000294              4199590                4199590\n",
+      "436                                      Tooth finding          278544002                3    0.000294              4132462                4132462\n",
+      "437     Disorder of teeth AND/OR supporting structures          105995000                3    0.000294               201603                 201603\n",
+      "438                                  Vascular headache          128187005                3    0.000294              4134454                4134454\n",
+      "439                          Acute bacterial sinusitis           75498004                3    0.000294              4294548                4294548\n",
+      "440                                             Asthma          195967001                3    0.000294               317009                 317009\n",
+      "441                             Closed fracture of hip          359817006                3    0.000294              4230399                4230399\n",
+      "442                      Disorder of tooth development          371136004                3    0.000294              4159157                4159157\n",
+      "443                   Refractory migraine without aura          423279000                3    0.000294               443616                 443616\n",
+      "444                   Pain of cardiovascular structure          301358001                3    0.000294              4115408                4115408\n",
+      "445                             Epidermal burn of skin          403190006                3    0.000294              4296204                4296204\n",
+      "446                                Refractory migraine          423894005                3    0.000294               443615                 443615\n",
+      "447                                    Disorder of hip          118935006                3    0.000294              4042501                4042501\n",
+      "448                                   Disorder of neck          118939000                3    0.000294              4042837                4042837\n",
+      "449                     Fracture of bone of hip region          700097003                3    0.000294             45763653               45763653\n",
+      "450  Disease of circulatory system complicating pre...          724497009                3    0.000294             37110290               37110290\n",
+      "451                            Whiplash injury to neck           39848009                3    0.000294              4218389                4218389\n",
+      "452                                     Tooth disorder          234947003                3    0.000294              4122115                4122115\n",
+      "453                                    Disorder of jaw           37156001                3    0.000294               435569                 435569\n",
+      "454                                    Closed fracture          423125000                3    0.000294              4307254                4307254\n",
+      "455                          Chronic headache disorder          431237007                3    0.000294               374639                 374639\n",
+      "456                                     Lesion of neck          298397000                3    0.000294              4185207                4185207\n",
+      "457                                    Acute sinusitis           15805002                3    0.000294               260123                 260123\n",
+      "458                                  Fracture of ankle           16114001                3    0.000294              4059173                4059173\n",
+      "459                          Arthropathy of knee joint          428724006                2    0.000196              4324765                4324765\n",
+      "460    Inflammation of skin and/or subcutaneous tissue          363168001                2    0.000196              4181062                4181062\n",
+      "461                                     Lesion of face          767811005                2    0.000196             35624868               35624868\n",
+      "462                                              Atopy          115665000                2    0.000196              4019380                4019380\n",
+      "463                                   Disorder of knee          128136007                2    0.000196              4134443                4134443\n",
+      "464                                 Laceration of head          428088000                2    0.000196              4179823                4179823\n",
+      "465                                 Disorder of tendon           68172002                2    0.000196               442264                 442264\n",
+      "466                        Neurodevelopmental disorder          700364009                2    0.000196             45771096               45771096\n",
+      "467  Perennial allergic rhinitis with seasonal vari...          232353008                2    0.000196              4048171                4048171\n",
+      "468                              Disorder of pregnancy          173300003                2    0.000196               439658                 439658\n",
+      "469                            Inflammatory dermatosis          703938007                2    0.000196             45766714               45766714\n",
+      "470                             Finding of knee region          116314006                2    0.000196              4022923                4022923\n",
+      "471           Disorders of attention and motor control          229712006                2    0.000196              4047120                4047120\n",
+      "472                                  Atopic dermatitis           24079001                2    0.000196               133834                 133834\n",
+      "473                     Open wound of head AND/OR neck          397180001                2    0.000196              4246695                4246695\n",
+      "474                                      Pre-eclampsia          398254007                2    0.000196               439393                 439393\n",
+      "475                                             Eczema           43116000                2    0.000196               133835                 133835\n",
+      "476                             Genetic predisposition           47708004                2    0.000196              4166231                4166231\n",
+      "477                                   Seizure disorder          128613002                2    0.000196              4029498                4029498\n",
+      "478                                       Chest injury          262525000                2    0.000196              4094683                4094683\n",
+      "479                     Propensity to adverse reaction          420134006                2    0.000196              4172024                4172024\n",
+      "480                                    Injury of trunk           48125009                2    0.000196               194526                 194526\n",
+      "481                            Seizure related finding          313287004                2    0.000196              4196708                4196708\n",
+      "482                                    Genetic finding          106221001                2    0.000196              4025367                4025367\n",
+      "483     Finding of functional performance and activity          248536006                2    0.000196              4089214                4089214\n",
+      "484           Developmental disorder of motor function          268674003                2    0.000196              4148091                4148091\n",
+      "485           Attention deficit hyperactivity disorder          406506008                2    0.000196               438409                 438409\n",
+      "486                           Laceration of upper limb          283366003                2    0.000196              4152932                4152932\n",
+      "487                                 Knee joint finding          299321000                2    0.000196              4100932                4100932\n",
+      "488                                     Injury of face          125593007                2    0.000196               444191                 444191\n",
+      "489                    Pregnancy with abortive outcome          363681007                2    0.000196             40539858               40539858\n",
+      "490                                 Open wound of head           38354005                2    0.000196              4243161                4243161\n",
+      "491                   Child attention deficit disorder          192127007                2    0.000196               440086                 440086\n",
+      "492                                 Open wound of face          210339009                2    0.000196              4049957                4049957\n",
+      "493                       Hypersensitivity disposition          609433001                2    0.000196             43530897               43530897\n",
+      "494                      Developmental mental disorder          129104009                2    0.000196              4043545                4043545\n",
+      "495                           Open wound of upper limb           81405006                2    0.000196              4216185                4216185\n",
+      "496                                            Seizure           91175000                2    0.000196               377091                 377091\n",
+      "497                         Cutaneous hypersensitivity           21626009                2    0.000196              4070025                4070025\n",
+      "498                                     Tendon finding          250133000                2    0.000196              4095203                4095203\n",
+      "499                                  Facial laceration          370247008                2    0.000196              4156265                4156265\n",
+      "500                     Pregnancy-induced hypertension           48194001                2    0.000196              4167493                4167493\n",
+      "501                        Laceration of head and neck          283358007                2    0.000196              4155030                4155030\n",
+      "502                                        Miscarriage           17369002                2    0.000196              4067106                4067106\n",
+      "503  Hypertension AND/OR vomiting complicating preg...          106005003                2    0.000196              4024560                4024560\n",
+      "504                                 Functional finding          118228005                2    0.000196              4041284                4041284\n",
+      "505                                    Mental disorder           74732009                2    0.000196               432586                 432586\n",
+      "506                     Miscarriage in first trimester           19169002                2    0.000196              4078393                4078393\n",
+      "507                                              Edema          267038008                1    0.000098               433595                 433595\n",
+      "508                  Finding of pelvic region of trunk          609625009                1    0.000098             43531059               43531059\n",
+      "509                                 Urogenital finding          118238000                1    0.000098              4041285                4041285\n",
+      "510                                  Cartilage finding          118954006                1    0.000098              4043349                4043349\n",
+      "511                                      Blighted ovum           35999006                1    0.000098              4262136                4262136\n",
+      "512                       Fracture of vertebral column           50448004                1    0.000098              4174520                4174520\n",
+      "513                                 Finding of abdomen          609624008                1    0.000098             43531058               43531058\n",
+      "514                                  Finding of pelvis          609626005                1    0.000098             43531060               43531060\n",
+      "515                             Injury of rotator cuff          718539004                1    0.000098             36713625               36713625\n",
+      "516       Traumatic injury of vertebral region of back          737566006                1    0.000098             42537893               42537893\n",
+      "517                         Finding of shoulder region          116308004                1    0.000098              4022449                4022449\n",
+      "518                               Disorder of shoulder          118944007                1    0.000098                77630                  77630\n",
+      "519                         Fracture of bones of trunk           65354004                1    0.000098              4279139                4279139\n",
+      "520                                    Finding of back          414252009                1    0.000098              4213101                4213101\n",
+      "521                                      Drug overdose           55680006                1    0.000098              4208104                4208104\n",
+      "522  Fracture of vertebral column with spinal cord ...            1734006                1    0.000098              4066995                4066995\n",
+      "523                 Disorder of pelvic region of trunk          609619005                1    0.000098             43531053               43531053\n",
+      "524                      Injury of intrathoracic organ          733217006                1    0.000098             37116489               37116489\n",
+      "525                           Injury of internal organ          105612003                1    0.000098               193631                 193631\n",
+      "526                        Lower urinary tract finding          106100005                1    0.000098              4021780                4021780\n",
+      "527                             Finding of hand region          116311003                1    0.000098                77358                  77358\n",
+      "528                                   Disorder of hand          118933004                1    0.000098                77635                  77635\n",
+      "529                       Structural disorder of heart          128599005                1    0.000098              4027255                4027255\n",
+      "530                Acute respiratory distress syndrome           67782005                1    0.000098              4195694                4195694\n",
+      "531         Finding of abdominopelvic segment of trunk          822987005                1    0.000098             37311678               37311678\n",
+      "532                  Soft tissue lesion of knee region          239999004                1    0.000098              4344027                4344027\n",
+      "533                          Disorder of urinary tract           41368006                1    0.000098               197331                 197331\n",
+      "534                             Urinary system finding          106098005                1    0.000098              4024000                4024000\n",
+      "535                           Vertebral column finding          119414006                1    0.000098              4002898                4002898\n",
+      "536                  Disorder of product of conception          128604004                1    0.000098              4029496                4029496\n",
+      "537        Disorder of abdominopelvic segment of trunk          822988000                1    0.000098             37311677               37311677\n",
+      "538                Central nervous system complication           87536007                1    0.000098               373087                 373087\n",
+      "539                          Interstitial lung disease          233703007                1    0.000098              4119786                4119786\n",
+      "540                                    Bladder finding          249585009                1    0.000098              4092881                4092881\n",
+      "541                             Partial thickness burn          403191005                1    0.000098              4296205                4296205\n",
+      "542                                Disorder of bladder           42643001                1    0.000098               201337                 201337\n",
+      "543                                 Laceration of hand          284549007                1    0.000098              4113008                4113008\n",
+      "544                                 Paralytic syndrome           29426003                1    0.000098               374377                 374377\n",
+      "545                           Finding of spinal region          298379008                1    0.000098              4182165                4182165\n",
+      "546            Complication occurring during pregnancy          609496007                1    0.000098             43530950               43530950\n",
+      "547                                Disorder of abdomen          118948005                1    0.000098               444089                 444089\n",
+      "548                                     Injury of hand          125599006                1    0.000098                80004                  80004\n",
+      "549                  Rupture of ligament of knee joint          263139003                1    0.000098              4134312                4134312\n",
+      "550                                     Edema of trunk          301867009                1    0.000098              4199409                4199409\n",
+      "551            Connective tissue disorder by body site          363044007                1    0.000098              4180645                4180645\n",
+      "552       Traumatic or non-traumatic rupture of tendon          415746003                1    0.000098              4215217                4215217\n",
+      "553                                    Pulmonary edema           19242006                1    0.000098              4078925                4078925\n",
+      "554                                    Injury of heart           86175003                1    0.000098              4311280                4311280\n",
+      "555                     Disorder of the urinary system          128606002                1    0.000098                75865                  75865\n",
+      "556                               Injury of chest wall           65978000                1    0.000098                75128                  75128\n",
+      "557                       Rupture of quadriceps tendon            6849006                1    0.000098               195632                 195632\n",
+      "558                           Tear of meniscus of knee          239720000                1    0.000098              4035415                4035415\n",
+      "559                            Abdominal organ finding          249561001                1    0.000098              4096864                4096864\n",
+      "560                                      Spinal injury          262521009                1    0.000098              4095850                4095850\n",
+      "561                    Abnormal products of conception           39804004                1    0.000098               436477                 436477\n",
+      "562                              Disorder of body wall          399986003                1    0.000098              4266188                4266188\n",
+      "563               Disorder of the genitourinary system           42030000                1    0.000098              4171379                4171379\n",
+      "564              Disorder of soft tissue of lower limb          280136002                1    0.000098              4093228                4093228\n",
+      "565                    Rupture of tendon of lower limb          281549008                1    0.000098              4084434                4084434\n",
+      "566                              Laceration of forearm          283371005                1    0.000098              4155034                4155034\n",
+      "567                                 Disorder of pelvis          609620004                1    0.000098             43531054               43531054\n",
+      "568                       Disorder of vertebral column          699699005                1    0.000098             44782549               44782549\n",
+      "569      Traumatic and/or non-traumatic injury of back          712893003                1    0.000098             37016775               37016775\n",
+      "570              Disorder of tendon of shoulder region           76318008                1    0.000098                79116                  79116\n",
+      "571                              Drug-related disorder           87858002                1    0.000098               444363                 444363\n",
+      "572                                   Ligament rupture          263134008                1    0.000098              4138286                4138286\n",
+      "573                             Finding of spinal cord          299733008                1    0.000098              4103661                4103661\n",
+      "574                             Eclampsia in pregnancy          198992004                1    0.000098               137613                 137613\n",
+      "575                           Disorder of rotator cuff          414033006                1    0.000098              4212887                4212887\n",
+      "576                                Spinal cord disease           48522003                1    0.000098               135526                 135526\n",
+      "577                Finding of structures of conception          289262005                1    0.000098              4128846                4128846\n",
+      "578                                     Injury of ribs          282770002                1    0.000098              4151202                4151202\n",
+      "579                                Finding of vertebra          298385001                1    0.000098              4185206                4185206\n",
+      "580                         Rupture of patellar tendon           30832001                1    0.000098              4149245                4149245\n",
+      "581                                          Eclampsia           15938005                1    0.000098               443700                 443700\n",
+      "582     Chronic paralysis due to lesion of spinal cord          698754002                1    0.000098             44782520               44782520\n",
+      "583                               Pelvic organ finding          700006000                1    0.000098             44784102               44784102\n",
+      "584                    Disorder characterized by edema          118654009                1    0.000098              4040388                4040388\n",
+      "585                                 Open wound of hand          125652005                1    0.000098              4129405                4129405\n",
+      "586                              Open wound of forearm          125649002                1    0.000098              4053599                4053599\n",
+      "587                Disorder of the lower urinary tract            7793005                1    0.000098              4301471                4301471\n",
+      "588                                 Spinal cord injury           90584004                1    0.000098              4235863                4235863\n",
+      "589                    Finding of urinary tract proper          249273002                1    0.000098              4091213                4091213\n",
+      "590             Paralysis due to lesion of spinal cord          372310001                1    0.000098              4157607                4157607\n",
+      "591      Inflammatory disorder of genitourinary system          373406006                1    0.000098              4159963                4159963\n",
+      "592                                           Cystitis           38822007                1    0.000098               195588                 195588\n",
+      "593                          Disorder of spinal region          410730009                1    0.000098              4260918                4260918\n",
+      "594                                 Cartilage disorder           50927007                1    0.000098              4178431                4178431\n",
+      "595                           Internal injury of chest           27817002                1    0.000098                74786                  74786\n",
+      "596                              Cardiovascular injury          282728007                1    0.000098              4152156                4152156\n",
+      "597   Injury of tendon of the rotator cuff of shoulder          307731004                1    0.000098              4146173                4146173\n",
+      "598                                   Disorder of back           33308003                1    0.000098               140190                 140190\n",
+      "599                                    Fracture of rib           33737001                1    0.000098              4142905                4142905\n",
+      "600                               Complete miscarriage          156073000                1    0.000098             40318618               40318618\n",
+      "the time taken to get cohort concept stats for condition_occurrence is 143.1435580253601s\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "pd.set_option('display.max_rows', None)\n",
+    "pd.set_option('display.max_columns', None)\n",
+    "pd.set_option('display.width', 1000)\n",
+    "\n",
+    "import time\n",
+    "\n",
+    "# get cohort concept prevalance\n",
+    "t1 = time.time()\n",
+    "cohort_concepts = cohort_data.get_concept_stats()\n",
+    "print(pd.DataFrame(cohort_concepts[\"condition_occurrence\"]))\n",
+    "print(f'the time taken to get cohort concept stats for condition_occurrence is {time.time() - t1}s')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "10305fac-8ae3-49ca-8542-47d0a0636f97",
+   "metadata": {},
+   "source": [
+    "———————————————\n",
+    "\n",
+    "**Cohort drug exposure concept prevalence**: \n",
+    "The code block below demonstrates how to use `get_concept_stats(concept_type='drug_exposure', filter_count=500, include_hierarchy=True)` function to retrieve concept prevalence for the `drug_exposure` domain. By default, this uses the `RxNorm` vocabulary. Concepts with fewer than 500 patients are excluded, and hierarchical relationships are included in the results. The function returns a dictionary where the **key** is the `concept_type` (in this case, `drug_exposure`) and the **value** is a list of concept dictionaries. Each concept dictionary in the list contains the following fields: `concept_name`, `concept_code`, `count_in_cohort`, `prevalence`, `ancestor_concept_id`, and `descendant_concept_id`. These values allow you to explore which clinical concepts are most prevalent in your cohort and suppoert deeper investigations into potential sources of selection bias.\n",
+    "\n",
+    "**Note**: Prevalence computation may take some time, especially for large cohorts or when hierarchical relationships are included. A progress bar will appear to indicate the progress of the computation. \n",
+    "\n",
+    "When `include_hierarchy=True`, the output also includes a text-based, indented representation of the concept hierarchy. Each concept is displayed along with its **concept code**, **patient count**, and **prevalence** in parentheses, providing a quick summary of both the structure and frequency of clinical concepts in the cohort."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "d539b8df-2bf4-42ec-abc5-36fa0238cea1",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "5087188325984ffb8977c95fed3c8acd",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "cohort concept hierarchy for drug_exposure with root concept ids [36217216, 1301025, 1125315, 36217210, 36217214]:\n",
+      "Pill (Code: 1151133, Count: 931, Prevalence: 9.120%)\n",
+      "  acetaminophen Pill (Code: 1152843, Count: 638, Prevalence: 6.250%)\n",
+      "    acetaminophen Oral Tablet (Code: 369097, Count: 609, Prevalence: 5.966%)\n",
+      "      acetaminophen 500 MG Oral Tablet (Code: 198440, Count: 582, Prevalence: 5.701%)\n",
+      "    acetaminophen 500 MG Oral Tablet (Code: 198440, Count: 582, Prevalence: 5.701%)\n",
+      "enoxaparin (Code: 67108, Count: 582, Prevalence: 5.701%)\n",
+      "  enoxaparin Prefilled Syringe (Code: 727722, Count: 582, Prevalence: 5.701%)\n",
+      "    enoxaparin sodium 100 MG/ML Prefilled Syringe (Code: 1360019, Count: 582, Prevalence: 5.701%)\n",
+      "      0.4 ML enoxaparin sodium 100 MG/ML Prefilled Syringe (Code: 854235, Count: 582, Prevalence: 5.701%)\n",
+      "    0.4 ML enoxaparin sodium 100 MG/ML Prefilled Syringe (Code: 854235, Count: 582, Prevalence: 5.701%)\n",
+      "  enoxaparin sodium 100 MG/ML (Code: 854227, Count: 582, Prevalence: 5.701%)\n",
+      "    enoxaparin sodium 100 MG/ML Prefilled Syringe (Code: 1360019, Count: 582, Prevalence: 5.701%)\n",
+      "      0.4 ML enoxaparin sodium 100 MG/ML Prefilled Syringe (Code: 854235, Count: 582, Prevalence: 5.701%)\n",
+      "    0.4 ML enoxaparin sodium 100 MG/ML Prefilled Syringe (Code: 854235, Count: 582, Prevalence: 5.701%)\n",
+      "  enoxaparin Injectable Product (Code: 1162664, Count: 582, Prevalence: 5.701%)\n",
+      "    enoxaparin Prefilled Syringe (Code: 727722, Count: 582, Prevalence: 5.701%)\n",
+      "      enoxaparin sodium 100 MG/ML Prefilled Syringe (Code: 1360019, Count: 582, Prevalence: 5.701%)\n",
+      "        0.4 ML enoxaparin sodium 100 MG/ML Prefilled Syringe (Code: 854235, Count: 582, Prevalence: 5.701%)\n",
+      "      0.4 ML enoxaparin sodium 100 MG/ML Prefilled Syringe (Code: 854235, Count: 582, Prevalence: 5.701%)\n",
+      "    0.4 ML enoxaparin sodium 100 MG/ML Prefilled Syringe (Code: 854235, Count: 582, Prevalence: 5.701%)\n",
+      "    enoxaparin sodium 100 MG/ML Prefilled Syringe (Code: 1360019, Count: 582, Prevalence: 5.701%)\n",
+      "      0.4 ML enoxaparin sodium 100 MG/ML Prefilled Syringe (Code: 854235, Count: 582, Prevalence: 5.701%)\n",
+      "acetaminophen (Code: 161, Count: 676, Prevalence: 6.622%)\n",
+      "  acetaminophen Pill (Code: 1152843, Count: 638, Prevalence: 6.250%)\n",
+      "    acetaminophen Oral Tablet (Code: 369097, Count: 609, Prevalence: 5.966%)\n",
+      "      acetaminophen 500 MG Oral Tablet (Code: 198440, Count: 582, Prevalence: 5.701%)\n",
+      "    acetaminophen 500 MG Oral Tablet (Code: 198440, Count: 582, Prevalence: 5.701%)\n",
+      "  acetaminophen Oral Product (Code: 1152842, Count: 638, Prevalence: 6.250%)\n",
+      "    acetaminophen Oral Tablet (Code: 369097, Count: 609, Prevalence: 5.966%)\n",
+      "      acetaminophen 500 MG Oral Tablet (Code: 198440, Count: 582, Prevalence: 5.701%)\n",
+      "    acetaminophen 500 MG Oral Tablet (Code: 198440, Count: 582, Prevalence: 5.701%)\n",
+      "  acetaminophen Oral Tablet (Code: 369097, Count: 609, Prevalence: 5.966%)\n",
+      "    acetaminophen 500 MG Oral Tablet (Code: 198440, Count: 582, Prevalence: 5.701%)\n",
+      "  acetaminophen 500 MG (Code: 315266, Count: 582, Prevalence: 5.701%)\n",
+      "    acetaminophen 500 MG Oral Tablet (Code: 198440, Count: 582, Prevalence: 5.701%)\n",
+      "Injectable Product (Code: 1151126, Count: 606, Prevalence: 5.937%)\n",
+      "  enoxaparin Injectable Product (Code: 1162664, Count: 582, Prevalence: 5.701%)\n",
+      "    enoxaparin Prefilled Syringe (Code: 727722, Count: 582, Prevalence: 5.701%)\n",
+      "      enoxaparin sodium 100 MG/ML Prefilled Syringe (Code: 1360019, Count: 582, Prevalence: 5.701%)\n",
+      "        0.4 ML enoxaparin sodium 100 MG/ML Prefilled Syringe (Code: 854235, Count: 582, Prevalence: 5.701%)\n",
+      "      0.4 ML enoxaparin sodium 100 MG/ML Prefilled Syringe (Code: 854235, Count: 582, Prevalence: 5.701%)\n",
+      "    0.4 ML enoxaparin sodium 100 MG/ML Prefilled Syringe (Code: 854235, Count: 582, Prevalence: 5.701%)\n",
+      "    enoxaparin sodium 100 MG/ML Prefilled Syringe (Code: 1360019, Count: 582, Prevalence: 5.701%)\n",
+      "      0.4 ML enoxaparin sodium 100 MG/ML Prefilled Syringe (Code: 854235, Count: 582, Prevalence: 5.701%)\n",
+      "Oral Product (Code: 1151131, Count: 937, Prevalence: 9.179%)\n",
+      "  acetaminophen Oral Product (Code: 1152842, Count: 638, Prevalence: 6.250%)\n",
+      "    acetaminophen Oral Tablet (Code: 369097, Count: 609, Prevalence: 5.966%)\n",
+      "      acetaminophen 500 MG Oral Tablet (Code: 198440, Count: 582, Prevalence: 5.701%)\n",
+      "    acetaminophen 500 MG Oral Tablet (Code: 198440, Count: 582, Prevalence: 5.701%)\n",
+      "                                         concept_name concept_code  count_in_cohort  prevalence  ancestor_concept_id  descendant_concept_id\n",
+      "0                                        Oral Product      1151131              937    0.091791             36217214               36217214\n",
+      "1                                                Pill      1151133              931    0.091203             36217216               36217216\n",
+      "2                                       acetaminophen          161              676    0.066223              1125315                1125315\n",
+      "3                                  acetaminophen Pill      1152843              638    0.062500              1125315               36216999\n",
+      "4                                  acetaminophen Pill      1152843              638    0.062500             36217216               36216999\n",
+      "5                          acetaminophen Oral Product      1152842              638    0.062500              1125315               36216998\n",
+      "6                                  acetaminophen Pill      1152843              638    0.062500             36216999               36216999\n",
+      "7                          acetaminophen Oral Product      1152842              638    0.062500             36216998               36216998\n",
+      "8                          acetaminophen Oral Product      1152842              638    0.062500             36217214               36216998\n",
+      "9                           acetaminophen Oral Tablet       369097              609    0.059659             36216998               40005746\n",
+      "10                          acetaminophen Oral Tablet       369097              609    0.059659             36216999               40005746\n",
+      "11                          acetaminophen Oral Tablet       369097              609    0.059659             40005746               40005746\n",
+      "12                          acetaminophen Oral Tablet       369097              609    0.059659              1125315               40005746\n",
+      "13                                 Injectable Product      1151126              606    0.059365             36217210               36217210\n",
+      "14                                         enoxaparin        67108              582    0.057014              1301025                1301025\n",
+      "15                       enoxaparin Prefilled Syringe       727722              582    0.057014              1301025               40141787\n",
+      "16                       enoxaparin Prefilled Syringe       727722              582    0.057014             36224590               40141787\n",
+      "17  0.4 ML enoxaparin sodium 100 MG/ML Prefilled S...       854235              582    0.057014             36224590               40160973\n",
+      "18                      enoxaparin Injectable Product      1162664              582    0.057014             36217210               36224590\n",
+      "19      enoxaparin sodium 100 MG/ML Prefilled Syringe      1360019              582    0.057014             40141787               42902906\n",
+      "20                       enoxaparin Prefilled Syringe       727722              582    0.057014             40141787               40141787\n",
+      "21                        enoxaparin sodium 100 MG/ML       854227              582    0.057014              1301025               40160947\n",
+      "22      enoxaparin sodium 100 MG/ML Prefilled Syringe      1360019              582    0.057014             40160947               42902906\n",
+      "23      enoxaparin sodium 100 MG/ML Prefilled Syringe      1360019              582    0.057014             42902906               42902906\n",
+      "24                   acetaminophen 500 MG Oral Tablet       198440              582    0.057014             19020053               19020053\n",
+      "25                               acetaminophen 500 MG       315266              582    0.057014              1127527                1127527\n",
+      "26  0.4 ML enoxaparin sodium 100 MG/ML Prefilled S...       854235              582    0.057014             40160973               40160973\n",
+      "27      enoxaparin sodium 100 MG/ML Prefilled Syringe      1360019              582    0.057014             36224590               42902906\n",
+      "28  0.4 ML enoxaparin sodium 100 MG/ML Prefilled S...       854235              582    0.057014             40141787               40160973\n",
+      "29                      enoxaparin Injectable Product      1162664              582    0.057014              1301025               36224590\n",
+      "30  0.4 ML enoxaparin sodium 100 MG/ML Prefilled S...       854235              582    0.057014             42902906               40160973\n",
+      "31                   acetaminophen 500 MG Oral Tablet       198440              582    0.057014             36216998               19020053\n",
+      "32                   acetaminophen 500 MG Oral Tablet       198440              582    0.057014              1127527               19020053\n",
+      "33                               acetaminophen 500 MG       315266              582    0.057014              1125315                1127527\n",
+      "34  0.4 ML enoxaparin sodium 100 MG/ML Prefilled S...       854235              582    0.057014             40160947               40160973\n",
+      "35                   acetaminophen 500 MG Oral Tablet       198440              582    0.057014             40005746               19020053\n",
+      "36                   acetaminophen 500 MG Oral Tablet       198440              582    0.057014             36216999               19020053\n",
+      "37                        enoxaparin sodium 100 MG/ML       854227              582    0.057014             40160947               40160947\n",
+      "38                      enoxaparin Injectable Product      1162664              582    0.057014             36224590               36224590\n",
+      "the time taken to get cohort concept stats for drug_exposure is 30.243131399154663s\n"
+     ]
+    }
+   ],
+   "source": [
+    "t1 = time.time()\n",
+    "cohort_de_concepts = cohort_data.get_concept_stats(concept_type='drug_exposure', filter_count=500, include_hierarchy=True)\n",
+    "print(pd.DataFrame(cohort_de_concepts[\"drug_exposure\"]))\n",
+    "print(f'the time taken to get cohort concept stats for drug_exposure is {time.time() - t1}s')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d8e53808-cac2-41c7-9d60-f7a3b661ff6f",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "\n",
+    "### Final cleanup to ensure database connections are closed"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "128c7b02-faef-4a35-97a6-c92baa5a37dd",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Connection to BiasDatabase closed.\n",
+      "Connection to the OMOP CDM database closed.\n"
+     ]
+    }
+   ],
+   "source": [
+    "bias.cleanup()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e3ea28f8",
+   "metadata": {},
+   "source": [
+    "### ✅ Summary\n",
+    "\n",
+    "In this tutorial, you learned how to use the BiasAnalyzer package to explore clinical concept prevalence in a cohort. Exploring clinical concept prevalence in a cohort allows you to explore which clinical concepts are most prevalent in your cohort and support deeper investigations into potential sources of cohort selection bias. \n",
+    "  \n",
+    "For more information, refer to the [BiasAnalyzer GitHub repo](https://github.com/VACLab/BiasAnalyzer) and the [README file](https://github.com/VACLab/BiasAnalyzer/blob/main/README.md).\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python (biasanalyzer)",
+   "language": "python",
+   "name": "biasanalyzer-py3.8"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/BiasAnalyzerTestingCohortConceptStats.ipynb b/notebooks/BiasAnalyzerTestingCohortConceptStats.ipynb
deleted file mode 100644
index 1f15809..0000000
--- a/notebooks/BiasAnalyzerTestingCohortConceptStats.ipynb
+++ /dev/null
@@ -1,1205 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "a25ba48a-9e2c-4e1d-9e93-80f7ea3ff3e3",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Collecting git+https://github.com/vaclab/BiasAnalyzer.git\n",
-      "  Cloning https://github.com/vaclab/BiasAnalyzer.git to ./temp/pip-req-build-vlj8e8fz\n",
-      "  Running command git clone --filter=blob:none --quiet https://github.com/vaclab/BiasAnalyzer.git /home/hyi/temp/pip-req-build-vlj8e8fz\n",
-      "  Resolved https://github.com/vaclab/BiasAnalyzer.git to commit 8d821839e93b1d9a208c5c66352ee66db60d1e53\n",
-      "  Installing build dependencies ... \u001B[?25ldone\n",
-      "\u001B[?25h  Getting requirements to build wheel ... \u001B[?25ldone\n",
-      "\u001B[?25h  Preparing metadata (pyproject.toml) ... \u001B[?25ldone\n",
-      "\u001B[?25hCollecting duckdb<2.0.0,>=1.1.1 (from biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for duckdb<2.0.0,>=1.1.1 from https://files.pythonhosted.org/packages/50/52/6e6f5b5b07841cec334ca6b98f2e02b7bb54ab3b99c49aa3a161cc0b4b37/duckdb-1.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n",
-      "  Using cached duckdb-1.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (966 bytes)\n",
-      "Collecting duckdb-engine<0.14.0,>=0.13.2 (from biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for duckdb-engine<0.14.0,>=0.13.2 from https://files.pythonhosted.org/packages/ef/5d/81a0d67483d0767e4fbf7444b079b3f21574a184b0888782ced1c2172777/duckdb_engine-0.13.6-py3-none-any.whl.metadata\n",
-      "  Using cached duckdb_engine-0.13.6-py3-none-any.whl.metadata (8.0 kB)\n",
-      "Collecting ipytree<0.3.0,>=0.2.2 (from biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for ipytree<0.3.0,>=0.2.2 from https://files.pythonhosted.org/packages/e4/03/35cf1742598d784e96153175233318a2332f71863e55ad1007c9264c1a7a/ipytree-0.2.2-py2.py3-none-any.whl.metadata\n",
-      "  Using cached ipytree-0.2.2-py2.py3-none-any.whl.metadata (849 bytes)\n",
-      "Collecting ipywidgets<9.0.0,>=8.1.5 (from biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for ipywidgets<9.0.0,>=8.1.5 from https://files.pythonhosted.org/packages/22/2d/9c0b76f2f9cc0ebede1b9371b6f317243028ed60b90705863d493bae622e/ipywidgets-8.1.5-py3-none-any.whl.metadata\n",
-      "  Using cached ipywidgets-8.1.5-py3-none-any.whl.metadata (2.3 kB)\n",
-      "Collecting jinja2==3.1.5 (from biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for jinja2==3.1.5 from https://files.pythonhosted.org/packages/bd/0f/2ba5fbcd631e3e88689309dbe978c5769e883e4b84ebfe7da30b43275c5a/jinja2-3.1.5-py3-none-any.whl.metadata\n",
-      "  Using cached jinja2-3.1.5-py3-none-any.whl.metadata (2.6 kB)\n",
-      "Collecting numpy==1.24.4 (from biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for numpy==1.24.4 from https://files.pythonhosted.org/packages/22/97/dfb1a31bb46686f09e68ea6ac5c63fdee0d22d7b23b8f3f7ea07712869ef/numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n",
-      "  Using cached numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB)\n",
-      "Collecting pandas==2.0.3 (from biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for pandas==2.0.3 from https://files.pythonhosted.org/packages/d0/28/88b81881c056376254618fad622a5e94b5126db8c61157ea1910cd1c040a/pandas-2.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n",
-      "  Using cached pandas-2.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)\n",
-      "Collecting psycopg2<3.0.0,>=2.9.1 (from biasanalyzer==0.1.0)\n",
-      "  Using cached psycopg2-2.9.10-cp311-cp311-linux_x86_64.whl\n",
-      "Collecting pydantic<3.0.0,>=2.9.2 (from biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for pydantic<3.0.0,>=2.9.2 from https://files.pythonhosted.org/packages/f4/3c/8cc1cc84deffa6e25d2d0c688ebb80635dfdbf1dbea3e30c541c8cf4d860/pydantic-2.10.6-py3-none-any.whl.metadata\n",
-      "  Using cached pydantic-2.10.6-py3-none-any.whl.metadata (30 kB)\n",
-      "Collecting pyyaml<7.0.0,>=6.0.2 (from biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for pyyaml<7.0.0,>=6.0.2 from https://files.pythonhosted.org/packages/75/e4/2c27590dfc9992f73aabbeb9241ae20220bd9452df27483b6e56d3975cc5/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n",
-      "  Using cached PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.1 kB)\n",
-      "Collecting scipy==1.10.1 (from biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for scipy==1.10.1 from https://files.pythonhosted.org/packages/21/cd/fe2d4af234b80dc08c911ce63fdaee5badcdde3e9bcd9a68884580652ef0/scipy-1.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n",
-      "  Using cached scipy-1.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (58 kB)\n",
-      "Collecting sqlalchemy<3.0.0,>=2.0.35 (from biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for sqlalchemy<3.0.0,>=2.0.35 from https://files.pythonhosted.org/packages/ff/0a/46f3171f564a19a1daf6e7e0e6c8afc6ecd792f947c6de435519d4d16af3/sqlalchemy-2.0.39-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n",
-      "  Using cached sqlalchemy-2.0.39-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)\n",
-      "Collecting MarkupSafe>=2.0 (from jinja2==3.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for MarkupSafe>=2.0 from https://files.pythonhosted.org/packages/f1/a4/aefb044a2cd8d7334c8a47d3fb2c9f328ac48cb349468cc31c20b539305f/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n",
-      "  Using cached MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.0 kB)\n",
-      "Collecting python-dateutil>=2.8.2 (from pandas==2.0.3->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for python-dateutil>=2.8.2 from https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl.metadata\n",
-      "  Using cached python_dateutil-2.9.0.post0-py2.py3-none-any.whl.metadata (8.4 kB)\n",
-      "Collecting pytz>=2020.1 (from pandas==2.0.3->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for pytz>=2020.1 from https://files.pythonhosted.org/packages/eb/38/ac33370d784287baa1c3d538978b5e2ea064d4c1b93ffbd12826c190dd10/pytz-2025.1-py2.py3-none-any.whl.metadata\n",
-      "  Using cached pytz-2025.1-py2.py3-none-any.whl.metadata (22 kB)\n",
-      "Collecting tzdata>=2022.1 (from pandas==2.0.3->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for tzdata>=2022.1 from https://files.pythonhosted.org/packages/0f/dd/84f10e23edd882c6f968c21c2434fe67bd4a528967067515feca9e611e5e/tzdata-2025.1-py2.py3-none-any.whl.metadata\n",
-      "  Using cached tzdata-2025.1-py2.py3-none-any.whl.metadata (1.4 kB)\n",
-      "Collecting packaging>=21 (from duckdb-engine<0.14.0,>=0.13.2->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for packaging>=21 from https://files.pythonhosted.org/packages/88/ef/eb23f262cca3c0c4eb7ab1933c3b1f03d021f2c48f54763065b6f0e321be/packaging-24.2-py3-none-any.whl.metadata\n",
-      "  Using cached packaging-24.2-py3-none-any.whl.metadata (3.2 kB)\n",
-      "Collecting comm>=0.1.3 (from ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for comm>=0.1.3 from https://files.pythonhosted.org/packages/e6/75/49e5bfe642f71f272236b5b2d2691cf915a7283cc0ceda56357b61daa538/comm-0.2.2-py3-none-any.whl.metadata\n",
-      "  Using cached comm-0.2.2-py3-none-any.whl.metadata (3.7 kB)\n",
-      "Collecting ipython>=6.1.0 (from ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for ipython>=6.1.0 from https://files.pythonhosted.org/packages/20/3a/917cb9e72f4e1a4ea13c862533205ae1319bd664119189ee5cc9e4e95ebf/ipython-9.0.2-py3-none-any.whl.metadata\n",
-      "  Using cached ipython-9.0.2-py3-none-any.whl.metadata (4.3 kB)\n",
-      "Collecting traitlets>=4.3.1 (from ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for traitlets>=4.3.1 from https://files.pythonhosted.org/packages/00/c0/8f5d070730d7836adc9c9b6408dec68c6ced86b304a9b26a14df072a6e8c/traitlets-5.14.3-py3-none-any.whl.metadata\n",
-      "  Using cached traitlets-5.14.3-py3-none-any.whl.metadata (10 kB)\n",
-      "Collecting widgetsnbextension~=4.0.12 (from ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for widgetsnbextension~=4.0.12 from https://files.pythonhosted.org/packages/21/02/88b65cc394961a60c43c70517066b6b679738caf78506a5da7b88ffcb643/widgetsnbextension-4.0.13-py3-none-any.whl.metadata\n",
-      "  Using cached widgetsnbextension-4.0.13-py3-none-any.whl.metadata (1.6 kB)\n",
-      "Collecting jupyterlab-widgets~=3.0.12 (from ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for jupyterlab-widgets~=3.0.12 from https://files.pythonhosted.org/packages/a9/93/858e87edc634d628e5d752ba944c2833133a28fa87bb093e6832ced36a3e/jupyterlab_widgets-3.0.13-py3-none-any.whl.metadata\n",
-      "  Using cached jupyterlab_widgets-3.0.13-py3-none-any.whl.metadata (4.1 kB)\n",
-      "Collecting annotated-types>=0.6.0 (from pydantic<3.0.0,>=2.9.2->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for annotated-types>=0.6.0 from https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl.metadata\n",
-      "  Using cached annotated_types-0.7.0-py3-none-any.whl.metadata (15 kB)\n",
-      "Collecting pydantic-core==2.27.2 (from pydantic<3.0.0,>=2.9.2->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for pydantic-core==2.27.2 from https://files.pythonhosted.org/packages/a8/7c/b860618c25678bbd6d1d99dbdfdf0510ccb50790099b963ff78a124b754f/pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n",
-      "  Using cached pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n",
-      "Collecting typing-extensions>=4.12.2 (from pydantic<3.0.0,>=2.9.2->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for typing-extensions>=4.12.2 from https://files.pythonhosted.org/packages/26/9f/ad63fc0248c5379346306f8668cda6e2e2e9c95e01216d2b8ffd9ff037d0/typing_extensions-4.12.2-py3-none-any.whl.metadata\n",
-      "  Using cached typing_extensions-4.12.2-py3-none-any.whl.metadata (3.0 kB)\n",
-      "Collecting greenlet!=0.4.17 (from sqlalchemy<3.0.0,>=2.0.35->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for greenlet!=0.4.17 from https://files.pythonhosted.org/packages/f7/4b/1c9695aa24f808e156c8f4813f685d975ca73c000c2a5056c514c64980f6/greenlet-3.1.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata\n",
-      "  Using cached greenlet-3.1.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (3.8 kB)\n",
-      "Collecting decorator (from ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for decorator from https://files.pythonhosted.org/packages/4e/8c/f3147f5c4b73e7550fe5f9352eaa956ae838d5c51eb58e7a25b9f3e2643b/decorator-5.2.1-py3-none-any.whl.metadata\n",
-      "  Using cached decorator-5.2.1-py3-none-any.whl.metadata (3.9 kB)\n",
-      "Collecting ipython-pygments-lexers (from ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for ipython-pygments-lexers from https://files.pythonhosted.org/packages/d9/33/1f075bf72b0b747cb3288d011319aaf64083cf2efef8354174e3ed4540e2/ipython_pygments_lexers-1.1.1-py3-none-any.whl.metadata\n",
-      "  Using cached ipython_pygments_lexers-1.1.1-py3-none-any.whl.metadata (1.1 kB)\n",
-      "Collecting jedi>=0.16 (from ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for jedi>=0.16 from https://files.pythonhosted.org/packages/c0/5a/9cac0c82afec3d09ccd97c8b6502d48f165f9124db81b4bcb90b4af974ee/jedi-0.19.2-py2.py3-none-any.whl.metadata\n",
-      "  Using cached jedi-0.19.2-py2.py3-none-any.whl.metadata (22 kB)\n",
-      "Collecting matplotlib-inline (from ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for matplotlib-inline from https://files.pythonhosted.org/packages/8f/8e/9ad090d3553c280a8060fbf6e24dc1c0c29704ee7d1c372f0c174aa59285/matplotlib_inline-0.1.7-py3-none-any.whl.metadata\n",
-      "  Using cached matplotlib_inline-0.1.7-py3-none-any.whl.metadata (3.9 kB)\n",
-      "Collecting pexpect>4.3 (from ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for pexpect>4.3 from https://files.pythonhosted.org/packages/9e/c3/059298687310d527a58bb01f3b1965787ee3b40dce76752eda8b44e9a2c5/pexpect-4.9.0-py2.py3-none-any.whl.metadata\n",
-      "  Using cached pexpect-4.9.0-py2.py3-none-any.whl.metadata (2.5 kB)\n",
-      "Collecting prompt_toolkit<3.1.0,>=3.0.41 (from ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for prompt_toolkit<3.1.0,>=3.0.41 from https://files.pythonhosted.org/packages/e4/ea/d836f008d33151c7a1f62caf3d8dd782e4d15f6a43897f64480c2b8de2ad/prompt_toolkit-3.0.50-py3-none-any.whl.metadata\n",
-      "  Using cached prompt_toolkit-3.0.50-py3-none-any.whl.metadata (6.6 kB)\n",
-      "Collecting pygments>=2.4.0 (from ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for pygments>=2.4.0 from https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl.metadata\n",
-      "  Using cached pygments-2.19.1-py3-none-any.whl.metadata (2.5 kB)\n",
-      "Collecting stack_data (from ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for stack_data from https://files.pythonhosted.org/packages/f1/7b/ce1eafaf1a76852e2ec9b22edecf1daa58175c090266e9f6c64afcd81d91/stack_data-0.6.3-py3-none-any.whl.metadata\n",
-      "  Using cached stack_data-0.6.3-py3-none-any.whl.metadata (18 kB)\n",
-      "Collecting six>=1.5 (from python-dateutil>=2.8.2->pandas==2.0.3->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for six>=1.5 from https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl.metadata\n",
-      "  Using cached six-1.17.0-py2.py3-none-any.whl.metadata (1.7 kB)\n",
-      "Collecting parso<0.9.0,>=0.8.4 (from jedi>=0.16->ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for parso<0.9.0,>=0.8.4 from https://files.pythonhosted.org/packages/c6/ac/dac4a63f978e4dcb3c6d3a78c4d8e0192a113d288502a1216950c41b1027/parso-0.8.4-py2.py3-none-any.whl.metadata\n",
-      "  Using cached parso-0.8.4-py2.py3-none-any.whl.metadata (7.7 kB)\n",
-      "Collecting ptyprocess>=0.5 (from pexpect>4.3->ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for ptyprocess>=0.5 from https://files.pythonhosted.org/packages/22/a6/858897256d0deac81a172289110f31629fc4cee19b6f01283303e18c8db3/ptyprocess-0.7.0-py2.py3-none-any.whl.metadata\n",
-      "  Using cached ptyprocess-0.7.0-py2.py3-none-any.whl.metadata (1.3 kB)\n",
-      "Collecting wcwidth (from prompt_toolkit<3.1.0,>=3.0.41->ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for wcwidth from https://files.pythonhosted.org/packages/fd/84/fd2ba7aafacbad3c4201d395674fc6348826569da3c0937e75505ead3528/wcwidth-0.2.13-py2.py3-none-any.whl.metadata\n",
-      "  Using cached wcwidth-0.2.13-py2.py3-none-any.whl.metadata (14 kB)\n",
-      "Collecting executing>=1.2.0 (from stack_data->ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for executing>=1.2.0 from https://files.pythonhosted.org/packages/7b/8f/c4d9bafc34ad7ad5d8dc16dd1347ee0e507a52c3adb6bfa8887e1c6a26ba/executing-2.2.0-py2.py3-none-any.whl.metadata\n",
-      "  Using cached executing-2.2.0-py2.py3-none-any.whl.metadata (8.9 kB)\n",
-      "Collecting asttokens>=2.1.0 (from stack_data->ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for asttokens>=2.1.0 from https://files.pythonhosted.org/packages/25/8a/c46dcc25341b5bce5472c718902eb3d38600a903b14fa6aeecef3f21a46f/asttokens-3.0.0-py3-none-any.whl.metadata\n",
-      "  Using cached asttokens-3.0.0-py3-none-any.whl.metadata (4.7 kB)\n",
-      "Collecting pure-eval (from stack_data->ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->biasanalyzer==0.1.0)\n",
-      "  Obtaining dependency information for pure-eval from https://files.pythonhosted.org/packages/8e/37/efad0257dc6e593a18957422533ff0f87ede7c9c6ea010a2177d738fb82f/pure_eval-0.2.3-py3-none-any.whl.metadata\n",
-      "  Using cached pure_eval-0.2.3-py3-none-any.whl.metadata (6.3 kB)\n",
-      "Using cached jinja2-3.1.5-py3-none-any.whl (134 kB)\n",
-      "Using cached numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)\n",
-      "Using cached pandas-2.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.2 MB)\n",
-      "Using cached scipy-1.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.1 MB)\n",
-      "Using cached duckdb-1.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (20.2 MB)\n",
-      "Using cached duckdb_engine-0.13.6-py3-none-any.whl (48 kB)\n",
-      "Using cached ipytree-0.2.2-py2.py3-none-any.whl (1.3 MB)\n",
-      "Using cached ipywidgets-8.1.5-py3-none-any.whl (139 kB)\n",
-      "Using cached pydantic-2.10.6-py3-none-any.whl (431 kB)\n",
-      "Using cached pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)\n",
-      "Using cached PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (762 kB)\n",
-      "Using cached sqlalchemy-2.0.39-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB)\n",
-      "Using cached annotated_types-0.7.0-py3-none-any.whl (13 kB)\n",
-      "Using cached comm-0.2.2-py3-none-any.whl (7.2 kB)\n",
-      "Using cached greenlet-3.1.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (602 kB)\n",
-      "Using cached ipython-9.0.2-py3-none-any.whl (600 kB)\n",
-      "Using cached jupyterlab_widgets-3.0.13-py3-none-any.whl (214 kB)\n",
-      "Using cached MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (23 kB)\n",
-      "Using cached packaging-24.2-py3-none-any.whl (65 kB)\n",
-      "Using cached python_dateutil-2.9.0.post0-py2.py3-none-any.whl (229 kB)\n",
-      "Using cached pytz-2025.1-py2.py3-none-any.whl (507 kB)\n",
-      "Using cached traitlets-5.14.3-py3-none-any.whl (85 kB)\n",
-      "Using cached typing_extensions-4.12.2-py3-none-any.whl (37 kB)\n",
-      "Using cached tzdata-2025.1-py2.py3-none-any.whl (346 kB)\n",
-      "Using cached widgetsnbextension-4.0.13-py3-none-any.whl (2.3 MB)\n",
-      "Using cached jedi-0.19.2-py2.py3-none-any.whl (1.6 MB)\n",
-      "Using cached pexpect-4.9.0-py2.py3-none-any.whl (63 kB)\n",
-      "Using cached prompt_toolkit-3.0.50-py3-none-any.whl (387 kB)\n",
-      "Using cached pygments-2.19.1-py3-none-any.whl (1.2 MB)\n",
-      "Using cached six-1.17.0-py2.py3-none-any.whl (11 kB)\n",
-      "Using cached decorator-5.2.1-py3-none-any.whl (9.2 kB)\n",
-      "Using cached ipython_pygments_lexers-1.1.1-py3-none-any.whl (8.1 kB)\n",
-      "Using cached matplotlib_inline-0.1.7-py3-none-any.whl (9.9 kB)\n",
-      "Using cached stack_data-0.6.3-py3-none-any.whl (24 kB)\n",
-      "Using cached asttokens-3.0.0-py3-none-any.whl (26 kB)\n",
-      "Using cached executing-2.2.0-py2.py3-none-any.whl (26 kB)\n",
-      "Using cached parso-0.8.4-py2.py3-none-any.whl (103 kB)\n",
-      "Using cached ptyprocess-0.7.0-py2.py3-none-any.whl (13 kB)\n",
-      "Using cached pure_eval-0.2.3-py3-none-any.whl (11 kB)\n",
-      "Using cached wcwidth-0.2.13-py2.py3-none-any.whl (34 kB)\n",
-      "Building wheels for collected packages: biasanalyzer\n",
-      "  Building wheel for biasanalyzer (pyproject.toml) ... \u001B[?25ldone\n",
-      "\u001B[?25h  Created wheel for biasanalyzer: filename=biasanalyzer-0.1.0-py3-none-any.whl size=25475 sha256=1982c82749337f81db1a730b8cc25c049d0c0788cd6b782f69ce8be1d92a397c\n",
-      "  Stored in directory: /home/hyi/temp/pip-ephem-wheel-cache-f_9rcqkk/wheels/25/75/4e/079d96d69cc58148ce31d3d44f858e4db5f689604112dcb7c3\n",
-      "Successfully built biasanalyzer\n",
-      "Installing collected packages: wcwidth, pytz, pure-eval, ptyprocess, widgetsnbextension, tzdata, typing-extensions, traitlets, six, pyyaml, pygments, psycopg2, prompt_toolkit, pexpect, parso, packaging, numpy, MarkupSafe, jupyterlab-widgets, greenlet, executing, duckdb, decorator, asttokens, annotated-types, stack_data, sqlalchemy, scipy, python-dateutil, pydantic-core, matplotlib-inline, jinja2, jedi, ipython-pygments-lexers, comm, pydantic, pandas, ipython, duckdb-engine, ipywidgets, ipytree, biasanalyzer\n",
-      "\u001B[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
-      "ipympl 0.9.3 requires ipython<9, but you have ipython 9.0.2 which is incompatible.\u001B[0m\u001B[31m\n",
-      "\u001B[0mSuccessfully installed MarkupSafe-3.0.2 annotated-types-0.7.0 asttokens-3.0.0 biasanalyzer-0.1.0 comm-0.2.2 decorator-5.2.1 duckdb-1.2.1 duckdb-engine-0.13.6 executing-2.2.0 greenlet-3.1.1 ipython-9.0.2 ipython-pygments-lexers-1.1.1 ipytree-0.2.2 ipywidgets-8.1.5 jedi-0.19.2 jinja2-3.1.5 jupyterlab-widgets-3.0.13 matplotlib-inline-0.1.7 numpy-1.24.4 packaging-24.2 pandas-2.0.3 parso-0.8.4 pexpect-4.9.0 prompt_toolkit-3.0.50 psycopg2-2.9.10 ptyprocess-0.7.0 pure-eval-0.2.3 pydantic-2.10.6 pydantic-core-2.27.2 pygments-2.19.1 python-dateutil-2.9.0.post0 pytz-2025.1 pyyaml-6.0.2 scipy-1.10.1 six-1.17.0 sqlalchemy-2.0.39 stack_data-0.6.3 traitlets-5.14.3 typing-extensions-4.12.2 tzdata-2025.1 wcwidth-0.2.13 widgetsnbextension-4.0.13\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Have to specify TMPDIR and target in pip install command to work around the kernel crash issue due to \n",
-    "# the small ephemeral local storage quota allocated to /tmp which is used by default by pip install\n",
-    "!TMPDIR=/home/hyi/temp pip install git+https://github.com/vaclab/BiasAnalyzer.git --target /home/hyi/temp --upgrade"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "9ce3b87c-0754-4eae-9f85-8210104e2b0b",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# append the target folder where HealthDataBias module was installed to PYTHONPATH\n",
-    "import sys\n",
-    "sys.path.append('/home/hyi/temp')\n",
-    "import pandas as pd\n",
-    "pd.set_option('display.max_rows', None)\n",
-    "pd.set_option('display.max_columns', None)\n",
-    "pd.set_option('display.width', 1000)\n",
-    "import time"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "729e8803-74f8-4180-aa8b-0e44567f8aeb",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "from biasanalyzer.api import BIAS"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "548223ed-8948-461e-b9d6-40a0ec7fc89f",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "no configuration file specified. Call set_config(config_file_path) next to specify configurations\n"
-     ]
-    }
-   ],
-   "source": [
-    "# create an object of BIAS class\n",
-    "bias = BIAS()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "7d440d9f-c7fa-4ef1-ad66-31274ebef4ea",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "configuration specified in /home/hyi/bias/config/config.yaml loaded successfully\n"
-     ]
-    }
-   ],
-   "source": [
-    "bias.set_config('/home/hyi/bias/config/config.yaml')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "283156f8-63da-42a5-bbd7-ee2b7719652c",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Connected to the OMOP CDM database (read-only).\n",
-      "Cohort Definition table created.\n",
-      "Cohort table created.\n"
-     ]
-    }
-   ],
-   "source": [
-    "# the configuration file includes root_omop_cdm_database configuration info with an example shown \n",
-    "# in https://github.com/hyi/HealthDataBias/blob/main/tests/assets/config/test_config.yaml\n",
-    "bias.set_root_omop()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "7192ab6d-0845-4bcd-acda-f00157d4215d",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "either domain or vocabulary must be set to constrain the number of returned concepts\n",
-      "concepts for COVID-19 in Condition domain with SNOMED vocabulary:\n",
-      "    concept_id                                       concept_name valid_start_date valid_end_date  domain_id vocabulary_id\n",
-      "0      703440  COVID-19 confirmed using clinical diagnostic c...       2020-04-01     2099-12-31  Condition        SNOMED\n",
-      "1      703441              COVID-19 confirmed by laboratory test       2020-04-01     2099-12-31  Condition        SNOMED\n",
-      "2      703445  Low risk category for developing complication ...       2020-04-01     2099-12-31  Condition        SNOMED\n",
-      "3      703446  Moderate risk category for developing complica...       2020-04-01     2099-12-31  Condition        SNOMED\n",
-      "4      703447  High risk category for developing complication...       2020-04-01     2099-12-31  Condition        SNOMED\n",
-      "5    37310269                                           COVID-19       2020-02-04     2020-10-28  Condition        SNOMED\n",
-      "6    37311061                                           COVID-19       2020-01-31     2099-12-31  Condition        SNOMED\n",
-      "concepts for COVID-19 in Condition domain:\n",
-      "     concept_id                                       concept_name valid_start_date valid_end_date  domain_id   vocabulary_id\n",
-      "0       702953                  Emergency use of U07.1 | COVID-19       2020-04-01     2099-12-31  Condition         ICD10CM\n",
-      "1       703440  COVID-19 confirmed using clinical diagnostic c...       2020-04-01     2099-12-31  Condition          SNOMED\n",
-      "2       703441              COVID-19 confirmed by laboratory test       2020-04-01     2099-12-31  Condition          SNOMED\n",
-      "3       703445  Low risk category for developing complication ...       2020-04-01     2099-12-31  Condition          SNOMED\n",
-      "4       703446  Moderate risk category for developing complica...       2020-04-01     2099-12-31  Condition          SNOMED\n",
-      "5       703447  High risk category for developing complication...       2020-04-01     2099-12-31  Condition          SNOMED\n",
-      "6       756023                Acute bronchitis caused by COVID-19       2020-03-18     2021-01-29  Condition  OMOP Extension\n",
-      "7       756031                      Bronchitis caused by COVID-19       2020-03-18     2099-12-31  Condition  OMOP Extension\n",
-      "8       756039           Respiratory infection caused by COVID-19       2020-03-18     2099-12-31  Condition  OMOP Extension\n",
-      "9       756044  Acute respiratory distress syndrome (ARDS) cau...       2020-03-18     2021-01-29  Condition  OMOP Extension\n",
-      "10      756061                              Asymptomatic COVID-19       2020-03-18     2021-01-29  Condition  OMOP Extension\n",
-      "11      756081  Infection of lower respiratory tract caused by...       2020-03-18     2021-01-29  Condition  OMOP Extension\n",
-      "12    37310269                                           COVID-19       2020-02-04     2020-10-28  Condition          SNOMED\n",
-      "13    37311061                                           COVID-19       2020-01-31     2099-12-31  Condition          SNOMED\n",
-      "concepts for COVID-19 in SNOMED vocabulary:\n",
-      "     concept_id                                       concept_name valid_start_date valid_end_date    domain_id vocabulary_id\n",
-      "0       703420  COVID-19 presenting complaints simple referenc...       2020-04-01     2099-12-31     Metadata        SNOMED\n",
-      "1       703421        COVID-19 health issues simple reference set       2020-04-01     2099-12-31     Metadata        SNOMED\n",
-      "2       703422           COVID-19 procedures simple reference set       2020-04-01     2099-12-31     Metadata        SNOMED\n",
-      "3       703423    COVID-19 record extraction simple reference set       2020-04-01     2099-12-31     Metadata        SNOMED\n",
-      "4       703424  Provision of advice, assessment or treatment l...       2020-04-01     2099-12-31  Observation        SNOMED\n",
-      "5       703429  COVID-19 excluded using clinical diagnostic cr...       2020-04-01     2099-12-31  Observation        SNOMED\n",
-      "6       703430               COVID-19 excluded by laboratory test       2020-04-01     2099-12-31  Observation        SNOMED\n",
-      "7       703431                                  COVID-19 excluded       2020-04-01     2020-10-28  Observation        SNOMED\n",
-      "8       703440  COVID-19 confirmed using clinical diagnostic c...       2020-04-01     2099-12-31    Condition        SNOMED\n",
-      "9       703441              COVID-19 confirmed by laboratory test       2020-04-01     2099-12-31    Condition        SNOMED\n",
-      "10      703442           Assessment using COVID-19 severity scale       2020-04-01     2099-12-31    Procedure        SNOMED\n",
-      "11      703443                            COVID-19 severity scale       2020-04-01     2099-12-31  Measurement        SNOMED\n",
-      "12      703444                            COVID-19 severity score       2020-04-01     2099-12-31  Measurement        SNOMED\n",
-      "13      703445  Low risk category for developing complication ...       2020-04-01     2099-12-31    Condition        SNOMED\n",
-      "14      703446  Moderate risk category for developing complica...       2020-04-01     2099-12-31    Condition        SNOMED\n",
-      "15      703447  High risk category for developing complication...       2020-04-01     2099-12-31    Condition        SNOMED\n",
-      "16     3657496  Provision of advice, assessment or treatment d...       2020-05-13     2099-12-31  Observation        SNOMED\n",
-      "17     3657558  COVID-19 test result communication to general ...       2020-06-10     2099-12-31     Metadata        SNOMED\n",
-      "18     3657559  COVID-19 test result communication to general ...       2020-06-10     2099-12-31     Metadata        SNOMED\n",
-      "19    37310268                                 Suspected COVID-19       2020-02-04     2020-10-28  Observation        SNOMED\n",
-      "20    37310269                                           COVID-19       2020-02-04     2020-10-28    Condition        SNOMED\n",
-      "21    37311060                                 Suspected COVID-19       2020-01-31     2099-12-31  Observation        SNOMED\n",
-      "22    37311061                                           COVID-19       2020-01-31     2099-12-31    Condition        SNOMED\n"
-     ]
-    }
-   ],
-   "source": [
-    "bias.get_concepts(\"COVID-19\")\n",
-    "concepts = bias.get_concepts(\"COVID-19\", \"Condition\", \"SNOMED\")\n",
-    "print(f'concepts for COVID-19 in Condition domain with SNOMED vocabulary:\\n {pd.DataFrame(concepts)}')\n",
-    "concepts = bias.get_concepts(\"COVID-19\", domain=\"Condition\")\n",
-    "print(f'concepts for COVID-19 in Condition domain:\\n {pd.DataFrame(concepts)}')\n",
-    "concepts = bias.get_concepts(\"COVID-19\", vocabulary=\"SNOMED\")\n",
-    "print(f'concepts for COVID-19 in SNOMED vocabulary:\\n {pd.DataFrame(concepts)}')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "9a52ab5f-57a8-4942-8a03-ec86651e919e",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "template_name: \"cohort_creation_condition_occurrence_query\"\n",
-      "\n",
-      "inclusion_criteria:\n",
-      "  demographics:                     # Optional\n",
-      "    gender: 'female'                # accepted values: female or male, optional field\n",
-      "    min_birth_year: 2000            # Born at the year of 2000 or after, optional field\n",
-      "  temporal_events:\n",
-      "    - operator: 'AND'\n",
-      "      events:\n",
-      "        - event_type: 'condition_occurrence'\n",
-      "          event_concept_id: 37311061 # COVID condition\n"
-     ]
-    }
-   ],
-   "source": [
-    "# create a cohort with all COVID-19 female patients under 24 years old\n",
-    "# cohort_query = ('SELECT c.person_id, c.condition_start_date as cohort_start_date, '\n",
-    "#                             'c.condition_end_date as cohort_end_date '\n",
-    "#                             'FROM condition_occurrence c JOIN '\n",
-    "#                             'person p ON c.person_id = p.person_id '\n",
-    "#                             'WHERE c.condition_concept_id = 37311061 '\n",
-    "#                             'AND p.gender_concept_id = 8532 AND p.year_of_birth > 2000')\n",
-    "!cat /home/hyi/bias/config/covid_female_born_after_2000_cohort.yaml\n",
-    "\n",
-    "cohort_query = '/home/hyi/bias/config/covid_female_born_after_2000_cohort.yaml'\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "70745d05-1f45-4e7d-b3ad-b6e0e45334e1",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "template_path: /home/hyi/temp/biasanalyzer/sql_templates\n",
-      "configuration specified in /home/hyi/bias/config/covid_female_born_after_2000_cohort.yaml loaded successfully\n",
-      "Cohort definition inserted successfully.\n",
-      "Cohort Young female COVID-19 patients successfully created.\n",
-      "cohort created successfully\n",
-      "Young female COVID-19 patient cohort definition: {'id': 1, 'name': 'Young female COVID-19 patients', 'description': 'Female patients with COVID-19 condition under 24 years old', 'created_date': datetime.date(2025, 3, 13), 'creation_info': 'WITH ranked_events AS ( SELECT person_id, condition_concept_id, condition_start_date AS event_start_date, condition_end_date AS event_end_date, ROW_NUMBER() OVER ( PARTITION BY person_id, condition_concept_id ORDER BY condition_start_date ASC ) AS event_instance FROM condition_occurrence ), ranked_visits AS ( SELECT person_id, visit_concept_id, visit_start_date AS event_start_date, visit_end_date AS event_end_date, ROW_NUMBER() OVER ( PARTITION BY person_id, visit_concept_id ORDER BY visit_start_date ASC ) AS event_instance FROM visit_occurrence ), condition_qualifying_events AS ( (SELECT person_id, event_start_date, event_end_date FROM ranked_events WHERE condition_concept_id = 37311061) ), filtered_cohort AS ( SELECT c.person_id, MIN(c.event_start_date) AS cohort_start_date, MAX(c.event_end_date) AS cohort_end_date FROM condition_qualifying_events c JOIN person p ON c.person_id = p.person_id WHERE 1=1 AND p.gender_concept_id = 8532 AND p.year_of_birth >= 2000 GROUP BY c.person_id ) SELECT * FROM filtered_cohort f', 'created_by': 'system'}\n",
-      "The first five patients in the cohort: [{'subject_id': 53949, 'cohort_definition_id': 1, 'cohort_start_date': datetime.date(2020, 2, 28), 'cohort_end_date': datetime.date(2020, 3, 11)}, {'subject_id': 22344, 'cohort_definition_id': 1, 'cohort_start_date': datetime.date(2020, 3, 11), 'cohort_end_date': datetime.date(2020, 4, 13)}, {'subject_id': 80198, 'cohort_definition_id': 1, 'cohort_start_date': datetime.date(2020, 3, 5), 'cohort_end_date': datetime.date(2020, 4, 9)}, {'subject_id': 30052, 'cohort_definition_id': 1, 'cohort_start_date': datetime.date(2020, 3, 6), 'cohort_end_date': datetime.date(2020, 4, 8)}, {'subject_id': 94887, 'cohort_definition_id': 1, 'cohort_start_date': datetime.date(2020, 2, 29), 'cohort_end_date': datetime.date(2020, 3, 24)}]\n"
-     ]
-    }
-   ],
-   "source": [
-    "cohort_data = bias.create_cohort('Young female COVID-19 patients', \n",
-    "                                 'Female patients with COVID-19 condition under 24 years old', \n",
-    "                                 cohort_query, 'system')\n",
-    "md = cohort_data.metadata\n",
-    "print(f'Young female COVID-19 patient cohort definition: {md}')\n",
-    "print(f'The first five patients in the cohort: {cohort_data.data[:5]}')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "b1415805-b065-40b8-b2cd-6db6f5f9ca41",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "the cohort stats: [{'total_count': 10208, 'earliest_start_date': datetime.date(2020, 1, 18), 'latest_start_date': datetime.date(2020, 3, 30), 'earliest_end_date': datetime.date(2020, 2, 7), 'latest_end_date': datetime.date(2020, 5, 3), 'min_duration_days': 8, 'max_duration_days': 37, 'avg_duration_days': 24.25, 'median_duration': 24, 'stddev_duration': 7.2}]\n",
-      "the cohort age stats: [{'total_count': 10208, 'min_age': 0, 'max_age': 20, 'avg_age': 10.94, 'median_age': 11, 'stddev_age': 5.92}]\n",
-      "the cohort gender stats: [{'gender': 'female', 'gender_count': 10208, 'probability': 1.0}]\n",
-      "the cohort race stats: [{'race': 'Asian', 'race_count': 723, 'probability': 0.07}, {'race': 'Other', 'race_count': 53, 'probability': 0.01}, {'race': 'Black or African American', 'race_count': 866, 'probability': 0.08}, {'race': 'White', 'race_count': 8566, 'probability': 0.84}]\n",
-      "the cohort ethnicity stats: [{'ethnicity': 'other', 'ethnicity_count': 10208, 'probability': 1.0}]\n"
-     ]
-    }
-   ],
-   "source": [
-    "# get stats of the cohocohort\n",
-    "cohort_stats = cohort_data.get_stats()\n",
-    "print(f'the cohort stats: {cohort_stats}')\n",
-    "cohort_age_stats = cohort_data.get_stats(\"age\")\n",
-    "print(f'the cohort age stats: {cohort_age_stats}')\n",
-    "cohort_gender_stats = cohort_data.get_stats(\"gender\")\n",
-    "print(f'the cohort gender stats: {cohort_gender_stats}')\n",
-    "cohort_race_stats = cohort_data.get_stats(\"race\")\n",
-    "print(f'the cohort race stats: {cohort_race_stats}')\n",
-    "cohort_ethnicity_stats = cohort_data.get_stats(\"ethnicity\")\n",
-    "print(f'the cohort ethnicity stats: {cohort_ethnicity_stats}')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "id": "d54e39da-6f78-4dc1-91ae-a8c26852582a",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "the cohort age discrete probability distribution: [{'age_bin': '0-10', 'bin_count': 4744, 'probability': 0.4647}, {'age_bin': '11-20', 'bin_count': 5464, 'probability': 0.5353}, {'age_bin': '21-30', 'bin_count': 0, 'probability': 0.0}, {'age_bin': '31-40', 'bin_count': 0, 'probability': 0.0}, {'age_bin': '41-50', 'bin_count': 0, 'probability': 0.0}, {'age_bin': '51-60', 'bin_count': 0, 'probability': 0.0}, {'age_bin': '61-70', 'bin_count': 0, 'probability': 0.0}, {'age_bin': '71-80', 'bin_count': 0, 'probability': 0.0}, {'age_bin': '81-90', 'bin_count': 0, 'probability': 0.0}, {'age_bin': '91+', 'bin_count': 0, 'probability': 0.0}]\n"
-     ]
-    }
-   ],
-   "source": [
-    "# get discrete probability distribution of the age variable in the cohort\n",
-    "cohort_age_distr = cohort_data.get_distributions('age')\n",
-    "print(f'the cohort age discrete probability distribution: {cohort_age_distr}')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "id": "c7ad0b7b-21dc-4572-af21-fe1580361999",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "cohort concept hierarchy for condition_occurrence with root concept ids []:\n",
-      "                                          concept_name       concept_code  count_in_cohort  prevalence  ancestor_concept_id  descendant_concept_id\n",
-      "0                         Disease due to Coronaviridae           27619001            10208    1.000000              4100065                4100065\n",
-      "1                                             COVID-19          840539006            10208    1.000000             37311061               37311061\n",
-      "2                                Coronavirus infection          186747009            10208    1.000000               439676                 439676\n",
-      "3                                        Viral disease           34014006            10208    1.000000               440029                 440029\n",
-      "4                                     Clinical finding          404684003            10208    1.000000               441840                 441840\n",
-      "5                            Disorder due to infection           40733004            10208    1.000000               432250                 432250\n",
-      "6                                              Disease           64572001            10208    1.000000              4274025                4274025\n",
-      "7                                      Finding by site          118234003             9153    0.896650              4042140                4042140\n",
-      "8            Clinical history and observation findings          250171008             9153    0.896650              4094294                4094294\n",
-      "9            General finding of observation of patient          118222006             9149    0.896258              4041283                4041283\n",
-      "10                          General body state finding           82832008             9080    0.889498              4221108                4221108\n",
-      "11                      Temperature-associated finding          301343009             8769    0.859032              4103474                4103474\n",
-      "12                            Body temperature finding          105723007             8650    0.847375              4022230                4022230\n",
-      "13                                 Vital signs finding          118227000             8650    0.847375              4042138                4042138\n",
-      "14                           Abnormal body temperature          123979008             8650    0.847375              4047791                4047791\n",
-      "15              Body temperature above reference range           50177009             8650    0.847375              4178904                4178904\n",
-      "16                                               Fever          386661006             8650    0.847375               437663                 437663\n",
-      "17                                 Respiratory finding          106048009             7785    0.762637              4024567                4024567\n",
-      "18                              Finding of body region          301857004             7200    0.705329              4199402                4199402\n",
-      "19                                Neurological finding          102957003             6664    0.652821              4011630                4011630\n",
-      "20                      Sensory nervous system finding          106147001             6662    0.652625              4024013                4024013\n",
-      "21                        Finding of sensation by site          699697007             6659    0.652332             44783587               44783587\n",
-      "22                        Respiratory function finding          365852007             6596    0.646160              4267789                4267789\n",
-      "23                                               Cough           49727002             6596    0.646160               254761                 254761\n",
-      "24                     Finding of head and neck region          118254002             6500    0.636755               255919                 255919\n",
-      "25                                        Head finding          406122000             6460    0.632837              4247371                4247371\n",
-      "26                            Digestive system finding          386617003             5963    0.584150              4302537                4302537\n",
-      "27                        Mouth and/or pharynx finding          249376008             5629    0.551430              4091363                4091363\n",
-      "28                              Finding of head region          298364001             5268    0.516066              4182161                4182161\n",
-      "29                             Finding of mouth region          423066003             4898    0.479820              4307122                4307122\n",
-      "30                                 Oral cavity finding          116337000             4898    0.479820              4022570                4022570\n",
-      "31                           Finding of sense of taste           76489005             4893    0.479330              4296465                4296465\n",
-      "32                                       Loss of taste           36955009             4893    0.479330              4289517                4289517\n",
-      "33                          General well-being finding          365275006             3776    0.369906              4272867                4272867\n",
-      "34                                             Fatigue           84229001             3776    0.369906              4223659                4223659\n",
-      "35                          Energy and stamina finding          359752005             3776    0.369906              4230389                4230389\n",
-      "36                    General problem AND/OR complaint          105721009             3776    0.369906              4022830                4022830\n",
-      "37                                   Metabolic finding          106089007             3776    0.369906               432455                 432455\n",
-      "38                                                Pain           22253000             3565    0.349236              4329041                4329041\n",
-      "39                            Pain / sensation finding          276435006             3565    0.349236              4170962                4170962\n",
-      "40                     Pain finding at anatomical site          279001004             3560    0.348746              4132926                4132926\n",
-      "41                                      Sputum finding          248595008             3233    0.316712              4089228                4089228\n",
-      "42                        Ear, nose and throat finding          297268004             2132    0.208856              4178545                4178545\n",
-      "43                     Upper respiratory tract finding          301186004             2047    0.200529              4103320                4103320\n",
-      "44     Finding reported by subject or history provider          418799008             2040    0.199843              4303401                4303401\n",
-      "45                                Difficulty breathing          230145002             1990    0.194945              4041664                4041664\n",
-      "46                       Ease of respiration - finding          366139009             1990    0.194945              4271505                4271505\n",
-      "47                              Finding of respiration          301282008             1990    0.194945              4115386                4115386\n",
-      "48                      General finding of soft tissue          248402002             1837    0.179957              4093991                4093991\n",
-      "49               Finding related to respiratory sounds          106051002             1624    0.159091              4021770                4021770\n",
-      "50                                            Wheezing           56018004             1624    0.159091               314754                 314754\n",
-      "51                                             Dyspnea          267036007             1624    0.159091               312437                 312437\n",
-      "52                       Finding of sound of breathing          301285005             1624    0.159091              4115387                4115387\n",
-      "53                             Musculoskeletal finding          106028002             1534    0.150274               135930                 135930\n",
-      "54                                       Joint finding          118952005             1495    0.146454                77960                  77960\n",
-      "55                                  Pharyngeal finding          116338005             1466    0.143613              4022571                4022571\n",
-      "56                                      Muscle finding          106030000             1445    0.141556              4024566                4024566\n",
-      "57                       Finding of sensation of joint          298249004             1445    0.141556              4179167                4179167\n",
-      "58                                          Joint pain           57676002             1445    0.141556                77074                  77074\n",
-      "59                                Musculoskeletal pain          279069000             1445    0.141556              4150129                4150129\n",
-      "60             Finding of sensation of skeletal muscle          298287007             1445    0.141556              4184117                4184117\n",
-      "61                                         Muscle pain           68962001             1445    0.141556               442752                 442752\n",
-      "62                              Finding of neck region          298378000             1382    0.135384              4184252                4184252\n",
-      "63                                 Sore throat symptom          267102003             1376    0.134796              4147326                4147326\n",
-      "64                     Finding of sensation of pharynx          300275004             1376    0.134796              4114487                4114487\n",
-      "65                         Pain of digestive structure          301362007             1376    0.134796              4116809                4116809\n",
-      "66                        Pain of head and neck region          301365009             1376    0.134796              4116810                4116810\n",
-      "67                                      Pain in throat          162397003             1376    0.134796               259153                 259153\n",
-      "68                       Pain of respiratory structure          301355003             1376    0.134796              4115406                4115406\n",
-      "69                                           Neck pain           81680005             1376    0.134796                24134                  24134\n",
-      "70                                            Headache           25064002             1304    0.127743               378253                 378253\n",
-      "71                               Disorder by body site          123946008             1195    0.117065              4047779                4047779\n",
-      "72                             Disorder of body system          362965005             1173    0.114910              4180628                4180628\n",
-      "73                                               Chill           43724002             1079    0.105701               434490                 434490\n",
-      "74                                 Shivering or rigors          248456009             1079    0.105701              4087630                4087630\n",
-      "75                               Inflammatory disorder          128139000              987    0.096689              4027384                4027384\n",
-      "76   Inflammation of specific body structures or ti...          363170005              987    0.096689              4180169                4180169\n",
-      "77               Inflammation of specific body systems          363171009              987    0.096689              4178818                4178818\n",
-      "78                      Disorder of respiratory system           50043002              891    0.087284               320136                 320136\n",
-      "79      Inflammatory disorder of the respiratory tract          363180009              885    0.086697              4180170                4180170\n",
-      "80     Inflammatory disorder of the respiratory system          373405005              885    0.086697              4162282                4162282\n",
-      "81                Inflammation of specific body organs          363169009              853    0.083562              4181063                4181063\n",
-      "82                      Gastrointestinal tract finding          386618008              768    0.075235              4304916                4304916\n",
-      "83        Functional finding of gastrointestinal tract          300358007              766    0.075039              4101343                4101343\n",
-      "84                                   Disorder of trunk          128121009              638    0.062500              4028071                4028071\n",
-      "85                          Finding of trunk structure          302292003              638    0.062500              4117930                4117930\n",
-      "86               Disorder of thoracic segment of trunk          609622007              635    0.062206             43531056               43531056\n",
-      "87                              Finding of upper trunk          609623002              635    0.062206             43531057               43531057\n",
-      "88                                  Disorder of thorax          118946009              635    0.062206              4043346                4043346\n",
-      "89                         Finding of region of thorax          298705000              635    0.062206              4185503                4185503\n",
-      "90    Inflammatory disorder of lower respiratory tract          128997002              628    0.061520              4028876                4028876\n",
-      "91                Disorder of lower respiratory system          128272009              628    0.061520              4027553                4027553\n",
-      "92                     Lower respiratory tract finding          301226008              628    0.061520              4115259                4115259\n",
-      "93                                   Metabolic disease           75934005              586    0.057406               436670                 436670\n",
-      "94                            Viscus structure finding          406123005              585    0.057308              4227253                4227253\n",
-      "95                                    Disorder of lung           19829001              582    0.057014               257907                 257907\n",
-      "96                                         Pneumonitis          205237003              582    0.057014               253506                 253506\n",
-      "97                                           Pneumonia          233604007              582    0.057014               255848                 255848\n",
-      "98                      General clinical state finding          365860008              582    0.057014               432453                 432453\n",
-      "99                                           Hypoxemia          389087006              582    0.057014               437390                 437390\n",
-      "100                                       Lung finding          301230006              582    0.057014              4115260                4115260\n",
-      "101                                 Lung consolidation           95436008              582    0.057014              4318404                4318404\n",
-      "102                              Disorder of blood gas          238157005              582    0.057014              4080012                4080012\n",
-      "103                               Respiratory distress          271825005              582    0.057014              4158346                4158346\n",
-      "104                                           Distress           69328002              582    0.057014              4239819                4239819\n",
-      "105                                      Acute disease            2704003              555    0.054369               443883                 443883\n",
-      "106                                    Finding of face          301310005              535    0.052410              4103352                4103352\n",
-      "107                               Nasal airway finding          249342004              531    0.052018              4096565                4096565\n",
-      "108                                       Nose finding          118237005              531    0.052018              4042142                4042142\n",
-      "109                                   Nasal congestion           68235000              513    0.050255              4195085                4195085\n",
-      "110                                   Vomiting symptom          249497008              443    0.043397              4096715                4096715\n",
-      "111                                Finding of vomiting          300359004              443    0.043397              4101344                4101344\n",
-      "112                                             Nausea          422587007              443    0.043397                31967                  31967\n",
-      "113                            Disorder of soft tissue           19660004              417    0.040850               376208                 376208\n",
-      "114                  Disorder of cardiovascular system           49601007              384    0.037618               134057                 134057\n",
-      "115                             Cardiovascular finding          106063007              384    0.037618              4023995                4023995\n",
-      "116                                 Soft tissue lesion          239953001              382    0.037422              4344497                4344497\n",
-      "117                      Ear, nose and throat disorder          232208008              370    0.036246              4339468                4339468\n",
-      "118                              Finding of defecation          300373008              350    0.034287              4113563                4113563\n",
-      "119                             Altered bowel function           88111009              350    0.034287              4338120                4338120\n",
-      "120                                  Digestive symptom          308925008              350    0.034287               192731                 192731\n",
-      "121                            Finding of bowel action          366256008              350    0.034287              4182633                4182633\n",
-      "122                                   Diarrhea symptom          267060006              350    0.034287              4145808                4145808\n",
-      "123                                           Diarrhea           62315008              350    0.034287               196523                 196523\n",
-      "124                               Blood vessel finding           21829004              341    0.033405              4071689                4071689\n",
-      "125                                  Vascular disorder           27550009              341    0.033405               443784                 443784\n",
-      "126                          Acute respiratory disease          111273006              332    0.032524              4006969                4006969\n",
-      "127             Acute disease of cardiovascular system          128487001              331    0.032426              4028367                4028367\n",
-      "128                                   Disorder of head          118934005              314    0.030760              4042836                4042836\n",
-      "129               Disorder of upper respiratory system          201060008              268    0.026254               254068                 254068\n",
-      "130                      Inflammatory disorder of head          363176004              268    0.026254              4181187                4181187\n",
-      "131   Inflammatory disorder of upper respiratory tract          129134004              268    0.026254              4043671                4043671\n",
-      "132                        Respiratory tract infection          275498002              236    0.023119              4170143                4170143\n",
-      "133                                  Infection by site          301810000              236    0.023119              4200532                4200532\n",
-      "134                        Upper respiratory infection           54150009              236    0.023119              4181583                4181583\n",
-      "135                                Respiratory failure          409622000              205    0.020082              4256228                4256228\n",
-      "136                          Acute respiratory failure           65710008              205    0.020082               319049                 319049\n",
-      "137                          Respiratory insufficiency          409623005              205    0.020082               318459                 318459\n",
-      "138            Viral upper respiratory tract infection          281794004              203    0.019886              4085100                4085100\n",
-      "139                        Viral respiratory infection          312133006              203    0.019886              4193169                4193169\n",
-      "140                            Viral infection by site          312130009              203    0.019886              4207186                4207186\n",
-      "141                         Thrombosis of blood vessel          439129009              169    0.016556              4208466                4208466\n",
-      "142                                         Thrombosis          439127006              169    0.016556              4231363                4231363\n",
-      "143                             Deep venous thrombosis          128053003              169    0.016556              4133004                4133004\n",
-      "144                                     Venous finding          248727005              169    0.016556              4095634                4095634\n",
-      "145                                   Disorder of vein           90507008              169    0.016556              4234997                4234997\n",
-      "146                                  Venous thrombosis          111293003              169    0.016556               444247                 444247\n",
-      "147                       Acute deep venous thrombosis    132281000119108              169    0.016556             44782746               44782746\n",
-      "148                             Trunk arterial embolus          312593004              162    0.015870              4194610                4194610\n",
-      "149                Disorder of blood vessels of thorax          373434004              162    0.015870              4190192                4190192\n",
-      "150                                   Arterial finding          248718009              162    0.015870              4095631                4095631\n",
-      "151                           Pulmonary artery finding          251039005              162    0.015870              4108173                4108173\n",
-      "152                                           Embolism          414086009              162    0.015870              4185607                4185607\n",
-      "153                           Acute pulmonary embolism          706870000              162    0.015870             45768439               45768439\n",
-      "154                                 Disorder of artery          359557001              162    0.015870               321887                 321887\n",
-      "155                  Disorder of pulmonary circulation           39785005              162    0.015870               433208                 433208\n",
-      "156                                  Arterial embolism           54687002              162    0.015870               312339                 312339\n",
-      "157                                 Pulmonary embolism           59282003              162    0.015870               440417                 440417\n",
-      "158                        Disorder of immune function          414029004              158    0.015478               440371                 440371\n",
-      "159                            Disorder of nasal sinus            7393007              146    0.014303               256440                 256440\n",
-      "160                                          Sinusitis           36971009              146    0.014303              4283893                4283893\n",
-      "161                               Facial sinus finding          271745005              146    0.014303              4158326                4158326\n",
-      "162  Traumatic and/or non-traumatic injury of anato...          609411003              145    0.014205             43530877               43530877\n",
-      "163              Traumatic AND/OR non-traumatic injury          417163006              145    0.014205               432795                 432795\n",
-      "164                           Traumatic injury by site          609336008              141    0.013813             43530815               43530815\n",
-      "165                                   Traumatic injury          417746004              141    0.013813               440921                 440921\n",
-      "166                         Infective disorder of head          363166002              133    0.013029              4176944                4176944\n",
-      "167                         Acute inflammatory disease          128482007              132    0.012931              4134294                4134294\n",
-      "168                                             Sepsis           91302008              131    0.012833               132797                 132797\n",
-      "169                             Sepsis caused by virus          770349000              131    0.012833             36674642               36674642\n",
-      "170                         Organ dysfunction syndrome          238147009              131    0.012833              4080011                4080011\n",
-      "171                                    Viral sinusitis          444814009              126    0.012343             40481087               40481087\n",
-      "172                       Disorder of digestive system           53619000              115    0.011266              4201745                4201745\n",
-      "173                        Disorder of digestive tract           84410009              115    0.011266              4309188                4309188\n",
-      "174                        Disorder of digestive organ           76712006              115    0.011266              4297887                4297887\n",
-      "175                  Disorder of upper digestive tract           50410009              113    0.011070              4198525                4198525\n",
-      "176          Inflammatory disorder of digestive system          373407002              106    0.010384              4190185                4190185\n",
-      "177           Inflammatory disorder of digestive tract          128999004              106    0.010384              4043371                4043371\n",
-      "178                                    Disorder of ear           25906001              105    0.010286               378161                 378161\n",
-      "179                                 Middle ear finding          300162007              105    0.010286              4101079                4101079\n",
-      "180                          Finding of limb structure          302293008              105    0.010286               138239                 138239\n",
-      "181                                       Otitis media           65363002              105    0.010286               372328                 372328\n",
-      "182                        Disorder of auditory system          362966006              105    0.010286              4176644                4176644\n",
-      "183                           Ear and auditory finding          118236001              105    0.010286              4042141                4042141\n",
-      "184                              Disorder of extremity          128605003              105    0.010286               133468                 133468\n",
-      "185                                        Ear finding          247234006              105    0.010286              4082416                4082416\n",
-      "186                                             Otitis           43275000              105    0.010286              4183452                4183452\n",
-      "187                             Disorder of middle ear           68996008              105    0.010286               374364                 374364\n",
-      "188                              Infective pharyngitis          312422001              104    0.010188              4193318                4193318\n",
-      "189                                Disorder of pharynx           75860007              104    0.010188                31057                  31057\n",
-      "190                      Infection of digestive system          312158001              104    0.010188              4193990                4193990\n",
-      "191              Infectious disease of digestive tract          128398001              104    0.010188              4134887                4134887\n",
-      "192                                        Pharyngitis          405737000              104    0.010188              4226263                4226263\n",
-      "193                 Disorder of musculoskeletal system             928000              102    0.009992              4244662                4244662\n",
-      "194                        Disorder of skeletal system           88230002              101    0.009894              4339410                4339410\n",
-      "195                   Injury of musculoskeletal system          105606008               99    0.009698              4022201                4022201\n",
-      "196                       Acute respiratory infections          195647007               83    0.008131              4112341                4112341\n",
-      "197                  Acute upper respiratory infection           54398005               83    0.008131               257011                 257011\n",
-      "198                           Acute infectious disease           63171007               83    0.008131              4271450                4271450\n",
-      "199                         Skin AND/OR mucosa finding          415531008               82    0.008033              4212577                4212577\n",
-      "200                       Finding related to pregnancy          118185001               78    0.007641               444094                 444094\n",
-      "201       Pregnancy, childbirth and puerperium finding          248982007               78    0.007641              4088927                4088927\n",
-      "202                            Acute viral pharyngitis          195662009               77    0.007543              4112343                4112343\n",
-      "203                                  Acute pharyngitis          363746003               77    0.007543                25297                  25297\n",
-      "204                                Acute viral disease          409631000               77    0.007543              4252853                4252853\n",
-      "205             Viral infection of the digestive tract          312131008               77    0.007543              4193875                4193875\n",
-      "206                    Acute digestive system disorder          127321000               77    0.007543              4132552                4132552\n",
-      "207                                  Viral pharyngitis            1532007               77    0.007543              4035987                4035987\n",
-      "208                                   Normal pregnancy           72892002               76    0.007445              4217975                4217975\n",
-      "209                                           Pregnant           77386006               76    0.007445              4299535                4299535\n",
-      "210                           Disorder of joint region          785875003               73    0.007151             37206233               37206233\n",
-      "211                                    Mucosal finding          128145008               72    0.007053              4028076                4028076\n",
-      "212                      Traumatic injury due to event          419945001               69    0.006759               439215                 439215\n",
-      "213                                Injury by mechanism          282745002               64    0.006270              4154161                4154161\n",
-      "214                                           Bleeding          131148009               62    0.006074               437312                 437312\n",
-      "215                                         Hemoptysis           66857006               62    0.006074               261687                 261687\n",
-      "216                                        Arthropathy          399269003               58    0.005682                73553                  73553\n",
-      "217                        Disorder of lower extremity          118937003               57    0.005584               193460                 193460\n",
-      "218                              Finding of lower limb          116312005               57    0.005584              4022922                4022922\n",
-      "219                          Injury of lower extremity          127279002               55    0.005388              4130852                4130852\n",
-      "220                              Traumatic arthropathy           58188004               54    0.005290                74124                  74124\n",
-      "221                        Disorder of free lower limb          700012005               53    0.005192             44782620               44782620\n",
-      "222                               Eye / vision finding          118235002               53    0.005192              4038502                4038502\n",
-      "223                             Ocular surface finding          246869006               53    0.005192              4087936                4087936\n",
-      "224                               Conjunctival finding          246875002               53    0.005192              4080857                4080857\n",
-      "225                           Anterior segment finding          418727003               53    0.005192              4303380                4303380\n",
-      "226                    Passive conjunctival congestion          246677007               53    0.005192              4080695                4080695\n",
-      "227                                      Orbit finding          246912006               53    0.005192              4087949                4087949\n",
-      "228                                      Globe finding          246915008               53    0.005192              4080992                4080992\n",
-      "229                                       Bone finding          118953000               52    0.005094              4042505                4042505\n",
-      "230                                   Fracture of bone          125605004               52    0.005094                75053                  75053\n",
-      "231                                 Soft tissue injury          282026002               52    0.005094              4083964                4083964\n",
-      "232                                        Bone injury          284003005               52    0.005094              4154739                4154739\n",
-      "233                                   Disorder of bone           76069003               52    0.005094                75909                  75909\n",
-      "234                      Disorder of connective tissue          105969002               51    0.004996               253549                 253549\n",
-      "235                          Injury of free lower limb          700010002               51    0.004996             44784105               44784105\n",
-      "236     Musculoskeletal and connective tissue disorder          312225001               50    0.004898              4208786                4208786\n",
-      "237                          Tracheobronchial disorder          233776003               49    0.004800               252662                 252662\n",
-      "238                                         Bronchitis           32398004               49    0.004800               256451                 256451\n",
-      "239                                   Acute bronchitis           10509002               49    0.004800               260139                 260139\n",
-      "240                                    Lesion of joint          298149009               49    0.004800              4179141                4179141\n",
-      "241                                  Bronchial finding          301229001               49    0.004800              4116777                4116777\n",
-      "242                               Disorder of bronchus           41427001               49    0.004800               260131                 260131\n",
-      "243                              Finding of upper limb          116307009               48    0.004702              4020346                4020346\n",
-      "244                        Disorder of upper extremity          118947000               48    0.004702              4042503                4042503\n",
-      "245                               Disorder of ligament           60492000               48    0.004702               442628                 442628\n",
-      "246                                   Ligament finding          250132005               48    0.004702              4094284                4094284\n",
-      "247                        Injury of connective tissue          385424001               47    0.004604              4300157                4300157\n",
-      "248                                       Joint injury          125610000               47    0.004604              4054054                4054054\n",
-      "249                          Injury of upper extremity          127278005               47    0.004604              4130851                4130851\n",
-      "250                                    Ligament injury          263126002               47    0.004604              4136694                4136694\n",
-      "251               Cardiovascular measurement - finding          366157005               45    0.004408              4277352                4277352\n",
-      "252                              Hypertensive disorder           38341003               45    0.004408               316866                 316866\n",
-      "253                                 Sprain of ligament          398878007               45    0.004408              4160875                4160875\n",
-      "254                                    Sprain of joint          105611005               45    0.004408              4023316                4023316\n",
-      "255                           Finding of ankle or foot          419518009               44    0.004310              4305027                4305027\n",
-      "256                             Essential hypertension           59621000               42    0.004114               320128                 320128\n",
-      "257                       Bacterial infectious disease           87628006               42    0.004114               432545                 432545\n",
-      "258                            Finding of ankle region          116315007               41    0.004016              4023577                4023577\n",
-      "259                                    Injury of ankle          125603006               41    0.004016                77162                  77162\n",
-      "260                                  Disorder of ankle          128138008               41    0.004016                78831                  78831\n",
-      "261                        Bacterial infection by site          301811001               34    0.003331              4200533                4200533\n",
-      "262             Disorder of the central nervous system           23853001               34    0.003331               376106                 376106\n",
-      "263                    Bacterial respiratory infection          312117008               34    0.003331              4207184                4207184\n",
-      "264              Bacterial upper respiratory infection          312118003               34    0.003331              4207185                4207185\n",
-      "265                         Disorder of nervous system          118940003               34    0.003331               376337                 376337\n",
-      "266                     Central nervous system finding          246556002               34    0.003331              4086181                4086181\n",
-      "267                                   Finding of brain          299718000               33    0.003233              4101796                4101796\n",
-      "268                                  Disorder of brain           81308009               33    0.003233               372887                 372887\n",
-      "269                         Hypersensitivity condition          473010000               32    0.003135             43021226               43021226\n",
-      "270                            Disorder of ankle joint          428776005               31    0.003037               443583                 443583\n",
-      "271             Disorder of joint of ankle and/or foot          442246002               31    0.003037             40482662               40482662\n",
-      "272     Traumatic arthropathy of the ankle and/or foot          201938008               31    0.003037                75620                  75620\n",
-      "273                        Sprain of ankle and/or foot          209529003               31    0.003037              4016673                4016673\n",
-      "274           Traumatic arthropathy of lower extremity          373575008               31    0.003037              4189458                4189458\n",
-      "275                   Sprain of ligament of lower limb          281599007               31    0.003037              4105866                4105866\n",
-      "276                        Traumatic arthropathy-ankle          201954006               31    0.003037              4114605                4114605\n",
-      "277            Lesion of ligaments of the ankle region          240019006               31    0.003037              4344271                4344271\n",
-      "278                                Ankle joint finding          299413005               31    0.003037               443357                 443357\n",
-      "279                                    Sprain of ankle           44465007               31    0.003037                81151                  81151\n",
-      "280                             Fracture of upper limb           23406007               30    0.002939              4050747                4050747\n",
-      "281                      Finding of bone of upper limb          298756009               30    0.002939              4186164                4186164\n",
-      "282         Bacterial infection of the digestive tract          312129004               27    0.002645              4193874                4193874\n",
-      "283                                 Allergic condition          473011001               27    0.002645             43021227               43021227\n",
-      "284                   Streptococcal infectious disease           85769006               27    0.002645               437779                 437779\n",
-      "285              Disease due to Gram-positive bacteria          371582002               27    0.002645              4161193                4161193\n",
-      "286                                    Chronic disease           27624003               27    0.002645               443783                 443783\n",
-      "287                Disease due to Gram-positive coccus          408637006               27    0.002645              4248801                4248801\n",
-      "288                          Streptococcal sore throat           43878008               27    0.002645                28060                  28060\n",
-      "289                               Head and neck injury          282749008               25    0.002449              4154162                4154162\n",
-      "290                                   Disorder of face          118930001               24    0.002351              4042835                4042835\n",
-      "291                        Traumatic arthropathy-wrist          201946009               23    0.002253              4116594                4116594\n",
-      "292           Traumatic arthropathy of upper extremity          373574007               23    0.002253              4162433                4162433\n",
-      "293                                  Disorder of wrist          128130001               23    0.002253              4028074                4028074\n",
-      "294                            Finding of wrist region          116310002               23    0.002253              4020347                4020347\n",
-      "295                                  Allergic disorder          781474001               19    0.001861             36683564               36683564\n",
-      "296                    Disorder of soft tissue of head          280131007               19    0.001861              4090614                4090614\n",
-      "297                                  Allergic rhinitis           61582004               19    0.001861               257007                 257007\n",
-      "298                     IgE-mediated allergic disorder          422076005               19    0.001861              4223759                4223759\n",
-      "299                        Disorder of mucous membrane           95351003               19    0.001861              4318379                4318379\n",
-      "300            Inflammatory disease of mucous membrane           95361005               19    0.001861               432661                 432661\n",
-      "301                   Disorder of nose and nasopharynx          232339008               19    0.001861              4049222                4049222\n",
-      "302                               Nasal mucosa finding          249353005               19    0.001861               442983                 442983\n",
-      "303      Immune hypersensitivity disorder by mechanism          427439005               19    0.001861              4141833                4141833\n",
-      "304              Atopic IgE-mediated allergic disorder          421871004               19    0.001861              4223595                4223595\n",
-      "305                                           Rhinitis           70076002               19    0.001861              4320791                4320791\n",
-      "306                                     Injury of head           82271004               19    0.001861               375415                 375415\n",
-      "307                               Disorder of the nose           89488007               19    0.001861              4229909                4229909\n",
-      "308                                 Evaluation finding          441742003               18    0.001763             40480457               40480457\n",
-      "309              Chronic disease of respiratory system           17097001               16    0.001567              4063381                4063381\n",
-      "310                                      Wound finding          225552003               16    0.001567              4021667                4021667\n",
-      "311                                Laceration - injury          312608009               16    0.001567               443419                 443419\n",
-      "312                                              Wound          416462003               16    0.001567              4168335                4168335\n",
-      "313                        Perennial allergic rhinitis          446096008               16    0.001567             40486433               40486433\n",
-      "314                                         Open wound          125643001               16    0.001567               444187                 444187\n",
-      "315                    Disorder of soft tissue of limb          280134004               16    0.001567              4090615                4090615\n",
-      "316                                       Chronic pain           82423001               16    0.001567               436096                 436096\n",
-      "317              Disorder of soft tissue of upper limb          280135003               15    0.001469              4090616                4090616\n",
-      "318                                Neurological lesion          299735001               15    0.001469              4103662                4103662\n",
-      "319                Concussion injury of body structure          708540005               14    0.001371             45769811               45769811\n",
-      "320                        Sprain of wrist and/or hand          209436000               14    0.001371              4018956                4018956\n",
-      "321                                       Complication          116223007               14    0.001371               433128                 433128\n",
-      "322                          Sprain of upper extremity          123536004               14    0.001371              4048512                4048512\n",
-      "323                                    Injury of wrist          125598003               14    0.001371               444129                 444129\n",
-      "324        Traumatic AND/OR non-traumatic brain injury          127294003               14    0.001371              4133611                4133611\n",
-      "325                             Traumatic brain injury          127295002               14    0.001371              4132546                4132546\n",
-      "326                                Intracranial injury          127296001               14    0.001371               437409                 437409\n",
-      "327                   Injury of central nervous system          128126004               14    0.001371              4134439                4134439\n",
-      "328                             Finding of wrist joint          298940007               14    0.001371              4181251                4181251\n",
-      "329                                    Lesion of brain          301766008               14    0.001371              4200516                4200516\n",
-      "330                             Fracture of lower limb           46866001               14    0.001371              4187096                4187096\n",
-      "331                                    Sprain of wrist           70704007               14    0.001371                78272                  78272\n",
-      "332                            Disorder of wrist joint          428107009               14    0.001371              4323193                4323193\n",
-      "333                         Concussion injury of brain          110030002               14    0.001371              4001336                4001336\n",
-      "334                           Injury of nervous system          128239009               14    0.001371              4134134                4134134\n",
-      "335                          Abnormal blood cell count          762656009               13    0.001274             42538830               42538830\n",
-      "336                                      RBC count low          165423001               13    0.001274              4013842                4013842\n",
-      "337                                 RBC count abnormal          165427000               13    0.001274              4013518                4013518\n",
-      "338                       Hematopoietic system finding          106200001               13    0.001274              4021915                4021915\n",
-      "339                                Measurement finding          118245000               13    0.001274              4041436                4041436\n",
-      "340                      Chronic inflammatory disorder          128294001               13    0.001274               444208                 444208\n",
-      "341                                  Chronic sinusitis           40055000               13    0.001274               257012                 257012\n",
-      "342            Disorder of cellular component of blood          414022008               13    0.001274               443723                 443723\n",
-      "343                                             Anemia          271737000               13    0.001274               439777                 439777\n",
-      "344           Hemoglobin level outside reference range          441793007               13    0.001274             40480513               40480513\n",
-      "345        Measurement finding outside reference range          442096005               13    0.001274             40481841               40481841\n",
-      "346          Measurement finding below reference range          442686002               13    0.001274             40484533               40484533\n",
-      "347                                     Hemoglobin low          165397008               13    0.001274              4013074                4013074\n",
-      "348                            Protein level - finding          365799007               13    0.001274              4276572                4276572\n",
-      "349                         Finding of substance level          785671009               13    0.001274             37203927               37203927\n",
-      "350                                          Cytopenia           50820005               13    0.001274              4179922                4179922\n",
-      "351                             Developmental disorder            5294002               13    0.001274               435244                 435244\n",
-      "352                                       Erythropenia           62574001               13    0.001274              4267432                4267432\n",
-      "353                                    Clavicle injury          282760004               12    0.001176              4151199                4151199\n",
-      "354                      Finding of clavicle structure          298761006               12    0.001176              4185643                4185643\n",
-      "355                                 Lesion of clavicle          298766001               12    0.001176              4186167                4186167\n",
-      "356                               Fracture of clavicle           58150001               12    0.001176              4237458                4237458\n",
-      "357                               Fracture of shoulder  16250001000004107               12    0.001176             46270317               46270317\n",
-      "358  Traumatic brain injury with no loss of conscio...          127302008               12    0.001176              4133715                4133715\n",
-      "359           Concussion with no loss of consciousness           62106007               12    0.001176               378001                 378001\n",
-      "360                                 Open wound of limb          105616000               11    0.001078              4023317                4023317\n",
-      "361                    Chronic nervous system disorder          128283000               11    0.001078              4134145                4134145\n",
-      "362                                       Skin finding          106076001               10    0.000980               141960                 141960\n",
-      "363                                  Vascular headache          128187005               10    0.000980              4134454                4134454\n",
-      "364                             Disorder of integument          128598002               10    0.000980              4028387                4028387\n",
-      "365                                  Fracture of ankle           16114001               10    0.000980              4059173                4059173\n",
-      "366                   Pain of cardiovascular structure          301358001               10    0.000980              4115408                4115408\n",
-      "367                          Chronic headache disorder          431237007               10    0.000980               374639                 374639\n",
-      "368                             Chronic brain syndrome           78689005               10    0.000980              4301371                4301371\n",
-      "369                                   Disorder of skin           95320005               10    0.000980              4317258                4317258\n",
-      "370          Chronic intractable migraine without aura    124171000119105               10    0.000980             43530652               43530652\n",
-      "371                                  Headache disorder          230461009               10    0.000980               375527                 375527\n",
-      "372                     Disorder characterized by pain          373673007               10    0.000980              4160062                4160062\n",
-      "373                                           Migraine           37796009               10    0.000980               318736                 318736\n",
-      "374                       Integumentary system finding          106077005               10    0.000980               444112                 444112\n",
-      "375                                  Injury of forearm          125597008               10    0.000980               134222                 134222\n",
-      "376                                Disorder of forearm          128132009               10    0.000980               136779                 136779\n",
-      "377           Chronic disease of cardiovascular system          128292002               10    0.000980              4028244                4028244\n",
-      "378                   Refractory migraine without aura          423279000               10    0.000980               443616                 443616\n",
-      "379                                Refractory migraine          423894005               10    0.000980               443615                 443615\n",
-      "380                               Transformed migraine          427419006               10    0.000980              4141827                4141827\n",
-      "381                              Migraine without aura           56097005               10    0.000980               378735                 378735\n",
-      "382        Disorder of skin and/or subcutaneous tissue           80659006               10    0.000980               200174                 200174\n",
-      "383                                    Impacted molars          196416002                9    0.000882              4055754                4055754\n",
-      "384                Fracture at wrist and/or hand level          208388003                9    0.000882              4015350                4015350\n",
-      "385                                     Impacted tooth          235104008                9    0.000882              4123726                4123726\n",
-      "386                      Disorder of tooth development          371136004                9    0.000882              4159157                4159157\n",
-      "387                                    Disorder of jaw           37156001                9    0.000882               435569                 435569\n",
-      "388     Disorder of teeth AND/OR supporting structures          105995000                9    0.000882               201603                 201603\n",
-      "389                               Dislocation of joint          108367008                9    0.000882                74726                  74726\n",
-      "390                                   Disease of mouth          118938008                9    0.000882              4042502                4042502\n",
-      "391            Traumatic dislocation of joint of wrist          125618007                9    0.000882              4054058                4054058\n",
-      "392                                   Seizure disorder          128613002                9    0.000882              4029498                4029498\n",
-      "393                     Traumatic dislocation of joint          129156001                9    0.000882              4043679                4043679\n",
-      "394                 Subluxation of joint of upper limb          263047001                9    0.000882              4135090                4135090\n",
-      "395                      Fracture dislocation of joint          263063009                9    0.000882              4134184                4134184\n",
-      "396                      Fracture subluxation of joint          263094009                9    0.000882              4136573                4136573\n",
-      "397                          Anomaly of tooth position           81256000                9    0.000882               433243                 433243\n",
-      "398                                            Seizure           91175000                9    0.000882               377091                 377091\n",
-      "399                                             Asthma          195967001                9    0.000882               317009                 317009\n",
-      "400                                   Childhood asthma          233678006                9    0.000882              4051466                4051466\n",
-      "401                                     Tooth disorder          234947003                9    0.000882              4122115                4122115\n",
-      "402                            Seizure related finding          313287004                9    0.000882              4196708                4196708\n",
-      "403                               Subluxation of wrist          833334002                9    0.000882              3654437                3654437\n",
-      "404                               Dislocation of wrist          833335001                9    0.000882              3654438                3654438\n",
-      "405                 Dislocation of joint of upper limb          263017003                9    0.000882                75047                  75047\n",
-      "406                               Subluxation of joint          263031003                9    0.000882              4134174                4134174\n",
-      "407        Fracture dislocation of joint of upper limb          263073006                9    0.000882              4135097                4135097\n",
-      "408                      Fracture subluxation of wrist          263102004                9    0.000882              4134304                4134304\n",
-      "409                                      Tooth finding          278544002                9    0.000882              4132462                4132462\n",
-      "410        Fracture subluxation of joint of upper limb          281519006                9    0.000882              4085546                4085546\n",
-      "411                                Fracture of forearm           65966004                9    0.000882              4278672                4278672\n",
-      "412                            Acute allergic reaction          241929008                8    0.000784              4084167                4084167\n",
-      "413                           Open wound of lower limb           26947005                8    0.000784              4097962                4097962\n",
-      "414                           Laceration of lower limb          283357002                8    0.000784              4152960                4152960\n",
-      "415                                       Chest injury          262525000                8    0.000784              4094683                4094683\n",
-      "416                                   Adverse reaction          281647001                8    0.000784              4105886                4105886\n",
-      "417                                  Allergic reaction          419076005                8    0.000784             40589905               40589905\n",
-      "418                                    Injury of trunk           48125009                8    0.000784               194526                 194526\n",
-      "419                          Hypersensitivity reaction          421961002                8    0.000784              4223616                4223616\n",
-      "420                                    Fracture of rib           33737001                7    0.000686              4142905                4142905\n",
-      "421                                    Acute sinusitis           15805002                7    0.000686               260123                 260123\n",
-      "422                                     Injury of ribs          282770002                7    0.000686              4151202                4151202\n",
-      "423                                Bacterial sinusitis          703470001                7    0.000686             45766333               45766333\n",
-      "424                              Disorder of body wall          399986003                7    0.000686              4266188                4266188\n",
-      "425                         Fracture of bones of trunk           65354004                7    0.000686              4279139                4279139\n",
-      "426                               Injury of chest wall           65978000                7    0.000686                75128                  75128\n",
-      "427                          Acute bacterial sinusitis           75498004                7    0.000686              4294548                4294548\n",
-      "428  Complication of pregnancy, childbirth and/or t...          198609003                6    0.000588               435875                 435875\n",
-      "429  Perennial allergic rhinitis with seasonal vari...          232353008                6    0.000588              4048171                4048171\n",
-      "430                                   Disorder of neck          118939000                6    0.000588              4042837                4042837\n",
-      "431                                    Cardiac finding          301095005                6    0.000588              4103183                4103183\n",
-      "432                                Mediastinal finding          301296002                6    0.000588              4115390                4115390\n",
-      "433                            Whiplash injury to neck           39848009                6    0.000588              4218389                4218389\n",
-      "434                            Disorder of mediastinum           49483002                6    0.000588               440142                 440142\n",
-      "435                                      Heart disease           56265001                6    0.000588               321588                 321588\n",
-      "436                                     Injury of neck           90460009                6    0.000588                24818                  24818\n",
-      "437                                     Lesion of neck          298397000                6    0.000588              4185207                4185207\n",
-      "438                            Inflammatory dermatosis          703938007                5    0.000490             45766714               45766714\n",
-      "439  Lesion of skin and/or skin-associated mucous m...          714974000                5    0.000490             37018424               37018424\n",
-      "440  Disease of circulatory system complicating pre...          724497009                5    0.000490             37110290               37110290\n",
-      "441                                 Open wound of face          210339009                5    0.000490              4049957                4049957\n",
-      "442                                  Atopic dermatitis           24079001                5    0.000490               133834                 133834\n",
-      "443                      Disorder of hemostatic system          362970003                5    0.000490              4179872                4179872\n",
-      "444                                  Facial laceration          370247008                5    0.000490              4156265                4156265\n",
-      "445                     Open wound of head AND/OR neck          397180001                5    0.000490              4246695                4246695\n",
-      "446                       Disorder of cardiac function          105981003                5    0.000490              4024552                4024552\n",
-      "447                                    Genetic finding          106221001                5    0.000490              4025367                4025367\n",
-      "448                                              Atopy          115665000                5    0.000490              4019380                4019380\n",
-      "449                               Injury of integument          125592002                5    0.000490              4053826                4053826\n",
-      "450                                Open wound of thigh          125659001                5    0.000490              4053602                4053602\n",
-      "451                                               Burn          125666000                5    0.000490               442013                 442013\n",
-      "452                                     Injury of face          125593007                5    0.000490               444191                 444191\n",
-      "453                                       Burn of skin          284196006                5    0.000490              4108467                4108467\n",
-      "454                                Laceration of thigh          283385000                5    0.000490              4152936                4152936\n",
-      "455                                   Finding of thigh          419003001                5    0.000490              4169466                4169466\n",
-      "456                             Genetic predisposition           47708004                5    0.000490              4166231                4166231\n",
-      "457                                    Injury of thigh            7523003                5    0.000490               442564                 442564\n",
-      "458                                      Heart failure           84114007                5    0.000490               316139                 316139\n",
-      "459                                 Laceration of head          428088000                5    0.000490              4179823                4179823\n",
-      "460                       Hypersensitivity disposition          609433001                5    0.000490             43530897               43530897\n",
-      "461                                     Lesion of face          767811005                5    0.000490             35624868               35624868\n",
-      "462                         Cutaneous hypersensitivity           21626009                5    0.000490              4070025                4070025\n",
-      "463                      Acquired coagulation disorder          234466008                5    0.000490              4120613                4120613\n",
-      "464    Inflammation of skin and/or subcutaneous tissue          363168001                5    0.000490              4181062                4181062\n",
-      "465                                 Open wound of head           38354005                5    0.000490              4243161                4243161\n",
-      "466                                  Disorder of thigh          128135006                5    0.000490               444211                 444211\n",
-      "467                              Skin or mucosa lesion          247440002                5    0.000490              4083787                4083787\n",
-      "468                        Laceration of head and neck          283358007                5    0.000490              4155030                4155030\n",
-      "469                     Propensity to adverse reaction          420134006                5    0.000490              4172024                4172024\n",
-      "470                                             Eczema           43116000                5    0.000490               133835                 133835\n",
-      "471                         Blood coagulation disorder           64779008                5    0.000490               432585                 432585\n",
-      "472                                        Skin lesion           95324001                5    0.000490              4316083                4316083\n",
-      "473                                 Finding of abdomen          609624008                4    0.000392             43531058               43531058\n",
-      "474                          Disorder of pelvic girdle          700011003                4    0.000392             44784106               44784106\n",
-      "475                     Disorder of glucose regulation          237597000                4    0.000392              4130161                4130161\n",
-      "476                                Disorder of abdomen          118948005                4    0.000392               444089                 444089\n",
-      "477                                    Disorder of hip          118935006                4    0.000392              4042501                4042501\n",
-      "478                                   Disorder of knee          128136007                4    0.000392              4134443                4134443\n",
-      "479                      Closed fracture of lower limb           52603002                4    0.000392              4199590                4199590\n",
-      "480         Finding of abdominopelvic segment of trunk          822987005                4    0.000392             37311678               37311678\n",
-      "481        Disorder of abdominopelvic segment of trunk          822988000                4    0.000392             37311677               37311677\n",
-      "482                         Impaired glucose tolerance            9414007                4    0.000392              4311629                4311629\n",
-      "483                          Arthropathy of knee joint          428724006                4    0.000392              4324765                4324765\n",
-      "484                            Injury of pelvic girdle          700009007                4    0.000392             44782619               44782619\n",
-      "485                              Disorder of pregnancy          173300003                4    0.000392               439658                 439658\n",
-      "486                                        Miscarriage           17369002                4    0.000392              4067106                4067106\n",
-      "487                     Miscarriage in first trimester           19169002                4    0.000392              4078393                4078393\n",
-      "488                Disorder of carbohydrate metabolism           20957000                4    0.000392               437515                 437515\n",
-      "489                             Closed fracture of hip          359817006                4    0.000392              4230399                4230399\n",
-      "490                       Disorder of endocrine system          362969004                4    0.000392                31821                  31821\n",
-      "491                    Pregnancy with abortive outcome          363681007                4    0.000392             40539858               40539858\n",
-      "492                              Finding of hip region          116313000                4    0.000392               444220                 444220\n",
-      "493                             Finding of knee region          116314006                4    0.000392              4022923                4022923\n",
-      "494                               Injury of hip region          125600009                4    0.000392               193666                 193666\n",
-      "495                            Abdominal organ finding          249561001                4    0.000392              4096864                4096864\n",
-      "496                                 Knee joint finding          299321000                4    0.000392              4100932                4100932\n",
-      "497                             Epidermal burn of skin          403190006                4    0.000392              4296204                4296204\n",
-      "498                                    Closed fracture          423125000                4    0.000392              4307254                4307254\n",
-      "499                     Fracture of bone of hip region          700097003                4    0.000392             45763653               45763653\n",
-      "500                   Child attention deficit disorder          192127007                3    0.000294               440086                 440086\n",
-      "501           Disorders of attention and motor control          229712006                3    0.000294              4047120                4047120\n",
-      "502                             Finding of foot region          116316008                3    0.000294              4022924                4022924\n",
-      "503                                   Disorder of foot          118932009                3    0.000294               444090                 444090\n",
-      "504                                     Injury of foot          125604000                3    0.000294               444130                 444130\n",
-      "505     Finding of functional performance and activity          248536006                3    0.000294              4089214                4089214\n",
-      "506           Attention deficit hyperactivity disorder          406506008                3    0.000294               438409                 438409\n",
-      "507                 Chronic disease of immune function          413834006                3    0.000294              4188970                4188970\n",
-      "508                                  Recurrent disease           58184002                3    0.000294               440059                 440059\n",
-      "509                                    Mental disorder           74732009                3    0.000294               432586                 432586\n",
-      "510                         Seasonal allergic rhinitis          367498001                3    0.000294              4280726                4280726\n",
-      "511  Hypertension AND/OR vomiting complicating preg...          106005003                3    0.000294              4024560                4024560\n",
-      "512                                 Functional finding          118228005                3    0.000294              4041284                4041284\n",
-      "513                                 Open wound of foot          125663008                3    0.000294              4054067                4054067\n",
-      "514                      Developmental mental disorder          129104009                3    0.000294              4043545                4043545\n",
-      "515           Developmental disorder of motor function          268674003                3    0.000294              4148091                4148091\n",
-      "516                           Laceration of upper limb          283366003                3    0.000294              4152932                4152932\n",
-      "517                                 Laceration of foot          284551006                3    0.000294              4109685                4109685\n",
-      "518                                      Pre-eclampsia          398254007                3    0.000294               439393                 439393\n",
-      "519                     Pregnancy-induced hypertension           48194001                3    0.000294              4167493                4167493\n",
-      "520                        Neurodevelopmental disorder          700364009                3    0.000294             45771096               45771096\n",
-      "521                           Open wound of upper limb           81405006                3    0.000294              4216185                4216185\n",
-      "522                 Disorder of pelvic region of trunk          609619005                2    0.000196             43531053               43531053\n",
-      "523                                 Disorder of pelvis          609620004                2    0.000196             43531054               43531054\n",
-      "524                  Finding of pelvic region of trunk          609625009                2    0.000196             43531059               43531059\n",
-      "525                               Rupture of intestine          235799001                2    0.000196              4340361                4340361\n",
-      "526                                           Cystitis           38822007                2    0.000196               195588                 195588\n",
-      "527                             Urinary system finding          106098005                2    0.000196              4024000                4024000\n",
-      "528                        Lower urinary tract finding          106100005                2    0.000196              4021780                4021780\n",
-      "529                             Finding of hand region          116311003                2    0.000196                77358                  77358\n",
-      "530                 Disorder of gastrointestinal tract          119292006                2    0.000196              4000610                4000610\n",
-      "531                        Disorder of large intestine          119523007                2    0.000196              4002905                4002905\n",
-      "532                                     Injury of hand          125599006                2    0.000196                80004                  80004\n",
-      "533                    Finding of urinary tract proper          249273002                2    0.000196              4091213                4091213\n",
-      "534                                    Bladder finding          249585009                2    0.000196              4092881                4092881\n",
-      "535                                     Tendon finding          250133000                2    0.000196              4095203                4095203\n",
-      "536                                 Laceration of hand          284549007                2    0.000196              4113008                4113008\n",
-      "537                          Disorder of urinary tract           41368006                2    0.000196               197331                 197331\n",
-      "538                    Inflammation of large intestine          302168000                2    0.000196              4201402                4201402\n",
-      "539                                Disorder of bladder           42643001                2    0.000196               201337                 201337\n",
-      "540                                 Disorder of tendon           68172002                2    0.000196               442264                 442264\n",
-      "541                                       Appendicitis           74400008                2    0.000196               440448                 440448\n",
-      "542                                           Epilepsy           84757009                2    0.000196               380378                 380378\n",
-      "543                              Disorder of intestine           85919009                2    0.000196               201618                 201618\n",
-      "544                              Drug-related disorder           87858002                2    0.000196               444363                 444363\n",
-      "545                         Injury of ligament of knee          438479005                2    0.000196              4231941                4231941\n",
-      "546            Complication occurring during pregnancy          609496007                2    0.000196             43530950               43530950\n",
-      "547                                  Finding of pelvis          609626005                2    0.000196             43531060               43531060\n",
-      "548                               Pelvic organ finding          700006000                2    0.000196             44784102               44784102\n",
-      "549                               Disorder of appendix           18526009                2    0.000196               433524                 433524\n",
-      "550                             Eclampsia in pregnancy          198992004                2    0.000196               137613                 137613\n",
-      "551      Inflammatory disorder of genitourinary system          373406006                2    0.000196              4159963                4159963\n",
-      "552                                 Urogenital finding          118238000                2    0.000196              4041285                4041285\n",
-      "553                         Finding of large intestine          118436003                2    0.000196              4038678                4038678\n",
-      "554                                   Disorder of hand          118933004                2    0.000196                77635                  77635\n",
-      "555                                     Injury of knee          125601008                2    0.000196               444132                 444132\n",
-      "556                                 Open wound of hand          125652005                2    0.000196              4129405                4129405\n",
-      "557                     Disorder of the urinary system          128606002                2    0.000196                75865                  75865\n",
-      "558                                          Eclampsia           15938005                2    0.000196               443700                 443700\n",
-      "559                                      Bowel finding          249562008                2    0.000196              4091532                4091532\n",
-      "560                                Finding of appendix          300307005                2    0.000196              4113552                4113552\n",
-      "561               Disorder of the genitourinary system           42030000                2    0.000196              4171379                4171379\n",
-      "562                                Rupture of appendix           47693006                2    0.000196              4166224                4166224\n",
-      "563                                      Drug overdose           55680006                2    0.000196              4208104                4208104\n",
-      "564                Disorder of the lower urinary tract            7793005                2    0.000196              4301471                4301471\n",
-      "565           Disorder of lower gastrointestinal tract           79787007                2    0.000196              4197094                4197094\n",
-      "566                Central nervous system complication           87536007                2    0.000196               373087                 373087\n",
-      "567                                              Edema          267038008                1    0.000098               433595                 433595\n",
-      "568                Injury of cruciate ligament of knee          444158007                1    0.000098             40485073               40485073\n",
-      "569               Injury of anterior cruciate ligament          444470001                1    0.000098             40479768               40479768\n",
-      "570                       Disorder of vertebral column          699699005                1    0.000098             44782549               44782549\n",
-      "571      Traumatic and/or non-traumatic injury of back          712893003                1    0.000098             37016775               37016775\n",
-      "572                      Injury of intrathoracic organ          733217006                1    0.000098             37116489               37116489\n",
-      "573                          Interstitial lung disease          233703007                1    0.000098              4119786                4119786\n",
-      "574   Injury of tendon of the rotator cuff of shoulder          307731004                1    0.000098              4146173                4146173\n",
-      "575                         Rupture of patellar tendon           30832001                1    0.000098              4149245                4149245\n",
-      "576                                   Disorder of back           33308003                1    0.000098               140190                 140190\n",
-      "577                                      Blighted ovum           35999006                1    0.000098              4262136                4262136\n",
-      "578             Paralysis due to lesion of spinal cord          372310001                1    0.000098              4157607                4157607\n",
-      "579                               Disorder of shoulder          118944007                1    0.000098                77630                  77630\n",
-      "580                           Vertebral column finding          119414006                1    0.000098              4002898                4002898\n",
-      "581                    Disorder characterized by edema          118654009                1    0.000098              4040388                4040388\n",
-      "582                              Open wound of forearm          125649002                1    0.000098              4053599                4053599\n",
-      "583  Traumatic brain injury with loss of consciousness          127298000                1    0.000098              4132082                4132082\n",
-      "584                       Structural disorder of heart          128599005                1    0.000098              4027255                4027255\n",
-      "585                                   Ligament rupture          263134008                1    0.000098              4138286                4138286\n",
-      "586                           Internal injury of chest           27817002                1    0.000098                74786                  74786\n",
-      "587                Finding of structures of conception          289262005                1    0.000098              4128846                4128846\n",
-      "588                                Finding of vertebra          298385001                1    0.000098              4185206                4185206\n",
-      "589                             Finding of spinal cord          299733008                1    0.000098              4103661                4103661\n",
-      "590                                     Edema of trunk          301867009                1    0.000098              4199409                4199409\n",
-      "591                    Abnormal products of conception           39804004                1    0.000098               436477                 436477\n",
-      "592                             Partial thickness burn          403191005                1    0.000098              4296205                4296205\n",
-      "593                          Disorder of spinal region          410730009                1    0.000098              4260918                4260918\n",
-      "594       Traumatic or non-traumatic rupture of tendon          415746003                1    0.000098              4215217                4215217\n",
-      "595                       Fracture of vertebral column           50448004                1    0.000098              4174520                4174520\n",
-      "596                                 Cartilage disorder           50927007                1    0.000098              4178431                4178431\n",
-      "597                       Rupture of quadriceps tendon            6849006                1    0.000098               195632                 195632\n",
-      "598              Disorder of tendon of shoulder region           76318008                1    0.000098                79116                  79116\n",
-      "599       Injury of medial collateral ligament of knee          444448004                1    0.000098             40479422               40479422\n",
-      "600              Injury of collateral ligament of knee          444159004                1    0.000098             40485074               40485074\n",
-      "601     Chronic paralysis due to lesion of spinal cord          698754002                1    0.000098             44782520               44782520\n",
-      "602                             Injury of rotator cuff          718539004                1    0.000098             36713625               36713625\n",
-      "603       Traumatic injury of vertebral region of back          737566006                1    0.000098             42537893               42537893\n",
-      "604  Fracture of vertebral column with spinal cord ...            1734006                1    0.000098              4066995                4066995\n",
-      "605                                    Pulmonary edema           19242006                1    0.000098              4078925                4078925\n",
-      "606                           Tear of meniscus of knee          239720000                1    0.000098              4035415                4035415\n",
-      "607                  Soft tissue lesion of knee region          239999004                1    0.000098              4344027                4344027\n",
-      "608            Connective tissue disorder by body site          363044007                1    0.000098              4180645                4180645\n",
-      "609                           Injury of internal organ          105612003                1    0.000098               193631                 193631\n",
-      "610                         Finding of shoulder region          116308004                1    0.000098              4022449                4022449\n",
-      "611                                  Cartilage finding          118954006                1    0.000098              4043349                4043349\n",
-      "612     Intracranial injury with loss of consciousness          127297005                1    0.000098               437385                 437385\n",
-      "613                  Disorder of product of conception          128604004                1    0.000098              4029496                4029496\n",
-      "614                               Complete miscarriage          156073000                1    0.000098             40318618               40318618\n",
-      "615                                      Spinal injury          262521009                1    0.000098              4095850                4095850\n",
-      "616                  Rupture of ligament of knee joint          263139003                1    0.000098              4134312                4134312\n",
-      "617              Disorder of soft tissue of lower limb          280136002                1    0.000098              4093228                4093228\n",
-      "618                    Rupture of tendon of lower limb          281549008                1    0.000098              4084434                4084434\n",
-      "619                              Cardiovascular injury          282728007                1    0.000098              4152156                4152156\n",
-      "620                              Laceration of forearm          283371005                1    0.000098              4155034                4155034\n",
-      "621                                 Paralytic syndrome           29426003                1    0.000098               374377                 374377\n",
-      "622                           Finding of spinal region          298379008                1    0.000098              4182165                4182165\n",
-      "623                           Disorder of rotator cuff          414033006                1    0.000098              4212887                4212887\n",
-      "624                                    Finding of back          414252009                1    0.000098              4213101                4213101\n",
-      "625                                Spinal cord disease           48522003                1    0.000098               135526                 135526\n",
-      "626                Acute respiratory distress syndrome           67782005                1    0.000098              4195694                4195694\n",
-      "627                                    Injury of heart           86175003                1    0.000098              4311280                4311280\n",
-      "628                                 Spinal cord injury           90584004                1    0.000098              4235863                4235863\n",
-      "629              Concussion with loss of consciousness           62564004                1    0.000098               375671                 375671\n",
-      "the time taken to get cohort concept stats for condition_occurrence is 1.1196579933166504s\n"
-     ]
-    }
-   ],
-   "source": [
-    "# get cohort concept prevalance\n",
-    "t1 = time.time()\n",
-    "cohort_concepts = cohort_data.get_concept_stats()\n",
-    "print(pd.DataFrame(cohort_concepts[\"condition_occurrence\"]))\n",
-    "print(f'the time taken to get cohort concept stats for condition_occurrence is {time.time() - t1}s')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "id": "128c7b02-faef-4a35-97a6-c92baa5a37dd",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Connection to BiasDatabase closed.\n",
-      "Connection to the OMOP CDM database closed.\n"
-     ]
-    }
-   ],
-   "source": [
-    "bias.cleanup()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7e2bf375-b4fb-4c50-aab9-fff4c1a02a95",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}

From 9947ddcb67ff56b7f1d7c152c73c2e7191d6b8a5 Mon Sep 17 00:00:00 2001
From: hyi <hongyi@renci.org>
Date: Fri, 20 Jun 2025 15:46:22 -0400
Subject: [PATCH 08/10] converted all jupyter notebooks into tutorials

---
 biasanalyzer/api.py                           |  20 +-
 biasanalyzer/database.py                      |   1 +
 .../BiasAnalyzerAsyncCohortsTutorial.ipynb    |   6 +-
 .../BiasAnalyzerCohortConceptTutorial.ipynb   |  10 +-
 notebooks/BiasAnalyzerCohortsTutorial.ipynb   |   4 +-
 .../BiasAnalyzerConceptBrowsingTutorial.ipynb | 519 ++++++++++++++
 .../BiasAnalyzerTestingConceptBrowsing.ipynb  | 660 ------------------
 7 files changed, 539 insertions(+), 681 deletions(-)
 create mode 100644 notebooks/BiasAnalyzerConceptBrowsingTutorial.ipynb
 delete mode 100644 notebooks/BiasAnalyzerTestingConceptBrowsing.ipynb

diff --git a/biasanalyzer/api.py b/biasanalyzer/api.py
index 50fca55..d09163f 100644
--- a/biasanalyzer/api.py
+++ b/biasanalyzer/api.py
@@ -72,7 +72,6 @@ def _set_cohort_action(self):
         return self.cohort_action
 
     def get_domains_and_vocabularies(self):
-        print(f'self.omop_cdm_db: {self.omop_cdm_db}')
         if self.omop_cdm_db is None:
             notify_users('A valid OMOP CDM must be set before getting domains. '
                          'Call set_root_omop first to set a valid root OMOP CDM')
@@ -96,19 +95,18 @@ def get_concept_hierarchy(self, concept_id):
             return None
         return self.omop_cdm_db.get_concept_hierarchy(concept_id)
 
-    def display_concept_tree(self, concept_tree: dict, level: int = 0, show_in_text_format=True, tree_type=None):
+    def display_concept_tree(self, concept_tree: dict, level: int = 0, show_in_text_format=True):
         """
         Recursively prints the concept hierarchy tree in an indented format for display.
         """
         details = concept_tree.get("details", {})
-        if tree_type is None or tree_type not in ['parents', 'children']:
-            if 'parents' in concept_tree:
-                tree_type = 'parents'
-            elif 'children' in concept_tree:
-                tree_type = 'children'
-            else:
-                notify_users('The input concept tree must contain parents or children key as the type of the tree.')
-                return ''
+        if 'parents' in concept_tree:
+            tree_type = 'parents'
+        elif 'children' in concept_tree:
+            tree_type = 'children'
+        else:
+            notify_users('The input concept tree must contain parents or children key as the type of the tree.')
+            return ''
 
         if show_in_text_format:
             if details:
@@ -119,7 +117,7 @@ def display_concept_tree(self, concept_tree: dict, level: int = 0, show_in_text_
 
             for child in concept_tree.get(tree_type, []):
                 if child:
-                    self.display_concept_tree(child, level + 1, tree_type=tree_type, show_in_text_format=True)
+                    self.display_concept_tree(child, level + 1, show_in_text_format=True)
             # return empty string to print None being printed at the end of printout
             return ""
         else:
diff --git a/biasanalyzer/database.py b/biasanalyzer/database.py
index 0c9d21f..03efa6e 100644
--- a/biasanalyzer/database.py
+++ b/biasanalyzer/database.py
@@ -504,6 +504,7 @@ def get_concept_hierarchy(self, concept_id: int):
                 ancestor_id, {"details": concept_details[ancestor_id], "parents": []})
             desc_entry_rev["parents"].append(ancestor_entry_rev)
         progress.update(1)
+        progress.close()
 
         # Return the parent hierarchy and children hierarchy of the specified concept
         return reverse_hierarchy[concept_id], hierarchy[concept_id]
diff --git a/notebooks/BiasAnalyzerAsyncCohortsTutorial.ipynb b/notebooks/BiasAnalyzerAsyncCohortsTutorial.ipynb
index 7e25e9b..9ee1bca 100644
--- a/notebooks/BiasAnalyzerAsyncCohortsTutorial.ipynb
+++ b/notebooks/BiasAnalyzerAsyncCohortsTutorial.ipynb
@@ -98,7 +98,7 @@
     "---\n",
     "\n",
     "### Asynchronous cohort creation\n",
-    "**Baseline cohort creation**: To create a baseline cohort of young female patients asynchronously, use the `run_in_background()` function on the `bias` object to run `create_cohort(cohort_name, cohort_description, query_or_yaml_file, created_by)` function in a background thread. You'll pass the target function as the first argument, the cohort creation target function input arguments as the next four arguments, a `BackgroundResult` object via the `result_holder` optional parameter to store the created baseline cohort result, and a `delay` value (e.g., 120 seconds) to simulate asynchronous execution of long-running process for testing purposes. The created baseline cohort will be identical to the one created in the [Cohort Exploration Tutorial](./BiasAnalyzerCohortsTutorial.ipynb), except that the cohort creation now runs asychronously in a background thread."
+    "**Baseline cohort creation**: To create a baseline cohort of young female patients asynchronously, use the `run_in_background()` function to run `create_cohort(cohort_name, cohort_description, query_or_yaml_file, created_by)` method in a background thread. You'll pass the target function as the first argument, the cohort creation target function input arguments as the next four arguments, a `BackgroundResult` object via the `result_holder` optional parameter to store the created baseline cohort result, and a `delay` value (e.g., 120 seconds) to simulate asynchronous execution of long-running process for testing purposes. The created baseline cohort will be identical to the one created in the [Cohort Exploration Tutorial](./BiasAnalyzerCohortsTutorial.ipynb), except that the cohort creation now runs asychronously in a background thread."
    ]
   },
   {
@@ -147,7 +147,7 @@
     "# Create baseline cohort result holder\n",
     "baseline_result = BackgroundResult()\n",
     "\n",
-    "# Start background task to run create_cohort() function for a baseline cohort in a background thread\n",
+    "# Start background task to run create_cohort() method for a baseline cohort in a background thread\n",
     "baseline_thread = run_in_background(\n",
     "    bias.create_cohort,\n",
     "    \"Young female patients\",\n",
@@ -168,7 +168,7 @@
    "source": [
     "———————————————\n",
     "\n",
-    "**Study cohort creation**: To create a study cohort of young female COVID patients asynchronously, use the `run_in_background()` function on the `bias` object to run `create_cohort(cohort_name, cohort_description, query_or_yaml_file, created_by)` function in a background thread. You'll pass the target function as the first argument, the cohort creation target function input arguments as the next four arguments, a `BackgroundResult` object via the `result_holder` optional parameter to store the created baseline cohort result, and a `delay` value (e.g., 120 seconds) to simulate asynchronous execution of long-running process for testing purposes. The created study cohort will be identical to the one created in the [Cohort Exploration Tutorial](./BiasAnalyzerCohortsTutorial.ipynb), except that the cohort creation now runs asychronously in a background thread."
+    "**Study cohort creation**: To create a study cohort of young female COVID patients asynchronously, use the `run_in_background()` function to run `create_cohort(cohort_name, cohort_description, query_or_yaml_file, created_by)` function in a background thread. You'll pass the target function as the first argument, the cohort creation target function input arguments as the next four arguments, a `BackgroundResult` object via the `result_holder` optional parameter to store the created baseline cohort result, and a `delay` value (e.g., 120 seconds) to simulate asynchronous execution of long-running process for testing purposes. The created study cohort will be identical to the one created in the [Cohort Exploration Tutorial](./BiasAnalyzerCohortsTutorial.ipynb), except that the cohort creation now runs asychronously in a background thread."
    ]
   },
   {
diff --git a/notebooks/BiasAnalyzerCohortConceptTutorial.ipynb b/notebooks/BiasAnalyzerCohortConceptTutorial.ipynb
index a35de32..8968c21 100644
--- a/notebooks/BiasAnalyzerCohortConceptTutorial.ipynb
+++ b/notebooks/BiasAnalyzerCohortConceptTutorial.ipynb
@@ -77,7 +77,7 @@
    "source": [
     "———————————————\n",
     "\n",
-    "**Preparation step 2**: Create a cohort of young female COVID patients using the `create_cohort(cohort_name, cohort_description, query_or_yaml_file, created_by)` function on the `bias` object for cohort concept prevalence exploration. You'll pass the name of the cohort as the first argument, the description of the cohort as the second argument, a yaml file that specifies cohort inclusion and exclusion criteria or a cohort selection SQL query as the third argument, and the cohort owner's name indicating who owns or creates this cohort as the fourth argument. After the cohort is created, you can call `get_stats()` and `get_distributions()` functions on the returned `cohort_data` object to explore cohort statistics and distributions."
+    "**Preparation step 2**: Create a cohort of young female COVID patients using the `create_cohort(cohort_name, cohort_description, query_or_yaml_file, created_by)` method on the `bias` object for cohort concept prevalence exploration. You'll pass the name of the cohort as the first argument, the description of the cohort as the second argument, a yaml file that specifies cohort inclusion and exclusion criteria or a cohort selection SQL query as the third argument, and the cohort owner's name indicating who owns or creates this cohort as the fourth argument. After the cohort is created, you can call `get_stats()` and `get_distributions()` methods on the returned `cohort_data` object to explore cohort statistics and distributions."
    ]
   },
   {
@@ -157,15 +157,15 @@
     "---\n",
     "\n",
     "### Exploring cohort concept prevalence\n",
-    "You can retrieve concept prevalence statistics for a cohort using the `get_concept_stats(concept_type='condition_occurrence', filter_count=0, vocab=None, include_hierarchy=False)` function on the `cohort_data` object. Each input argument to this function has a default value, so you can call the function without specifying all parameters. \n",
+    "You can retrieve concept prevalence statistics for a cohort using the `get_concept_stats(concept_type='condition_occurrence', filter_count=0, vocab=None, include_hierarchy=False)` method on the `cohort_data` object. Each input argument to this method has a default value, so you can call the method without specifying all parameters.\n",
     "- The `concept_type` input argument specifies the OMOP domain to analyze. It must be one of the OMOP domain names: `condition_occurrence`, `drug_exposure`, `procedure_occurrence`, `visit_occurrence`, `measurement`, or `observation`.\n",
     "- The `vocab` input argument specifies the OMOP vocabulary ID to filter concepts by. If set to `None`, a default vocabulary is used based on the domain: `RxNorm` for `drug_exposure`, `LOINC` for `measurement`, and `SNOMED` for all other domains.\n",
     "- The `filter_count` input argument filters out concepts with fewer than this number of patients in the cohort. Set it to `0` to include all without filtering.\n",
     "- The `include_hierarchy` input argument specifies whether to include concept hierarchical relationship. If set to `True`, ancestor concepts using the OMOP concept hierarchy are included when calculating prevalence.\n",
-    "This function helps identify the most prevalent clinical concepts in your cohort, which can reveal patterns or potential sources of selection bias in the cohort data.\n",
+    "This method helps identify the most prevalent clinical concepts in your cohort, which can reveal patterns or potential sources of selection bias in the cohort data.\n",
     "\n",
     "**Cohort condition occurrence concept prevalence**: \n",
-    "The code block below demonstrates how to use the default parameters of the `get_concept_stats()` function to retrieve concept prevalence for the `condition occurrence` domain. By default, it uses the `SNOMED` vocabulary, excludes hierarchical relationships, and applies no filtering. The function returns a dictionary where the **key** is the `concept_type` (e.g., `condition_occurrence`) and the **value** is a list of concept dictionaries. Each concept dictionary in the list contains `concept_name`, `concept_code`, `count_in_cohort`, `prevalence`, `ancestor_concept_id`, and `descendant_concept_id`. These values allow you to explore which clinical concepts are most prevalent in your cohort and suppoert deeper investigations into potential sources of selection bias.\n",
+    "The code block below demonstrates how to use the default parameters of the `get_concept_stats()` method to retrieve concept prevalence for the `condition occurrence` domain. By default, it uses the `SNOMED` vocabulary, excludes hierarchical relationships, and applies no filtering. The method returns a dictionary where the **key** is the `concept_type` (e.g., `condition_occurrence`) and the **value** is a list of concept dictionaries. Each concept dictionary in the list contains `concept_name`, `concept_code`, `count_in_cohort`, `prevalence`, `ancestor_concept_id`, and `descendant_concept_id`. These values allow you to explore which clinical concepts are most prevalent in your cohort and suppoert deeper investigations into potential sources of selection bias.\n",
     "\n",
     "**Note** that this prevalence computation may take some time, especially for large cohorts. A progress bar will appear to indicate the progress of the prevalence calculation."
    ]
@@ -854,7 +854,7 @@
     "———————————————\n",
     "\n",
     "**Cohort drug exposure concept prevalence**: \n",
-    "The code block below demonstrates how to use `get_concept_stats(concept_type='drug_exposure', filter_count=500, include_hierarchy=True)` function to retrieve concept prevalence for the `drug_exposure` domain. By default, this uses the `RxNorm` vocabulary. Concepts with fewer than 500 patients are excluded, and hierarchical relationships are included in the results. The function returns a dictionary where the **key** is the `concept_type` (in this case, `drug_exposure`) and the **value** is a list of concept dictionaries. Each concept dictionary in the list contains the following fields: `concept_name`, `concept_code`, `count_in_cohort`, `prevalence`, `ancestor_concept_id`, and `descendant_concept_id`. These values allow you to explore which clinical concepts are most prevalent in your cohort and suppoert deeper investigations into potential sources of selection bias.\n",
+    "The code block below demonstrates how to use `get_concept_stats(concept_type='drug_exposure', filter_count=500, include_hierarchy=True)` method to retrieve concept prevalence for the `drug_exposure` domain. By default, this uses the `RxNorm` vocabulary. Concepts with fewer than 500 patients are excluded, and hierarchical relationships are included in the results. The method returns a dictionary where the **key** is the `concept_type` (in this case, `drug_exposure`) and the **value** is a list of concept dictionaries. Each concept dictionary in the list contains the following fields: `concept_name`, `concept_code`, `count_in_cohort`, `prevalence`, `ancestor_concept_id`, and `descendant_concept_id`. These values allow you to explore which clinical concepts are most prevalent in your cohort and suppoert deeper investigations into potential sources of selection bias.\n",
     "\n",
     "**Note**: Prevalence computation may take some time, especially for large cohorts or when hierarchical relationships are included. A progress bar will appear to indicate the progress of the computation. \n",
     "\n",
diff --git a/notebooks/BiasAnalyzerCohortsTutorial.ipynb b/notebooks/BiasAnalyzerCohortsTutorial.ipynb
index 4ac4bfc..028745e 100644
--- a/notebooks/BiasAnalyzerCohortsTutorial.ipynb
+++ b/notebooks/BiasAnalyzerCohortsTutorial.ipynb
@@ -146,7 +146,7 @@
     "---\n",
     "\n",
     "### Baseline cohort creation and exploration\n",
-    "**Baseline cohort creation**: To create a baseline cohort of young female patients, use the `create_cohort(cohort_name, cohort_description, query_or_yaml_file, created_by)` function on the `bias` object. You'll pass the name of the cohort as the first argument, the description of the cohort as the second argument, a yaml file that specifies cohort inclusion and exclusion criteria or a cohort selection SQL query as the third argument, and the cohort owner's name indicating who owns or creates this cohort as the fourth argument. The function will show a progress bar to indicate cohort creation progress over three stages."
+    "**Baseline cohort creation**: To create a baseline cohort of young female patients, use the `create_cohort(cohort_name, cohort_description, query_or_yaml_file, created_by)` method on the `bias` object. You'll pass the name of the cohort as the first argument, the description of the cohort as the second argument, a yaml file that specifies cohort inclusion and exclusion criteria or a cohort selection SQL query as the third argument, and the cohort owner's name indicating who owns or creates this cohort as the fourth argument. The method will show a progress bar to indicate cohort creation progress over three stages."
    ]
   },
   {
@@ -304,7 +304,7 @@
     "---\n",
     "\n",
     "### Study cohort creation and exploration\n",
-    "**Study cohort creation**: To create a study cohort of young female COVID patients, use the `create_cohort(cohort_name, cohort_description, query_or_yaml_file, created_by)` function on the `bias` object. You'll pass the name of the cohort as the first argument, the description of the cohort as the second argument, a yaml file that specifies cohort inclusion and exclusion criteria or a cohort selection SQL query as the third argument, and the cohort owner's name indicating who owns or creates this cohort as the fourth argument. The function will show a progress bar to indicate cohort creation progress over three stages.\n"
+    "**Study cohort creation**: To create a study cohort of young female COVID patients, use the `create_cohort(cohort_name, cohort_description, query_or_yaml_file, created_by)` method on the `bias` object. You'll pass the name of the cohort as the first argument, the description of the cohort as the second argument, a yaml file that specifies cohort inclusion and exclusion criteria or a cohort selection SQL query as the third argument, and the cohort owner's name indicating who owns or creates this cohort as the fourth argument. The method will show a progress bar to indicate cohort creation progress over three stages.\n"
    ]
   },
   {
diff --git a/notebooks/BiasAnalyzerConceptBrowsingTutorial.ipynb b/notebooks/BiasAnalyzerConceptBrowsingTutorial.ipynb
new file mode 100644
index 0000000..753555c
--- /dev/null
+++ b/notebooks/BiasAnalyzerConceptBrowsingTutorial.ipynb
@@ -0,0 +1,519 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "d719bac9-2b20-4792-8a1f-3272f3e42a8b",
+   "metadata": {},
+   "source": [
+    "# Using BiasAnalyzer for Cohort Concept Prevalence Exploration\n",
+    "\n",
+    "This tutorial demonstrates how to use the `BiasAnalyzer` package to browse and explore OMOP concepts. In the OMOP (Observational Medical Outcomes Partnership) CDM (Common Data Model), a **concept** refers to a coded term from a standardized medical vocabulary, uniquely identified by a **concept ID**. All clinical events in OMOP, such as conditions, drug exposures, procedures, measurements, and events, are represented as concepts.\n",
+    "\n",
+    "---\n",
+    "\n",
+    "### Overview\n",
+    "\n",
+    "**Objective**:  \n",
+    "Learn how to browse and explore OMOP concepts using `BiasAnalyzer`.\n",
+    "\n",
+    "**Before You Begin**:  \n",
+    "The `BiasAnalyzer` package is currently in active development and has not yet been officially released on PyPI.\n",
+    "You can install it in one of the two ways:\n",
+    "\n",
+    "- **Install from GitHub (recommended during development)**:\n",
+    "```bash\n",
+    "pip install git+https://github.com/vaclab/BiasAnalyzer.git\n",
+    "```\n",
+    "- **Install from PyPI (once the pacakge is officially released)**:\n",
+    "```bash\n",
+    "pip install biasanalyzer\n",
+    "```\n",
+    "\n",
+    "For full setup and usage instructions, refer to the [README](https://github.com/VACLab/BiasAnalyzer/blob/main/README.md).\n",
+    "\n",
+    "---\n",
+    "\n",
+    "\n",
+    "### Preparation for OMOP concept exploration\n",
+    "Import the `BIAS` class from the `api` module of the `BiasAnalyzer` package, create an object `bias` of the `BIAS` class, specify OMOP CDM database configurations on the `bias` object, and set OMOP CDM database to enable connection to the database. Refer to the [Cohort Exploration Tutorial](./BiasAnalyzerCohortsTutorial.ipynb) for more details."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "6dc76f46",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "configuration specified in ../config.yaml loaded successfully\n",
+      "Connected to the OMOP CDM database (read-only).\n",
+      "Cohort Definition table created.\n",
+      "Cohort table created.\n"
+     ]
+    }
+   ],
+   "source": [
+    "from biasanalyzer.api import BIAS\n",
+    "\n",
+    "bias = BIAS()\n",
+    "\n",
+    "bias.set_config('../config.yaml')\n",
+    "\n",
+    "bias.set_root_omop()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8731e481",
+   "metadata": {},
+   "source": [
+    "**Now that you have connected to your OMOP CDM database, you are ready to browse and explore OMOP concepts.** \n",
+    "\n",
+    "---\n",
+    "\n",
+    "### Explore OMOP domains and vocabularies\n",
+    "Since each OMOP concept is linked to a domain and vocabulary, it is helpful to first understand which domains and vocabularies are available before exploring concepts. You can retrieve available OMOP domains and their associated vocabularies using the `get_domains_and_vocabularies()` method on the `bias` object. This function returns a list of dictionaries, where each dictionary contains a `domain_id` and a `vocabulary_id`. The list is sorted alphabetically by `domain_id` and then by `vocabulary_id`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "51969248-f348-4f0d-914f-bb908183e3f1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "               domain_id         vocabulary_id\n",
+      "0              Condition                 HCPCS\n",
+      "1              Condition                 ICD10\n",
+      "2              Condition               ICD10CM\n",
+      "3              Condition                ICD9CM\n",
+      "4              Condition                 ICDO3\n",
+      "5              Condition        OMOP Extension\n",
+      "6              Condition                SNOMED\n",
+      "7       Condition/Device               ICD10CM\n",
+      "8         Condition/Meas               ICD10CM\n",
+      "9       Condition Status      Condition Status\n",
+      "10                  Cost                  Cost\n",
+      "11              Currency              Currency\n",
+      "12                Device                 HCPCS\n",
+      "13                Device              ICD10PCS\n",
+      "14                Device                   NDC\n",
+      "15                Device                SNOMED\n",
+      "16                Device                   SPL\n",
+      "17                  Drug                   ATC\n",
+      "18                  Drug                 HCPCS\n",
+      "19                  Drug              ICD10PCS\n",
+      "20                  Drug                   NDC\n",
+      "21                  Drug                RxNorm\n",
+      "22                  Drug      RxNorm Extension\n",
+      "23                  Drug                SNOMED\n",
+      "24                  Drug                   SPL\n",
+      "25               Episode               Episode\n",
+      "26             Ethnicity             Ethnicity\n",
+      "27                Gender                Gender\n",
+      "28                Gender                SNOMED\n",
+      "29             Geography                   OSM\n",
+      "30             Geography             US Census\n",
+      "31           Measurement                 HCPCS\n",
+      "32           Measurement                 ICD10\n",
+      "33           Measurement               ICD10CM\n",
+      "34           Measurement                ICD9CM\n",
+      "35           Measurement                 LOINC\n",
+      "36           Measurement        OMOP Extension\n",
+      "37           Measurement                SNOMED\n",
+      "38            Meas Value                 LOINC\n",
+      "39            Meas Value                SNOMED\n",
+      "40   Meas Value Operator                SNOMED\n",
+      "41              Metadata                   CDM\n",
+      "42              Metadata         Concept Class\n",
+      "43              Metadata                Domain\n",
+      "44              Metadata              Metadata\n",
+      "45              Metadata                  None\n",
+      "46              Metadata          Relationship\n",
+      "47              Metadata                SNOMED\n",
+      "48              Metadata            Vocabulary\n",
+      "49           Observation                 HCPCS\n",
+      "50           Observation                 ICD10\n",
+      "51           Observation               ICD10CM\n",
+      "52           Observation                ICD9CM\n",
+      "53           Observation                 ICDO3\n",
+      "54           Observation                 LOINC\n",
+      "55           Observation                  NUCC\n",
+      "56           Observation        OMOP Extension\n",
+      "57           Observation                SNOMED\n",
+      "58           Observation                   SPL\n",
+      "59           Observation   UB04 Pri Typ of Adm\n",
+      "60           Observation         UB04 Typ bill\n",
+      "61                 Payer                 PHDSC\n",
+      "62                  Plan                  Plan\n",
+      "63      Plan Stop Reason      Plan Stop Reason\n",
+      "64             Procedure                 HCPCS\n",
+      "65             Procedure                 ICD10\n",
+      "66             Procedure               ICD10CM\n",
+      "67             Procedure              ICD10PCS\n",
+      "68             Procedure                ICD9CM\n",
+      "69             Procedure              ICD9Proc\n",
+      "70             Procedure                SNOMED\n",
+      "71              Provider                  ABMS\n",
+      "72              Provider    Medicare Specialty\n",
+      "73              Provider                  NUCC\n",
+      "74              Provider                SNOMED\n",
+      "75                  Race                  Race\n",
+      "76                  Race                SNOMED\n",
+      "77          Relationship                SNOMED\n",
+      "78          Revenue Code   Korean Revenue Code\n",
+      "79          Revenue Code          Revenue Code\n",
+      "80                 Route                SNOMED\n",
+      "81    Spec Anatomic Site                SNOMED\n",
+      "82   Spec Disease Status                SNOMED\n",
+      "83              Specimen                SNOMED\n",
+      "84               Sponsor               Sponsor\n",
+      "85          Type Concept        Condition Type\n",
+      "86          Type Concept             Cost Type\n",
+      "87          Type Concept            Death Type\n",
+      "88          Type Concept           Device Type\n",
+      "89          Type Concept             Drug Type\n",
+      "90          Type Concept             Meas Type\n",
+      "91          Type Concept             Note Type\n",
+      "92          Type Concept      Observation Type\n",
+      "93          Type Concept       Obs Period Type\n",
+      "94          Type Concept        Procedure Type\n",
+      "95          Type Concept                SNOMED\n",
+      "96          Type Concept          Type Concept\n",
+      "97          Type Concept            Visit Type\n",
+      "98                  Unit                SNOMED\n",
+      "99                  Unit                  UCUM\n",
+      "100                Visit  CMS Place of Service\n",
+      "101                Visit    Medicare Specialty\n",
+      "102                Visit                  NUCC\n",
+      "103                Visit                SNOMED\n",
+      "104                Visit  UB04 Point of Origin\n",
+      "105                Visit    UB04 Pt dis status\n",
+      "106                Visit         UB04 Typ bill\n",
+      "107                Visit                 Visit\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "pd.set_option('display.max_rows', None)\n",
+    "\n",
+    "domains_and_vocabs = bias.get_domains_and_vocabularies()\n",
+    "print(pd.DataFrame(domains_and_vocabs))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "22edda35",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "\n",
+    "### Exploring OMOP concepts\n",
+    "\n",
+    "You can explore OMOP concepts using the `get_concepts(search_term, domain=None, vocabulary=None)` method on the `bias` object. To narrow down your search, you should provide a search term along with a domain, a vocabulary, or both. Since the OMOP vocabulary contains a vast number of concepts, filtering by domain and/or vocabulary helps constrain the search space and keeps the number of results manageable. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "b1415805-b065-40b8-b2cd-6db6f5f9ca41",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "   concept_id                                       concept_name  \\\n",
+      "0      703440  COVID-19 confirmed using clinical diagnostic c...   \n",
+      "1      703441              COVID-19 confirmed by laboratory test   \n",
+      "2      703445  Low risk category for developing complication ...   \n",
+      "3      703446  Moderate risk category for developing complica...   \n",
+      "4      703447  High risk category for developing complication...   \n",
+      "5    37310269                                           COVID-19   \n",
+      "6    37311061                                           COVID-19   \n",
+      "\n",
+      "  valid_start_date valid_end_date  domain_id vocabulary_id  \n",
+      "0       2020-04-01     2099-12-31  Condition        SNOMED  \n",
+      "1       2020-04-01     2099-12-31  Condition        SNOMED  \n",
+      "2       2020-04-01     2099-12-31  Condition        SNOMED  \n",
+      "3       2020-04-01     2099-12-31  Condition        SNOMED  \n",
+      "4       2020-04-01     2099-12-31  Condition        SNOMED  \n",
+      "5       2020-02-04     2020-10-28  Condition        SNOMED  \n",
+      "6       2020-01-31     2099-12-31  Condition        SNOMED  \n"
+     ]
+    }
+   ],
+   "source": [
+    "concepts = bias.get_concepts(\"COVID-19\", \"Condition\", \"SNOMED\")\n",
+    "print(pd.DataFrame(concepts))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "10305fac-8ae3-49ca-8542-47d0a0636f97",
+   "metadata": {},
+   "source": [
+    "———————————————\n",
+    "\n",
+    "### Exploring concept hierarchy\n",
+    "\n",
+    "**Retrieve concept hierarchy**:You can retrieve the concept hierarchy for a specific OMOP concept using the `get_concept_hierarchy(concept_id)` method on the `bias` object. The method returns two dictionaries: the **ancestor hierarchy** representing the concept's lineage upward, and the descendant hierarchy representing the concept's children and their branches. Each dictionary has a nested structure with two main keys: \n",
+    "- `details`: a dictionary containing metadata about the current concept node, including `concept_id`, `concept_name`, `vocabulary_id`, and `concept_code`\n",
+    "-  `parents` (for the ancestor hierarchy) or `children` (for the descendant hierarchy): a list of parent of child concept nodes, respectively\n",
+    "\n",
+    "A progress bar is displayed during execution to indicate the progress of computing the concept's hierarchical relationships."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "d539b8df-2bf4-42ec-abc5-36fa0238cea1",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "2452988eafb64ccd8caf3eec8004c453",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Concept Hierarchy:   0%|          | 0/3 [00:00<?, ?stage/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# get parent and children concept hierarchical tree for COVID-19 (SNOMED id: 37311061)\n",
+    "parent_concept_tree, children_concept_tree = bias.get_concept_hierarchy(37311061)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "82aaaa5c-6ab8-4fce-800e-c30735b1c75e",
+   "metadata": {},
+   "source": [
+    "**Visualize concept hierarchy**: You can visualize a concept hierarchy using the `display_concept_tree(concept_tree, level=0, show_in_text_format=True)` method on the `bias` object. This method supports two display modes:\n",
+    "- Text-based visualization (`show_in_text_format=True`): Displays an indented tree with upward and downward arrows to indicate parent-child relationships. This is the default and more robust display mode.\n",
+    "- Interactive widget visualization (`show_in_text_format=False`): Uses a `ipytree`-based widget to render the concept hierarchy as an expandable/collapsible tree, ideal for interactive exploration in supported Jupyter environments.\n",
+    "  - **Note**: The `ipytree`-based interactive widget may display frontend warnings or partial rendering issues in **JupyterLab 4.x** or above due to compatibility limitations of the `ipytree` widget. Despite these warnings, the tree should remain functional. This feature is optional and recommended to be used in environments where full `ipytree` support is available.\n",
+    "\n",
+    "For maximum compatibility, the text-based display mode is used by default."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "f37d30ba-aa23-4474-9d09-0db8ffb15c53",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "parent concept hierarchy for COVID-19 in text format:\n",
+      "🔼 COVID-19 (ID: 37311061, Code: 840539006)\n",
+      "  🔼 Clinical finding (ID: 441840, Code: 404684003)\n",
+      "  🔼 Viral disease (ID: 440029, Code: 34014006)\n",
+      "  🔼 Disease (ID: 4274025, Code: 64572001)\n",
+      "  🔼 Coronavirus infection (ID: 439676, Code: 186747009)\n",
+      "  🔼 Disease due to Coronaviridae (ID: 4100065, Code: 27619001)\n",
+      "  🔼 Disorder due to infection (ID: 432250, Code: 40733004)\n",
+      "\n",
+      "children concept hierarchy for COVID-19 in text format:\n",
+      "🔽 COVID-19 (ID: 37311061, Code: 840539006)\n",
+      "  🔽 Lymphocytopenia due to Severe acute respiratory syndrome coronavirus 2 (ID: 3661631, Code: 866151004)\n",
+      "  🔽 Otitis media due to disease caused by Severe acute respiratory syndrome coronavirus 2 (ID: 37310254, Code: 1240521000000100)\n",
+      "  🔽 Respiratory infection caused by COVID-19 (ID: 756039, Code: OMOP4873907)\n",
+      "    🔽 Acute bronchitis caused by SARS-CoV-2 (ID: 3661405, Code: 138389411000119105)\n",
+      "    🔽 Pneumonia caused by SARS-CoV-2 (ID: 3661408, Code: 882784691000119100)\n",
+      "    🔽 Lower respiratory infection caused by SARS-CoV-2 (ID: 3663281, Code: 880529761000119102)\n",
+      "      🔽 Pneumonia caused by SARS-CoV-2 (ID: 3661408, Code: 882784691000119100)\n",
+      "      🔽 Acute bronchitis caused by SARS-CoV-2 (ID: 3661405, Code: 138389411000119105)\n",
+      "      🔽 Bronchitis caused by COVID-19 (ID: 756031, Code: OMOP4873909)\n",
+      "        🔽 Acute bronchitis caused by SARS-CoV-2 (ID: 3661405, Code: 138389411000119105)\n",
+      "    🔽 Bronchitis caused by COVID-19 (ID: 756031, Code: OMOP4873909)\n",
+      "      🔽 Acute bronchitis caused by SARS-CoV-2 (ID: 3661405, Code: 138389411000119105)\n",
+      "    🔽 Infection of upper respiratory tract caused by Severe acute respiratory syndrome coronavirus 2 (ID: 37310286, Code: 1240541000000107)\n",
+      "  🔽 Encephalopathy due to disease caused by Severe acute respiratory syndrome coronavirus 2 (ID: 37310284, Code: 1240561000000108)\n",
+      "  🔽 Cardiomyopathy due to disease caused by Severe acute respiratory syndrome coronavirus 2 (ID: 3656667, Code: 119731000146105)\n",
+      "  🔽 Acute bronchitis caused by SARS-CoV-2 (ID: 3661405, Code: 138389411000119105)\n",
+      "  🔽 Acute kidney injury due to disease caused by Severe acute respiratory syndrome coronavirus 2 (ID: 3661748, Code: 870589006)\n",
+      "  🔽 Thrombocytopenia due to Severe acute respiratory syndrome coronavirus 2 (ID: 3661632, Code: 866152006)\n",
+      "  🔽 Conjunctivitis due to disease caused by Severe acute respiratory syndrome coronavirus 2 (ID: 3656668, Code: 119741000146102)\n",
+      "  🔽 Pneumonia caused by SARS-CoV-2 (ID: 3661408, Code: 882784691000119100)\n",
+      "  🔽 Lower respiratory infection caused by SARS-CoV-2 (ID: 3663281, Code: 880529761000119102)\n",
+      "    🔽 Pneumonia caused by SARS-CoV-2 (ID: 3661408, Code: 882784691000119100)\n",
+      "    🔽 Acute bronchitis caused by SARS-CoV-2 (ID: 3661405, Code: 138389411000119105)\n",
+      "    🔽 Bronchitis caused by COVID-19 (ID: 756031, Code: OMOP4873909)\n",
+      "      🔽 Acute bronchitis caused by SARS-CoV-2 (ID: 3661405, Code: 138389411000119105)\n",
+      "  🔽 Gastroenteritis caused by SARS-CoV-2 (severe acute respiratory syndrome coronavirus 2) (ID: 37310283, Code: 1240571000000101)\n",
+      "  🔽 Fever caused by Severe acute respiratory syndrome coronavirus 2 (ID: 3661885, Code: 119751000146104)\n",
+      "  🔽 Acute respiratory distress syndrome due to disease caused by Severe acute respiratory syndrome coronavirus 2 (ID: 3661406, Code: 674814021000119106)\n",
+      "  🔽 Myocarditis due to disease caused by Severe acute respiratory syndrome coronavirus 2 (ID: 37310287, Code: 1240531000000103)\n",
+      "  🔽 Rhabdomyolysis due to disease caused by Severe acute respiratory syndrome coronavirus 2 (ID: 3655977, Code: 870591003)\n",
+      "  🔽 Bronchitis caused by COVID-19 (ID: 756031, Code: OMOP4873909)\n",
+      "    🔽 Acute bronchitis caused by SARS-CoV-2 (ID: 3661405, Code: 138389411000119105)\n",
+      "  🔽 Asymptomatic SARS-CoV-2 (ID: 3662381, Code: 189486241000119100)\n",
+      "  🔽 Infection of upper respiratory tract caused by Severe acute respiratory syndrome coronavirus 2 (ID: 37310286, Code: 1240541000000107)\n",
+      "  🔽 Sepsis due to disease caused by Severe acute respiratory syndrome coronavirus 2 (ID: 3655975, Code: 870588003)\n",
+      "  🔽 Dyspnea caused by Severe acute respiratory syndrome coronavirus 2 (ID: 3656669, Code: 119981000146107)\n",
+      "  🔽 Acute hypoxemic respiratory failure due to disease caused by Severe acute respiratory syndrome coronavirus 2 (ID: 3655976, Code: 870590002)\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print('parent concept hierarchy for COVID-19 in text format:')\n",
+    "print(bias.display_concept_tree(parent_concept_tree))\n",
+    "print('children concept hierarchy for COVID-19 in text format:')\n",
+    "print(bias.display_concept_tree(children_concept_tree))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "a34adbb8-4a28-4bd2-ada2-21f7a4b2ac90",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "parent concept hierarchy for COVID-19 in widget tree format:\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b9dded3e89044bb0bf93db97477bd1b3",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "VBox(children=(Label(value='Concept Hierarchy'), Tree(nodes=(Node(name='🔼 COVID-19 (ID: 37311061, Code: 840539…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "children concept hierarchy for COVID-19 in widget tree format:\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b202439604dd467db526a747bc2dea6d",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "VBox(children=(Label(value='Concept Hierarchy'), Tree(nodes=(Node(name='🔽 COVID-19 (ID: 37311061, Code: 840539…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d75757156a35401caac73390fc12f6bc",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Node(name='🔽 COVID-19 (ID: 37311061, Code: 840539006)', nodes=(Node(name='🔽 Lymphocytopenia due to Severe acut…"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "print(f'parent concept hierarchy for COVID-19 in widget tree format:')\n",
+    "bias.display_concept_tree(parent_concept_tree,  show_in_text_format=False)\n",
+    "print(f'children concept hierarchy for COVID-19 in widget tree format:')\n",
+    "bias.display_concept_tree(children_concept_tree, show_in_text_format=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d8e53808-cac2-41c7-9d60-f7a3b661ff6f",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "\n",
+    "### Final cleanup to ensure database connections are closed"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "128c7b02-faef-4a35-97a6-c92baa5a37dd",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Connection to BiasDatabase closed.\n",
+      "Connection to the OMOP CDM database closed.\n"
+     ]
+    }
+   ],
+   "source": [
+    "bias.cleanup()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e3ea28f8",
+   "metadata": {},
+   "source": [
+    "### ✅ Summary\n",
+    "\n",
+    "In this tutorial, you learned how to use the BiasAnalyzer package to explore OMOP clinical concepts in the context of their associated domains and vocabularies. You also explored how to use BiasAnalyzer APIs to retrieve and visualize concept hierarchies, including ancestor and descendant relationships, in a tree structure.\n",
+    "  \n",
+    "For more information, refer to the [BiasAnalyzer GitHub repo](https://github.com/VACLab/BiasAnalyzer) and the [README file](https://github.com/VACLab/BiasAnalyzer/blob/main/README.md).\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python (biasanalyzer)",
+   "language": "python",
+   "name": "biasanalyzer-py3.8"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/BiasAnalyzerTestingConceptBrowsing.ipynb b/notebooks/BiasAnalyzerTestingConceptBrowsing.ipynb
deleted file mode 100644
index c387145..0000000
--- a/notebooks/BiasAnalyzerTestingConceptBrowsing.ipynb
+++ /dev/null
@@ -1,660 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "a25ba48a-9e2c-4e1d-9e93-80f7ea3ff3e3",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Collecting git+https://github.com/vaclab/BiasAnalyzer.git\n",
-      "  Cloning https://github.com/vaclab/BiasAnalyzer.git to ./temp/pip-req-build-2mkwyv9w\n",
-      "  Running command git clone --filter=blob:none --quiet https://github.com/vaclab/BiasAnalyzer.git /home/hyi/temp/pip-req-build-2mkwyv9w\n",
-      "  Resolved https://github.com/vaclab/BiasAnalyzer.git to commit a3d43525ddd2b934d8a094901f7ad62c52f2e724\n",
-      "  Installing build dependencies ... \u001B[?25ldone\n",
-      "\u001B[?25h  Getting requirements to build wheel ... \u001B[?25ldone\n",
-      "\u001B[?25h  Preparing metadata (pyproject.toml) ... \u001B[?25ldone\n",
-      "\u001B[?25hCollecting duckdb<2.0.0,>=1.1.1 (from BiasAnalyzer==0.1.0)\n",
-      "  Obtaining dependency information for duckdb<2.0.0,>=1.1.1 from https://files.pythonhosted.org/packages/bf/56/f627b6fcd4aa34015a15449d852ccb78d7cc6eda654aa20c1d378e99fa76/duckdb-1.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n",
-      "  Using cached duckdb-1.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (762 bytes)\n",
-      "Collecting duckdb-engine<0.14.0,>=0.13.2 (from BiasAnalyzer==0.1.0)\n",
-      "  Obtaining dependency information for duckdb-engine<0.14.0,>=0.13.2 from https://files.pythonhosted.org/packages/5f/81/571c0373978d4e987ec2437bfb16adce6cf3b4a05761a76f1c06e859b668/duckdb_engine-0.13.5-py3-none-any.whl.metadata\n",
-      "  Using cached duckdb_engine-0.13.5-py3-none-any.whl.metadata (8.0 kB)\n",
-      "Collecting ipytree<0.3.0,>=0.2.2 (from BiasAnalyzer==0.1.0)\n",
-      "  Obtaining dependency information for ipytree<0.3.0,>=0.2.2 from https://files.pythonhosted.org/packages/e4/03/35cf1742598d784e96153175233318a2332f71863e55ad1007c9264c1a7a/ipytree-0.2.2-py2.py3-none-any.whl.metadata\n",
-      "  Using cached ipytree-0.2.2-py2.py3-none-any.whl.metadata (849 bytes)\n",
-      "Collecting ipywidgets<9.0.0,>=8.1.5 (from BiasAnalyzer==0.1.0)\n",
-      "  Obtaining dependency information for ipywidgets<9.0.0,>=8.1.5 from https://files.pythonhosted.org/packages/22/2d/9c0b76f2f9cc0ebede1b9371b6f317243028ed60b90705863d493bae622e/ipywidgets-8.1.5-py3-none-any.whl.metadata\n",
-      "  Using cached ipywidgets-8.1.5-py3-none-any.whl.metadata (2.3 kB)\n",
-      "Collecting numpy==1.24.4 (from BiasAnalyzer==0.1.0)\n",
-      "  Obtaining dependency information for numpy==1.24.4 from https://files.pythonhosted.org/packages/22/97/dfb1a31bb46686f09e68ea6ac5c63fdee0d22d7b23b8f3f7ea07712869ef/numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n",
-      "  Using cached numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB)\n",
-      "Collecting pandas==2.0.3 (from BiasAnalyzer==0.1.0)\n",
-      "  Obtaining dependency information for pandas==2.0.3 from https://files.pythonhosted.org/packages/d0/28/88b81881c056376254618fad622a5e94b5126db8c61157ea1910cd1c040a/pandas-2.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n",
-      "  Using cached pandas-2.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)\n",
-      "Collecting psycopg2<3.0.0,>=2.9.1 (from BiasAnalyzer==0.1.0)\n",
-      "  Using cached psycopg2-2.9.10-cp311-cp311-linux_x86_64.whl\n",
-      "Collecting pydantic<3.0.0,>=2.9.2 (from BiasAnalyzer==0.1.0)\n",
-      "  Obtaining dependency information for pydantic<3.0.0,>=2.9.2 from https://files.pythonhosted.org/packages/df/e4/ba44652d562cbf0bf320e0f3810206149c8a4e99cdbf66da82e97ab53a15/pydantic-2.9.2-py3-none-any.whl.metadata\n",
-      "  Using cached pydantic-2.9.2-py3-none-any.whl.metadata (149 kB)\n",
-      "Collecting pyyaml<7.0.0,>=6.0.2 (from BiasAnalyzer==0.1.0)\n",
-      "  Obtaining dependency information for pyyaml<7.0.0,>=6.0.2 from https://files.pythonhosted.org/packages/75/e4/2c27590dfc9992f73aabbeb9241ae20220bd9452df27483b6e56d3975cc5/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n",
-      "  Using cached PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.1 kB)\n",
-      "Collecting scipy==1.10.1 (from BiasAnalyzer==0.1.0)\n",
-      "  Obtaining dependency information for scipy==1.10.1 from https://files.pythonhosted.org/packages/21/cd/fe2d4af234b80dc08c911ce63fdaee5badcdde3e9bcd9a68884580652ef0/scipy-1.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n",
-      "  Using cached scipy-1.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (58 kB)\n",
-      "Collecting sqlalchemy<3.0.0,>=2.0.35 (from BiasAnalyzer==0.1.0)\n",
-      "  Obtaining dependency information for sqlalchemy<3.0.0,>=2.0.35 from https://files.pythonhosted.org/packages/b4/5f/95e0ed74093ac3c0db6acfa944d4d8ac6284ef5e1136b878a327ea1f975a/SQLAlchemy-2.0.36-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n",
-      "  Using cached SQLAlchemy-2.0.36-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.7 kB)\n",
-      "Collecting python-dateutil>=2.8.2 (from pandas==2.0.3->BiasAnalyzer==0.1.0)\n",
-      "  Obtaining dependency information for python-dateutil>=2.8.2 from https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl.metadata\n",
-      "  Using cached python_dateutil-2.9.0.post0-py2.py3-none-any.whl.metadata (8.4 kB)\n",
-      "Collecting pytz>=2020.1 (from pandas==2.0.3->BiasAnalyzer==0.1.0)\n",
-      "  Obtaining dependency information for pytz>=2020.1 from https://files.pythonhosted.org/packages/11/c3/005fcca25ce078d2cc29fd559379817424e94885510568bc1bc53d7d5846/pytz-2024.2-py2.py3-none-any.whl.metadata\n",
-      "  Using cached pytz-2024.2-py2.py3-none-any.whl.metadata (22 kB)\n",
-      "Collecting tzdata>=2022.1 (from pandas==2.0.3->BiasAnalyzer==0.1.0)\n",
-      "  Obtaining dependency information for tzdata>=2022.1 from https://files.pythonhosted.org/packages/a6/ab/7e5f53c3b9d14972843a647d8d7a853969a58aecc7559cb3267302c94774/tzdata-2024.2-py2.py3-none-any.whl.metadata\n",
-      "  Using cached tzdata-2024.2-py2.py3-none-any.whl.metadata (1.4 kB)\n",
-      "Collecting packaging>=21 (from duckdb-engine<0.14.0,>=0.13.2->BiasAnalyzer==0.1.0)\n",
-      "  Obtaining dependency information for packaging>=21 from https://files.pythonhosted.org/packages/08/aa/cc0199a5f0ad350994d660967a8efb233fe0416e4639146c089643407ce6/packaging-24.1-py3-none-any.whl.metadata\n",
-      "  Using cached packaging-24.1-py3-none-any.whl.metadata (3.2 kB)\n",
-      "Collecting comm>=0.1.3 (from ipywidgets<9.0.0,>=8.1.5->BiasAnalyzer==0.1.0)\n",
-      "  Obtaining dependency information for comm>=0.1.3 from https://files.pythonhosted.org/packages/e6/75/49e5bfe642f71f272236b5b2d2691cf915a7283cc0ceda56357b61daa538/comm-0.2.2-py3-none-any.whl.metadata\n",
-      "  Using cached comm-0.2.2-py3-none-any.whl.metadata (3.7 kB)\n",
-      "Collecting ipython>=6.1.0 (from ipywidgets<9.0.0,>=8.1.5->BiasAnalyzer==0.1.0)\n",
-      "  Obtaining dependency information for ipython>=6.1.0 from https://files.pythonhosted.org/packages/c5/a5/c15ed187f1b3fac445bb42a2dedd8dec1eee1718b35129242049a13a962f/ipython-8.29.0-py3-none-any.whl.metadata\n",
-      "  Using cached ipython-8.29.0-py3-none-any.whl.metadata (5.0 kB)\n",
-      "Collecting traitlets>=4.3.1 (from ipywidgets<9.0.0,>=8.1.5->BiasAnalyzer==0.1.0)\n",
-      "  Obtaining dependency information for traitlets>=4.3.1 from https://files.pythonhosted.org/packages/00/c0/8f5d070730d7836adc9c9b6408dec68c6ced86b304a9b26a14df072a6e8c/traitlets-5.14.3-py3-none-any.whl.metadata\n",
-      "  Using cached traitlets-5.14.3-py3-none-any.whl.metadata (10 kB)\n",
-      "Collecting widgetsnbextension~=4.0.12 (from ipywidgets<9.0.0,>=8.1.5->BiasAnalyzer==0.1.0)\n",
-      "  Obtaining dependency information for widgetsnbextension~=4.0.12 from https://files.pythonhosted.org/packages/21/02/88b65cc394961a60c43c70517066b6b679738caf78506a5da7b88ffcb643/widgetsnbextension-4.0.13-py3-none-any.whl.metadata\n",
-      "  Using cached widgetsnbextension-4.0.13-py3-none-any.whl.metadata (1.6 kB)\n",
-      "Collecting jupyterlab-widgets~=3.0.12 (from ipywidgets<9.0.0,>=8.1.5->BiasAnalyzer==0.1.0)\n",
-      "  Obtaining dependency information for jupyterlab-widgets~=3.0.12 from https://files.pythonhosted.org/packages/a9/93/858e87edc634d628e5d752ba944c2833133a28fa87bb093e6832ced36a3e/jupyterlab_widgets-3.0.13-py3-none-any.whl.metadata\n",
-      "  Using cached jupyterlab_widgets-3.0.13-py3-none-any.whl.metadata (4.1 kB)\n",
-      "Collecting annotated-types>=0.6.0 (from pydantic<3.0.0,>=2.9.2->BiasAnalyzer==0.1.0)\n",
-      "  Obtaining dependency information for annotated-types>=0.6.0 from https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl.metadata\n",
-      "  Using cached annotated_types-0.7.0-py3-none-any.whl.metadata (15 kB)\n",
-      "Collecting pydantic-core==2.23.4 (from pydantic<3.0.0,>=2.9.2->BiasAnalyzer==0.1.0)\n",
-      "  Obtaining dependency information for pydantic-core==2.23.4 from https://files.pythonhosted.org/packages/44/31/a3899b5ce02c4316865e390107f145089876dff7e1dfc770a231d836aed8/pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n",
-      "  Using cached pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n",
-      "Collecting typing-extensions>=4.6.1 (from pydantic<3.0.0,>=2.9.2->BiasAnalyzer==0.1.0)\n",
-      "  Obtaining dependency information for typing-extensions>=4.6.1 from https://files.pythonhosted.org/packages/26/9f/ad63fc0248c5379346306f8668cda6e2e2e9c95e01216d2b8ffd9ff037d0/typing_extensions-4.12.2-py3-none-any.whl.metadata\n",
-      "  Using cached typing_extensions-4.12.2-py3-none-any.whl.metadata (3.0 kB)\n",
-      "Collecting greenlet!=0.4.17 (from sqlalchemy<3.0.0,>=2.0.35->BiasAnalyzer==0.1.0)\n",
-      "  Obtaining dependency information for greenlet!=0.4.17 from https://files.pythonhosted.org/packages/f7/4b/1c9695aa24f808e156c8f4813f685d975ca73c000c2a5056c514c64980f6/greenlet-3.1.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata\n",
-      "  Using cached greenlet-3.1.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (3.8 kB)\n",
-      "Collecting decorator (from ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->BiasAnalyzer==0.1.0)\n",
-      "  Obtaining dependency information for decorator from https://files.pythonhosted.org/packages/d5/50/83c593b07763e1161326b3b8c6686f0f4b0f24d5526546bee538c89837d6/decorator-5.1.1-py3-none-any.whl.metadata\n",
-      "  Using cached decorator-5.1.1-py3-none-any.whl.metadata (4.0 kB)\n",
-      "Collecting jedi>=0.16 (from ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->BiasAnalyzer==0.1.0)\n",
-      "  Obtaining dependency information for jedi>=0.16 from https://files.pythonhosted.org/packages/20/9f/bc63f0f0737ad7a60800bfd472a4836661adae21f9c2535f3957b1e54ceb/jedi-0.19.1-py2.py3-none-any.whl.metadata\n",
-      "  Using cached jedi-0.19.1-py2.py3-none-any.whl.metadata (22 kB)\n",
-      "Collecting matplotlib-inline (from ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->BiasAnalyzer==0.1.0)\n",
-      "  Obtaining dependency information for matplotlib-inline from https://files.pythonhosted.org/packages/8f/8e/9ad090d3553c280a8060fbf6e24dc1c0c29704ee7d1c372f0c174aa59285/matplotlib_inline-0.1.7-py3-none-any.whl.metadata\n",
-      "  Using cached matplotlib_inline-0.1.7-py3-none-any.whl.metadata (3.9 kB)\n",
-      "Collecting prompt-toolkit<3.1.0,>=3.0.41 (from ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->BiasAnalyzer==0.1.0)\n",
-      "  Obtaining dependency information for prompt-toolkit<3.1.0,>=3.0.41 from https://files.pythonhosted.org/packages/a9/6a/fd08d94654f7e67c52ca30523a178b3f8ccc4237fce4be90d39c938a831a/prompt_toolkit-3.0.48-py3-none-any.whl.metadata\n",
-      "  Using cached prompt_toolkit-3.0.48-py3-none-any.whl.metadata (6.4 kB)\n",
-      "Collecting pygments>=2.4.0 (from ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->BiasAnalyzer==0.1.0)\n",
-      "  Obtaining dependency information for pygments>=2.4.0 from https://files.pythonhosted.org/packages/f7/3f/01c8b82017c199075f8f788d0d906b9ffbbc5a47dc9918a945e13d5a2bda/pygments-2.18.0-py3-none-any.whl.metadata\n",
-      "  Using cached pygments-2.18.0-py3-none-any.whl.metadata (2.5 kB)\n",
-      "Collecting stack-data (from ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->BiasAnalyzer==0.1.0)\n",
-      "  Obtaining dependency information for stack-data from https://files.pythonhosted.org/packages/f1/7b/ce1eafaf1a76852e2ec9b22edecf1daa58175c090266e9f6c64afcd81d91/stack_data-0.6.3-py3-none-any.whl.metadata\n",
-      "  Using cached stack_data-0.6.3-py3-none-any.whl.metadata (18 kB)\n",
-      "Collecting pexpect>4.3 (from ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->BiasAnalyzer==0.1.0)\n",
-      "  Obtaining dependency information for pexpect>4.3 from https://files.pythonhosted.org/packages/9e/c3/059298687310d527a58bb01f3b1965787ee3b40dce76752eda8b44e9a2c5/pexpect-4.9.0-py2.py3-none-any.whl.metadata\n",
-      "  Using cached pexpect-4.9.0-py2.py3-none-any.whl.metadata (2.5 kB)\n",
-      "Collecting six>=1.5 (from python-dateutil>=2.8.2->pandas==2.0.3->BiasAnalyzer==0.1.0)\n",
-      "  Obtaining dependency information for six>=1.5 from https://files.pythonhosted.org/packages/d9/5a/e7c31adbe875f2abbb91bd84cf2dc52d792b5a01506781dbcf25c91daf11/six-1.16.0-py2.py3-none-any.whl.metadata\n",
-      "  Using cached six-1.16.0-py2.py3-none-any.whl.metadata (1.8 kB)\n",
-      "Collecting parso<0.9.0,>=0.8.3 (from jedi>=0.16->ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->BiasAnalyzer==0.1.0)\n",
-      "  Obtaining dependency information for parso<0.9.0,>=0.8.3 from https://files.pythonhosted.org/packages/c6/ac/dac4a63f978e4dcb3c6d3a78c4d8e0192a113d288502a1216950c41b1027/parso-0.8.4-py2.py3-none-any.whl.metadata\n",
-      "  Using cached parso-0.8.4-py2.py3-none-any.whl.metadata (7.7 kB)\n",
-      "Collecting ptyprocess>=0.5 (from pexpect>4.3->ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->BiasAnalyzer==0.1.0)\n",
-      "  Obtaining dependency information for ptyprocess>=0.5 from https://files.pythonhosted.org/packages/22/a6/858897256d0deac81a172289110f31629fc4cee19b6f01283303e18c8db3/ptyprocess-0.7.0-py2.py3-none-any.whl.metadata\n",
-      "  Using cached ptyprocess-0.7.0-py2.py3-none-any.whl.metadata (1.3 kB)\n",
-      "Collecting wcwidth (from prompt-toolkit<3.1.0,>=3.0.41->ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->BiasAnalyzer==0.1.0)\n",
-      "  Obtaining dependency information for wcwidth from https://files.pythonhosted.org/packages/fd/84/fd2ba7aafacbad3c4201d395674fc6348826569da3c0937e75505ead3528/wcwidth-0.2.13-py2.py3-none-any.whl.metadata\n",
-      "  Using cached wcwidth-0.2.13-py2.py3-none-any.whl.metadata (14 kB)\n",
-      "Collecting executing>=1.2.0 (from stack-data->ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->BiasAnalyzer==0.1.0)\n",
-      "  Obtaining dependency information for executing>=1.2.0 from https://files.pythonhosted.org/packages/b5/fd/afcd0496feca3276f509df3dbd5dae726fcc756f1a08d9e25abe1733f962/executing-2.1.0-py2.py3-none-any.whl.metadata\n",
-      "  Using cached executing-2.1.0-py2.py3-none-any.whl.metadata (8.9 kB)\n",
-      "Collecting asttokens>=2.1.0 (from stack-data->ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->BiasAnalyzer==0.1.0)\n",
-      "  Obtaining dependency information for asttokens>=2.1.0 from https://files.pythonhosted.org/packages/45/86/4736ac618d82a20d87d2f92ae19441ebc7ac9e7a581d7e58bbe79233b24a/asttokens-2.4.1-py2.py3-none-any.whl.metadata\n",
-      "  Using cached asttokens-2.4.1-py2.py3-none-any.whl.metadata (5.2 kB)\n",
-      "Collecting pure-eval (from stack-data->ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->BiasAnalyzer==0.1.0)\n",
-      "  Obtaining dependency information for pure-eval from https://files.pythonhosted.org/packages/8e/37/efad0257dc6e593a18957422533ff0f87ede7c9c6ea010a2177d738fb82f/pure_eval-0.2.3-py3-none-any.whl.metadata\n",
-      "  Using cached pure_eval-0.2.3-py3-none-any.whl.metadata (6.3 kB)\n",
-      "Using cached numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)\n",
-      "Using cached pandas-2.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.2 MB)\n",
-      "Using cached scipy-1.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.1 MB)\n",
-      "Using cached duckdb-1.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (20.1 MB)\n",
-      "Using cached duckdb_engine-0.13.5-py3-none-any.whl (48 kB)\n",
-      "Using cached ipytree-0.2.2-py2.py3-none-any.whl (1.3 MB)\n",
-      "Using cached ipywidgets-8.1.5-py3-none-any.whl (139 kB)\n",
-      "Using cached pydantic-2.9.2-py3-none-any.whl (434 kB)\n",
-      "Using cached pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.1 MB)\n",
-      "Using cached PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (762 kB)\n",
-      "Using cached SQLAlchemy-2.0.36-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB)\n",
-      "Using cached annotated_types-0.7.0-py3-none-any.whl (13 kB)\n",
-      "Using cached comm-0.2.2-py3-none-any.whl (7.2 kB)\n",
-      "Using cached greenlet-3.1.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (602 kB)\n",
-      "Using cached ipython-8.29.0-py3-none-any.whl (819 kB)\n",
-      "Using cached jupyterlab_widgets-3.0.13-py3-none-any.whl (214 kB)\n",
-      "Using cached packaging-24.1-py3-none-any.whl (53 kB)\n",
-      "Using cached python_dateutil-2.9.0.post0-py2.py3-none-any.whl (229 kB)\n",
-      "Using cached pytz-2024.2-py2.py3-none-any.whl (508 kB)\n",
-      "Using cached traitlets-5.14.3-py3-none-any.whl (85 kB)\n",
-      "Using cached typing_extensions-4.12.2-py3-none-any.whl (37 kB)\n",
-      "Using cached tzdata-2024.2-py2.py3-none-any.whl (346 kB)\n",
-      "Using cached widgetsnbextension-4.0.13-py3-none-any.whl (2.3 MB)\n",
-      "Using cached jedi-0.19.1-py2.py3-none-any.whl (1.6 MB)\n",
-      "Using cached pexpect-4.9.0-py2.py3-none-any.whl (63 kB)\n",
-      "Using cached prompt_toolkit-3.0.48-py3-none-any.whl (386 kB)\n",
-      "Using cached pygments-2.18.0-py3-none-any.whl (1.2 MB)\n",
-      "Using cached six-1.16.0-py2.py3-none-any.whl (11 kB)\n",
-      "Using cached decorator-5.1.1-py3-none-any.whl (9.1 kB)\n",
-      "Using cached matplotlib_inline-0.1.7-py3-none-any.whl (9.9 kB)\n",
-      "Using cached stack_data-0.6.3-py3-none-any.whl (24 kB)\n",
-      "Using cached asttokens-2.4.1-py2.py3-none-any.whl (27 kB)\n",
-      "Using cached executing-2.1.0-py2.py3-none-any.whl (25 kB)\n",
-      "Using cached parso-0.8.4-py2.py3-none-any.whl (103 kB)\n",
-      "Using cached ptyprocess-0.7.0-py2.py3-none-any.whl (13 kB)\n",
-      "Using cached pure_eval-0.2.3-py3-none-any.whl (11 kB)\n",
-      "Using cached wcwidth-0.2.13-py2.py3-none-any.whl (34 kB)\n",
-      "Building wheels for collected packages: BiasAnalyzer\n",
-      "  Building wheel for BiasAnalyzer (pyproject.toml) ... \u001B[?25ldone\n",
-      "\u001B[?25h  Created wheel for BiasAnalyzer: filename=biasanalyzer-0.1.0-py3-none-any.whl size=12482 sha256=254ea1fa17b7c1706a4d4e4ed711dd7128601c09a1c3c36c9ec903ed842441af\n",
-      "  Stored in directory: /home/hyi/temp/pip-ephem-wheel-cache-wgmpfyq9/wheels/25/75/4e/079d96d69cc58148ce31d3d44f858e4db5f689604112dcb7c3\n",
-      "Successfully built BiasAnalyzer\n",
-      "Installing collected packages: wcwidth, pytz, pure-eval, ptyprocess, widgetsnbextension, tzdata, typing-extensions, traitlets, six, pyyaml, pygments, psycopg2, prompt-toolkit, pexpect, parso, packaging, numpy, jupyterlab-widgets, greenlet, executing, duckdb, decorator, annotated-types, sqlalchemy, scipy, python-dateutil, pydantic-core, matplotlib-inline, jedi, comm, asttokens, stack-data, pydantic, pandas, duckdb-engine, ipython, ipywidgets, ipytree, BiasAnalyzer\n",
-      "Successfully installed BiasAnalyzer-0.1.0 annotated-types-0.7.0 asttokens-2.4.1 comm-0.2.2 decorator-5.1.1 duckdb-1.1.3 duckdb-engine-0.13.5 executing-2.1.0 greenlet-3.1.1 ipython-8.29.0 ipytree-0.2.2 ipywidgets-8.1.5 jedi-0.19.1 jupyterlab-widgets-3.0.13 matplotlib-inline-0.1.7 numpy-1.24.4 packaging-24.1 pandas-2.0.3 parso-0.8.4 pexpect-4.9.0 prompt-toolkit-3.0.48 psycopg2-2.9.10 ptyprocess-0.7.0 pure-eval-0.2.3 pydantic-2.9.2 pydantic-core-2.23.4 pygments-2.18.0 python-dateutil-2.9.0.post0 pytz-2024.2 pyyaml-6.0.2 scipy-1.10.1 six-1.16.0 sqlalchemy-2.0.36 stack-data-0.6.3 traitlets-5.14.3 typing-extensions-4.12.2 tzdata-2024.2 wcwidth-0.2.13 widgetsnbextension-4.0.13\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Have to specify TMPDIR and target in pip install command to work around the kernel crash issue due to \n",
-    "# the small ephemeral local storage quota allocated to /tmp which is used by default by pip install\n",
-    "!TMPDIR=/home/hyi/temp pip install git+https://github.com/vaclab/BiasAnalyzer.git --target /home/hyi/temp --upgrade"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "9ce3b87c-0754-4eae-9f85-8210104e2b0b",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# append the target folder where HealthDataBias module was installed to PYTHONPATH\n",
-    "import sys\n",
-    "sys.path.append('/home/hyi/temp')\n",
-    "import pandas as pd\n",
-    "pd.set_option('display.max_rows', None)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "729e8803-74f8-4180-aa8b-0e44567f8aeb",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "from biasanalyzer.api import BIAS"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "548223ed-8948-461e-b9d6-40a0ec7fc89f",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "no configuration file specified. Call set_config(config_file_path) next to specify configurations\n",
-      "Cohort Definition table created.\n",
-      "Cohort table created.\n"
-     ]
-    }
-   ],
-   "source": [
-    "# create an object of BIAS class\n",
-    "bias = BIAS()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "7d440d9f-c7fa-4ef1-ad66-31274ebef4ea",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "configuration specified in /home/hyi/bias/config/config.yaml loaded successfully\n"
-     ]
-    }
-   ],
-   "source": [
-    "bias.set_config('/home/hyi/bias/config/config.yaml')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "283156f8-63da-42a5-bbd7-ee2b7719652c",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Connected to the OMOP CDM database (read-only).\n"
-     ]
-    }
-   ],
-   "source": [
-    "# the configuration file includes root_omop_cdm_database configuration info with an example shown \n",
-    "# in https://github.com/hyi/HealthDataBias/blob/main/tests/assets/config/test_config.yaml\n",
-    "bias.set_root_omop()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "9a52ab5f-57a8-4942-8a03-ec86651e919e",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "               domain_id         vocabulary_id\n",
-      "0              Condition                 HCPCS\n",
-      "1              Condition                 ICD10\n",
-      "2              Condition               ICD10CM\n",
-      "3              Condition                ICD9CM\n",
-      "4              Condition                 ICDO3\n",
-      "5              Condition        OMOP Extension\n",
-      "6              Condition                SNOMED\n",
-      "7       Condition/Device               ICD10CM\n",
-      "8         Condition/Meas               ICD10CM\n",
-      "9       Condition Status      Condition Status\n",
-      "10                  Cost                  Cost\n",
-      "11              Currency              Currency\n",
-      "12                Device                 HCPCS\n",
-      "13                Device              ICD10PCS\n",
-      "14                Device                   NDC\n",
-      "15                Device                SNOMED\n",
-      "16                Device                   SPL\n",
-      "17                  Drug                   ATC\n",
-      "18                  Drug                 HCPCS\n",
-      "19                  Drug              ICD10PCS\n",
-      "20                  Drug                   NDC\n",
-      "21                  Drug                RxNorm\n",
-      "22                  Drug      RxNorm Extension\n",
-      "23                  Drug                SNOMED\n",
-      "24                  Drug                   SPL\n",
-      "25               Episode               Episode\n",
-      "26             Ethnicity             Ethnicity\n",
-      "27                Gender                Gender\n",
-      "28                Gender                SNOMED\n",
-      "29             Geography                   OSM\n",
-      "30             Geography             US Census\n",
-      "31           Measurement                 HCPCS\n",
-      "32           Measurement                 ICD10\n",
-      "33           Measurement               ICD10CM\n",
-      "34           Measurement                ICD9CM\n",
-      "35           Measurement                 LOINC\n",
-      "36           Measurement        OMOP Extension\n",
-      "37           Measurement                SNOMED\n",
-      "38            Meas Value                 LOINC\n",
-      "39            Meas Value                SNOMED\n",
-      "40   Meas Value Operator                SNOMED\n",
-      "41              Metadata                   CDM\n",
-      "42              Metadata         Concept Class\n",
-      "43              Metadata                Domain\n",
-      "44              Metadata              Metadata\n",
-      "45              Metadata                  None\n",
-      "46              Metadata          Relationship\n",
-      "47              Metadata                SNOMED\n",
-      "48              Metadata            Vocabulary\n",
-      "49           Observation                 HCPCS\n",
-      "50           Observation                 ICD10\n",
-      "51           Observation               ICD10CM\n",
-      "52           Observation                ICD9CM\n",
-      "53           Observation                 ICDO3\n",
-      "54           Observation                 LOINC\n",
-      "55           Observation                  NUCC\n",
-      "56           Observation        OMOP Extension\n",
-      "57           Observation                SNOMED\n",
-      "58           Observation                   SPL\n",
-      "59           Observation   UB04 Pri Typ of Adm\n",
-      "60           Observation         UB04 Typ bill\n",
-      "61                 Payer                 PHDSC\n",
-      "62                  Plan                  Plan\n",
-      "63      Plan Stop Reason      Plan Stop Reason\n",
-      "64             Procedure                 HCPCS\n",
-      "65             Procedure                 ICD10\n",
-      "66             Procedure               ICD10CM\n",
-      "67             Procedure              ICD10PCS\n",
-      "68             Procedure                ICD9CM\n",
-      "69             Procedure              ICD9Proc\n",
-      "70             Procedure                SNOMED\n",
-      "71              Provider                  ABMS\n",
-      "72              Provider    Medicare Specialty\n",
-      "73              Provider                  NUCC\n",
-      "74              Provider                SNOMED\n",
-      "75                  Race                  Race\n",
-      "76                  Race                SNOMED\n",
-      "77          Relationship                SNOMED\n",
-      "78          Revenue Code   Korean Revenue Code\n",
-      "79          Revenue Code          Revenue Code\n",
-      "80                 Route                SNOMED\n",
-      "81    Spec Anatomic Site                SNOMED\n",
-      "82   Spec Disease Status                SNOMED\n",
-      "83              Specimen                SNOMED\n",
-      "84               Sponsor               Sponsor\n",
-      "85          Type Concept        Condition Type\n",
-      "86          Type Concept             Cost Type\n",
-      "87          Type Concept            Death Type\n",
-      "88          Type Concept           Device Type\n",
-      "89          Type Concept             Drug Type\n",
-      "90          Type Concept             Meas Type\n",
-      "91          Type Concept             Note Type\n",
-      "92          Type Concept      Observation Type\n",
-      "93          Type Concept       Obs Period Type\n",
-      "94          Type Concept        Procedure Type\n",
-      "95          Type Concept                SNOMED\n",
-      "96          Type Concept          Type Concept\n",
-      "97          Type Concept            Visit Type\n",
-      "98                  Unit                SNOMED\n",
-      "99                  Unit                  UCUM\n",
-      "100                Visit  CMS Place of Service\n",
-      "101                Visit    Medicare Specialty\n",
-      "102                Visit                  NUCC\n",
-      "103                Visit                SNOMED\n",
-      "104                Visit  UB04 Point of Origin\n",
-      "105                Visit    UB04 Pt dis status\n",
-      "106                Visit         UB04 Typ bill\n",
-      "107                Visit                 Visit\n"
-     ]
-    }
-   ],
-   "source": [
-    "domains = bias.get_domains_and_vocabularies()\n",
-    "print(pd.DataFrame(domains))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "b1415805-b065-40b8-b2cd-6db6f5f9ca41",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "   concept_id                                       concept_name  \\\n",
-      "0      703440  COVID-19 confirmed using clinical diagnostic c...   \n",
-      "1    37311061                                           COVID-19   \n",
-      "2    37310269                                           COVID-19   \n",
-      "3      703447  High risk category for developing complication...   \n",
-      "4      703446  Moderate risk category for developing complica...   \n",
-      "5      703445  Low risk category for developing complication ...   \n",
-      "6      703441              COVID-19 confirmed by laboratory test   \n",
-      "\n",
-      "  valid_start_date valid_end_date  \n",
-      "0       2020-04-01     2099-12-31  \n",
-      "1       2020-01-31     2099-12-31  \n",
-      "2       2020-02-04     2020-10-28  \n",
-      "3       2020-04-01     2099-12-31  \n",
-      "4       2020-04-01     2099-12-31  \n",
-      "5       2020-04-01     2099-12-31  \n",
-      "6       2020-04-01     2099-12-31  \n"
-     ]
-    }
-   ],
-   "source": [
-    "concepts = bias.get_concepts(\"COVID-19\", \"Condition\", \"SNOMED\")\n",
-    "print(pd.DataFrame(concepts))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "d54e39da-6f78-4dc1-91ae-a8c26852582a",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# get parent and children concept hierarchical tree for COVID-19\n",
-    "parent_concept_tree, children_concept_tree = bias.get_concept_hierarchy(37311061)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "00f036eb",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "parent concept hierarchy for COVID-19 in text format:\n",
-      "🔼 COVID-19 (ID: 37311061, Code: 840539006)\n",
-      "  🔼 Clinical finding (ID: 441840, Code: 404684003)\n",
-      "  🔼 Viral disease (ID: 440029, Code: 34014006)\n",
-      "  🔼 Disease (ID: 4274025, Code: 64572001)\n",
-      "  🔼 Coronavirus infection (ID: 439676, Code: 186747009)\n",
-      "  🔼 Disease due to Coronaviridae (ID: 4100065, Code: 27619001)\n",
-      "  🔼 Disorder due to infection (ID: 432250, Code: 40733004)\n",
-      "\n",
-      "children concept hierarchy for COVID-19 in text format:\n",
-      "🔽 COVID-19 (ID: 37311061, Code: 840539006)\n",
-      "  🔽 Lymphocytopenia due to Severe acute respiratory syndrome coronavirus 2 (ID: 3661631, Code: 866151004)\n",
-      "  🔽 Otitis media due to disease caused by Severe acute respiratory syndrome coronavirus 2 (ID: 37310254, Code: 1240521000000100)\n",
-      "  🔽 Respiratory infection caused by COVID-19 (ID: 756039, Code: OMOP4873907)\n",
-      "    🔽 Acute bronchitis caused by SARS-CoV-2 (ID: 3661405, Code: 138389411000119105)\n",
-      "    🔽 Pneumonia caused by SARS-CoV-2 (ID: 3661408, Code: 882784691000119100)\n",
-      "    🔽 Lower respiratory infection caused by SARS-CoV-2 (ID: 3663281, Code: 880529761000119102)\n",
-      "      🔽 Pneumonia caused by SARS-CoV-2 (ID: 3661408, Code: 882784691000119100)\n",
-      "      🔽 Acute bronchitis caused by SARS-CoV-2 (ID: 3661405, Code: 138389411000119105)\n",
-      "      🔽 Bronchitis caused by COVID-19 (ID: 756031, Code: OMOP4873909)\n",
-      "        🔽 Acute bronchitis caused by SARS-CoV-2 (ID: 3661405, Code: 138389411000119105)\n",
-      "    🔽 Bronchitis caused by COVID-19 (ID: 756031, Code: OMOP4873909)\n",
-      "      🔽 Acute bronchitis caused by SARS-CoV-2 (ID: 3661405, Code: 138389411000119105)\n",
-      "    🔽 Infection of upper respiratory tract caused by Severe acute respiratory syndrome coronavirus 2 (ID: 37310286, Code: 1240541000000107)\n",
-      "  🔽 Encephalopathy due to disease caused by Severe acute respiratory syndrome coronavirus 2 (ID: 37310284, Code: 1240561000000108)\n",
-      "  🔽 Cardiomyopathy due to disease caused by Severe acute respiratory syndrome coronavirus 2 (ID: 3656667, Code: 119731000146105)\n",
-      "  🔽 Acute bronchitis caused by SARS-CoV-2 (ID: 3661405, Code: 138389411000119105)\n",
-      "  🔽 Acute kidney injury due to disease caused by Severe acute respiratory syndrome coronavirus 2 (ID: 3661748, Code: 870589006)\n",
-      "  🔽 Thrombocytopenia due to Severe acute respiratory syndrome coronavirus 2 (ID: 3661632, Code: 866152006)\n",
-      "  🔽 Conjunctivitis due to disease caused by Severe acute respiratory syndrome coronavirus 2 (ID: 3656668, Code: 119741000146102)\n",
-      "  🔽 Pneumonia caused by SARS-CoV-2 (ID: 3661408, Code: 882784691000119100)\n",
-      "  🔽 Lower respiratory infection caused by SARS-CoV-2 (ID: 3663281, Code: 880529761000119102)\n",
-      "    🔽 Pneumonia caused by SARS-CoV-2 (ID: 3661408, Code: 882784691000119100)\n",
-      "    🔽 Acute bronchitis caused by SARS-CoV-2 (ID: 3661405, Code: 138389411000119105)\n",
-      "    🔽 Bronchitis caused by COVID-19 (ID: 756031, Code: OMOP4873909)\n",
-      "      🔽 Acute bronchitis caused by SARS-CoV-2 (ID: 3661405, Code: 138389411000119105)\n",
-      "  🔽 Gastroenteritis caused by SARS-CoV-2 (severe acute respiratory syndrome coronavirus 2) (ID: 37310283, Code: 1240571000000101)\n",
-      "  🔽 Fever caused by Severe acute respiratory syndrome coronavirus 2 (ID: 3661885, Code: 119751000146104)\n",
-      "  🔽 Acute respiratory distress syndrome due to disease caused by Severe acute respiratory syndrome coronavirus 2 (ID: 3661406, Code: 674814021000119106)\n",
-      "  🔽 Myocarditis due to disease caused by Severe acute respiratory syndrome coronavirus 2 (ID: 37310287, Code: 1240531000000103)\n",
-      "  🔽 Rhabdomyolysis due to disease caused by Severe acute respiratory syndrome coronavirus 2 (ID: 3655977, Code: 870591003)\n",
-      "  🔽 Bronchitis caused by COVID-19 (ID: 756031, Code: OMOP4873909)\n",
-      "    🔽 Acute bronchitis caused by SARS-CoV-2 (ID: 3661405, Code: 138389411000119105)\n",
-      "  🔽 Asymptomatic SARS-CoV-2 (ID: 3662381, Code: 189486241000119100)\n",
-      "  🔽 Infection of upper respiratory tract caused by Severe acute respiratory syndrome coronavirus 2 (ID: 37310286, Code: 1240541000000107)\n",
-      "  🔽 Sepsis due to disease caused by Severe acute respiratory syndrome coronavirus 2 (ID: 3655975, Code: 870588003)\n",
-      "  🔽 Dyspnea caused by Severe acute respiratory syndrome coronavirus 2 (ID: 3656669, Code: 119981000146107)\n",
-      "  🔽 Acute hypoxemic respiratory failure due to disease caused by Severe acute respiratory syndrome coronavirus 2 (ID: 3655976, Code: 870590002)\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "print('parent concept hierarchy for COVID-19 in text format:')\n",
-    "print(bias.display_concept_tree(parent_concept_tree))\n",
-    "print('children concept hierarchy for COVID-19 in text format:')\n",
-    "print(bias.display_concept_tree(children_concept_tree))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "id": "e3f5ace2-6cc4-4940-a067-e1a3fc14e1ce",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "parent concept hierarchy for COVID-19 in widget tree format:\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "4969f2d9c4f6438ba5557588332be0aa",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "VBox(children=(Label(value='Concept Hierarchy'), Tree(nodes=(Node(name='🔼 COVID-19 (ID: 37311061, Code: 840539…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "children concept hierarchy for COVID-19 in widget tree format:\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "c476950289304222b749351a48121387",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "VBox(children=(Label(value='Concept Hierarchy'), Tree(nodes=(Node(name='🔽 COVID-19 (ID: 37311061, Code: 840539…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "print(f'parent concept hierarchy for COVID-19 in widget tree format:')\n",
-    "bias.display_concept_tree(parent_concept_tree,  show_in_text_format=False)\n",
-    "print(f'children concept hierarchy for COVID-19 in widget tree format:')\n",
-    "bias.display_concept_tree(children_concept_tree, show_in_text_format=False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "id": "8be5061b-cfdf-4dc0-9ef8-f18277ab9fbe",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Connection to BiasDatabase closed.\n",
-      "Connection to the OMOP CDM database closed.\n"
-     ]
-    }
-   ],
-   "source": [
-    "bias.cleanup()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c7ad0b7b-21dc-4572-af21-fe1580361999",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0d03cf95-3c68-4eee-be41-5482dea68b84",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "128c7b02-faef-4a35-97a6-c92baa5a37dd",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}

From f87c9142a513d55db689ebe3e578c8e25f8c8113 Mon Sep 17 00:00:00 2001
From: hyi <hongyi@renci.org>
Date: Fri, 20 Jun 2025 16:56:04 -0400
Subject: [PATCH 09/10] linked tutorials to readme

---
 README.md | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index a2d5103..f354010 100644
--- a/README.md
+++ b/README.md
@@ -57,7 +57,7 @@ containing a list of the concept's children or parents in the hierarchy.
 - Call `bias.display_concept_tree(parent_concept_tree)` and `bias.display_concept_tree(children_concept_tree)` to display 
 the concept hierarchical tree in an indented text format. If ipytree widget is installed and supported in a Jupyter notebook 
 environment, you can set `show_in_text_format` input parameter to `False` 
-(e.g., call `bias.display_concept_tree(parent_concept_tree,  show_in_text_format=False)`)to leverage the tree widget for displaying 
+(e.g., call `bias.display_concept_tree(parent_concept_tree,  show_in_text_format=False)`) to leverage the tree widget for displaying 
 the hierarchy in a tree that can be expanded and collapsed on demand interactively.   
 
 In addition to exploring the concepts using BiasAnalyzer APIs, the main functionalities of the BiasAnalyzer is 
@@ -88,10 +88,13 @@ The following code snippets show some examples.
   ```
   Note that currently the `get_stats()` method only returns statistics of age, gender, race, and ethinicity of a cohort 
 and `get_distributions()` method only returns distribution of age and gender in a cohort.
-- You can also get patient counts and prevalence with each diagnostic condition concept code in a cohort by accessing 
+- You can also explore concept prevalence within a cohort - a key step in identifying potential biases during 
+cohort selection. A concept refers to a coded term from a standardized medical vocabulary, uniquely identified by a 
+concept ID. All clinical events in OMOP, such as conditions, drug exposures, procedures, measurements, and events, are 
+represented as concepts. You can get patient counts and prevalence associated with each concept by accessing 
 the method `get_concept_stats()` with a code snippet example shown below.
   ```angular2html
-    cohort_concepts = baseline_cohort_data.get_concept_stats()
+    cohort_concepts = baseline_cohort_data.get_concept_stats(concept_type='condition_occurrence')
     print(pd.DataFrame(cohort_concepts["condition_occurrence"]))
   ```
 - There is also an API method that enables users to compare distributions of two cohorts by calling `bias.compare_cohorts(cohort1_id, cohort2_id)` 
@@ -99,4 +102,21 @@ where cohort1_id and cohort2_id are integers and can be obtained from metadata o
 only hellinger distances between distributions of two cohorts are computed.
 
 - After all analysis is done, please make sure to close database connections and do necessary cleanups by calling 
-the API method `bias.cleanup()`.
\ No newline at end of file
+the API method `bias.cleanup()`.
+
+--
+
+## 📘 Tutorial Notebooks
+
+To help users get started with the `BiasAnalyzer` python package, four Jupyter notebooks are 
+provided in the [`notebooks/`](https://github.com/VACLab/BiasAnalyzer/tree/main/notebooks) 
+directory. These tutorials walk users through key features and workflows with illustrative examples.
+
+| Tutorial | Description                                                                                                                                                                                                           |
+|----------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| [BiasAnalyzerCohortsTutorial.ipynb](https://github.com/VACLab/BiasAnalyzer/blob/main/notebooks/BiasAnalyzerCohortsTutorial.ipynb) | Demonstrates how to create baseline and study cohorts, retrieve cohort statistics, and compare cohort distributions.                                                                                                  |
+| [BiasAnalyzerAsyncCohortsTutorial.ipynb](https://github.com/VACLab/BiasAnalyzer/blob/main/notebooks/BiasAnalyzerAsyncCohortsTutorial.ipynb) | As a companion to the Cohort tutorial above, demonstrates how to create and analyze cohorts asynchronously for improved performance and responsiveness when working with large datasets or complex cohort definitions. |
+| [BiasAnalyzerCohortConceptTutorial.ipynb](https://github.com/VACLab/BiasAnalyzer/blob/main/notebooks/BiasAnalyzerCohortConceptTutorial.ipynb) | Demonstrates how to explore clinical concept prevalence within a cohort, helping users analyze clinical concept prevalence and identify potential cohort selection biases.                                            |
+| [BiasAnalyzerConceptBrowsingTutorial.ipynb](https://github.com/VACLab/BiasAnalyzer/blob/main/notebooks/BiasAnalyzerConceptBrowsingTutorial.ipynb) | Guides users through browsing OMOP concepts, domains, and vocabularies, including how to retrieve and visualize concept hierarchies.                                                                                  |
+
+These tutorials are designed to be run in a Jupyter environment with access to an OMOP-compatible postgreSQL or DuckDB database. 

From f425071eef1641538213d30a86b40aa5c97e3538 Mon Sep 17 00:00:00 2001
From: hyi <hongyi@renci.org>
Date: Fri, 20 Jun 2025 16:57:33 -0400
Subject: [PATCH 10/10] minor updates to readme

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index f354010..fb444c1 100644
--- a/README.md
+++ b/README.md
@@ -104,7 +104,7 @@ only hellinger distances between distributions of two cohorts are computed.
 - After all analysis is done, please make sure to close database connections and do necessary cleanups by calling 
 the API method `bias.cleanup()`.
 
---
+---
 
 ## 📘 Tutorial Notebooks
 
@@ -119,4 +119,4 @@ directory. These tutorials walk users through key features and workflows with il
 | [BiasAnalyzerCohortConceptTutorial.ipynb](https://github.com/VACLab/BiasAnalyzer/blob/main/notebooks/BiasAnalyzerCohortConceptTutorial.ipynb) | Demonstrates how to explore clinical concept prevalence within a cohort, helping users analyze clinical concept prevalence and identify potential cohort selection biases.                                            |
 | [BiasAnalyzerConceptBrowsingTutorial.ipynb](https://github.com/VACLab/BiasAnalyzer/blob/main/notebooks/BiasAnalyzerConceptBrowsingTutorial.ipynb) | Guides users through browsing OMOP concepts, domains, and vocabularies, including how to retrieve and visualize concept hierarchies.                                                                                  |
 
-These tutorials are designed to be run in a Jupyter environment with access to an OMOP-compatible postgreSQL or DuckDB database. 
+These tutorials are designed to run in a Jupyter environment with access to an OMOP-compatible postgreSQL or DuckDB database.