From 6e02dbe405ee35afd28dc6e5149015c3c5baeb13 Mon Sep 17 00:00:00 2001
From: hyi <hongyi@renci.org>
Date: Thu, 5 Jun 2025 23:01:46 -0400
Subject: [PATCH 01/12] added more tests with test structure refactoring

---
 README.md                                     |   2 +-
 biasanalyzer/api.py                           |  63 +++++++++---------
 biasanalyzer/utils.py                         |  24 +++++++
 ...asAnalyzerTestingAsyncCohortCreation.ipynb |   2 +-
 ...iasAnalyzerTestingCohortConceptStats.ipynb |  20 +++---
 notebooks/BiasAnalyzerTestingCohorts.ipynb    |  20 +++---
 .../BiasAnalyzerTestingConceptBrowsing.ipynb  |  14 ++--
 .../config/.test_config_postgresql.yaml.swp   | Bin 0 -> 12288 bytes
 tests/assets/{ => config}/test_config.yaml    |   0
 .../assets/config/test_config_postgresql.yaml |   9 +++
 .../test_config_unsupported_db_type.yaml      |   9 +++
 tests/assets/config/test_invalid_config.yaml  |   8 +++
 tests/{query_based => }/conftest.py           |  11 ++-
 tests/test_biasanalyzer.py                    |   5 --
 tests/test_biasanalyzer_api.py                |  38 +++++++++++
 tests/test_config.py                          |  15 +----
 16 files changed, 160 insertions(+), 80 deletions(-)
 create mode 100644 tests/assets/config/.test_config_postgresql.yaml.swp
 rename tests/assets/{ => config}/test_config.yaml (100%)
 create mode 100644 tests/assets/config/test_config_postgresql.yaml
 create mode 100644 tests/assets/config/test_config_unsupported_db_type.yaml
 create mode 100644 tests/assets/config/test_invalid_config.yaml
 rename tests/{query_based => }/conftest.py (97%)
 delete mode 100644 tests/test_biasanalyzer.py
 create mode 100644 tests/test_biasanalyzer_api.py

diff --git a/README.md b/README.md
index 4b92f80..a2d5103 100644
--- a/README.md
+++ b/README.md
@@ -22,7 +22,7 @@ to install the python package from this github repo.
 - Run `bias = BIAS()` to create an object of the imported BIAS class.
 - Create a config.yaml file for specifying OMOP database connection configuration information. 
 The config.yaml file must include root_omop_cdm_database key. 
-- [A test OMOP database configuration yaml file](https://github.com/VACLab/BiasAnalyzer/blob/main/tests/assets/test_config.yaml) 
+- [A test OMOP database configuration yaml file](https://github.com/VACLab/BiasAnalyzer/blob/main/tests/assets/config/test_config.yaml) 
 can serve as an example. Another config.yaml example for connecting to a OMOP postgreSQL database 
 is also copied below for reference.
   ```angular2html
diff --git a/biasanalyzer/api.py b/biasanalyzer/api.py
index 6caadce..a79cb25 100644
--- a/biasanalyzer/api.py
+++ b/biasanalyzer/api.py
@@ -6,7 +6,7 @@
 from ipywidgets import VBox, Label
 from ipytree import Tree, Node
 from IPython.display import display
-from biasanalyzer.utils import get_direction_arrow
+from biasanalyzer.utils import get_direction_arrow, notify_users
 
 
 class BIAS:
@@ -26,24 +26,24 @@ def __new__(cls, config_file_path=None):
 
     def set_config(self, config_file_path: str):
         if config_file_path is None:
-            print('no configuration file specified. '
-                  'Call set_config(config_file_path) next to specify configurations')
+            notify_users('no configuration file specified. '
+                         'Call set_config(config_file_path) next to specify configurations')
         else:
             try:
                 self.config = load_config(config_file_path)
-                print(f'configuration specified in {config_file_path} loaded successfully')
+                notify_users(f'configuration specified in {config_file_path} loaded successfully')
             except FileNotFoundError:
-                print('specified configuration file does not exist. '
-                      'Call set_config(config_file_path) next to specify a valid '
-                      'configuration file')
+                notify_users('specified configuration file does not exist. '
+                             'Call set_config(config_file_path) next to specify a valid configuration file',
+                             level='error')
             except ValidationError as ex:
-                print(f'configuration yaml file is not valid with validation error: {ex}')
+                notify_users(f'configuration yaml file is not valid with validation error: {ex}', level='error')
 
     def set_root_omop(self):
         if not self.config:
-            print('no valid configuration to set root OMOP CDM data. '
-                  'Call set_config(config_file_path) to specify configurations first.')
-        elif 'root_omop_cdm_database' in self.config:
+            notify_users('no valid configuration to set root OMOP CDM data. '
+                         'Call set_config(config_file_path) to specify configurations first.')
+        else:
             db_type = self.config['root_omop_cdm_database']['database_type']
             if db_type == 'postgresql':
                 user = self.config['root_omop_cdm_database']['username']
@@ -65,14 +65,12 @@ def set_root_omop(self):
                 self.bias_db = BiasDatabase(db_path)
                 self.bias_db.omop_cdm_db_url = db_path
             else:
-                print(f"Unsupported database type: {db_type}")
-        else:
-            print('Configuration file must include configuration values for root_omop_cdm_database key.')
+                notify_users(f"Unsupported database type: {db_type}")
 
     def _set_cohort_action(self):
         if self.omop_cdm_db is None:
-            print('A valid OMOP CDM must be set before creating a cohort. '
-                  'Call set_root_omop first to set a valid root OMOP CDM')
+            notify_users('A valid OMOP CDM must be set before creating a cohort. '
+                         'Call set_root_omop first to set a valid root OMOP CDM')
             return None
         if self.cohort_action is None:
             self.cohort_action = CohortAction(self.omop_cdm_db, self.bias_db)
@@ -80,25 +78,25 @@ def _set_cohort_action(self):
 
     def get_domains_and_vocabularies(self):
         if self.omop_cdm_db is None:
-            print('A valid OMOP CDM must be set before getting domains. '
-                  'Call set_root_omop first to set a valid root OMOP CDM')
+            notify_users('A valid OMOP CDM must be set before getting domains. '
+                         'Call set_root_omop first to set a valid root OMOP CDM')
             return None
         return self.omop_cdm_db.get_domains_and_vocabularies()
 
     def get_concepts(self, search_term, domain=None, vocabulary=None):
         if self.omop_cdm_db is None:
-            print('A valid OMOP CDM must be set before getting concepts. '
-                  'Call set_root_omop first to set a valid root OMOP CDM')
+            notify_users('A valid OMOP CDM must be set before getting concepts. '
+                         'Call set_root_omop first to set a valid root OMOP CDM')
             return None
         if domain is None and vocabulary is None:
-            print('either domain or vocabulary must be set to constrain the number of returned concepts')
+            notify_users('either domain or vocabulary must be set to constrain the number of returned concepts')
             return None
         return self.omop_cdm_db.get_concepts(search_term, domain, vocabulary)
 
     def get_concept_hierarchy(self, concept_id):
         if self.omop_cdm_db is None:
-            print('A valid OMOP CDM must be set before getting concepts. '
-                  'Call set_root_omop first to set a valid root OMOP CDM')
+            notify_users('A valid OMOP CDM must be set before getting concepts. '
+                         'Call set_root_omop first to set a valid root OMOP CDM')
             return None
         return self.omop_cdm_db.get_concept_hierarchy(concept_id)
 
@@ -134,7 +132,7 @@ def display_concept_tree(self, concept_tree: dict, level: int = 0, show_in_text_
             elif 'children' in concept_tree:
                 tree_type = 'children'
             else:
-                print('The input concept tree must contain parents or children key as the type of the tree.')
+                notify_users('The input concept tree must contain parents or children key as the type of the tree.')
                 return ''
 
         if show_in_text_format:
@@ -178,12 +176,12 @@ def create_cohort(self, cohort_name: str, cohort_desc: str, query_or_yaml_file:
             created_cohort = c_action.create_cohort(cohort_name, cohort_desc, query_or_yaml_file, created_by)
             if created_cohort is not None:
                 if delay > 0:
-                    print(f"[DEBUG] Simulating long-running task with {delay} seconds delay...")
+                    notify_users(f"[DEBUG] Simulating long-running task with {delay} seconds delay...")
                     time.sleep(delay)
-                print('cohort created successfully')
+                notify_users('cohort created successfully')
             return created_cohort
         else:
-            print('failed to create a valid cohort action object')
+            notify_users('failed to create a valid cohort action object')
             return None
 
 
@@ -192,11 +190,14 @@ def compare_cohorts(self, cohort_id1, cohort_id2):
         if c_action:
             return c_action.compare_cohorts(cohort_id1, cohort_id2)
         else:
-            print('failed to create a valid cohort action object')
+            notify_users('failed to create a valid cohort action object')
             return None
 
 
     def cleanup(self):
-        self.bias_db.close()
-        self.omop_cdm_db.close()
-        del self.cohort_action
+        if self.bias_db:
+            self.bias_db.close()
+        if self.omop_cdm_db:
+            self.omop_cdm_db.close()
+        if self.cohort_action:
+            del self.cohort_action
diff --git a/biasanalyzer/utils.py b/biasanalyzer/utils.py
index e89f604..faa9953 100644
--- a/biasanalyzer/utils.py
+++ b/biasanalyzer/utils.py
@@ -1,5 +1,29 @@
 import numpy as np
 import re
+import logging
+
+
+logger = logging.getLogger(__name__)
+
+
+def notify_users(message: str, level: str = "info"):
+    """
+    Notify users via both print and logging.
+    :param message: message to show
+    :param level: Logging level: 'info', 'warning', 'error'
+    :return:
+    """
+
+    print(message)
+
+    log_func = {
+        "info": logger.info,
+        "warning": logger.warning,
+        "error": logger.error,
+        "debug": logger.debug,
+    }.get(level.lower(), logger.info)
+
+    log_func(message)
 
 
 def get_direction_arrow(tree_type):
diff --git a/notebooks/BiasAnalyzerTestingAsyncCohortCreation.ipynb b/notebooks/BiasAnalyzerTestingAsyncCohortCreation.ipynb
index 3f15471..763bbc1 100644
--- a/notebooks/BiasAnalyzerTestingAsyncCohortCreation.ipynb
+++ b/notebooks/BiasAnalyzerTestingAsyncCohortCreation.ipynb
@@ -72,7 +72,7 @@
    ],
    "source": [
     "# the configuration file includes root_omop_cdm_database configuration info with an example shown \n",
-    "# in https://github.com/hyi/HealthDataBias/blob/main/tests/assets/test_config.yaml\n",
+    "# in https://github.com/hyi/HealthDataBias/blob/main/tests/assets/config/test_config.yaml\n",
     "bias.set_root_omop()"
    ]
   },
diff --git a/notebooks/BiasAnalyzerTestingCohortConceptStats.ipynb b/notebooks/BiasAnalyzerTestingCohortConceptStats.ipynb
index af88f81..1f15809 100644
--- a/notebooks/BiasAnalyzerTestingCohortConceptStats.ipynb
+++ b/notebooks/BiasAnalyzerTestingCohortConceptStats.ipynb
@@ -16,10 +16,10 @@
       "  Cloning https://github.com/vaclab/BiasAnalyzer.git to ./temp/pip-req-build-vlj8e8fz\n",
       "  Running command git clone --filter=blob:none --quiet https://github.com/vaclab/BiasAnalyzer.git /home/hyi/temp/pip-req-build-vlj8e8fz\n",
       "  Resolved https://github.com/vaclab/BiasAnalyzer.git to commit 8d821839e93b1d9a208c5c66352ee66db60d1e53\n",
-      "  Installing build dependencies ... \u001b[?25ldone\n",
-      "\u001b[?25h  Getting requirements to build wheel ... \u001b[?25ldone\n",
-      "\u001b[?25h  Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n",
-      "\u001b[?25hCollecting duckdb<2.0.0,>=1.1.1 (from biasanalyzer==0.1.0)\n",
+      "  Installing build dependencies ... \u001B[?25ldone\n",
+      "\u001B[?25h  Getting requirements to build wheel ... \u001B[?25ldone\n",
+      "\u001B[?25h  Preparing metadata (pyproject.toml) ... \u001B[?25ldone\n",
+      "\u001B[?25hCollecting duckdb<2.0.0,>=1.1.1 (from biasanalyzer==0.1.0)\n",
       "  Obtaining dependency information for duckdb<2.0.0,>=1.1.1 from https://files.pythonhosted.org/packages/50/52/6e6f5b5b07841cec334ca6b98f2e02b7bb54ab3b99c49aa3a161cc0b4b37/duckdb-1.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n",
       "  Using cached duckdb-1.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (966 bytes)\n",
       "Collecting duckdb-engine<0.14.0,>=0.13.2 (from biasanalyzer==0.1.0)\n",
@@ -182,14 +182,14 @@
       "Using cached pure_eval-0.2.3-py3-none-any.whl (11 kB)\n",
       "Using cached wcwidth-0.2.13-py2.py3-none-any.whl (34 kB)\n",
       "Building wheels for collected packages: biasanalyzer\n",
-      "  Building wheel for biasanalyzer (pyproject.toml) ... \u001b[?25ldone\n",
-      "\u001b[?25h  Created wheel for biasanalyzer: filename=biasanalyzer-0.1.0-py3-none-any.whl size=25475 sha256=1982c82749337f81db1a730b8cc25c049d0c0788cd6b782f69ce8be1d92a397c\n",
+      "  Building wheel for biasanalyzer (pyproject.toml) ... \u001B[?25ldone\n",
+      "\u001B[?25h  Created wheel for biasanalyzer: filename=biasanalyzer-0.1.0-py3-none-any.whl size=25475 sha256=1982c82749337f81db1a730b8cc25c049d0c0788cd6b782f69ce8be1d92a397c\n",
       "  Stored in directory: /home/hyi/temp/pip-ephem-wheel-cache-f_9rcqkk/wheels/25/75/4e/079d96d69cc58148ce31d3d44f858e4db5f689604112dcb7c3\n",
       "Successfully built biasanalyzer\n",
       "Installing collected packages: wcwidth, pytz, pure-eval, ptyprocess, widgetsnbextension, tzdata, typing-extensions, traitlets, six, pyyaml, pygments, psycopg2, prompt_toolkit, pexpect, parso, packaging, numpy, MarkupSafe, jupyterlab-widgets, greenlet, executing, duckdb, decorator, asttokens, annotated-types, stack_data, sqlalchemy, scipy, python-dateutil, pydantic-core, matplotlib-inline, jinja2, jedi, ipython-pygments-lexers, comm, pydantic, pandas, ipython, duckdb-engine, ipywidgets, ipytree, biasanalyzer\n",
-      "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
-      "ipympl 0.9.3 requires ipython<9, but you have ipython 9.0.2 which is incompatible.\u001b[0m\u001b[31m\n",
-      "\u001b[0mSuccessfully installed MarkupSafe-3.0.2 annotated-types-0.7.0 asttokens-3.0.0 biasanalyzer-0.1.0 comm-0.2.2 decorator-5.2.1 duckdb-1.2.1 duckdb-engine-0.13.6 executing-2.2.0 greenlet-3.1.1 ipython-9.0.2 ipython-pygments-lexers-1.1.1 ipytree-0.2.2 ipywidgets-8.1.5 jedi-0.19.2 jinja2-3.1.5 jupyterlab-widgets-3.0.13 matplotlib-inline-0.1.7 numpy-1.24.4 packaging-24.2 pandas-2.0.3 parso-0.8.4 pexpect-4.9.0 prompt_toolkit-3.0.50 psycopg2-2.9.10 ptyprocess-0.7.0 pure-eval-0.2.3 pydantic-2.10.6 pydantic-core-2.27.2 pygments-2.19.1 python-dateutil-2.9.0.post0 pytz-2025.1 pyyaml-6.0.2 scipy-1.10.1 six-1.17.0 sqlalchemy-2.0.39 stack_data-0.6.3 traitlets-5.14.3 typing-extensions-4.12.2 tzdata-2025.1 wcwidth-0.2.13 widgetsnbextension-4.0.13\n"
+      "\u001B[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+      "ipympl 0.9.3 requires ipython<9, but you have ipython 9.0.2 which is incompatible.\u001B[0m\u001B[31m\n",
+      "\u001B[0mSuccessfully installed MarkupSafe-3.0.2 annotated-types-0.7.0 asttokens-3.0.0 biasanalyzer-0.1.0 comm-0.2.2 decorator-5.2.1 duckdb-1.2.1 duckdb-engine-0.13.6 executing-2.2.0 greenlet-3.1.1 ipython-9.0.2 ipython-pygments-lexers-1.1.1 ipytree-0.2.2 ipywidgets-8.1.5 jedi-0.19.2 jinja2-3.1.5 jupyterlab-widgets-3.0.13 matplotlib-inline-0.1.7 numpy-1.24.4 packaging-24.2 pandas-2.0.3 parso-0.8.4 pexpect-4.9.0 prompt_toolkit-3.0.50 psycopg2-2.9.10 ptyprocess-0.7.0 pure-eval-0.2.3 pydantic-2.10.6 pydantic-core-2.27.2 pygments-2.19.1 python-dateutil-2.9.0.post0 pytz-2025.1 pyyaml-6.0.2 scipy-1.10.1 six-1.17.0 sqlalchemy-2.0.39 stack_data-0.6.3 traitlets-5.14.3 typing-extensions-4.12.2 tzdata-2025.1 wcwidth-0.2.13 widgetsnbextension-4.0.13\n"
      ]
     }
    ],
@@ -291,7 +291,7 @@
    ],
    "source": [
     "# the configuration file includes root_omop_cdm_database configuration info with an example shown \n",
-    "# in https://github.com/hyi/HealthDataBias/blob/main/tests/assets/test_config.yaml\n",
+    "# in https://github.com/hyi/HealthDataBias/blob/main/tests/assets/config/test_config.yaml\n",
     "bias.set_root_omop()"
    ]
   },
diff --git a/notebooks/BiasAnalyzerTestingCohorts.ipynb b/notebooks/BiasAnalyzerTestingCohorts.ipynb
index 5c62812..7810c49 100644
--- a/notebooks/BiasAnalyzerTestingCohorts.ipynb
+++ b/notebooks/BiasAnalyzerTestingCohorts.ipynb
@@ -16,10 +16,10 @@
       "  Cloning https://github.com/vaclab/BiasAnalyzer.git to ./temp/pip-req-build-sqm_zvhy\n",
       "  Running command git clone --filter=blob:none --quiet https://github.com/vaclab/BiasAnalyzer.git /home/hyi/temp/pip-req-build-sqm_zvhy\n",
       "  Resolved https://github.com/vaclab/BiasAnalyzer.git to commit 8d821839e93b1d9a208c5c66352ee66db60d1e53\n",
-      "  Installing build dependencies ... \u001b[?25ldone\n",
-      "\u001b[?25h  Getting requirements to build wheel ... \u001b[?25ldone\n",
-      "\u001b[?25h  Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n",
-      "\u001b[?25hCollecting duckdb<2.0.0,>=1.1.1 (from biasanalyzer==0.1.0)\n",
+      "  Installing build dependencies ... \u001B[?25ldone\n",
+      "\u001B[?25h  Getting requirements to build wheel ... \u001B[?25ldone\n",
+      "\u001B[?25h  Preparing metadata (pyproject.toml) ... \u001B[?25ldone\n",
+      "\u001B[?25hCollecting duckdb<2.0.0,>=1.1.1 (from biasanalyzer==0.1.0)\n",
       "  Obtaining dependency information for duckdb<2.0.0,>=1.1.1 from https://files.pythonhosted.org/packages/50/52/6e6f5b5b07841cec334ca6b98f2e02b7bb54ab3b99c49aa3a161cc0b4b37/duckdb-1.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n",
       "  Using cached duckdb-1.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (966 bytes)\n",
       "Collecting duckdb-engine<0.14.0,>=0.13.2 (from biasanalyzer==0.1.0)\n",
@@ -182,14 +182,14 @@
       "Using cached pure_eval-0.2.3-py3-none-any.whl (11 kB)\n",
       "Using cached wcwidth-0.2.13-py2.py3-none-any.whl (34 kB)\n",
       "Building wheels for collected packages: biasanalyzer\n",
-      "  Building wheel for biasanalyzer (pyproject.toml) ... \u001b[?25ldone\n",
-      "\u001b[?25h  Created wheel for biasanalyzer: filename=biasanalyzer-0.1.0-py3-none-any.whl size=25475 sha256=1982c82749337f81db1a730b8cc25c049d0c0788cd6b782f69ce8be1d92a397c\n",
+      "  Building wheel for biasanalyzer (pyproject.toml) ... \u001B[?25ldone\n",
+      "\u001B[?25h  Created wheel for biasanalyzer: filename=biasanalyzer-0.1.0-py3-none-any.whl size=25475 sha256=1982c82749337f81db1a730b8cc25c049d0c0788cd6b782f69ce8be1d92a397c\n",
       "  Stored in directory: /home/hyi/temp/pip-ephem-wheel-cache-7pwouolk/wheels/25/75/4e/079d96d69cc58148ce31d3d44f858e4db5f689604112dcb7c3\n",
       "Successfully built biasanalyzer\n",
       "Installing collected packages: wcwidth, pytz, pure-eval, ptyprocess, widgetsnbextension, tzdata, typing-extensions, traitlets, six, pyyaml, pygments, psycopg2, prompt_toolkit, pexpect, parso, packaging, numpy, MarkupSafe, jupyterlab-widgets, greenlet, executing, duckdb, decorator, asttokens, annotated-types, stack_data, sqlalchemy, scipy, python-dateutil, pydantic-core, matplotlib-inline, jinja2, jedi, ipython-pygments-lexers, comm, pydantic, pandas, ipython, duckdb-engine, ipywidgets, ipytree, biasanalyzer\n",
-      "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
-      "ipympl 0.9.3 requires ipython<9, but you have ipython 9.0.2 which is incompatible.\u001b[0m\u001b[31m\n",
-      "\u001b[0mSuccessfully installed MarkupSafe-3.0.2 annotated-types-0.7.0 asttokens-3.0.0 biasanalyzer-0.1.0 comm-0.2.2 decorator-5.2.1 duckdb-1.2.1 duckdb-engine-0.13.6 executing-2.2.0 greenlet-3.1.1 ipython-9.0.2 ipython-pygments-lexers-1.1.1 ipytree-0.2.2 ipywidgets-8.1.5 jedi-0.19.2 jinja2-3.1.5 jupyterlab-widgets-3.0.13 matplotlib-inline-0.1.7 numpy-1.24.4 packaging-24.2 pandas-2.0.3 parso-0.8.4 pexpect-4.9.0 prompt_toolkit-3.0.50 psycopg2-2.9.10 ptyprocess-0.7.0 pure-eval-0.2.3 pydantic-2.10.6 pydantic-core-2.27.2 pygments-2.19.1 python-dateutil-2.9.0.post0 pytz-2025.1 pyyaml-6.0.2 scipy-1.10.1 six-1.17.0 sqlalchemy-2.0.39 stack_data-0.6.3 traitlets-5.14.3 typing-extensions-4.12.2 tzdata-2025.1 wcwidth-0.2.13 widgetsnbextension-4.0.13\n",
+      "\u001B[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+      "ipympl 0.9.3 requires ipython<9, but you have ipython 9.0.2 which is incompatible.\u001B[0m\u001B[31m\n",
+      "\u001B[0mSuccessfully installed MarkupSafe-3.0.2 annotated-types-0.7.0 asttokens-3.0.0 biasanalyzer-0.1.0 comm-0.2.2 decorator-5.2.1 duckdb-1.2.1 duckdb-engine-0.13.6 executing-2.2.0 greenlet-3.1.1 ipython-9.0.2 ipython-pygments-lexers-1.1.1 ipytree-0.2.2 ipywidgets-8.1.5 jedi-0.19.2 jinja2-3.1.5 jupyterlab-widgets-3.0.13 matplotlib-inline-0.1.7 numpy-1.24.4 packaging-24.2 pandas-2.0.3 parso-0.8.4 pexpect-4.9.0 prompt_toolkit-3.0.50 psycopg2-2.9.10 ptyprocess-0.7.0 pure-eval-0.2.3 pydantic-2.10.6 pydantic-core-2.27.2 pygments-2.19.1 python-dateutil-2.9.0.post0 pytz-2025.1 pyyaml-6.0.2 scipy-1.10.1 six-1.17.0 sqlalchemy-2.0.39 stack_data-0.6.3 traitlets-5.14.3 typing-extensions-4.12.2 tzdata-2025.1 wcwidth-0.2.13 widgetsnbextension-4.0.13\n",
       "Requirement already satisfied: typing-extensions in /opt/conda/lib/python3.11/site-packages (4.12.2)\n"
      ]
     }
@@ -288,7 +288,7 @@
    ],
    "source": [
     "# the configuration file includes root_omop_cdm_database configuration info with an example shown \n",
-    "# in https://github.com/hyi/HealthDataBias/blob/main/tests/assets/test_config.yaml\n",
+    "# in https://github.com/hyi/HealthDataBias/blob/main/tests/assets/config/test_config.yaml\n",
     "bias.set_root_omop()"
    ]
   },
diff --git a/notebooks/BiasAnalyzerTestingConceptBrowsing.ipynb b/notebooks/BiasAnalyzerTestingConceptBrowsing.ipynb
index b4627c0..c387145 100644
--- a/notebooks/BiasAnalyzerTestingConceptBrowsing.ipynb
+++ b/notebooks/BiasAnalyzerTestingConceptBrowsing.ipynb
@@ -16,10 +16,10 @@
       "  Cloning https://github.com/vaclab/BiasAnalyzer.git to ./temp/pip-req-build-2mkwyv9w\n",
       "  Running command git clone --filter=blob:none --quiet https://github.com/vaclab/BiasAnalyzer.git /home/hyi/temp/pip-req-build-2mkwyv9w\n",
       "  Resolved https://github.com/vaclab/BiasAnalyzer.git to commit a3d43525ddd2b934d8a094901f7ad62c52f2e724\n",
-      "  Installing build dependencies ... \u001b[?25ldone\n",
-      "\u001b[?25h  Getting requirements to build wheel ... \u001b[?25ldone\n",
-      "\u001b[?25h  Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n",
-      "\u001b[?25hCollecting duckdb<2.0.0,>=1.1.1 (from BiasAnalyzer==0.1.0)\n",
+      "  Installing build dependencies ... \u001B[?25ldone\n",
+      "\u001B[?25h  Getting requirements to build wheel ... \u001B[?25ldone\n",
+      "\u001B[?25h  Preparing metadata (pyproject.toml) ... \u001B[?25ldone\n",
+      "\u001B[?25hCollecting duckdb<2.0.0,>=1.1.1 (from BiasAnalyzer==0.1.0)\n",
       "  Obtaining dependency information for duckdb<2.0.0,>=1.1.1 from https://files.pythonhosted.org/packages/bf/56/f627b6fcd4aa34015a15449d852ccb78d7cc6eda654aa20c1d378e99fa76/duckdb-1.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n",
       "  Using cached duckdb-1.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (762 bytes)\n",
       "Collecting duckdb-engine<0.14.0,>=0.13.2 (from BiasAnalyzer==0.1.0)\n",
@@ -170,8 +170,8 @@
       "Using cached pure_eval-0.2.3-py3-none-any.whl (11 kB)\n",
       "Using cached wcwidth-0.2.13-py2.py3-none-any.whl (34 kB)\n",
       "Building wheels for collected packages: BiasAnalyzer\n",
-      "  Building wheel for BiasAnalyzer (pyproject.toml) ... \u001b[?25ldone\n",
-      "\u001b[?25h  Created wheel for BiasAnalyzer: filename=biasanalyzer-0.1.0-py3-none-any.whl size=12482 sha256=254ea1fa17b7c1706a4d4e4ed711dd7128601c09a1c3c36c9ec903ed842441af\n",
+      "  Building wheel for BiasAnalyzer (pyproject.toml) ... \u001B[?25ldone\n",
+      "\u001B[?25h  Created wheel for BiasAnalyzer: filename=biasanalyzer-0.1.0-py3-none-any.whl size=12482 sha256=254ea1fa17b7c1706a4d4e4ed711dd7128601c09a1c3c36c9ec903ed842441af\n",
       "  Stored in directory: /home/hyi/temp/pip-ephem-wheel-cache-wgmpfyq9/wheels/25/75/4e/079d96d69cc58148ce31d3d44f858e4db5f689604112dcb7c3\n",
       "Successfully built BiasAnalyzer\n",
       "Installing collected packages: wcwidth, pytz, pure-eval, ptyprocess, widgetsnbextension, tzdata, typing-extensions, traitlets, six, pyyaml, pygments, psycopg2, prompt-toolkit, pexpect, parso, packaging, numpy, jupyterlab-widgets, greenlet, executing, duckdb, decorator, annotated-types, sqlalchemy, scipy, python-dateutil, pydantic-core, matplotlib-inline, jedi, comm, asttokens, stack-data, pydantic, pandas, duckdb-engine, ipython, ipywidgets, ipytree, BiasAnalyzer\n",
@@ -274,7 +274,7 @@
    ],
    "source": [
     "# the configuration file includes root_omop_cdm_database configuration info with an example shown \n",
-    "# in https://github.com/hyi/HealthDataBias/blob/main/tests/assets/test_config.yaml\n",
+    "# in https://github.com/hyi/HealthDataBias/blob/main/tests/assets/config/test_config.yaml\n",
     "bias.set_root_omop()"
    ]
   },
diff --git a/tests/assets/config/.test_config_postgresql.yaml.swp b/tests/assets/config/.test_config_postgresql.yaml.swp
new file mode 100644
index 0000000000000000000000000000000000000000..5feb484dcde0cc44c29cdbe4c06e5f0b1503c846
GIT binary patch
literal 12288
zcmeI&O>Pr06bEoOI~Fa7Uf_`~SfDh`q>*TLRH70KmLOPJPV8hx&5T{!qcSX7jsX|o
z7#stx!*epD6r{F`u0VfFPtN$`dHmZ&^JYiy-@K)lha<u5i4glAPV+C%b+10_iQHDp
zrbrWaH=WPjXp#*k$AgnWmW^*)eYHtAJS|jsRjF0;+4x~JA%>v}!EiUX)uLFg$K>X>
zbT-7LH{oJ+*r;-KTW*X)00Q?3?22(VI!db!`p;<p>AU+jU>*VxfB*y_009U<00I#B
zj|8II5!Za5UVEoUZNKwZ-#(&)00bZa0SG_<0uX=z1Rwwb2teQ;6i`}-k9$IV<4E8C
zyZ`^c9t-i4;|GW2c+K&GV~^u|yZ&q1g9ZTzKmY;|fB*y_009U<00IzrhyanZK2B+J
zJkA7>R#DAVFjMMpE5iH4?SSfFNKwd@H<~I{8d_LSs-mJAl%}-4AzjbU^^AD9OSE0M
z&By2DTyCCHOeqznOFfg_2v0gLQ@^xcx0Br?&+sew8{4`^p6Sk$v2mPrdl;ox=c;!q
zB1KEKGGrGNb3^gcQm9?JKVK;A3x8}(w^-jtcx$7yrFC+yOW7SY<?>!KpH%5qhBjsN
PwO6sQ)f(nz`MvlJSW~dR

literal 0
HcmV?d00001

diff --git a/tests/assets/test_config.yaml b/tests/assets/config/test_config.yaml
similarity index 100%
rename from tests/assets/test_config.yaml
rename to tests/assets/config/test_config.yaml
diff --git a/tests/assets/config/test_config_postgresql.yaml b/tests/assets/config/test_config_postgresql.yaml
new file mode 100644
index 0000000..dcf1794
--- /dev/null
+++ b/tests/assets/config/test_config_postgresql.yaml
@@ -0,0 +1,9 @@
+# example configuration for BiasAnalyzer
+
+root_omop_cdm_database:
+  database_type: postgresql   # set it to one of the two supported types: postgresql or duckdb
+  username: test_username
+  password: test_password
+  hostname: test_db_hostname
+  database: "postgresql"    # use a shared name for an in-memory duckdb or database name for postgresql
+  port: 5432
diff --git a/tests/assets/config/test_config_unsupported_db_type.yaml b/tests/assets/config/test_config_unsupported_db_type.yaml
new file mode 100644
index 0000000..5366b88
--- /dev/null
+++ b/tests/assets/config/test_config_unsupported_db_type.yaml
@@ -0,0 +1,9 @@
+# example configuration for BiasAnalyzer
+
+root_omop_cdm_database:
+  database_type: unsupported_db_type   # set it to one of the two supported types: postgresql or duckdb
+  username: test_username
+  password: test_password
+  hostname: test_db_hostname
+  database: "unsupported_db"    # use a shared name for an in-memory duckdb or database name for postgresql
+  port: 5432
diff --git a/tests/assets/config/test_invalid_config.yaml b/tests/assets/config/test_invalid_config.yaml
new file mode 100644
index 0000000..36f139f
--- /dev/null
+++ b/tests/assets/config/test_invalid_config.yaml
@@ -0,0 +1,8 @@
+# example configuration for BiasAnalyzer
+
+root_omop_cdm_database:
+  invalid_database_type: duckdb   # set it to one of the two supported types: postgresql or duckdb
+  invalid_username: test_username
+  password: test_password
+  hostname: test_db_hostname
+  port: unsupported
diff --git a/tests/query_based/conftest.py b/tests/conftest.py
similarity index 97%
rename from tests/query_based/conftest.py
rename to tests/conftest.py
index 4cabcdb..88edef8 100644
--- a/tests/query_based/conftest.py
+++ b/tests/conftest.py
@@ -5,9 +5,17 @@
 import os
 
 
+@pytest.fixture
+def fresh_bias_obj():
+    """Provides a fresh BIAS() object with no config set — safe for testing invalid config scenarios."""
+    bias = BIAS()
+    yield bias
+    bias.cleanup()
+
+
 @pytest.fixture(scope="session")
 def test_db():
-    config_file = os.path.join(os.path.dirname(__file__), '..', 'assets', 'test_config.yaml')
+    config_file = os.path.join(os.path.dirname(__file__), 'assets', 'config', 'test_config.yaml')
     config = load_config(config_file)
     db_path = config['root_omop_cdm_database']['database']
     conn = duckdb.connect(db_path)
@@ -241,6 +249,7 @@ def test_db():
 
     # mock configuration file
     bias = BIAS()
+
     bias.set_config(config_file)
     bias.set_root_omop()
 
diff --git a/tests/test_biasanalyzer.py b/tests/test_biasanalyzer.py
deleted file mode 100644
index 9d8eab4..0000000
--- a/tests/test_biasanalyzer.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from biasanalyzer import __version__
-
-
-def test_version():
-    assert __version__ == '0.1.0'
diff --git a/tests/test_biasanalyzer_api.py b/tests/test_biasanalyzer_api.py
new file mode 100644
index 0000000..d9b4123
--- /dev/null
+++ b/tests/test_biasanalyzer_api.py
@@ -0,0 +1,38 @@
+import os
+import pytest
+from biasanalyzer import __version__
+import logging
+
+
+def test_version():
+    assert __version__ == '0.1.0'
+
+@pytest.mark.usefixtures
+def test_set_config(caplog, fresh_bias_obj):
+    caplog.clear()
+    with caplog.at_level(logging.ERROR):
+        fresh_bias_obj.set_config('non_existent_config_file.yaml')
+    assert 'does not exist' in caplog.text
+
+    caplog.clear()
+    with caplog.at_level(logging.ERROR):
+        invalid_config_file = os.path.join(os.path.dirname(__file__), 'assets', 'config',
+                                           'test_invalid_config.yaml')
+        fresh_bias_obj.set_config(invalid_config_file)
+    assert 'is not valid' in caplog.text
+
+
+@pytest.mark.usefixtures
+def test_set_root_omop(caplog, fresh_bias_obj):
+    caplog.clear()
+    with caplog.at_level(logging.INFO):
+        fresh_bias_obj.set_root_omop()
+    assert 'no valid configuration' in caplog.text
+
+    caplog.clear()
+    with caplog.at_level(logging.INFO):
+        config_file_with_unsupported_db_type = os.path.join(os.path.dirname(__file__), 'assets', 'config',
+                                                            'test_config_unsupported_db_type.yaml')
+        fresh_bias_obj.set_config(config_file_with_unsupported_db_type)
+        fresh_bias_obj.set_root_omop()
+    assert 'Unsupported database type' in caplog.text
diff --git a/tests/test_config.py b/tests/test_config.py
index eb9aeb0..060b940 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -1,13 +1,11 @@
 import os
 
-from numpy.ma.testutils import assert_equal
-
 from biasanalyzer.config import load_config, load_cohort_creation_config
 
 
 def test_load_config():
     try:
-        config = load_config(os.path.join(os.path.dirname(__file__), 'assets', 'test_config.yaml'))
+        config = load_config(os.path.join(os.path.dirname(__file__), 'assets', 'config', 'test_config.yaml'))
     except Exception as e:
         assert False, f"load_config() raised an exception: {e}"
 
@@ -45,14 +43,3 @@ def test_load_cohort_creation_config():
     in_events = config.get('inclusion_criteria')['temporal_events']
     assert 'operator' in in_events[0]
     assert 'events' in in_events[0]
-
-    # ex_events = config.get('exclusion_criteria')['temporal_events']
-    # ex_demographics = config.get('exclusion_criteria').get('demographics')
-    # assert 'operator' in ex_events[0]
-    # assert 'events' in ex_events[0]
-    # assert 'event_type' in ex_events[0]['events'][0]
-    # assert_equal(ex_events[0]['events'][0]['event_type'], 'condition_occurrence',
-    #              'exclusion event type is not condition_occurrence')
-    # assert 'min_birth_year' in ex_demographics
-    # assert 'gender' not in ex_demographics
-    # assert 'max_birth_year' not in ex_demographics

From 4083a7cf0b4712e1774591aca4bfb5d89a102720 Mon Sep 17 00:00:00 2001
From: hyi <hongyi@renci.org>
Date: Fri, 6 Jun 2025 11:26:40 -0400
Subject: [PATCH 02/12] get mock postgresql tests working

---
 .../config/.test_config_postgresql.yaml.swp   | Bin 12288 -> 0 bytes
 .../assets/config/test_config_postgresql.yaml |   9 ---
 tests/test_biasanalyzer_api.py                |  52 +++++++++++++++++-
 3 files changed, 49 insertions(+), 12 deletions(-)
 delete mode 100644 tests/assets/config/.test_config_postgresql.yaml.swp
 delete mode 100644 tests/assets/config/test_config_postgresql.yaml

diff --git a/tests/assets/config/.test_config_postgresql.yaml.swp b/tests/assets/config/.test_config_postgresql.yaml.swp
deleted file mode 100644
index 5feb484dcde0cc44c29cdbe4c06e5f0b1503c846..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 12288
zcmeI&O>Pr06bEoOI~Fa7Uf_`~SfDh`q>*TLRH70KmLOPJPV8hx&5T{!qcSX7jsX|o
z7#stx!*epD6r{F`u0VfFPtN$`dHmZ&^JYiy-@K)lha<u5i4glAPV+C%b+10_iQHDp
zrbrWaH=WPjXp#*k$AgnWmW^*)eYHtAJS|jsRjF0;+4x~JA%>v}!EiUX)uLFg$K>X>
zbT-7LH{oJ+*r;-KTW*X)00Q?3?22(VI!db!`p;<p>AU+jU>*VxfB*y_009U<00I#B
zj|8II5!Za5UVEoUZNKwZ-#(&)00bZa0SG_<0uX=z1Rwwb2teQ;6i`}-k9$IV<4E8C
zyZ`^c9t-i4;|GW2c+K&GV~^u|yZ&q1g9ZTzKmY;|fB*y_009U<00IzrhyanZK2B+J
zJkA7>R#DAVFjMMpE5iH4?SSfFNKwd@H<~I{8d_LSs-mJAl%}-4AzjbU^^AD9OSE0M
z&By2DTyCCHOeqznOFfg_2v0gLQ@^xcx0Br?&+sew8{4`^p6Sk$v2mPrdl;ox=c;!q
zB1KEKGGrGNb3^gcQm9?JKVK;A3x8}(w^-jtcx$7yrFC+yOW7SY<?>!KpH%5qhBjsN
PwO6sQ)f(nz`MvlJSW~dR

diff --git a/tests/assets/config/test_config_postgresql.yaml b/tests/assets/config/test_config_postgresql.yaml
deleted file mode 100644
index dcf1794..0000000
--- a/tests/assets/config/test_config_postgresql.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-# example configuration for BiasAnalyzer
-
-root_omop_cdm_database:
-  database_type: postgresql   # set it to one of the two supported types: postgresql or duckdb
-  username: test_username
-  password: test_password
-  hostname: test_db_hostname
-  database: "postgresql"    # use a shared name for an in-memory duckdb or database name for postgresql
-  port: 5432
diff --git a/tests/test_biasanalyzer_api.py b/tests/test_biasanalyzer_api.py
index d9b4123..08792cb 100644
--- a/tests/test_biasanalyzer_api.py
+++ b/tests/test_biasanalyzer_api.py
@@ -1,7 +1,8 @@
 import os
 import pytest
-from biasanalyzer import __version__
 import logging
+from unittest.mock import patch
+from biasanalyzer import __version__
 
 
 def test_version():
@@ -21,9 +22,8 @@ def test_set_config(caplog, fresh_bias_obj):
         fresh_bias_obj.set_config(invalid_config_file)
     assert 'is not valid' in caplog.text
 
-
 @pytest.mark.usefixtures
-def test_set_root_omop(caplog, fresh_bias_obj):
+def test_set_root_omop(monkeypatch, caplog, fresh_bias_obj):
     caplog.clear()
     with caplog.at_level(logging.INFO):
         fresh_bias_obj.set_root_omop()
@@ -36,3 +36,49 @@ def test_set_root_omop(caplog, fresh_bias_obj):
         fresh_bias_obj.set_config(config_file_with_unsupported_db_type)
         fresh_bias_obj.set_root_omop()
     assert 'Unsupported database type' in caplog.text
+
+    # Create a fake postgresql config
+    config = {
+        "root_omop_cdm_database": {
+            "database_type": "postgresql",
+            "username": "testuser",
+            "password": "testpass",
+            "hostname": "localhost",
+            "port": 5432,
+            "database": "testdb"
+        }
+    }
+
+    # Patch the config parser to return this directly instead of reading a file
+    monkeypatch.setattr(fresh_bias_obj, "set_config", lambda x: setattr(fresh_bias_obj, "config", config))
+
+    # Patch OMOPCDMDatabase to avoid real DB connection
+    class MockOMOPCDMDatabase:
+        def __init__(self, db_url):
+            self.db_url = db_url
+        def close(self):
+            pass
+
+    monkeypatch.setattr("biasanalyzer.api.OMOPCDMDatabase", MockOMOPCDMDatabase)
+
+    # --- Step 3: Mock BiasDatabase and its methods ---
+    class MockBiasDatabase:
+        def __init__(self, path):
+            self.omop_cdm_db_url = None
+
+        def load_postgres_extension(self):
+            pass
+
+        def close(self):
+            pass
+
+    monkeypatch.setattr("biasanalyzer.api.BiasDatabase", MockBiasDatabase)
+
+    # Run
+    fresh_bias_obj.set_config("dummy.yaml")  # This will now inject the mocked config
+    fresh_bias_obj.set_root_omop()
+
+    # Check values
+    assert fresh_bias_obj.omop_cdm_db.db_url == "postgresql://testuser:testpass@localhost:5432/testdb"
+    assert fresh_bias_obj.bias_db is not None
+    assert fresh_bias_obj.bias_db.omop_cdm_db_url == "postgresql://testuser:testpass@localhost:5432/testdb"

From 645b9e15720dc3096a351ad0a3692f632102f23a Mon Sep 17 00:00:00 2001
From: hyi <hongyi@renci.org>
Date: Fri, 6 Jun 2025 15:00:40 -0400
Subject: [PATCH 03/12] changed BIAS() from Singleton class to normal class and
 added more tests

---
 biasanalyzer/api.py            | 16 ++++++---------
 tests/conftest.py              | 23 +++++++++++-----------
 tests/test_biasanalyzer_api.py | 36 ++++++++++++++++++++++++++++++----
 3 files changed, 50 insertions(+), 25 deletions(-)

diff --git a/biasanalyzer/api.py b/biasanalyzer/api.py
index a79cb25..a9c4d2c 100644
--- a/biasanalyzer/api.py
+++ b/biasanalyzer/api.py
@@ -10,19 +10,14 @@
 
 
 class BIAS:
-    _instance = None
-
-    def __init__(self):
-        self.config = {}
+    def __init__(self, config_file_path=None):
         self.bias_db = None
         self.omop_cdm_db = None
         self.cohort_action = None
-
-    def __new__(cls, config_file_path=None):
-        if cls._instance is None:
-            cls._instance = super(BIAS, cls).__new__(cls)
-            cls._instance.set_config(config_file_path)
-        return cls._instance
+        if config_file_path is None:
+            self.config = {}
+        else:
+            self.set_config(config_file_path)
 
     def set_config(self, config_file_path: str):
         if config_file_path is None:
@@ -77,6 +72,7 @@ def _set_cohort_action(self):
         return self.cohort_action
 
     def get_domains_and_vocabularies(self):
+        print(f'self.omop_cdm_db: {self.omop_cdm_db}')
         if self.omop_cdm_db is None:
             notify_users('A valid OMOP CDM must be set before getting domains. '
                          'Call set_root_omop first to set a valid root OMOP CDM')
diff --git a/tests/conftest.py b/tests/conftest.py
index 88edef8..33aa222 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -33,7 +33,8 @@ def test_db():
                 concept_id INTEGER PRIMARY KEY,
                 concept_name TEXT,
                 concept_code TEXT,
-                vocabulary_id TEXT
+                vocabulary_id TEXT,
+                domain_id TEXT
             );
         """)
     conn.execute("""
@@ -127,17 +128,17 @@ def test_db():
     result = conn.execute("SELECT COUNT(*) FROM concept").fetchone()
     if result[0] == 0:
         conn.execute("""
-                INSERT INTO concept (concept_id, concept_name, concept_code, vocabulary_id)
+                INSERT INTO concept (concept_id, concept_name, concept_code, vocabulary_id, domain_id)
                 VALUES
-                    (4274025, 'Disease', '64572001', 'SNOMED'), 
-                    (1, 'Diabetes Mellitus', 'E10-E14', 'ICD10CM'), 
-                    (2, 'Type 1 Diabetes Mellitus', 'E10', 'ICD10CM'),
-                    (3, 'Type 2 Diabetes Mellitus', 'E11', 'ICD10CM'), 
-                    (4, 'Diabetic Retinopathy', 'E10.3/E11.3', 'ICD10CM'), 
-                    (5, 'Fever', 'R50.9', 'ICD10CM'),
-                    (37311061, 'COVID-19', '840539006', 'SNOMED'),
-                    (4041664, 'Difficulty breathing', '230145002', 'SNOMED'),
-                    (316139, 'Heart failure', '84114007', 'SNOMED');
+                    (4274025, 'Disease', '64572001', 'SNOMED', 'Condition'), 
+                    (1, 'Diabetes Mellitus', 'E10-E14', 'ICD10CM', 'Condition'), 
+                    (2, 'Type 1 Diabetes Mellitus', 'E10', 'ICD10CM', 'Condition'),
+                    (3, 'Type 2 Diabetes Mellitus', 'E11', 'ICD10CM', 'Condition'), 
+                    (4, 'Diabetic Retinopathy', 'E10.3/E11.3', 'ICD10CM', 'Condition'), 
+                    (5, 'Fever', 'R50.9', 'ICD10CM', 'Condition'),
+                    (37311061, 'COVID-19', '840539006', 'SNOMED', 'Condition'),
+                    (4041664, 'Difficulty breathing', '230145002', 'SNOMED', 'Condition'),
+                    (316139, 'Heart failure', '84114007', 'SNOMED', 'Condition');
             """)
 
     # Insert hierarchical relationships as needed
diff --git a/tests/test_biasanalyzer_api.py b/tests/test_biasanalyzer_api.py
index 08792cb..22b78a7 100644
--- a/tests/test_biasanalyzer_api.py
+++ b/tests/test_biasanalyzer_api.py
@@ -1,14 +1,12 @@
 import os
 import pytest
 import logging
-from unittest.mock import patch
 from biasanalyzer import __version__
 
 
 def test_version():
     assert __version__ == '0.1.0'
 
-@pytest.mark.usefixtures
 def test_set_config(caplog, fresh_bias_obj):
     caplog.clear()
     with caplog.at_level(logging.ERROR):
@@ -22,7 +20,6 @@ def test_set_config(caplog, fresh_bias_obj):
         fresh_bias_obj.set_config(invalid_config_file)
     assert 'is not valid' in caplog.text
 
-@pytest.mark.usefixtures
 def test_set_root_omop(monkeypatch, caplog, fresh_bias_obj):
     caplog.clear()
     with caplog.at_level(logging.INFO):
@@ -50,7 +47,7 @@ def test_set_root_omop(monkeypatch, caplog, fresh_bias_obj):
     }
 
     # Patch the config parser to return this directly instead of reading a file
-    monkeypatch.setattr(fresh_bias_obj, "set_config", lambda x: setattr(fresh_bias_obj, "config", config))
+    monkeypatch.setattr(fresh_bias_obj, "config", config)
 
     # Patch OMOPCDMDatabase to avoid real DB connection
     class MockOMOPCDMDatabase:
@@ -82,3 +79,34 @@ def close(self):
     assert fresh_bias_obj.omop_cdm_db.db_url == "postgresql://testuser:testpass@localhost:5432/testdb"
     assert fresh_bias_obj.bias_db is not None
     assert fresh_bias_obj.bias_db.omop_cdm_db_url == "postgresql://testuser:testpass@localhost:5432/testdb"
+
+def test_set_cohort_action(caplog, fresh_bias_obj):
+    caplog.clear()
+    with caplog.at_level(logging.INFO):
+        fresh_bias_obj._set_cohort_action()
+    assert 'valid OMOP CDM must be set' in caplog.text
+
+def test_get_domains_and_vocabularies_invalid(caplog, fresh_bias_obj):
+    caplog.clear()
+    with caplog.at_level(logging.INFO):
+        fresh_bias_obj.get_domains_and_vocabularies()
+    assert 'valid OMOP CDM must be set' in caplog.text
+
+def test_get_domains_and_vocabularies(test_db):
+    domains_and_vocabularies = test_db.get_domains_and_vocabularies()
+    print(f'domains_and_vocabs: {domains_and_vocabularies}', flush=True)
+    expected = [{'domain_id': 'Condition', 'vocabulary_id': 'ICD10CM'},
+                {'domain_id': 'Condition', 'vocabulary_id': 'SNOMED'}]
+    assert domains_and_vocabularies == expected
+
+def test_get_concepts(caplog, fresh_bias_obj):
+    caplog.clear()
+    with caplog.at_level(logging.INFO):
+        fresh_bias_obj.get_concepts('dummy')
+    assert 'valid OMOP CDM must be set' in caplog.text
+
+def test_get_concept_hierarchy(caplog, fresh_bias_obj):
+    caplog.clear()
+    with caplog.at_level(logging.INFO):
+        fresh_bias_obj.get_concept_hierarchy('dummy')
+    assert 'valid OMOP CDM must be set' in caplog.text

From 07dd5aa48e04a0f387d0cfd8eaf9674f31f3b8ab Mon Sep 17 00:00:00 2001
From: hyi <hongyi@renci.org>
Date: Fri, 6 Jun 2025 18:07:04 -0400
Subject: [PATCH 04/12] added more tests

---
 biasanalyzer/api.py            |   2 +-
 biasanalyzer/database.py       | 116 +++++++++++++++++++++++----------
 tests/conftest.py              |  27 ++++----
 tests/test_biasanalyzer_api.py |  44 +++++++++++--
 4 files changed, 137 insertions(+), 52 deletions(-)

diff --git a/biasanalyzer/api.py b/biasanalyzer/api.py
index a9c4d2c..ade0e32 100644
--- a/biasanalyzer/api.py
+++ b/biasanalyzer/api.py
@@ -20,7 +20,7 @@ def __init__(self, config_file_path=None):
             self.set_config(config_file_path)
 
     def set_config(self, config_file_path: str):
-        if config_file_path is None:
+        if not config_file_path:
             notify_users('no configuration file specified. '
                          'Call set_config(config_file_path) next to specify configurations')
         else:
diff --git a/biasanalyzer/database.py b/biasanalyzer/database.py
index 3f36158..ccdfb72 100644
--- a/biasanalyzer/database.py
+++ b/biasanalyzer/database.py
@@ -372,45 +372,90 @@ def get_domains_and_vocabularies(self) -> list:
         return self.execute_query(query)
 
     def get_concepts(self, search_term: str, domain: Optional[str], vocab: Optional[str]) -> list:
-        # find a concept ID based on a search term
         search_term_exact = search_term.lower()
         search_term_suffix = f'{search_term_exact} '
         search_term_prefix = f' {search_term_exact}'
         search_term_prefix_suffix = f' {search_term_exact} '
-        param_set = {
-            "search_term_exact": search_term_exact,
-            "search_term_prefix": search_term_prefix,
-            "search_term_suffix": search_term_suffix,
-            "search_term_prefix_suffix": search_term_prefix_suffix
-        }
-        if domain is not None and vocab is not None:
-            condition_str = "domain_id = :domain and vocabulary_id = :vocabulary"
-            param_set['domain'] = domain
-            param_set['vocabulary'] = vocab
-        elif domain is None:
-            condition_str = "vocabulary_id = :vocabulary"
-            param_set['vocabulary'] = vocab
-        else:
-            # vocab is None
-            condition_str = "domain_id = :domain"
-            param_set['domain'] = domain
 
-        query = f"""
-        SELECT concept_id, concept_name, valid_start_date, valid_end_date, domain_id, vocabulary_id FROM concept 
-        where {condition_str} and 
-        (LOWER(concept_name) = :search_term_exact or LOWER(concept_name) LIKE '%' || :search_term_prefix
-        or LOWER(concept_name) LIKE :search_term_suffix || '%'
-        or LOWER(concept_name) LIKE '%' || :search_term_prefix_suffix || '%')
-        ORDER BY concept_id
-        """
+        if self._database_type == 'duckdb':
+            # Use positional parameters and ? as placeholder to meet duckdb syntax requirement
+            base_query = """
+                         SELECT concept_id, concept_name, valid_start_date, valid_end_date, domain_id, vocabulary_id \
+                         FROM concept
+                         WHERE {condition_str} \
+                           AND (
+                             LOWER (concept_name) = ? \
+                            OR
+                             LOWER (concept_name) LIKE '%' || ? \
+                            OR
+                             LOWER (concept_name) LIKE ? || '%' \
+                            OR
+                             LOWER (concept_name) LIKE '%' || ? || '%'
+                             )
+                         ORDER BY concept_id \
+                         """
+
+            if domain is not None and vocab is not None:
+                condition_str = "domain_id = ? AND vocabulary_id = ?"
+                params = [domain, vocab, search_term_exact, search_term_prefix, search_term_suffix,
+                              search_term_prefix_suffix]
+            elif domain is None:
+                condition_str = "vocabulary_id = ?"
+                params = [vocab, search_term_exact, search_term_prefix, search_term_suffix,
+                              search_term_prefix_suffix]
+            else:
+                condition_str = "domain_id = ?"
+                params = [domain, search_term_exact, search_term_prefix, search_term_suffix,
+                              search_term_prefix_suffix]
 
-        return self.execute_query(query, params=param_set)
+        else:
+            # Use named parameters with :param_name syntax for SQLAlchemy/PostgreSQL
+            base_query = """
+                         SELECT concept_id, concept_name, valid_start_date, valid_end_date, domain_id, vocabulary_id \
+                         FROM concept
+                         WHERE {condition_str} \
+                           AND (
+                             LOWER (concept_name) = :search_term_exact \
+                            OR
+                             LOWER (concept_name) LIKE '%' || :search_term_prefix \
+                            OR
+                             LOWER (concept_name) LIKE :search_term_suffix || '%' \
+                            OR
+                             LOWER (concept_name) LIKE '%' || :search_term_prefix_suffix || '%'
+                             )
+                         ORDER BY concept_id \
+                         """
+
+            params = {
+                "search_term_exact": search_term_exact,
+                "search_term_prefix": search_term_prefix,
+                "search_term_suffix": search_term_suffix,
+                "search_term_prefix_suffix": search_term_prefix_suffix
+            }
+
+            if domain is not None and vocab is not None:
+                condition_str = "domain_id = :domain AND vocabulary_id = :vocabulary"
+                params['domain'] = domain
+                params['vocabulary'] = vocab
+            elif domain is None:
+                condition_str = "vocabulary_id = :vocabulary"
+                params['vocabulary'] = vocab
+            else:
+                condition_str = "domain_id = :domain"
+                params['domain'] = domain
+
+        query = base_query.format(condition_str=condition_str)
+        return self.execute_query(query, params=params)
 
     def get_concept_hierarchy(self, concept_id: int):
         """
         Retrieves the full concept hierarchy (ancestors and descendants) for a given concept_id
         and organizes it into a nested dictionary to represent the tree structure.
         """
+        if not isinstance(concept_id, int):
+            # this check is important to avoid SQL injection risk
+            raise ValueError("concept_id must be an integer")
+
         stages = [
             "Queried concept hierarchy",
             "Fetched concept details",
@@ -419,11 +464,12 @@ def get_concept_hierarchy(self, concept_id: int):
         progress = tqdm(total=len(stages), desc="Concept Hierarchy", unit="stage")
 
         progress.set_postfix_str(stages[0])
-        query = """
+        # Inline the concept_id directly into the query
+        query = f"""
                 WITH RECURSIVE concept_hierarchy AS (
                     SELECT ancestor_concept_id, descendant_concept_id, min_levels_of_separation
                     FROM concept_ancestor
-                    WHERE ancestor_concept_id = :concept_id OR descendant_concept_id = :concept_id
+                    WHERE ancestor_concept_id = {concept_id} OR descendant_concept_id = {concept_id}
 
                     UNION
 
@@ -434,9 +480,9 @@ def get_concept_hierarchy(self, concept_id: int):
                 SELECT ancestor_concept_id, descendant_concept_id
                 FROM concept_hierarchy
                 WHERE min_levels_of_separation > 0
-                """
+            """
+        results = self.execute_query(query)
 
-        results = self.execute_query(query, params={"concept_id": concept_id})
         progress.update(1)
 
         progress.set_postfix_str(stages[1])
@@ -445,13 +491,15 @@ def get_concept_hierarchy(self, concept_id: int):
         # Fetch details of each concept
         concept_details = {}
         if concept_ids:
-            query = """
+            # Convert set of integers to comma-separated string
+            concept_ids_str = ", ".join(str(cid) for cid in concept_ids)
+            query = f"""
                     SELECT concept_id, concept_name, vocabulary_id, concept_code
                     FROM concept
-                    WHERE concept_id IN :concept_ids
+                    WHERE concept_id IN ({concept_ids_str})
                     """
 
-            result = self.execute_query(query, params={"concept_ids": tuple(concept_ids)})
+            result = self.execute_query(query)
             concept_details = {row['concept_id']: row for row in result}
         progress.update(1)
 
diff --git a/tests/conftest.py b/tests/conftest.py
index 33aa222..4fc856c 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -32,6 +32,8 @@ def test_db():
             CREATE TABLE IF NOT EXISTS concept (
                 concept_id INTEGER PRIMARY KEY,
                 concept_name TEXT,
+                valid_start_date DATE, 
+                valid_end_date DATE,
                 concept_code TEXT,
                 vocabulary_id TEXT,
                 domain_id TEXT
@@ -128,17 +130,18 @@ def test_db():
     result = conn.execute("SELECT COUNT(*) FROM concept").fetchone()
     if result[0] == 0:
         conn.execute("""
-                INSERT INTO concept (concept_id, concept_name, concept_code, vocabulary_id, domain_id)
+                INSERT INTO concept (concept_id, concept_name, valid_start_date, valid_end_date, concept_code, 
+                                     vocabulary_id, domain_id)
                 VALUES
-                    (4274025, 'Disease', '64572001', 'SNOMED', 'Condition'), 
-                    (1, 'Diabetes Mellitus', 'E10-E14', 'ICD10CM', 'Condition'), 
-                    (2, 'Type 1 Diabetes Mellitus', 'E10', 'ICD10CM', 'Condition'),
-                    (3, 'Type 2 Diabetes Mellitus', 'E11', 'ICD10CM', 'Condition'), 
-                    (4, 'Diabetic Retinopathy', 'E10.3/E11.3', 'ICD10CM', 'Condition'), 
-                    (5, 'Fever', 'R50.9', 'ICD10CM', 'Condition'),
-                    (37311061, 'COVID-19', '840539006', 'SNOMED', 'Condition'),
-                    (4041664, 'Difficulty breathing', '230145002', 'SNOMED', 'Condition'),
-                    (316139, 'Heart failure', '84114007', 'SNOMED', 'Condition');
+                    (4274025, 'Disease', '2012-04-01', '2020-04-01', '64572001', 'SNOMED', 'Condition'), 
+                    (1, 'Diabetes Mellitus', '2012-04-01', '2020-04-01', 'E10-E14', 'ICD10CM', 'Condition'), 
+                    (2, 'Type 1 Diabetes Mellitus', '2012-04-01', '2020-04-01', 'E10', 'ICD10CM', 'Condition'),
+                    (3, 'Type 2 Diabetes Mellitus', '2012-04-01', '2020-04-01', 'E11', 'ICD10CM', 'Condition'), 
+                    (4, 'Diabetic Retinopathy', '2012-04-01', '2020-04-01', 'E10.3/E11.3', 'ICD10CM', 'Condition'), 
+                    (5, 'Fever', '2012-04-01', '2020-04-01', 'R50.9', 'ICD10CM', 'Condition'),
+                    (37311061, 'COVID-19', '2012-04-01', '2020-04-01', '840539006', 'SNOMED', 'Condition'),
+                    (4041664, 'Difficulty breathing', '2012-04-01', '2020-04-01', '230145002', 'SNOMED', 'Condition'),
+                    (316139, 'Heart failure', '2012-04-01', '2020-04-01', '84114007', 'SNOMED', 'Condition');
             """)
 
     # Insert hierarchical relationships as needed
@@ -249,9 +252,7 @@ def test_db():
 
 
     # mock configuration file
-    bias = BIAS()
-
-    bias.set_config(config_file)
+    bias = BIAS(config_file_path=config_file)
     bias.set_root_omop()
 
     yield bias  # Provide the connection to the test
diff --git a/tests/test_biasanalyzer_api.py b/tests/test_biasanalyzer_api.py
index 22b78a7..fc594f7 100644
--- a/tests/test_biasanalyzer_api.py
+++ b/tests/test_biasanalyzer_api.py
@@ -1,6 +1,8 @@
 import os
-import pytest
+import datetime
 import logging
+import pytest
+
 from biasanalyzer import __version__
 
 
@@ -8,6 +10,11 @@ def test_version():
     assert __version__ == '0.1.0'
 
 def test_set_config(caplog, fresh_bias_obj):
+    caplog.clear()
+    with caplog.at_level(logging.INFO):
+        fresh_bias_obj.set_config('')
+    assert 'no configuration file specified' in caplog.text
+
     caplog.clear()
     with caplog.at_level(logging.ERROR):
         fresh_bias_obj.set_config('non_existent_config_file.yaml')
@@ -94,19 +101,48 @@ def test_get_domains_and_vocabularies_invalid(caplog, fresh_bias_obj):
 
 def test_get_domains_and_vocabularies(test_db):
     domains_and_vocabularies = test_db.get_domains_and_vocabularies()
-    print(f'domains_and_vocabs: {domains_and_vocabularies}', flush=True)
     expected = [{'domain_id': 'Condition', 'vocabulary_id': 'ICD10CM'},
                 {'domain_id': 'Condition', 'vocabulary_id': 'SNOMED'}]
     assert domains_and_vocabularies == expected
 
-def test_get_concepts(caplog, fresh_bias_obj):
+def test_get_concepts_no_omop_cdm(caplog, fresh_bias_obj):
     caplog.clear()
     with caplog.at_level(logging.INFO):
         fresh_bias_obj.get_concepts('dummy')
     assert 'valid OMOP CDM must be set' in caplog.text
 
-def test_get_concept_hierarchy(caplog, fresh_bias_obj):
+def test_get_concepts_no_domain_and_vocab(caplog, test_db):
+    caplog.clear()
+    with caplog.at_level(logging.INFO):
+        test_db.get_concepts('dummy')
+    assert 'either domain or vocabulary must be set' in caplog.text
+
+def test_get_concepts(test_db):
+    concepts = test_db.get_concepts('Heart failure', domain='Condition', vocabulary='SNOMED')
+    print(f'concepts: {concepts}', flush=True)
+    expected = [{'concept_id': 316139, 'concept_name': 'Heart failure',
+                 'valid_start_date': datetime.date(2012, 4, 1),
+                 'valid_end_date': datetime.date(2020, 4, 1),
+                 'domain_id': 'Condition', 'vocabulary_id': 'SNOMED'}]
+    assert concepts == expected
+
+def test_get_concept_hierarchy_no_omop_cdm(caplog, fresh_bias_obj):
     caplog.clear()
     with caplog.at_level(logging.INFO):
         fresh_bias_obj.get_concept_hierarchy('dummy')
     assert 'valid OMOP CDM must be set' in caplog.text
+
+def test_get_concept_hierarchy(test_db):
+    with pytest.raises(ValueError):
+        test_db.get_concept_hierarchy('not_int_str')
+        
+    hierarchy = test_db.get_concept_hierarchy(2)
+    print(f'hierarchy: {hierarchy}', flush=True)
+    expected = ({'details': {'concept_id': 2, 'concept_name': 'Type 1 Diabetes Mellitus', 'vocabulary_id': 'ICD10CM',
+                             'concept_code': 'E10'}, 'parents': [{'details': {'concept_id': 1, 'concept_name':
+        'Diabetes Mellitus', 'vocabulary_id': 'ICD10CM', 'concept_code': 'E10-E14'}, 'parents': []}]},
+                {'details': {'concept_id': 2, 'concept_name': 'Type 1 Diabetes Mellitus', 'vocabulary_id': 'ICD10CM',
+                             'concept_code': 'E10'}, 'children': [{'details': {'concept_id': 4, 'concept_name':
+                    'Diabetic Retinopathy', 'vocabulary_id': 'ICD10CM', 'concept_code': 'E10.3/E11.3'},
+                                                                   'children': []}]})
+    assert hierarchy == expected

From 2b61ae89518a84dde53246b2589a756c8832268d Mon Sep 17 00:00:00 2001
From: hyi <hongyi@renci.org>
Date: Fri, 6 Jun 2025 22:58:23 -0400
Subject: [PATCH 05/12] added more tests

---
 biasanalyzer/database.py                  | 50 +++++++++++------------
 biasanalyzer/sql.py                       |  2 +
 tests/query_based/test_cohort_creation.py | 45 ++++++++++----------
 tests/test_biasanalyzer_api.py            | 14 ++++++-
 4 files changed, 64 insertions(+), 47 deletions(-)

diff --git a/biasanalyzer/database.py b/biasanalyzer/database.py
index ccdfb72..5ed6749 100644
--- a/biasanalyzer/database.py
+++ b/biasanalyzer/database.py
@@ -8,7 +8,7 @@
 from sqlalchemy import create_engine, text
 from biasanalyzer.models import Cohort, CohortDefinition
 from biasanalyzer.sql import *
-from biasanalyzer.utils import build_concept_hierarchy, print_hierarchy, find_roots
+from biasanalyzer.utils import build_concept_hierarchy, print_hierarchy, find_roots, notify_users
 
 
 class BiasDatabase:
@@ -51,7 +51,7 @@ def _create_cohort_definition_table(self):
             self.conn.execute('CREATE SEQUENCE id_sequence START 1')
         except duckdb.Error as e:
             if "already exists" in str(e).lower():
-                print("Sequence already exists, skipping creation.")
+                notify_users("Sequence already exists, skipping creation.")
             else:
                 raise
         self.conn.execute('''
@@ -65,7 +65,7 @@ def _create_cohort_definition_table(self):
                       PRIMARY KEY (id)
                       )
                 ''')
-        print("Cohort Definition table created.")
+        notify_users("Cohort Definition table created.")
 
     def _create_cohort_table(self):
         self.conn.execute('''
@@ -83,10 +83,10 @@ def _create_cohort_table(self):
             ''')
         except duckdb.Error as e:
             if "already exists" in str(e).lower():
-                print("Index already exists, skipping creation.")
+                notify_users("Index already exists, skipping creation.")
             else:
                 raise
-        print("Cohort table created.")
+        notify_users("Cohort table created.")
 
     def load_postgres_extension(self):
         self.conn.execute("INSTALL postgres_scanner;")
@@ -104,7 +104,7 @@ def create_cohort_definition(self, cohort_definition: CohortDefinition, progress
             cohort_definition.created_by
         ))
         if progress_obj is None:
-            print("Cohort definition inserted successfully.")
+            notify_users("Cohort definition inserted successfully.")
         else:
             progress_obj.write("Cohort definition inserted successfully.")
         self.conn.execute("SELECT id from cohort_definition ORDER BY id DESC LIMIT 1")
@@ -194,7 +194,7 @@ def get_cohort_basic_stats(self, cohort_definition_id: int, variable=''):
                                          f"Valid variables are {self.__class__.stats_queries.keys()}")
                     stats_query = query_str.format(cohort_definition_id)
                 else:
-                    print("Cannot connect to the OMOP database to query person table")
+                    notify_users("Cannot connect to the OMOP database to query person table")
                     return None
             else:
                 # Query the cohort data to get basic statistics
@@ -225,7 +225,7 @@ def get_cohort_basic_stats(self, cohort_definition_id: int, variable=''):
             return self._execute_query(stats_query)
 
         except Exception as e:
-            print(f"Error computing cohort basic statistics: {e}")
+            notify_users(f"Error computing cohort basic statistics: {e}", level='error')
             return None
 
     @property
@@ -239,16 +239,16 @@ def get_cohort_distributions(self, cohort_definition_id: int, variable: str):
         try:
             if self._create_omop_table('person'):
                 query_str = self.__class__.distribution_queries.get(variable)
-                if query_str is None:
+                if not query_str:
                     raise ValueError(f"Distribution for variable '{variable}' is not available. "
                                      f"Valid variables are {self.__class__.distribution_queries.keys()}")
                 query = query_str.format(cohort_definition_id)
                 return self._execute_query(query)
             else:
-                print("Cannot connect to the OMOP database to query person table")
+                notify_users("Cannot connect to the OMOP database to query person table")
                 return None
         except Exception as e:
-            print(f"Error computing cohort {variable} distributions: {e}")
+            notify_users(f"Error computing cohort {variable} distributions: {e}", level='error')
             return None
 
     def get_cohort_concept_stats(self, cohort_definition_id: int,
@@ -259,8 +259,8 @@ def get_cohort_concept_stats(self, cohort_definition_id: int,
         """
         concept_stats = {}
         if concept_type not in self.__class__.cohort_concept_queries:
-            print(f"input {concept_type} is not a valid concept type. "
-                  f"Supported concept types are: {self.__class__.cohort_concept_queries.keys()}")
+            notify_users(f"input {concept_type} is not a valid concept type. "
+                         f"Supported concept types are: {self.__class__.cohort_concept_queries.keys()}", level='error')
             return concept_stats
         try:
             if self._create_omop_table('concept') and self._create_omop_table('concept_ancestor'):
@@ -279,26 +279,26 @@ def get_cohort_concept_stats(self, cohort_definition_id: int,
                     filtered_cs_df = cs_df[cs_df['ancestor_concept_id'] != cs_df['descendant_concept_id']]
                     roots = find_roots(filtered_cs_df)
                     hierarchy = build_concept_hierarchy(filtered_cs_df)
-                    print(f'cohort concept hierarchy for {concept_type} with root concept ids {roots}:')
+                    notify_users(f'cohort concept hierarchy for {concept_type} with root concept ids {roots}:')
                     for root in roots:
                         root_detail = cs_df[(cs_df['ancestor_concept_id'] == root)
                                   & (cs_df['descendant_concept_id'] == root)]['details'].iloc[0]
                         print_hierarchy(hierarchy, parent=root, level=0, parent_details=root_detail)
                     return concept_stats
                 else:
-                    print(f"Cannot connect to the OMOP database to query {concept_type} table")
+                    notify_users(f"Cannot connect to the OMOP database to query {concept_type} table")
                     return concept_stats
             else:
-                print("Cannot connect to the OMOP database to query concept table")
+                notify_users("Cannot connect to the OMOP database to query concept table")
                 return concept_stats
         except Exception as e:
-            print(f"Error computing cohort concept stats: {e}")
+            notify_users(f"Error computing cohort concept stats: {e}", level='error')
             return concept_stats
 
     def close(self):
         self.conn.close()
         BiasDatabase._instance = None
-        print("Connection to BiasDatabase closed.")
+        notify_users("Connection to BiasDatabase closed.")
 
 
 class OMOPCDMDatabase:
@@ -315,9 +315,9 @@ def _initialize(self, db_url):
             # Handle DuckDB connection
             try:
                 self.engine = duckdb.connect(db_url)
-                print(f"Connected to the DuckDB database: {db_url}.")
+                notify_users(f"Connected to the DuckDB database: {db_url}.")
             except duckdb.Error as e:
-                print(f"Failed to connect to DuckDB: {e}")
+                notify_users(f"Failed to connect to DuckDB: {e}", level='error')
             self.Session = self.engine  # Use engine directly for DuckDB
             self._database_type = 'duckdb'
         try:
@@ -327,10 +327,10 @@ def _initialize(self, db_url):
                 connect_args={'options': '-c default_transaction_read_only=on'}  # Enforce read-only transactions
             )
             self.Session = sessionmaker(bind=self.engine)
-            print("Connected to the OMOP CDM database (read-only).")
+            notify_users("Connected to the OMOP CDM database (read-only).")
             self._database_type = 'postgresql'
         except SQLAlchemyError as e:
-            print(f"Failed to connect to the database: {e}")
+            notify_users(f"Failed to connect to the database: {e}", level='error')
 
     def get_session(self):
         if self._database_type == 'duckdb':
@@ -357,10 +357,10 @@ def execute_query(self, query, params=None):
             return [dict(zip(headers, row)) for row in results]
 
         except duckdb.Error as e:
-            print(f"Error executing query: {e}")
+            notify_users(f"Error executing query: {e}", level='error')
             return []
         except SQLAlchemyError as e:
-            print(f"Error executing query: {e}")
+            notify_users(f"Error executing query: {e}", level='error')
             omop_session.close()
             return []
 
@@ -534,4 +534,4 @@ def close(self):
         else:
             self.engine.dispose()
         OMOPCDMDatabase._instance = None
-        print("Connection to the OMOP CDM database closed.")
+        notify_users("Connection to the OMOP CDM database closed.")
diff --git a/biasanalyzer/sql.py b/biasanalyzer/sql.py
index 8aa39c4..a87c665 100644
--- a/biasanalyzer/sql.py
+++ b/biasanalyzer/sql.py
@@ -5,6 +5,7 @@
         SELECT p.person_id, 
                EXTRACT(YEAR FROM
                    COALESCE(
+                       c.cohort_start_date,
                        c.cohort_end_date,
                        CURRENT_DATE
                    )
@@ -82,6 +83,7 @@
         SELECT p.person_id,
             EXTRACT(YEAR FROM
                    COALESCE(
+                       c.cohort_start_date,
                        c.cohort_end_date,
                        CURRENT_DATE
                    )
diff --git a/tests/query_based/test_cohort_creation.py b/tests/query_based/test_cohort_creation.py
index 12a9d11..1334402 100644
--- a/tests/query_based/test_cohort_creation.py
+++ b/tests/query_based/test_cohort_creation.py
@@ -1,12 +1,13 @@
 import pytest
 import os
 import datetime
+import logging
 from numpy.ma.testutils import assert_equal
 
 
-@pytest.mark.usefixtures
-def test_cohort_creation_baseline(test_db):
+def test_cohort_creation_baseline(caplog, test_db):
     bias = test_db
+    
     cohort = bias.create_cohort(
         "COVID-19 patient",
         "Cohort of young female patients",
@@ -16,11 +17,18 @@ def test_cohort_creation_baseline(test_db):
     )
     # Test cohort object and methods
     assert cohort is not None, "Cohort creation failed"
-    print(f'metadata: {cohort.metadata}')
     assert cohort.metadata is not None, "Cohort creation wrongly returned None metadata"
     assert 'creation_info' in cohort.metadata, "Cohort creation does not contain 'creation_info' key"
     assert cohort.data is not None, "Cohort creation wrongly returned None data"
-    print(f'baseline cohort data: {cohort.data}', flush=True)
+    caplog.clear()
+    with caplog.at_level(logging.ERROR):
+        cohort.get_distributions('ethnicity')
+    assert "Distribution for variable 'ethnicity' is not available" in caplog.text
+
+    assert len(cohort.get_distributions('age')) == 10, "Cohort get_distribution('age') does not return 10 age_bin items"
+    assert len(cohort.get_distributions('gender')) == 3, ("Cohort get_distribution('gender') does not return "
+                                                          "3 gender_bin items")
+
     patient_ids = set([item['subject_id'] for item in cohort.data])
     assert_equal(len(patient_ids), 5)
     assert_equal(patient_ids, {106, 108, 110, 111, 112})
@@ -39,7 +47,6 @@ def test_cohort_creation_baseline(test_db):
                  "Incorrect cohort_end_date for patient 108")
 
 
-@pytest.mark.usefixtures
 def test_cohort_creation_study(test_db):
     bias = test_db
     cohort = bias.create_cohort(
@@ -51,8 +58,6 @@ def test_cohort_creation_study(test_db):
     )
     # Test cohort object and methods
     assert cohort is not None, "Cohort creation failed"
-    print(f'metadata: {cohort.metadata}')
-    print(f'data: {cohort.data}')
     assert cohort.metadata is not None, "Cohort creation wrongly returned None metadata"
     assert 'creation_info' in cohort.metadata, "Cohort creation does not contain 'creation_info' key"
     assert cohort.data is not None, "Cohort creation wrongly returned None data"
@@ -60,20 +65,21 @@ def test_cohort_creation_study(test_db):
     assert_equal(len(patient_ids), 4)
     assert_equal(patient_ids, {108, 110, 111, 112})
 
-@pytest.mark.usefixtures
-def test_cohort_creation_study2(test_db):
+def test_cohort_creation_study2(caplog, test_db):
     bias = test_db
-    cohort = bias.create_cohort(
-        "COVID-19 patient",
-        "Cohort of young female patients with no COVID-19",
-        os.path.join(os.path.dirname(__file__), '..', 'assets', 'cohort_creation',
-                    'test_cohort_creation_condition_occurrence_config_study2.yaml'),
-        "test_user"
-    )
+    caplog.clear()
+    with caplog.at_level(logging.INFO):
+        cohort = bias.create_cohort(
+            "COVID-19 patient",
+            "Cohort of young female patients with no COVID-19",
+            os.path.join(os.path.dirname(__file__), '..', 'assets', 'cohort_creation',
+                        'test_cohort_creation_condition_occurrence_config_study2.yaml'),
+            "test_user",
+            delay=1
+        )
+    assert 'Simulating long-running task' in caplog.text
     # Test cohort object and methods
     assert cohort is not None, "Cohort creation failed"
-    print(f'metadata: {cohort.metadata}')
-    print(f'data: {cohort.data}')
     assert cohort.metadata is not None, "Cohort creation wrongly returned None metadata"
     assert 'creation_info' in cohort.metadata, "Cohort creation does not contain 'creation_info' key"
     assert cohort.data is not None, "Cohort creation wrongly returned None data"
@@ -81,7 +87,6 @@ def test_cohort_creation_study2(test_db):
     assert_equal(len(patient_ids), 1)
     assert_equal(patient_ids, {106})
 
-@pytest.mark.usefixtures
 def test_cohort_creation_all(test_db):
     bias = test_db
     cohort = bias.create_cohort(
@@ -95,7 +100,6 @@ def test_cohort_creation_all(test_db):
     )
     # Test cohort object and methods
     assert cohort is not None, "Cohort creation failed"
-    print(f'metadata: {cohort.metadata}')
     assert cohort.metadata is not None, "Cohort creation wrongly returned None metadata"
     assert 'creation_info' in cohort.metadata, "Cohort creation does not contain 'creation_info' key"
     stats = cohort.get_stats()
@@ -106,7 +110,6 @@ def test_cohort_creation_all(test_db):
     assert_equal(len(patient_ids), 2)
     assert_equal(patient_ids, {108, 110})
 
-@pytest.mark.usefixtures
 def test_cohort_creation_mixed_domains(test_db):
     """
     Test cohort creation with mixed domains (condition, drug, visit, procedure).
diff --git a/tests/test_biasanalyzer_api.py b/tests/test_biasanalyzer_api.py
index fc594f7..382ee5b 100644
--- a/tests/test_biasanalyzer_api.py
+++ b/tests/test_biasanalyzer_api.py
@@ -93,6 +93,18 @@ def test_set_cohort_action(caplog, fresh_bias_obj):
         fresh_bias_obj._set_cohort_action()
     assert 'valid OMOP CDM must be set' in caplog.text
 
+def test_create_cohort_with_no_action(caplog, fresh_bias_obj):
+    caplog.clear()
+    with caplog.at_level(logging.INFO):
+        fresh_bias_obj.create_cohort('test', 'test', 'test.yaml', 'test')
+    assert 'failed to create a valid cohort action object' in caplog.text
+
+def test_compare_cohort_with_no_action(caplog, fresh_bias_obj):
+    caplog.clear()
+    with caplog.at_level(logging.INFO):
+        fresh_bias_obj.compare_cohorts(1, 2)
+    assert 'failed to create a valid cohort action object' in caplog.text
+
 def test_get_domains_and_vocabularies_invalid(caplog, fresh_bias_obj):
     caplog.clear()
     with caplog.at_level(logging.INFO):
@@ -135,7 +147,7 @@ def test_get_concept_hierarchy_no_omop_cdm(caplog, fresh_bias_obj):
 def test_get_concept_hierarchy(test_db):
     with pytest.raises(ValueError):
         test_db.get_concept_hierarchy('not_int_str')
-        
+
     hierarchy = test_db.get_concept_hierarchy(2)
     print(f'hierarchy: {hierarchy}', flush=True)
     expected = ({'details': {'concept_id': 2, 'concept_name': 'Type 1 Diabetes Mellitus', 'vocabulary_id': 'ICD10CM',

From 782e4614985c55a4def7f129b0f9deead9b0f922 Mon Sep 17 00:00:00 2001
From: hyi <hongyi@renci.org>
Date: Sat, 7 Jun 2025 12:09:58 -0400
Subject: [PATCH 06/12] added more tests

---
 .coveragerc                               | 12 ++++++
 .github/workflows/test.yml                |  2 +-
 biasanalyzer/api.py                       | 29 ++-----------
 biasanalyzer/utils.py                     | 23 ++++++++++
 tests/query_based/test_cohort_creation.py | 23 ++++++++++
 tests/test_biasanalyzer_api.py            | 52 +++++++++++++++++++++++
 6 files changed, 115 insertions(+), 26 deletions(-)
 create mode 100644 .coveragerc

diff --git a/.coveragerc b/.coveragerc
new file mode 100644
index 0000000..384b44e
--- /dev/null
+++ b/.coveragerc
@@ -0,0 +1,12 @@
+[run]
+omit =
+    */module_test.py
+
+[report]
+exclude_lines =
+    pragma: no cover
+    if __name__ == .__main__.:
+
+[html]
+directory = coverage_html_report
+
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 8f19c7e..24c2fc5 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -39,4 +39,4 @@ jobs:
     # Step 5: Run Tests
     - name: Run tests
       run: |
-        poetry run pytest -s --cov=biasanalyzer
+        poetry run pytest -s --cov=biasanalyzer --cov-config=.coveragerc
diff --git a/biasanalyzer/api.py b/biasanalyzer/api.py
index ade0e32..50fca55 100644
--- a/biasanalyzer/api.py
+++ b/biasanalyzer/api.py
@@ -4,9 +4,9 @@
 from biasanalyzer.cohort import CohortAction
 from biasanalyzer.config import load_config
 from ipywidgets import VBox, Label
-from ipytree import Tree, Node
+from ipytree import Tree
 from IPython.display import display
-from biasanalyzer.utils import get_direction_arrow, notify_users
+from biasanalyzer.utils import get_direction_arrow, notify_users, build_concept_tree
 
 
 class BIAS:
@@ -96,27 +96,6 @@ def get_concept_hierarchy(self, concept_id):
             return None
         return self.omop_cdm_db.get_concept_hierarchy(concept_id)
 
-    def _build_concept_tree(self, concept_tree: dict, tree_type: str) -> Node:
-        """
-            Recursively builds an ipytree Node for a given concept tree.
-            """
-        # Extract concept details
-        details = concept_tree.get("details", {})
-        concept_name = details.get("concept_name", "Unknown Concept")
-        concept_id = details.get("concept_id", "")
-        concept_code = details.get("concept_code", "")
-        direction_arrow = get_direction_arrow(tree_type)
-        # Create a label for the current concept
-        label_text = f"{direction_arrow} {concept_name} (ID: {concept_id}, Code: {concept_code})"
-        node = Node(label_text)
-
-        # Recursively add child nodes
-        for child in concept_tree.get(tree_type, []):
-            child_node = self._build_concept_tree(child, tree_type)
-            node.add_node(child_node)
-
-        return node
-
     def display_concept_tree(self, concept_tree: dict, level: int = 0, show_in_text_format=True, tree_type=None):
         """
         Recursively prints the concept hierarchy tree in an indented format for display.
@@ -146,12 +125,12 @@ def display_concept_tree(self, concept_tree: dict, level: int = 0, show_in_text_
         else:
             # Extract concept details
             # Build the root tree node
-            root_node = self._build_concept_tree(concept_tree, tree_type)
+            root_node = build_concept_tree(concept_tree, tree_type)
             tree = Tree()
             tree.add_node(root_node)
             tree.opened = True
             display(VBox([Label("Concept Hierarchy"), tree]))
-            return None
+            return root_node
 
 
     def create_cohort(self, cohort_name: str, cohort_desc: str, query_or_yaml_file: str, created_by: str,
diff --git a/biasanalyzer/utils.py b/biasanalyzer/utils.py
index faa9953..c2a2e2b 100644
--- a/biasanalyzer/utils.py
+++ b/biasanalyzer/utils.py
@@ -1,5 +1,6 @@
 import numpy as np
 import re
+from ipytree import Node
 import logging
 
 
@@ -67,6 +68,28 @@ def build_concept_hierarchy(df, parent_col="ancestor_concept_id", child_col="des
     return hierarchy
 
 
+def build_concept_tree(concept_tree: dict, tree_type: str) -> Node:
+    """
+        Recursively builds an ipytree Node for a given concept tree.
+        """
+    # Extract concept details
+    details = concept_tree.get("details", {})
+    concept_name = details.get("concept_name", "Unknown Concept")
+    concept_id = details.get("concept_id", "")
+    concept_code = details.get("concept_code", "")
+    direction_arrow = get_direction_arrow(tree_type)
+    # Create a label for the current concept
+    label_text = f"{direction_arrow} {concept_name} (ID: {concept_id}, Code: {concept_code})"
+    node = Node(label_text)
+
+    # Recursively add child nodes
+    for child in concept_tree.get(tree_type, []):
+        child_node = build_concept_tree(child, tree_type)
+        node.add_node(child_node)
+
+    return node
+
+
 def find_roots(df, parent_col="ancestor_concept_id", child_col="descendant_concept_id"):
     """
     Finds root nodes in the hierarchy. Roots are nodes that are parents
diff --git a/tests/query_based/test_cohort_creation.py b/tests/query_based/test_cohort_creation.py
index 1334402..7878b65 100644
--- a/tests/query_based/test_cohort_creation.py
+++ b/tests/query_based/test_cohort_creation.py
@@ -143,3 +143,26 @@ def test_cohort_creation_mixed_domains(test_db):
     end_dates = [item['cohort_end_date'] for item in cohort.data]
     assert_equal(len(end_dates), 2)
     assert_equal(end_dates, [datetime.date(2020, 6, 20), datetime.date(2020, 6, 20)])
+
+def test_cohort_comparison(test_db):
+    bias = test_db
+    cohort_base = bias.create_cohort(
+        "COVID-19 patient",
+        "Cohort of young female patients",
+        os.path.join(os.path.dirname(__file__), '..', 'assets', 'cohort_creation',
+                     'test_cohort_creation_condition_occurrence_config_baseline.yaml'),
+        "test_user"
+    )
+    cohort_study = bias.create_cohort(
+        "Female diabetes patients born between 1970 and 2000",
+        "Cohort of female patients with diabetes who had insulin prescribed 0-30 days after diagnosis "
+        "and have at least one outpatient or emergency visit and underwent a blood test before 12/31/2020, "
+        "with patients born after 1995 and with cardiac surgery excluded",
+        os.path.join(os.path.dirname(__file__), '..', 'assets', 'cohort_creation',
+                     'test_cohort_creation_config.yaml'),
+        "test_user"
+    )
+    results = bias.compare_cohorts(cohort_base.cohort_id, cohort_study.cohort_id)
+    print(f'results: {results}', flush=True)
+    assert {'gender_hellinger_distance': 0.0} in results
+    assert any('age_hellinger_distance' in r for r in results)
diff --git a/tests/test_biasanalyzer_api.py b/tests/test_biasanalyzer_api.py
index 382ee5b..3d1c241 100644
--- a/tests/test_biasanalyzer_api.py
+++ b/tests/test_biasanalyzer_api.py
@@ -2,6 +2,7 @@
 import datetime
 import logging
 import pytest
+from ipytree import Node
 
 from biasanalyzer import __version__
 
@@ -158,3 +159,54 @@ def test_get_concept_hierarchy(test_db):
                     'Diabetic Retinopathy', 'vocabulary_id': 'ICD10CM', 'concept_code': 'E10.3/E11.3'},
                                                                    'children': []}]})
     assert hierarchy == expected
+
+def test_display_concept_tree_text_format(capsys, test_db):
+    sample_tree = {
+        "details": {
+            "concept_id": 123,
+            "concept_name": "Hypertension",
+            "concept_code": "I10"
+        }
+    }
+    test_db.display_concept_tree(sample_tree)
+    captured = capsys.readouterr()
+    assert "concept tree must contain parents or children key" in captured.out
+
+    sample_tree['children'] = [{
+        "details": {
+            "concept_id": 456,
+            "concept_name": "Essential Hypertension",
+            "concept_code": "I10.0"
+            },
+        "children": []
+        }]
+    test_db.display_concept_tree(sample_tree, show_in_text_format=True)
+    captured = capsys.readouterr()
+    assert "Hypertension (ID: 123" in captured.out
+    assert "Essential Hypertension (ID: 456" in captured.out
+
+def test_display_concept_tree_widget(test_db):
+    sample_tree = {
+        "details": {
+            "concept_id": 456,
+            "concept_name": "Essential Hypertension",
+            "concept_code": "I10.0"
+        },
+        "parents": [{
+            "details": {
+                "concept_id": 123,
+                "concept_name": "Hypertension",
+                "concept_code": "I10"
+                },
+            "parents": []
+        }]
+    }
+
+    tree_output = test_db.display_concept_tree(sample_tree, show_in_text_format=False)
+    assert tree_output is not None
+    print(tree_output)
+    assert isinstance(tree_output, Node)
+    assert "Essential Hypertension" in tree_output.name
+    assert len(tree_output.nodes) == 1
+    parent_node = tree_output.nodes[0]
+    assert "Hypertension" in parent_node.name

From 6ee464de0054e7f2935c4161be5fcd40774f39a1 Mon Sep 17 00:00:00 2001
From: hyi <hongyi@renci.org>
Date: Sat, 7 Jun 2025 14:34:33 -0400
Subject: [PATCH 07/12] added more tests

---
 .coveragerc                               |  2 +-
 biasanalyzer/cohort.py                    | 18 ++++----
 tests/query_based/test_cohort_creation.py | 53 +++++++++++++++++++++--
 3 files changed, 60 insertions(+), 13 deletions(-)

diff --git a/.coveragerc b/.coveragerc
index 384b44e..b1be6ff 100644
--- a/.coveragerc
+++ b/.coveragerc
@@ -1,7 +1,7 @@
 [run]
 omit =
     */module_test.py
-
+    biasanalyzer/background/threading_utils.py
 [report]
 exclude_lines =
     pragma: no cover
diff --git a/biasanalyzer/cohort.py b/biasanalyzer/cohort.py
index a36853d..6ca1d75 100644
--- a/biasanalyzer/cohort.py
+++ b/biasanalyzer/cohort.py
@@ -4,10 +4,10 @@
 from datetime import datetime
 from tqdm.auto import tqdm
 from pydantic import ValidationError
-from biasanalyzer.models import CohortDefinition, Cohort
+from biasanalyzer.models import CohortDefinition
 from biasanalyzer.config import load_cohort_creation_config
 from biasanalyzer.database import OMOPCDMDatabase, BiasDatabase
-from biasanalyzer.utils import hellinger_distance, clean_string
+from biasanalyzer.utils import hellinger_distance, clean_string, notify_users
 from biasanalyzer.cohort_query_builder import CohortQueryBuilder
 
 
@@ -99,12 +99,11 @@ def create_cohort(self, cohort_name: str, description: str, query_or_yaml_file:
                 cohort_config = load_cohort_creation_config(query_or_yaml_file)
                 tqdm.write(f'configuration specified in {query_or_yaml_file} loaded successfully')
             except FileNotFoundError:
-                print('specified cohort creation configuration file does not exist. Make sure '
-                      'the configuration file name with path is specified correctly.')
+                notify_users('specified cohort creation configuration file does not exist. Make sure '
+                             'the configuration file name with path is specified correctly.')
                 return None
             except ValidationError as ex:
-                print(f'cohort creation configuration yaml file is not valid with '
-                      f'validation error: {ex}')
+                notify_users(f'cohort creation configuration yaml file is not valid with validation error: {ex}')
                 return None
 
             query = self._query_builder.build_query(cohort_config)
@@ -139,11 +138,12 @@ def create_cohort(self, cohort_name: str, description: str, query_or_yaml_file:
             tqdm.write(f"Cohort {cohort_name} successfully created.")
             return CohortData(cohort_id=cohort_def_id, bias_db=self.bias_db, omop_db=self.omop_db)
         except duckdb.Error as e:
-            print(f"Error executing query: {e}")
+            notify_users(f"Error executing query: {e}")
             return None
         except SQLAlchemyError as e:
-            print(f"Error executing query: {e}")
-            omop_session.close()
+            notify_users(f"Error executing query: {e}")
+            if omop_session is not None:
+                omop_session.close()
             return None
 
     def compare_cohorts(self, cohort_id_1: int, cohort_id_2: int):
diff --git a/tests/query_based/test_cohort_creation.py b/tests/query_based/test_cohort_creation.py
index 7878b65..6c6acbb 100644
--- a/tests/query_based/test_cohort_creation.py
+++ b/tests/query_based/test_cohort_creation.py
@@ -1,13 +1,12 @@
-import pytest
 import os
 import datetime
 import logging
+from sqlalchemy.exc import SQLAlchemyError
 from numpy.ma.testutils import assert_equal
 
 
 def test_cohort_creation_baseline(caplog, test_db):
     bias = test_db
-    
     cohort = bias.create_cohort(
         "COVID-19 patient",
         "Cohort of young female patients",
@@ -163,6 +162,54 @@ def test_cohort_comparison(test_db):
         "test_user"
     )
     results = bias.compare_cohorts(cohort_base.cohort_id, cohort_study.cohort_id)
-    print(f'results: {results}', flush=True)
     assert {'gender_hellinger_distance': 0.0} in results
     assert any('age_hellinger_distance' in r for r in results)
+
+def test_cohort_invalid(caplog, test_db):
+    caplog.clear()
+    with caplog.at_level(logging.INFO):
+        invalid_cohort = test_db.create_cohort('invalid_cohort', 'invalid_cohort',
+                                               'invalid_yaml_file.yml',
+                                               'invalid_created_by')
+    assert 'cohort creation configuration file does not exist' in caplog.text
+    assert invalid_cohort is None
+
+    caplog.clear()
+    with caplog.at_level(logging.INFO):
+        invalid_cohort = test_db.create_cohort('invalid_cohort', 'invalid_cohort',
+                                               os.path.join(os.path.dirname(__file__), '..', 'assets', 'config',
+                                                            'test_config.yaml'), 'invalid_created_by')
+    assert 'configuration yaml file is not valid' in caplog.text
+    assert invalid_cohort is None
+
+    with caplog.at_level(logging.INFO):
+        invalid_cohort = test_db.create_cohort('invalid_cohort', 'invalid_cohort',
+                                               'INVALID SQL QUERY STRING',
+                                               'invalid_created_by')
+    assert 'Error executing query:' in caplog.text
+    assert invalid_cohort is None
+
+def test_create_cohort_sqlalchemy_error(monkeypatch, fresh_bias_obj):
+    # Mock omop_db methods
+    class MockOmopDB:
+        def get_session(self):
+            return self  # not used after error
+        def execute_query(self, query):
+            raise SQLAlchemyError("Mocked SQLAlchemy error")
+        def close(self):
+            pass
+
+    class MockBiasDB:
+        def create_cohort_definition(self, *args, **kwargs):
+            pass
+        def create_cohort_in_bulk(self, *args, **kwargs):
+            pass
+        def close(self):
+            pass
+
+    fresh_bias_obj.omop_cdm_db = MockOmopDB()
+    fresh_bias_obj.bias_db = MockBiasDB()
+
+    result = fresh_bias_obj.create_cohort("test", "desc", "SELECT * FROM person", "test_user")
+
+    assert result is None

From ca6733bcc0cf447c71222197421400f7a1a7e781 Mon Sep 17 00:00:00 2001
From: hyi <hongyi@renci.org>
Date: Sat, 7 Jun 2025 18:53:06 -0400
Subject: [PATCH 08/12] added more tests

---
 biasanalyzer/cohort_query_builder.py          | 38 ++++++++-----
 ...iple_temporal_groups_without_operator.yaml | 57 +++++++++++++++++++
 tests/query_based/test_cohort_creation.py     | 17 ++++++
 3 files changed, 97 insertions(+), 15 deletions(-)
 create mode 100644 tests/assets/cohort_creation/test_cohort_creation_multiple_temporal_groups_without_operator.yaml

diff --git a/biasanalyzer/cohort_query_builder.py b/biasanalyzer/cohort_query_builder.py
index 95f411e..860970a 100644
--- a/biasanalyzer/cohort_query_builder.py
+++ b/biasanalyzer/cohort_query_builder.py
@@ -9,14 +9,14 @@ class CohortQueryBuilder:
     def __init__(self):
         """Get the path to SQL templates, whether running from source or installed."""
         try:
-            if sys.version_info >= (3, 9):
+            if sys.version_info >= (3, 9): # pragma: no cover
                 # Python 3.9+: Use importlib.resources.files()
                 template_path = importlib.resources.files("biasanalyzer").joinpath("sql_templates")
             else:
                 # Python 3.8: Use importlib.resources.path() (context manager)
                 with importlib.resources.path("biasanalyzer", "sql_templates") as p:
                     template_path = str(p)
-        except ModuleNotFoundError:
+        except ModuleNotFoundError: # pragma: no cover
             template_path = os.path.join(os.path.dirname(__file__), "sql_templates")
 
         print(f'template_path: {template_path}')
@@ -117,7 +117,7 @@ def render_event_group(event_group, alias_prefix="evt"):
                 event_sql = CohortQueryBuilder.render_event_group(event, f"{alias_prefix}_{i}")
                 if event_sql:
                     queries.append(event_sql)
-            if not queries:
+            if not queries: # pragma: no cover
                 return ""
 
             if event_group["operator"] == "AND":
@@ -150,9 +150,6 @@ def render_event_group(event_group, alias_prefix="evt"):
             elif event_group["operator"] == "OR":
                 return f"SELECT person_id, event_start_date, event_end_date FROM ({' UNION '.join(queries)}) AS {alias_prefix}_or"
             elif event_group["operator"] == "NOT":
-                if len(queries) != 1:
-                    raise ValueError("NOT operator expects exactly one event subquery")
-                    # Keep the full subquery with dates for consistency, but use it as a filter
                 not_query = queries[0]
                 # Return a query that selects all persons from a base table (e.g., person),
                 # excluding those in the NOT subquery, while allowing dates from other criteria
@@ -187,10 +184,6 @@ def render_event_group(event_group, alias_prefix="evt"):
                                         FROM ({queries[0]}) AS {alias_prefix}_0
                                         WHERE event_start_date < DATE '{timestamp}'
                                     """
-                    else:
-                        print(f"Error: event_group: {event_group} with BEFORE operator only "
-                              f"has one query event {queries}")
-                        return ''
                 elif len(queries) == 2:
                     event_group = TemporalEventGroup(**event_group)
                     e1_alias = f"e1_{alias_prefix}"
@@ -213,7 +206,7 @@ def render_event_group(event_group, alias_prefix="evt"):
                                     AND {e1_alias}.event_start_date < {e2_alias}.event_start_date
                                     {interval_sql}
                             """
-            return ""
+            return ""  # pragma: no cover
 
     def temporal_event_filter(self, event_groups, alias='c'):
         """
@@ -236,15 +229,30 @@ def temporal_event_filter(self, event_groups, alias='c'):
                     filters.append(f"AND {alias}.person_id IN (SELECT person_id FROM ({group_sql}) AS ex_subquery_{i})")
                 else:
                     filters.append(f"({group_sql})")
-        if not filters:
+        if not filters:  # pragma: no cover
             return ""
         if alias == 'ex':
             # For exclusion, combine with AND as filters
             return " ".join(filters)
         else:
-            # For inclusion, combine as a single subquery (assuming one event group for simplicity)
-            # If multiple groups, may need UNION or further logic
+            # For inclusion, handle both single event group case with operator defined and multiple event group
+            # case with no operator defined
             if len(filters) > 1:
+                # For multiple temporal event group case with no operator defined, use "OR" operator by default
+                # An example YAML block for multiple temporal event group is shown below for reference, in which
+                # case, patients who satisfy either group (condition 37311061 or drug 67890) will be included:
+                # inclusion_criteria:
+                #   temporal_events:
+                #     - operator: AND
+                #       events:
+                #         - event_type: condition_occurrence
+                #           event_concept_id: 37311061
+                #     - operator: AND
+                #       events:
+                #         - event_type: drug_exposure
+                #           event_concept_id: 67890
                 return (f"SELECT person_id, event_start_date, event_end_date FROM "
                         f"({' UNION ALL '.join(filters)}) AS combined_events")
-            return filters[0]  # Single event group case
+
+            # Single event group case with operator defined
+            return filters[0]
diff --git a/tests/assets/cohort_creation/test_cohort_creation_multiple_temporal_groups_without_operator.yaml b/tests/assets/cohort_creation/test_cohort_creation_multiple_temporal_groups_without_operator.yaml
new file mode 100644
index 0000000..a4df661
--- /dev/null
+++ b/tests/assets/cohort_creation/test_cohort_creation_multiple_temporal_groups_without_operator.yaml
@@ -0,0 +1,57 @@
+inclusion_criteria:
+  demographics:                     # Optional
+    gender: 'female'                # accepted values: female or male, optional field
+    min_birth_year: 2000            # Born at the year of 2000 or after, optional field
+    max_birth_year: 2020            # Born at the year of 2020 or before, optional field
+  temporal_events:
+    # Since no top operator is defined for the OR operator nested event and the BEFORE nested event,
+    # the default OR operator will be applied by default since a convenience feature is supported
+    # for multiple temporal event groups with no explicit operator defined. Therefore, the temporal events
+    # below captures a cohort of patients who have the condition with
+    # difficulty breathing 2 to 5 days before a COVID diagnosis 3/15/20-12/11/20 or have at
+    # least one emergency room visit or at least two inpatient visits
+    - operator: 'OR'
+      events:
+        - event_type: 'visit_occurrence'
+          event_concept_id: 9201    # inpatient visit
+          event_instance: 2         # Optional, minimum number of occurrences the event has happened
+        - event_type: 'visit_occurrence'
+          event_concept_id: 9203    # Emergency Room visit
+    # The operator BEFORE event below captures the condition that difficulty breathing happens
+    # 2 to 5 days before a COVID diagnosis between 3/15/20 and 12/11/20
+    - operator: 'BEFORE'
+      # interval is an inclusive list, allowing lower or higher number being null indicating no lower or higher
+      # bound, respectively
+      interval: [2, 5]  # 2 to 5 days between difficulty breathing and COVID diagnosis 3/15/20 - 12/11/20
+      events:
+        - event_type: 'condition_occurrence'
+          event_concept_id: 4041664  # difficulty breathing
+        # The operator AND event below captures a COVID diagnosis between 3/15/20 and 12/11/20
+        - operator: 'AND'
+          events:
+          - operator: 'BEFORE'
+            events:
+              - event_type: date
+                timestamp: '2020-03-15'
+              - event_type: 'condition_occurrence'
+                event_concept_id: 37311061 # COVID condition
+          - operator: 'BEFORE'
+            events:
+              - event_type: 'condition_occurrence'
+                event_concept_id: 37311061 # COVID condition
+              - event_type: date
+                timestamp: '2020-12-11' # when the first COVID vaccine was given
+
+exclusion_criteria:
+  # Among those patients meeting the inclusion criteria, patients born after 2010 and with
+  # heart failure conditions will be excluded from the cohort
+  demographics:  # Optional
+    min_birth_year: 2010
+  temporal_events:
+    # AND and OR operators allow one or more defined events, but NOT operator only allows
+    # one event; if NOT operator were used here, patients with no heart failure conditions
+    # will be excluded
+    - operator: 'AND'
+      events:
+        - event_type: 'condition_occurrence'
+          event_concept_id: 316139    # Exclude patients with heart failure
diff --git a/tests/query_based/test_cohort_creation.py b/tests/query_based/test_cohort_creation.py
index 6c6acbb..7a39090 100644
--- a/tests/query_based/test_cohort_creation.py
+++ b/tests/query_based/test_cohort_creation.py
@@ -109,6 +109,23 @@ def test_cohort_creation_all(test_db):
     assert_equal(len(patient_ids), 2)
     assert_equal(patient_ids, {108, 110})
 
+def test_cohort_creation_multiple_temporary_groups_with_no_operator(test_db):
+    bias = test_db
+    cohort = bias.create_cohort(
+        "Patients with COVID or other emergency conditions",
+        "Cohort of young female patients who either have COVID-19 with difficulty breathing 2 to 5 days "
+        "before a COVID diagnosis 3/15/20-12/11/20 OR have at least one emergency room visit or at least "
+        "two inpatient visits",
+        os.path.join(os.path.dirname(__file__), '..', 'assets', 'cohort_creation',
+                     'test_cohort_creation_multiple_temporal_groups_without_operator.yaml'),
+        "test_user"
+    )
+    # Test cohort object and methods
+    patient_ids = set([item['subject_id'] for item in cohort.data])
+    print(f'patient_ids: {patient_ids}', flush=True)
+    assert_equal(len(patient_ids), 2)
+    assert_equal(patient_ids, {108, 110})
+
 def test_cohort_creation_mixed_domains(test_db):
     """
     Test cohort creation with mixed domains (condition, drug, visit, procedure).

From b125b6b626359b97a249f196b1554315bd8769e6 Mon Sep 17 00:00:00 2001
From: hyi <hongyi@renci.org>
Date: Sat, 7 Jun 2025 20:44:16 -0400
Subject: [PATCH 09/12] added more tests

---
 biasanalyzer/models.py                    | 27 +++++--------
 tests/query_based/test_cohort_creation.py | 49 +++++++++++++++++++++++
 2 files changed, 59 insertions(+), 17 deletions(-)

diff --git a/biasanalyzer/models.py b/biasanalyzer/models.py
index 9c114cc..2ec6140 100644
--- a/biasanalyzer/models.py
+++ b/biasanalyzer/models.py
@@ -125,26 +125,19 @@ class TemporalEventGroup(BaseModel):
     events: List[Union[TemporalEvent, "TemporalEventGroup"]]  # A list of events or nested operators
     interval: Optional[List[Union[int, None]]] = None  # [start, end] interval only applying for BEFORE operator
 
-    @field_validator("interval", mode="before")
-    def validate_interval_structure(cls, value):
-        """Ensure interval is a list with exactly two elements, or None."""
-        if value is None:
-            return value
-        if not isinstance(value, list) or len(value) != 2:
-            raise ValueError("Interval must be a list with exactly two elements: [start, end].")
-        return value
-
     @model_validator(mode="before")
     def validate_interval_logic(cls, values):
-        operator = values.get("operator")
+        """
+        Validate interval structure and logic for all operators, though only used for BEFORE.
+        Ensures interval is None or a list of two elements [start, end], with start <= end if both are integers.
+        For AND, OR, NOT, interval is validated but ignored in SQL generation.
+        """
         interval = values.get("interval")
-        """Ensure interval is logically consistent when operator is 'BEFORE'."""
-        if operator == "BEFORE" and interval is not None:
+        """Ensure interval is logically consistent which is only used for operator 'BEFORE'."""
+        if interval is not None:
+            if not isinstance(interval, list) or len(interval) != 2:
+                raise ValueError("Interval must be a list with exactly two elements: [start, end].")
             start, end = interval
-            if start is not None and not isinstance(start, int):
-                raise ValueError("Interval start must be an integer or None.")
-            if end is not None and not isinstance(end, int):
-                raise ValueError("Interval end must be an integer or None.")
             if start is not None and end is not None and start > end:
                 raise ValueError("Interval start cannot be greater than interval end.")
         return values
@@ -167,7 +160,7 @@ def validate_events_list(cls, values):
 
     def get_interval_sql(self, e1_alias='e1', e2_alias='e2') -> str:
         """Generate SQL for the interval."""
-        if not self.interval:
+        if not self.interval:  # pragma: no cover
             return ""
         start = self.interval[0] if self.interval[0] is not None else 0
         end = self.interval[1] if self.interval[1] is not None else 99999
diff --git a/tests/query_based/test_cohort_creation.py b/tests/query_based/test_cohort_creation.py
index 7a39090..10c7e93 100644
--- a/tests/query_based/test_cohort_creation.py
+++ b/tests/query_based/test_cohort_creation.py
@@ -1,10 +1,59 @@
 import os
 import datetime
 import logging
+import pytest
 from sqlalchemy.exc import SQLAlchemyError
 from numpy.ma.testutils import assert_equal
+from biasanalyzer.models import DemographicsCriteria, TemporalEvent, TemporalEventGroup
 
 
+def test_cohort_yaml_validation(test_db):
+    invalid_data = {
+        "gender": "female",
+        "min_birth_year": 2000,
+        "max_birth_year": 1999  # Invalid: less than min_birth_year
+    }
+    with pytest.raises(ValueError):
+        DemographicsCriteria(**invalid_data)
+
+    invalid_data = {
+        "event_type": "date",
+        "event_concept_id": "dummy"
+    }
+    # validate date event_type must have a timestamp field
+    with pytest.raises(ValueError):
+        TemporalEvent(**invalid_data)
+
+    invalid_data = {
+        "operator": "BEFORE",
+        "events": [
+            {'event_type': 'condition_occurrence',
+             'event_concept_id': 201826},
+            {'event_type': 'drug_exposure',
+             'event_concept_id': 4285892},
+        ],
+        "interval": [100, 50]
+    }
+    # validate interval start must be smaller than interval end
+    with pytest.raises(ValueError):
+        TemporalEventGroup(**invalid_data)
+
+    # validate interval must be either a list of 2 integers or a None
+    invalid_data["interval"] = [123]
+    with pytest.raises(ValueError):
+        TemporalEventGroup(**invalid_data)
+
+    # validate NOT operator cannot have more than one event
+    invalid_data["operator"] = "NOT"
+    with pytest.raises(ValueError):
+        TemporalEventGroup(**invalid_data)
+
+    # validate BEFORE operator must have two events
+    invalid_data["operator"] = "BEFORE"
+    del invalid_data["events"][1]
+    with pytest.raises(ValueError):
+        TemporalEventGroup(**invalid_data)
+
 def test_cohort_creation_baseline(caplog, test_db):
     bias = test_db
     cohort = bias.create_cohort(

From e036f8f5efea00710def8f5a231126dd9506b0b8 Mon Sep 17 00:00:00 2001
From: hyi <hongyi@renci.org>
Date: Sat, 7 Jun 2025 22:33:11 -0400
Subject: [PATCH 10/12] added more database tests

---
 biasanalyzer/database.py       |   7 +-
 tests/test_biasanalyzer_api.py |   6 +-
 tests/test_database.py         | 122 +++++++++++++++++++++++++++++++++
 3 files changed, 131 insertions(+), 4 deletions(-)
 create mode 100644 tests/test_database.py

diff --git a/biasanalyzer/database.py b/biasanalyzer/database.py
index 5ed6749..1398f73 100644
--- a/biasanalyzer/database.py
+++ b/biasanalyzer/database.py
@@ -296,7 +296,8 @@ def get_cohort_concept_stats(self, cohort_definition_id: int,
             return concept_stats
 
     def close(self):
-        self.conn.close()
+        if self.conn:
+            self.conn.close()
         BiasDatabase._instance = None
         notify_users("Connection to BiasDatabase closed.")
 
@@ -316,7 +317,7 @@ def _initialize(self, db_url):
             try:
                 self.engine = duckdb.connect(db_url)
                 notify_users(f"Connected to the DuckDB database: {db_url}.")
-            except duckdb.Error as e:
+            except duckdb.Error as e:  # pragma: no cover
                 notify_users(f"Failed to connect to DuckDB: {e}", level='error')
             self.Session = self.engine  # Use engine directly for DuckDB
             self._database_type = 'duckdb'
@@ -532,6 +533,6 @@ def close(self):
         if isinstance(self.engine, duckdb.DuckDBPyConnection):
             self.engine.close()
         else:
-            self.engine.dispose()
+            self.engine.dispose()  # pragma: no cover
         OMOPCDMDatabase._instance = None
         notify_users("Connection to the OMOP CDM database closed.")
diff --git a/tests/test_biasanalyzer_api.py b/tests/test_biasanalyzer_api.py
index 3d1c241..0ed8ae3 100644
--- a/tests/test_biasanalyzer_api.py
+++ b/tests/test_biasanalyzer_api.py
@@ -132,12 +132,16 @@ def test_get_concepts_no_domain_and_vocab(caplog, test_db):
 
 def test_get_concepts(test_db):
     concepts = test_db.get_concepts('Heart failure', domain='Condition', vocabulary='SNOMED')
-    print(f'concepts: {concepts}', flush=True)
     expected = [{'concept_id': 316139, 'concept_name': 'Heart failure',
                  'valid_start_date': datetime.date(2012, 4, 1),
                  'valid_end_date': datetime.date(2020, 4, 1),
                  'domain_id': 'Condition', 'vocabulary_id': 'SNOMED'}]
     assert concepts == expected
+    concepts = test_db.get_concepts('Heart failure', vocabulary='SNOMED')
+    assert concepts == expected
+    concepts = test_db.get_concepts('Heart failure', domain='Condition')
+    print(f'concepts: {concepts}', flush=True)
+    assert concepts == expected
 
 def test_get_concept_hierarchy_no_omop_cdm(caplog, fresh_bias_obj):
     caplog.clear()
diff --git a/tests/test_database.py b/tests/test_database.py
new file mode 100644
index 0000000..1a9b074
--- /dev/null
+++ b/tests/test_database.py
@@ -0,0 +1,122 @@
+import duckdb
+import pytest
+from biasanalyzer.database import BiasDatabase
+
+
+def test_create_cohort_definition_table_error_on_sequence():
+    db = BiasDatabase(":memory:")
+    class MockConn:
+        def __init__(self):
+            self.calls = []
+
+        def execute(self, sql):
+            self.calls.append(sql)
+            if "CREATE SEQUENCE" in sql:
+                raise duckdb.Error("random error")  # simulate failure
+            return None
+
+        def close(self):
+            pass
+
+    db.conn = MockConn()
+
+    with pytest.raises(duckdb.Error, match="random error"):
+        db._create_cohort_definition_table()
+
+def test_create_cohort_definition_table_sequence_exists():
+    db = BiasDatabase(":memory:")
+    class MockConn:
+        def __init__(self):
+            self.call_count = 0
+            self.executed_sql = []
+
+        def execute(self, sql):
+            self.call_count += 1
+            self.executed_sql.append(sql)
+            if "CREATE SEQUENCE" in sql:
+                raise duckdb.Error("Sequence already exists")
+
+            return None
+
+        def close(self):
+            pass
+
+    db.conn = MockConn()
+
+    # Should handle "Index already exists" without raising
+    db._create_cohort_definition_table()
+
+    # Optional assertions
+    assert db.conn.call_count >= 2
+    assert any("CREATE SEQUENCE" in sql for sql in db.conn.executed_sql)
+
+def test_create_cohort_index_error():
+    db = BiasDatabase(":memory:")
+    class MockConn:
+        def __init__(self):
+            self.calls = []
+
+        def execute(self, sql):
+            self.calls.append(sql)
+            if "CREATE INDEX" in sql:
+                raise duckdb.Error("random error")  # simulate failure
+            return None
+
+        def close(self):
+            pass
+
+    db.conn = MockConn()
+
+    with pytest.raises(duckdb.Error, match="random error"):
+        db._create_cohort_table()
+
+def test_create_cohort_index_exists():
+    db = BiasDatabase(":memory:")
+    class MockConn:
+        def __init__(self):
+            self.call_count = 0
+            self.executed_sql = []
+
+        def execute(self, sql):
+            self.call_count += 1
+            self.executed_sql.append(sql)
+            if "CREATE INDEX" in sql:
+                raise duckdb.Error("Index already exists")
+
+            return None
+
+        def close(self):
+            pass
+
+    db.conn = MockConn()
+
+    # Should handle "Index already exists" without raising
+    db._create_cohort_table()
+
+    # Optional assertions
+    assert db.conn.call_count >= 2
+    assert any("CREATE INDEX" in sql for sql in db.conn.executed_sql)
+
+def test_create_omop_table_postgres(monkeypatch):
+    # Set up tracking dict
+    called = {"executed": False, "query": None}
+
+    # Patch before BiasDatabase instance is created
+    def mock_execute(self, query):
+        called["executed"] = True
+        called["query"] = query
+        return None
+
+    # Monkeypatch at class level first
+    monkeypatch.setattr(duckdb.DuckDBPyConnection, "execute", mock_execute)
+
+    # Now create the instance (so it uses the patched class method)
+    BiasDatabase._instance = None
+    db = BiasDatabase(":memory:")
+    db.omop_cdm_db_url = "postgresql://user:pass@localhost:5432/mydb"
+
+    result = db._create_omop_table("person")
+
+    assert result is True
+    assert called["executed"] is True
+    assert "postgres_scan" in called["query"]

From 9ceb9a88703f98caa65024ecf94fb5a2b017a5be Mon Sep 17 00:00:00 2001
From: hyi <hongyi@renci.org>
Date: Sun, 8 Jun 2025 14:27:39 -0400
Subject: [PATCH 11/12] added more tests bringing coverage to 93%

---
 tests/query_based/test_cohort_creation.py |  4 ++
 tests/test_database.py                    | 76 +++++++++++++++--------
 2 files changed, 54 insertions(+), 26 deletions(-)

diff --git a/tests/query_based/test_cohort_creation.py b/tests/query_based/test_cohort_creation.py
index 10c7e93..f497df1 100644
--- a/tests/query_based/test_cohort_creation.py
+++ b/tests/query_based/test_cohort_creation.py
@@ -63,8 +63,12 @@ def test_cohort_creation_baseline(caplog, test_db):
                      'test_cohort_creation_condition_occurrence_config_baseline.yaml'),
         "test_user"
     )
+
     # Test cohort object and methods
     assert cohort is not None, "Cohort creation failed"
+    cohort_id = cohort.cohort_id
+    assert bias.bias_db.get_cohort_definition(cohort_id)['name'] == "COVID-19 patient"
+    assert bias.bias_db.get_cohort_definition(cohort_id + 1) == {}
     assert cohort.metadata is not None, "Cohort creation wrongly returned None metadata"
     assert 'creation_info' in cohort.metadata, "Cohort creation does not contain 'creation_info' key"
     assert cohort.data is not None, "Cohort creation wrongly returned None data"
diff --git a/tests/test_database.py b/tests/test_database.py
index 1a9b074..2631c3a 100644
--- a/tests/test_database.py
+++ b/tests/test_database.py
@@ -3,7 +3,54 @@
 from biasanalyzer.database import BiasDatabase
 
 
+def test_create_omop_table_postgres(monkeypatch):
+    # Set up tracking dict
+    called = {"executed": False, "query": None}
+
+    # Patch before BiasDatabase instance is created
+    def mock_execute(self, query):
+        called["executed"] = True
+        called["query"] = query
+        return None
+
+    # Monkeypatch at class level first
+    monkeypatch.setattr(duckdb.DuckDBPyConnection, "execute", mock_execute)
+
+    # Now create the instance (so it uses the patched class method)
+    BiasDatabase._instance = None
+    db = BiasDatabase(":memory:")
+    db.omop_cdm_db_url = "postgresql://user:pass@localhost:5432/mydb"
+
+    result = db._create_omop_table("person")
+
+    assert result is True
+    assert called["executed"] is True
+    assert "postgres_scan" in called["query"]
+
+def test_load_postgres_extension_executes_twice(monkeypatch):
+    # Reset singleton to get a clean instance
+    BiasDatabase._instance = None
+    db = BiasDatabase(":memory:")
+
+    calls = []
+
+    class MockConn:
+        def execute(self, query):
+            calls.append(query)
+            return None
+
+    db.conn = MockConn()
+
+    # Run the method under test
+    db.load_postgres_extension()
+
+    # Assert that execute() was called twice
+    assert len(calls) == 2
+    assert "INSTALL postgres_scanner" in calls[0]
+    assert "LOAD postgres_scanner" in calls[1]
+
 def test_create_cohort_definition_table_error_on_sequence():
+    BiasDatabase._instance = None
     db = BiasDatabase(":memory:")
     class MockConn:
         def __init__(self):
@@ -24,6 +71,7 @@ def close(self):
         db._create_cohort_definition_table()
 
 def test_create_cohort_definition_table_sequence_exists():
+    BiasDatabase._instance = None
     db = BiasDatabase(":memory:")
     class MockConn:
         def __init__(self):
@@ -46,11 +94,11 @@ def close(self):
     # Should handle "Index already exists" without raising
     db._create_cohort_definition_table()
 
-    # Optional assertions
     assert db.conn.call_count >= 2
     assert any("CREATE SEQUENCE" in sql for sql in db.conn.executed_sql)
 
 def test_create_cohort_index_error():
+    BiasDatabase._instance = None
     db = BiasDatabase(":memory:")
     class MockConn:
         def __init__(self):
@@ -71,6 +119,7 @@ def close(self):
         db._create_cohort_table()
 
 def test_create_cohort_index_exists():
+    BiasDatabase._instance = None
     db = BiasDatabase(":memory:")
     class MockConn:
         def __init__(self):
@@ -93,30 +142,5 @@ def close(self):
     # Should handle "Index already exists" without raising
     db._create_cohort_table()
 
-    # Optional assertions
     assert db.conn.call_count >= 2
     assert any("CREATE INDEX" in sql for sql in db.conn.executed_sql)
-
-def test_create_omop_table_postgres(monkeypatch):
-    # Set up tracking dict
-    called = {"executed": False, "query": None}
-
-    # Patch before BiasDatabase instance is created
-    def mock_execute(self, query):
-        called["executed"] = True
-        called["query"] = query
-        return None
-
-    # Monkeypatch at class level first
-    monkeypatch.setattr(duckdb.DuckDBPyConnection, "execute", mock_execute)
-
-    # Now create the instance (so it uses the patched class method)
-    BiasDatabase._instance = None
-    db = BiasDatabase(":memory:")
-    db.omop_cdm_db_url = "postgresql://user:pass@localhost:5432/mydb"
-
-    result = db._create_omop_table("person")
-
-    assert result is True
-    assert called["executed"] is True
-    assert "postgres_scan" in called["query"]

From 845f2db5ec4727e33b456924a030af6ec912143f Mon Sep 17 00:00:00 2001
From: hyi <hongyi@renci.org>
Date: Sun, 8 Jun 2025 17:31:08 -0400
Subject: [PATCH 12/12] bug fixes and added tests with 100% test coverage now

---
 biasanalyzer/database.py                      | 108 ++++++++----------
 tests/query_based/test_cohort_creation.py     |   9 +-
 .../test_hierarchical_prevalence.py           |   7 +-
 tests/test_database.py                        |  28 +++++
 4 files changed, 87 insertions(+), 65 deletions(-)

diff --git a/biasanalyzer/database.py b/biasanalyzer/database.py
index 1398f73..af951bb 100644
--- a/biasanalyzer/database.py
+++ b/biasanalyzer/database.py
@@ -104,25 +104,13 @@ def create_cohort_definition(self, cohort_definition: CohortDefinition, progress
             cohort_definition.created_by
         ))
         if progress_obj is None:
-            notify_users("Cohort definition inserted successfully.")
+            notify_users("Cohort definition inserted successfully.")  # pragma: no cover
         else:
             progress_obj.write("Cohort definition inserted successfully.")
         self.conn.execute("SELECT id from cohort_definition ORDER BY id DESC LIMIT 1")
         created_cohort_id = self.conn.fetchone()[0]
         return created_cohort_id
 
-    # Method to insert cohort data
-    def create_cohort(self, cohort: Cohort):
-        self.conn.execute('''
-            INSERT INTO cohort (subject_id, cohort_definition_id, cohort_start_date, cohort_end_date)
-            VALUES (?, ?, ?, ?)
-        ''', (
-            cohort.subject_id,
-            cohort.cohort_definition_id,
-            cohort.cohort_start_date,
-            cohort.cohort_end_date
-        ))
-
     # Method to insert cohort data in bulk from a dataframe
     def create_cohort_in_bulk(self, cohort_df: pd.DataFrame):
         # make duckdb to treat cohort_df dataframe as a virtual table named "cohort_df"
@@ -161,20 +149,18 @@ def _create_omop_table(self, table_name):
                 SELECT * from postgres_scan('{self.omop_cdm_db_url}', 'public', {table_name})
             """)
             return True # success
-        elif self.omop_cdm_db_url.endswith('.duckdb'):
+        elif self.omop_cdm_db_url is None:
+            return False
+        else: # omop table is already included in duckdb
             return True
-        else:
-            return False # failure
+
 
     def _execute_query(self, query_str):
         results = self.conn.execute(query_str)
 
         headers = [desc[0] for desc in results.description]
         rows = results.fetchall()
-        if len(rows) == 0:
-            return []
-        else:
-            return [dict(zip(headers, row)) for row in rows]
+        return [dict(zip(headers, row)) for row in rows]
 
     def get_cohort_basic_stats(self, cohort_definition_id: int, variable=''):
         """
@@ -263,31 +249,28 @@ def get_cohort_concept_stats(self, cohort_definition_id: int,
                          f"Supported concept types are: {self.__class__.cohort_concept_queries.keys()}", level='error')
             return concept_stats
         try:
-            if self._create_omop_table('concept') and self._create_omop_table('concept_ancestor'):
+            if (self._create_omop_table('concept') and self._create_omop_table('concept_ancestor')
+                    and self._create_omop_table(concept_type)):
                 query_str = self.__class__.cohort_concept_queries[concept_type]['query']
-                if self._create_omop_table(concept_type):
-                    if not vocab:
-                        vocab = self.__class__.cohort_concept_queries[concept_type]['default_vocab']
-                    query = query_str.format(cid=cohort_definition_id, filter_count=filter_count,
-                                             vocab=vocab, include_hierarchy=include_hierarchy)
-                    concept_stats[concept_type] = self._execute_query(query)
-                    cs_df = pd.DataFrame(concept_stats[concept_type])
-                    # Combine concept_name and prevalence into a "details" column
-                    cs_df["details"] = cs_df.apply(
-                        lambda row: f"{row['concept_name']} (Code: {row['concept_code']}, "
-                                    f"Count: {row['count_in_cohort']}, Prevalence: {row['prevalence']:.3%})", axis=1)
-                    filtered_cs_df = cs_df[cs_df['ancestor_concept_id'] != cs_df['descendant_concept_id']]
-                    roots = find_roots(filtered_cs_df)
-                    hierarchy = build_concept_hierarchy(filtered_cs_df)
-                    notify_users(f'cohort concept hierarchy for {concept_type} with root concept ids {roots}:')
-                    for root in roots:
-                        root_detail = cs_df[(cs_df['ancestor_concept_id'] == root)
-                                  & (cs_df['descendant_concept_id'] == root)]['details'].iloc[0]
-                        print_hierarchy(hierarchy, parent=root, level=0, parent_details=root_detail)
-                    return concept_stats
-                else:
-                    notify_users(f"Cannot connect to the OMOP database to query {concept_type} table")
-                    return concept_stats
+                if not vocab:
+                    vocab = self.__class__.cohort_concept_queries[concept_type]['default_vocab']
+                query = query_str.format(cid=cohort_definition_id, filter_count=filter_count,
+                                         vocab=vocab, include_hierarchy=include_hierarchy)
+                concept_stats[concept_type] = self._execute_query(query)
+                cs_df = pd.DataFrame(concept_stats[concept_type])
+                # Combine concept_name and prevalence into a "details" column
+                cs_df["details"] = cs_df.apply(
+                    lambda row: f"{row['concept_name']} (Code: {row['concept_code']}, "
+                                f"Count: {row['count_in_cohort']}, Prevalence: {row['prevalence']:.3%})", axis=1)
+                filtered_cs_df = cs_df[cs_df['ancestor_concept_id'] != cs_df['descendant_concept_id']]
+                roots = find_roots(filtered_cs_df)
+                hierarchy = build_concept_hierarchy(filtered_cs_df)
+                notify_users(f'cohort concept hierarchy for {concept_type} with root concept ids {roots}:')
+                for root in roots:
+                    root_detail = cs_df[(cs_df['ancestor_concept_id'] == root)
+                              & (cs_df['descendant_concept_id'] == root)]['details'].iloc[0]
+                    print_hierarchy(hierarchy, parent=root, level=0, parent_details=root_detail)
+                return concept_stats
             else:
                 notify_users("Cannot connect to the OMOP database to query concept table")
                 return concept_stats
@@ -321,23 +304,25 @@ def _initialize(self, db_url):
                 notify_users(f"Failed to connect to DuckDB: {e}", level='error')
             self.Session = self.engine  # Use engine directly for DuckDB
             self._database_type = 'duckdb'
-        try:
-            self.engine = create_engine(
-                db_url,
-                echo=False,
-                connect_args={'options': '-c default_transaction_read_only=on'}  # Enforce read-only transactions
-            )
-            self.Session = sessionmaker(bind=self.engine)
-            notify_users("Connected to the OMOP CDM database (read-only).")
-            self._database_type = 'postgresql'
-        except SQLAlchemyError as e:
-            notify_users(f"Failed to connect to the database: {e}", level='error')
+        else:  # pragma: no cover
+            # Handle PostgreSQL connection
+            try:
+                self.engine = create_engine(
+                    db_url,
+                    echo=False,
+                    connect_args={'options': '-c default_transaction_read_only=on'}  # Enforce read-only transactions
+                )
+                self.Session = sessionmaker(bind=self.engine)
+                notify_users("Connected to the OMOP CDM database (read-only).")
+                self._database_type = 'postgresql'
+            except SQLAlchemyError as e:
+                notify_users(f"Failed to connect to the database: {e}", level='error')
 
     def get_session(self):
         if self._database_type == 'duckdb':
             return self.engine
-        else: # postgresql connection
-            # Provide a new session for read-only queries
+        else:  # pragma: no cover
+            # postgresql connection: provide a new session for read-only queries
             return self.Session()
 
     def execute_query(self, query, params=None):
@@ -346,7 +331,7 @@ def execute_query(self, query, params=None):
                 # DuckDB query execution
                 results = self.engine.execute(query, params).fetchall()
                 headers = [desc[0] for desc in self.engine.execute(query, params).description]
-            else:
+            else:  # pragma: no cover
                 # PostgreSQL query execution
                 omop_session = self.get_session()
                 query = text(query)
@@ -360,9 +345,10 @@ def execute_query(self, query, params=None):
         except duckdb.Error as e:
             notify_users(f"Error executing query: {e}", level='error')
             return []
-        except SQLAlchemyError as e:
+        except SQLAlchemyError as e:  # pragma: no cover
             notify_users(f"Error executing query: {e}", level='error')
-            omop_session.close()
+            if omop_session:
+                omop_session.close()
             return []
 
     def get_domains_and_vocabularies(self) -> list:
@@ -409,7 +395,7 @@ def get_concepts(self, search_term: str, domain: Optional[str], vocab: Optional[
                 params = [domain, search_term_exact, search_term_prefix, search_term_suffix,
                               search_term_prefix_suffix]
 
-        else:
+        else:  # pragma: no cover
             # Use named parameters with :param_name syntax for SQLAlchemy/PostgreSQL
             base_query = """
                          SELECT concept_id, concept_name, valid_start_date, valid_end_date, domain_id, vocabulary_id \
diff --git a/tests/query_based/test_cohort_creation.py b/tests/query_based/test_cohort_creation.py
index f497df1..8211cad 100644
--- a/tests/query_based/test_cohort_creation.py
+++ b/tests/query_based/test_cohort_creation.py
@@ -139,7 +139,7 @@ def test_cohort_creation_study2(caplog, test_db):
     assert_equal(len(patient_ids), 1)
     assert_equal(patient_ids, {106})
 
-def test_cohort_creation_all(test_db):
+def test_cohort_creation_all(caplog, test_db):
     bias = test_db
     cohort = bias.create_cohort(
         "COVID-19 patient",
@@ -156,6 +156,13 @@ def test_cohort_creation_all(test_db):
     assert 'creation_info' in cohort.metadata, "Cohort creation does not contain 'creation_info' key"
     stats = cohort.get_stats()
     assert stats is not None, "Created cohort's stats is None"
+    gender_stats = cohort.get_stats(variable='gender')
+    assert gender_stats is not None, "Created cohort's gender stats is None"
+    caplog.clear()
+    with caplog.at_level(logging.ERROR):
+        cohort.get_stats(variable='address')
+    assert 'is not available' in caplog.text
+    assert gender_stats is not None, "Created cohort's gender stats is None"
     assert cohort.data is not None, "Cohort creation wrongly returned None data"
     patient_ids = set([item['subject_id'] for item in cohort.data])
     print(f'patient_ids: {patient_ids}', flush=True)
diff --git a/tests/query_based/test_hierarchical_prevalence.py b/tests/query_based/test_hierarchical_prevalence.py
index 85ea594..ba849fa 100644
--- a/tests/query_based/test_hierarchical_prevalence.py
+++ b/tests/query_based/test_hierarchical_prevalence.py
@@ -1,6 +1,3 @@
-import pytest
-
-@pytest.mark.usefixtures
 def test_cohort_concept_hierarchical_prevalence(test_db):
     bias = test_db
     cohort_query = """
@@ -18,6 +15,10 @@ def test_cohort_concept_hierarchical_prevalence(test_db):
     )
     # Test cohort object and methods
     assert cohort is not None, "Cohort creation failed"
+    # test cohort.get_concept_stats only supports concept stats for condition_occurrence and drug_exposures currently
+    concept_stats = cohort.get_concept_stats(concept_type='procedure_occurrence')
+    assert concept_stats == {}
+
     include_hierarchy_flags = [True, False]
     for flag in include_hierarchy_flags:
         concept_stats = cohort.get_concept_stats(include_hierarchy=flag)
diff --git a/tests/test_database.py b/tests/test_database.py
index 2631c3a..89652ed 100644
--- a/tests/test_database.py
+++ b/tests/test_database.py
@@ -1,5 +1,6 @@
 import duckdb
 import pytest
+import logging
 from biasanalyzer.database import BiasDatabase
 
 
@@ -19,6 +20,10 @@ def mock_execute(self, query):
     # Now create the instance (so it uses the patched class method)
     BiasDatabase._instance = None
     db = BiasDatabase(":memory:")
+    db.omop_cdm_db_url = None
+    result = db._create_omop_table("person")
+    assert result is False
+
     db.omop_cdm_db_url = "postgresql://user:pass@localhost:5432/mydb"
 
     result = db._create_omop_table("person")
@@ -144,3 +149,26 @@ def close(self):
 
     assert db.conn.call_count >= 2
     assert any("CREATE INDEX" in sql for sql in db.conn.executed_sql)
+
+def test_get_cohort_concept_stats_handles_exception(caplog):
+    BiasDatabase._instance = None
+    db = BiasDatabase(":memory:")
+    db.omop_cdm_db_url = 'duckdb'
+    caplog.clear()
+    with caplog.at_level(logging.ERROR):
+        result = db.get_cohort_concept_stats(123)
+    assert 'Error computing cohort concept stats' in caplog.text
+    assert result == {}
+
+def test_get_cohort_attributes_handles_exception():
+    BiasDatabase._instance = None
+    db = BiasDatabase(":memory:")
+
+    db.omop_cdm_db_url = None
+    result_stats = db.get_cohort_basic_stats(123, variable='age')
+    assert result_stats is None
+    result = db.get_cohort_distributions(123, 'age')
+    assert result is None
+    result = db.get_cohort_concept_stats(123)
+    assert result == {}
+