From 6e02dbe405ee35afd28dc6e5149015c3c5baeb13 Mon Sep 17 00:00:00 2001 From: hyi Date: Thu, 5 Jun 2025 23:01:46 -0400 Subject: [PATCH 01/12] added more tests with test structure refactoring --- README.md | 2 +- biasanalyzer/api.py | 63 +++++++++--------- biasanalyzer/utils.py | 24 +++++++ ...asAnalyzerTestingAsyncCohortCreation.ipynb | 2 +- ...iasAnalyzerTestingCohortConceptStats.ipynb | 20 +++--- notebooks/BiasAnalyzerTestingCohorts.ipynb | 20 +++--- .../BiasAnalyzerTestingConceptBrowsing.ipynb | 14 ++-- .../config/.test_config_postgresql.yaml.swp | Bin 0 -> 12288 bytes tests/assets/{ => config}/test_config.yaml | 0 .../assets/config/test_config_postgresql.yaml | 9 +++ .../test_config_unsupported_db_type.yaml | 9 +++ tests/assets/config/test_invalid_config.yaml | 8 +++ tests/{query_based => }/conftest.py | 11 ++- tests/test_biasanalyzer.py | 5 -- tests/test_biasanalyzer_api.py | 38 +++++++++++ tests/test_config.py | 15 +---- 16 files changed, 160 insertions(+), 80 deletions(-) create mode 100644 tests/assets/config/.test_config_postgresql.yaml.swp rename tests/assets/{ => config}/test_config.yaml (100%) create mode 100644 tests/assets/config/test_config_postgresql.yaml create mode 100644 tests/assets/config/test_config_unsupported_db_type.yaml create mode 100644 tests/assets/config/test_invalid_config.yaml rename tests/{query_based => }/conftest.py (97%) delete mode 100644 tests/test_biasanalyzer.py create mode 100644 tests/test_biasanalyzer_api.py diff --git a/README.md b/README.md index 4b92f80..a2d5103 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ to install the python package from this github repo. - Run `bias = BIAS()` to create an object of the imported BIAS class. - Create a config.yaml file for specifying OMOP database connection configuration information. The config.yaml file must include root_omop_cdm_database key. -- [A test OMOP database configuration yaml file](https://github.com/VACLab/BiasAnalyzer/blob/main/tests/assets/test_config.yaml) +- [A test OMOP database configuration yaml file](https://github.com/VACLab/BiasAnalyzer/blob/main/tests/assets/config/test_config.yaml) can serve as an example. Another config.yaml example for connecting to a OMOP postgreSQL database is also copied below for reference. ```angular2html diff --git a/biasanalyzer/api.py b/biasanalyzer/api.py index 6caadce..a79cb25 100644 --- a/biasanalyzer/api.py +++ b/biasanalyzer/api.py @@ -6,7 +6,7 @@ from ipywidgets import VBox, Label from ipytree import Tree, Node from IPython.display import display -from biasanalyzer.utils import get_direction_arrow +from biasanalyzer.utils import get_direction_arrow, notify_users class BIAS: @@ -26,24 +26,24 @@ def __new__(cls, config_file_path=None): def set_config(self, config_file_path: str): if config_file_path is None: - print('no configuration file specified. ' - 'Call set_config(config_file_path) next to specify configurations') + notify_users('no configuration file specified. ' + 'Call set_config(config_file_path) next to specify configurations') else: try: self.config = load_config(config_file_path) - print(f'configuration specified in {config_file_path} loaded successfully') + notify_users(f'configuration specified in {config_file_path} loaded successfully') except FileNotFoundError: - print('specified configuration file does not exist. ' - 'Call set_config(config_file_path) next to specify a valid ' - 'configuration file') + notify_users('specified configuration file does not exist. ' + 'Call set_config(config_file_path) next to specify a valid configuration file', + level='error') except ValidationError as ex: - print(f'configuration yaml file is not valid with validation error: {ex}') + notify_users(f'configuration yaml file is not valid with validation error: {ex}', level='error') def set_root_omop(self): if not self.config: - print('no valid configuration to set root OMOP CDM data. ' - 'Call set_config(config_file_path) to specify configurations first.') - elif 'root_omop_cdm_database' in self.config: + notify_users('no valid configuration to set root OMOP CDM data. ' + 'Call set_config(config_file_path) to specify configurations first.') + else: db_type = self.config['root_omop_cdm_database']['database_type'] if db_type == 'postgresql': user = self.config['root_omop_cdm_database']['username'] @@ -65,14 +65,12 @@ def set_root_omop(self): self.bias_db = BiasDatabase(db_path) self.bias_db.omop_cdm_db_url = db_path else: - print(f"Unsupported database type: {db_type}") - else: - print('Configuration file must include configuration values for root_omop_cdm_database key.') + notify_users(f"Unsupported database type: {db_type}") def _set_cohort_action(self): if self.omop_cdm_db is None: - print('A valid OMOP CDM must be set before creating a cohort. ' - 'Call set_root_omop first to set a valid root OMOP CDM') + notify_users('A valid OMOP CDM must be set before creating a cohort. ' + 'Call set_root_omop first to set a valid root OMOP CDM') return None if self.cohort_action is None: self.cohort_action = CohortAction(self.omop_cdm_db, self.bias_db) @@ -80,25 +78,25 @@ def _set_cohort_action(self): def get_domains_and_vocabularies(self): if self.omop_cdm_db is None: - print('A valid OMOP CDM must be set before getting domains. ' - 'Call set_root_omop first to set a valid root OMOP CDM') + notify_users('A valid OMOP CDM must be set before getting domains. ' + 'Call set_root_omop first to set a valid root OMOP CDM') return None return self.omop_cdm_db.get_domains_and_vocabularies() def get_concepts(self, search_term, domain=None, vocabulary=None): if self.omop_cdm_db is None: - print('A valid OMOP CDM must be set before getting concepts. ' - 'Call set_root_omop first to set a valid root OMOP CDM') + notify_users('A valid OMOP CDM must be set before getting concepts. ' + 'Call set_root_omop first to set a valid root OMOP CDM') return None if domain is None and vocabulary is None: - print('either domain or vocabulary must be set to constrain the number of returned concepts') + notify_users('either domain or vocabulary must be set to constrain the number of returned concepts') return None return self.omop_cdm_db.get_concepts(search_term, domain, vocabulary) def get_concept_hierarchy(self, concept_id): if self.omop_cdm_db is None: - print('A valid OMOP CDM must be set before getting concepts. ' - 'Call set_root_omop first to set a valid root OMOP CDM') + notify_users('A valid OMOP CDM must be set before getting concepts. ' + 'Call set_root_omop first to set a valid root OMOP CDM') return None return self.omop_cdm_db.get_concept_hierarchy(concept_id) @@ -134,7 +132,7 @@ def display_concept_tree(self, concept_tree: dict, level: int = 0, show_in_text_ elif 'children' in concept_tree: tree_type = 'children' else: - print('The input concept tree must contain parents or children key as the type of the tree.') + notify_users('The input concept tree must contain parents or children key as the type of the tree.') return '' if show_in_text_format: @@ -178,12 +176,12 @@ def create_cohort(self, cohort_name: str, cohort_desc: str, query_or_yaml_file: created_cohort = c_action.create_cohort(cohort_name, cohort_desc, query_or_yaml_file, created_by) if created_cohort is not None: if delay > 0: - print(f"[DEBUG] Simulating long-running task with {delay} seconds delay...") + notify_users(f"[DEBUG] Simulating long-running task with {delay} seconds delay...") time.sleep(delay) - print('cohort created successfully') + notify_users('cohort created successfully') return created_cohort else: - print('failed to create a valid cohort action object') + notify_users('failed to create a valid cohort action object') return None @@ -192,11 +190,14 @@ def compare_cohorts(self, cohort_id1, cohort_id2): if c_action: return c_action.compare_cohorts(cohort_id1, cohort_id2) else: - print('failed to create a valid cohort action object') + notify_users('failed to create a valid cohort action object') return None def cleanup(self): - self.bias_db.close() - self.omop_cdm_db.close() - del self.cohort_action + if self.bias_db: + self.bias_db.close() + if self.omop_cdm_db: + self.omop_cdm_db.close() + if self.cohort_action: + del self.cohort_action diff --git a/biasanalyzer/utils.py b/biasanalyzer/utils.py index e89f604..faa9953 100644 --- a/biasanalyzer/utils.py +++ b/biasanalyzer/utils.py @@ -1,5 +1,29 @@ import numpy as np import re +import logging + + +logger = logging.getLogger(__name__) + + +def notify_users(message: str, level: str = "info"): + """ + Notify users via both print and logging. + :param message: message to show + :param level: Logging level: 'info', 'warning', 'error' + :return: + """ + + print(message) + + log_func = { + "info": logger.info, + "warning": logger.warning, + "error": logger.error, + "debug": logger.debug, + }.get(level.lower(), logger.info) + + log_func(message) def get_direction_arrow(tree_type): diff --git a/notebooks/BiasAnalyzerTestingAsyncCohortCreation.ipynb b/notebooks/BiasAnalyzerTestingAsyncCohortCreation.ipynb index 3f15471..763bbc1 100644 --- a/notebooks/BiasAnalyzerTestingAsyncCohortCreation.ipynb +++ b/notebooks/BiasAnalyzerTestingAsyncCohortCreation.ipynb @@ -72,7 +72,7 @@ ], "source": [ "# the configuration file includes root_omop_cdm_database configuration info with an example shown \n", - "# in https://github.com/hyi/HealthDataBias/blob/main/tests/assets/test_config.yaml\n", + "# in https://github.com/hyi/HealthDataBias/blob/main/tests/assets/config/test_config.yaml\n", "bias.set_root_omop()" ] }, diff --git a/notebooks/BiasAnalyzerTestingCohortConceptStats.ipynb b/notebooks/BiasAnalyzerTestingCohortConceptStats.ipynb index af88f81..1f15809 100644 --- a/notebooks/BiasAnalyzerTestingCohortConceptStats.ipynb +++ b/notebooks/BiasAnalyzerTestingCohortConceptStats.ipynb @@ -16,10 +16,10 @@ " Cloning https://github.com/vaclab/BiasAnalyzer.git to ./temp/pip-req-build-vlj8e8fz\n", " Running command git clone --filter=blob:none --quiet https://github.com/vaclab/BiasAnalyzer.git /home/hyi/temp/pip-req-build-vlj8e8fz\n", " Resolved https://github.com/vaclab/BiasAnalyzer.git to commit 8d821839e93b1d9a208c5c66352ee66db60d1e53\n", - " Installing build dependencies ... \u001b[?25ldone\n", - "\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n", - "\u001b[?25h Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n", - "\u001b[?25hCollecting duckdb<2.0.0,>=1.1.1 (from biasanalyzer==0.1.0)\n", + " Installing build dependencies ... \u001B[?25ldone\n", + "\u001B[?25h Getting requirements to build wheel ... \u001B[?25ldone\n", + "\u001B[?25h Preparing metadata (pyproject.toml) ... \u001B[?25ldone\n", + "\u001B[?25hCollecting duckdb<2.0.0,>=1.1.1 (from biasanalyzer==0.1.0)\n", " Obtaining dependency information for duckdb<2.0.0,>=1.1.1 from https://files.pythonhosted.org/packages/50/52/6e6f5b5b07841cec334ca6b98f2e02b7bb54ab3b99c49aa3a161cc0b4b37/duckdb-1.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n", " Using cached duckdb-1.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (966 bytes)\n", "Collecting duckdb-engine<0.14.0,>=0.13.2 (from biasanalyzer==0.1.0)\n", @@ -182,14 +182,14 @@ "Using cached pure_eval-0.2.3-py3-none-any.whl (11 kB)\n", "Using cached wcwidth-0.2.13-py2.py3-none-any.whl (34 kB)\n", "Building wheels for collected packages: biasanalyzer\n", - " Building wheel for biasanalyzer (pyproject.toml) ... \u001b[?25ldone\n", - "\u001b[?25h Created wheel for biasanalyzer: filename=biasanalyzer-0.1.0-py3-none-any.whl size=25475 sha256=1982c82749337f81db1a730b8cc25c049d0c0788cd6b782f69ce8be1d92a397c\n", + " Building wheel for biasanalyzer (pyproject.toml) ... \u001B[?25ldone\n", + "\u001B[?25h Created wheel for biasanalyzer: filename=biasanalyzer-0.1.0-py3-none-any.whl size=25475 sha256=1982c82749337f81db1a730b8cc25c049d0c0788cd6b782f69ce8be1d92a397c\n", " Stored in directory: /home/hyi/temp/pip-ephem-wheel-cache-f_9rcqkk/wheels/25/75/4e/079d96d69cc58148ce31d3d44f858e4db5f689604112dcb7c3\n", "Successfully built biasanalyzer\n", "Installing collected packages: wcwidth, pytz, pure-eval, ptyprocess, widgetsnbextension, tzdata, typing-extensions, traitlets, six, pyyaml, pygments, psycopg2, prompt_toolkit, pexpect, parso, packaging, numpy, MarkupSafe, jupyterlab-widgets, greenlet, executing, duckdb, decorator, asttokens, annotated-types, stack_data, sqlalchemy, scipy, python-dateutil, pydantic-core, matplotlib-inline, jinja2, jedi, ipython-pygments-lexers, comm, pydantic, pandas, ipython, duckdb-engine, ipywidgets, ipytree, biasanalyzer\n", - "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "ipympl 0.9.3 requires ipython<9, but you have ipython 9.0.2 which is incompatible.\u001b[0m\u001b[31m\n", - "\u001b[0mSuccessfully installed MarkupSafe-3.0.2 annotated-types-0.7.0 asttokens-3.0.0 biasanalyzer-0.1.0 comm-0.2.2 decorator-5.2.1 duckdb-1.2.1 duckdb-engine-0.13.6 executing-2.2.0 greenlet-3.1.1 ipython-9.0.2 ipython-pygments-lexers-1.1.1 ipytree-0.2.2 ipywidgets-8.1.5 jedi-0.19.2 jinja2-3.1.5 jupyterlab-widgets-3.0.13 matplotlib-inline-0.1.7 numpy-1.24.4 packaging-24.2 pandas-2.0.3 parso-0.8.4 pexpect-4.9.0 prompt_toolkit-3.0.50 psycopg2-2.9.10 ptyprocess-0.7.0 pure-eval-0.2.3 pydantic-2.10.6 pydantic-core-2.27.2 pygments-2.19.1 python-dateutil-2.9.0.post0 pytz-2025.1 pyyaml-6.0.2 scipy-1.10.1 six-1.17.0 sqlalchemy-2.0.39 stack_data-0.6.3 traitlets-5.14.3 typing-extensions-4.12.2 tzdata-2025.1 wcwidth-0.2.13 widgetsnbextension-4.0.13\n" + "\u001B[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "ipympl 0.9.3 requires ipython<9, but you have ipython 9.0.2 which is incompatible.\u001B[0m\u001B[31m\n", + "\u001B[0mSuccessfully installed MarkupSafe-3.0.2 annotated-types-0.7.0 asttokens-3.0.0 biasanalyzer-0.1.0 comm-0.2.2 decorator-5.2.1 duckdb-1.2.1 duckdb-engine-0.13.6 executing-2.2.0 greenlet-3.1.1 ipython-9.0.2 ipython-pygments-lexers-1.1.1 ipytree-0.2.2 ipywidgets-8.1.5 jedi-0.19.2 jinja2-3.1.5 jupyterlab-widgets-3.0.13 matplotlib-inline-0.1.7 numpy-1.24.4 packaging-24.2 pandas-2.0.3 parso-0.8.4 pexpect-4.9.0 prompt_toolkit-3.0.50 psycopg2-2.9.10 ptyprocess-0.7.0 pure-eval-0.2.3 pydantic-2.10.6 pydantic-core-2.27.2 pygments-2.19.1 python-dateutil-2.9.0.post0 pytz-2025.1 pyyaml-6.0.2 scipy-1.10.1 six-1.17.0 sqlalchemy-2.0.39 stack_data-0.6.3 traitlets-5.14.3 typing-extensions-4.12.2 tzdata-2025.1 wcwidth-0.2.13 widgetsnbextension-4.0.13\n" ] } ], @@ -291,7 +291,7 @@ ], "source": [ "# the configuration file includes root_omop_cdm_database configuration info with an example shown \n", - "# in https://github.com/hyi/HealthDataBias/blob/main/tests/assets/test_config.yaml\n", + "# in https://github.com/hyi/HealthDataBias/blob/main/tests/assets/config/test_config.yaml\n", "bias.set_root_omop()" ] }, diff --git a/notebooks/BiasAnalyzerTestingCohorts.ipynb b/notebooks/BiasAnalyzerTestingCohorts.ipynb index 5c62812..7810c49 100644 --- a/notebooks/BiasAnalyzerTestingCohorts.ipynb +++ b/notebooks/BiasAnalyzerTestingCohorts.ipynb @@ -16,10 +16,10 @@ " Cloning https://github.com/vaclab/BiasAnalyzer.git to ./temp/pip-req-build-sqm_zvhy\n", " Running command git clone --filter=blob:none --quiet https://github.com/vaclab/BiasAnalyzer.git /home/hyi/temp/pip-req-build-sqm_zvhy\n", " Resolved https://github.com/vaclab/BiasAnalyzer.git to commit 8d821839e93b1d9a208c5c66352ee66db60d1e53\n", - " Installing build dependencies ... \u001b[?25ldone\n", - "\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n", - "\u001b[?25h Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n", - "\u001b[?25hCollecting duckdb<2.0.0,>=1.1.1 (from biasanalyzer==0.1.0)\n", + " Installing build dependencies ... \u001B[?25ldone\n", + "\u001B[?25h Getting requirements to build wheel ... \u001B[?25ldone\n", + "\u001B[?25h Preparing metadata (pyproject.toml) ... \u001B[?25ldone\n", + "\u001B[?25hCollecting duckdb<2.0.0,>=1.1.1 (from biasanalyzer==0.1.0)\n", " Obtaining dependency information for duckdb<2.0.0,>=1.1.1 from https://files.pythonhosted.org/packages/50/52/6e6f5b5b07841cec334ca6b98f2e02b7bb54ab3b99c49aa3a161cc0b4b37/duckdb-1.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n", " Using cached duckdb-1.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (966 bytes)\n", "Collecting duckdb-engine<0.14.0,>=0.13.2 (from biasanalyzer==0.1.0)\n", @@ -182,14 +182,14 @@ "Using cached pure_eval-0.2.3-py3-none-any.whl (11 kB)\n", "Using cached wcwidth-0.2.13-py2.py3-none-any.whl (34 kB)\n", "Building wheels for collected packages: biasanalyzer\n", - " Building wheel for biasanalyzer (pyproject.toml) ... \u001b[?25ldone\n", - "\u001b[?25h Created wheel for biasanalyzer: filename=biasanalyzer-0.1.0-py3-none-any.whl size=25475 sha256=1982c82749337f81db1a730b8cc25c049d0c0788cd6b782f69ce8be1d92a397c\n", + " Building wheel for biasanalyzer (pyproject.toml) ... \u001B[?25ldone\n", + "\u001B[?25h Created wheel for biasanalyzer: filename=biasanalyzer-0.1.0-py3-none-any.whl size=25475 sha256=1982c82749337f81db1a730b8cc25c049d0c0788cd6b782f69ce8be1d92a397c\n", " Stored in directory: /home/hyi/temp/pip-ephem-wheel-cache-7pwouolk/wheels/25/75/4e/079d96d69cc58148ce31d3d44f858e4db5f689604112dcb7c3\n", "Successfully built biasanalyzer\n", "Installing collected packages: wcwidth, pytz, pure-eval, ptyprocess, widgetsnbextension, tzdata, typing-extensions, traitlets, six, pyyaml, pygments, psycopg2, prompt_toolkit, pexpect, parso, packaging, numpy, MarkupSafe, jupyterlab-widgets, greenlet, executing, duckdb, decorator, asttokens, annotated-types, stack_data, sqlalchemy, scipy, python-dateutil, pydantic-core, matplotlib-inline, jinja2, jedi, ipython-pygments-lexers, comm, pydantic, pandas, ipython, duckdb-engine, ipywidgets, ipytree, biasanalyzer\n", - "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "ipympl 0.9.3 requires ipython<9, but you have ipython 9.0.2 which is incompatible.\u001b[0m\u001b[31m\n", - "\u001b[0mSuccessfully installed MarkupSafe-3.0.2 annotated-types-0.7.0 asttokens-3.0.0 biasanalyzer-0.1.0 comm-0.2.2 decorator-5.2.1 duckdb-1.2.1 duckdb-engine-0.13.6 executing-2.2.0 greenlet-3.1.1 ipython-9.0.2 ipython-pygments-lexers-1.1.1 ipytree-0.2.2 ipywidgets-8.1.5 jedi-0.19.2 jinja2-3.1.5 jupyterlab-widgets-3.0.13 matplotlib-inline-0.1.7 numpy-1.24.4 packaging-24.2 pandas-2.0.3 parso-0.8.4 pexpect-4.9.0 prompt_toolkit-3.0.50 psycopg2-2.9.10 ptyprocess-0.7.0 pure-eval-0.2.3 pydantic-2.10.6 pydantic-core-2.27.2 pygments-2.19.1 python-dateutil-2.9.0.post0 pytz-2025.1 pyyaml-6.0.2 scipy-1.10.1 six-1.17.0 sqlalchemy-2.0.39 stack_data-0.6.3 traitlets-5.14.3 typing-extensions-4.12.2 tzdata-2025.1 wcwidth-0.2.13 widgetsnbextension-4.0.13\n", + "\u001B[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "ipympl 0.9.3 requires ipython<9, but you have ipython 9.0.2 which is incompatible.\u001B[0m\u001B[31m\n", + "\u001B[0mSuccessfully installed MarkupSafe-3.0.2 annotated-types-0.7.0 asttokens-3.0.0 biasanalyzer-0.1.0 comm-0.2.2 decorator-5.2.1 duckdb-1.2.1 duckdb-engine-0.13.6 executing-2.2.0 greenlet-3.1.1 ipython-9.0.2 ipython-pygments-lexers-1.1.1 ipytree-0.2.2 ipywidgets-8.1.5 jedi-0.19.2 jinja2-3.1.5 jupyterlab-widgets-3.0.13 matplotlib-inline-0.1.7 numpy-1.24.4 packaging-24.2 pandas-2.0.3 parso-0.8.4 pexpect-4.9.0 prompt_toolkit-3.0.50 psycopg2-2.9.10 ptyprocess-0.7.0 pure-eval-0.2.3 pydantic-2.10.6 pydantic-core-2.27.2 pygments-2.19.1 python-dateutil-2.9.0.post0 pytz-2025.1 pyyaml-6.0.2 scipy-1.10.1 six-1.17.0 sqlalchemy-2.0.39 stack_data-0.6.3 traitlets-5.14.3 typing-extensions-4.12.2 tzdata-2025.1 wcwidth-0.2.13 widgetsnbextension-4.0.13\n", "Requirement already satisfied: typing-extensions in /opt/conda/lib/python3.11/site-packages (4.12.2)\n" ] } @@ -288,7 +288,7 @@ ], "source": [ "# the configuration file includes root_omop_cdm_database configuration info with an example shown \n", - "# in https://github.com/hyi/HealthDataBias/blob/main/tests/assets/test_config.yaml\n", + "# in https://github.com/hyi/HealthDataBias/blob/main/tests/assets/config/test_config.yaml\n", "bias.set_root_omop()" ] }, diff --git a/notebooks/BiasAnalyzerTestingConceptBrowsing.ipynb b/notebooks/BiasAnalyzerTestingConceptBrowsing.ipynb index b4627c0..c387145 100644 --- a/notebooks/BiasAnalyzerTestingConceptBrowsing.ipynb +++ b/notebooks/BiasAnalyzerTestingConceptBrowsing.ipynb @@ -16,10 +16,10 @@ " Cloning https://github.com/vaclab/BiasAnalyzer.git to ./temp/pip-req-build-2mkwyv9w\n", " Running command git clone --filter=blob:none --quiet https://github.com/vaclab/BiasAnalyzer.git /home/hyi/temp/pip-req-build-2mkwyv9w\n", " Resolved https://github.com/vaclab/BiasAnalyzer.git to commit a3d43525ddd2b934d8a094901f7ad62c52f2e724\n", - " Installing build dependencies ... \u001b[?25ldone\n", - "\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n", - "\u001b[?25h Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n", - "\u001b[?25hCollecting duckdb<2.0.0,>=1.1.1 (from BiasAnalyzer==0.1.0)\n", + " Installing build dependencies ... \u001B[?25ldone\n", + "\u001B[?25h Getting requirements to build wheel ... \u001B[?25ldone\n", + "\u001B[?25h Preparing metadata (pyproject.toml) ... \u001B[?25ldone\n", + "\u001B[?25hCollecting duckdb<2.0.0,>=1.1.1 (from BiasAnalyzer==0.1.0)\n", " Obtaining dependency information for duckdb<2.0.0,>=1.1.1 from https://files.pythonhosted.org/packages/bf/56/f627b6fcd4aa34015a15449d852ccb78d7cc6eda654aa20c1d378e99fa76/duckdb-1.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n", " Using cached duckdb-1.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (762 bytes)\n", "Collecting duckdb-engine<0.14.0,>=0.13.2 (from BiasAnalyzer==0.1.0)\n", @@ -170,8 +170,8 @@ "Using cached pure_eval-0.2.3-py3-none-any.whl (11 kB)\n", "Using cached wcwidth-0.2.13-py2.py3-none-any.whl (34 kB)\n", "Building wheels for collected packages: BiasAnalyzer\n", - " Building wheel for BiasAnalyzer (pyproject.toml) ... \u001b[?25ldone\n", - "\u001b[?25h Created wheel for BiasAnalyzer: filename=biasanalyzer-0.1.0-py3-none-any.whl size=12482 sha256=254ea1fa17b7c1706a4d4e4ed711dd7128601c09a1c3c36c9ec903ed842441af\n", + " Building wheel for BiasAnalyzer (pyproject.toml) ... \u001B[?25ldone\n", + "\u001B[?25h Created wheel for BiasAnalyzer: filename=biasanalyzer-0.1.0-py3-none-any.whl size=12482 sha256=254ea1fa17b7c1706a4d4e4ed711dd7128601c09a1c3c36c9ec903ed842441af\n", " Stored in directory: /home/hyi/temp/pip-ephem-wheel-cache-wgmpfyq9/wheels/25/75/4e/079d96d69cc58148ce31d3d44f858e4db5f689604112dcb7c3\n", "Successfully built BiasAnalyzer\n", "Installing collected packages: wcwidth, pytz, pure-eval, ptyprocess, widgetsnbextension, tzdata, typing-extensions, traitlets, six, pyyaml, pygments, psycopg2, prompt-toolkit, pexpect, parso, packaging, numpy, jupyterlab-widgets, greenlet, executing, duckdb, decorator, annotated-types, sqlalchemy, scipy, python-dateutil, pydantic-core, matplotlib-inline, jedi, comm, asttokens, stack-data, pydantic, pandas, duckdb-engine, ipython, ipywidgets, ipytree, BiasAnalyzer\n", @@ -274,7 +274,7 @@ ], "source": [ "# the configuration file includes root_omop_cdm_database configuration info with an example shown \n", - "# in https://github.com/hyi/HealthDataBias/blob/main/tests/assets/test_config.yaml\n", + "# in https://github.com/hyi/HealthDataBias/blob/main/tests/assets/config/test_config.yaml\n", "bias.set_root_omop()" ] }, diff --git a/tests/assets/config/.test_config_postgresql.yaml.swp b/tests/assets/config/.test_config_postgresql.yaml.swp new file mode 100644 index 0000000000000000000000000000000000000000..5feb484dcde0cc44c29cdbe4c06e5f0b1503c846 GIT binary patch literal 12288 zcmeI&O>Pr06bEoOI~Fa7Uf_`~SfDh`q>*TLRH70KmLOPJPV8hx&5T{!qcSX7jsX|o z7#stx!*epD6r{F`u0VfFPtN$`dHmZ&^JYiy-@K)lhav}!EiUX)uLFg$K>X> zbT-7LH{oJ+*r;-KTW*X)00Q?3?22(VI!db!`p;

AU+jU>*VxfB*y_009U<00I#B zj|8II5!Za5UVEoUZNKwZ-#(&)00bZa0SG_<0uX=z1Rwwb2teQ;6i`}-k9$IV<4E8C zyZ`^c9t-i4;|GW2c+K&GV~^u|yZ&q1g9ZTzKmY;|fB*y_009U<00IzrhyanZK2B+J zJkA7>R#DAVFjMMpE5iH4?SSfFNKwd@H<~I{8d_LSs-mJAl%}-4AzjbU^^AD9OSE0M z&By2DTyCCHOeqznOFfg_2v0gLQ@^xcx0Br?&+sew8{4`^p6Sk$v2mPrdl;ox=c;!q zB1KEKGGrGNb3^gcQm9?JKVK;A3x8}(w^-jtcx$7yrFC+yOW7SY!KpH%5qhBjsN PwO6sQ)f(nz`MvlJSW~dR literal 0 HcmV?d00001 diff --git a/tests/assets/test_config.yaml b/tests/assets/config/test_config.yaml similarity index 100% rename from tests/assets/test_config.yaml rename to tests/assets/config/test_config.yaml diff --git a/tests/assets/config/test_config_postgresql.yaml b/tests/assets/config/test_config_postgresql.yaml new file mode 100644 index 0000000..dcf1794 --- /dev/null +++ b/tests/assets/config/test_config_postgresql.yaml @@ -0,0 +1,9 @@ +# example configuration for BiasAnalyzer + +root_omop_cdm_database: + database_type: postgresql # set it to one of the two supported types: postgresql or duckdb + username: test_username + password: test_password + hostname: test_db_hostname + database: "postgresql" # use a shared name for an in-memory duckdb or database name for postgresql + port: 5432 diff --git a/tests/assets/config/test_config_unsupported_db_type.yaml b/tests/assets/config/test_config_unsupported_db_type.yaml new file mode 100644 index 0000000..5366b88 --- /dev/null +++ b/tests/assets/config/test_config_unsupported_db_type.yaml @@ -0,0 +1,9 @@ +# example configuration for BiasAnalyzer + +root_omop_cdm_database: + database_type: unsupported_db_type # set it to one of the two supported types: postgresql or duckdb + username: test_username + password: test_password + hostname: test_db_hostname + database: "unsupported_db" # use a shared name for an in-memory duckdb or database name for postgresql + port: 5432 diff --git a/tests/assets/config/test_invalid_config.yaml b/tests/assets/config/test_invalid_config.yaml new file mode 100644 index 0000000..36f139f --- /dev/null +++ b/tests/assets/config/test_invalid_config.yaml @@ -0,0 +1,8 @@ +# example configuration for BiasAnalyzer + +root_omop_cdm_database: + invalid_database_type: duckdb # set it to one of the two supported types: postgresql or duckdb + invalid_username: test_username + password: test_password + hostname: test_db_hostname + port: unsupported diff --git a/tests/query_based/conftest.py b/tests/conftest.py similarity index 97% rename from tests/query_based/conftest.py rename to tests/conftest.py index 4cabcdb..88edef8 100644 --- a/tests/query_based/conftest.py +++ b/tests/conftest.py @@ -5,9 +5,17 @@ import os +@pytest.fixture +def fresh_bias_obj(): + """Provides a fresh BIAS() object with no config set — safe for testing invalid config scenarios.""" + bias = BIAS() + yield bias + bias.cleanup() + + @pytest.fixture(scope="session") def test_db(): - config_file = os.path.join(os.path.dirname(__file__), '..', 'assets', 'test_config.yaml') + config_file = os.path.join(os.path.dirname(__file__), 'assets', 'config', 'test_config.yaml') config = load_config(config_file) db_path = config['root_omop_cdm_database']['database'] conn = duckdb.connect(db_path) @@ -241,6 +249,7 @@ def test_db(): # mock configuration file bias = BIAS() + bias.set_config(config_file) bias.set_root_omop() diff --git a/tests/test_biasanalyzer.py b/tests/test_biasanalyzer.py deleted file mode 100644 index 9d8eab4..0000000 --- a/tests/test_biasanalyzer.py +++ /dev/null @@ -1,5 +0,0 @@ -from biasanalyzer import __version__ - - -def test_version(): - assert __version__ == '0.1.0' diff --git a/tests/test_biasanalyzer_api.py b/tests/test_biasanalyzer_api.py new file mode 100644 index 0000000..d9b4123 --- /dev/null +++ b/tests/test_biasanalyzer_api.py @@ -0,0 +1,38 @@ +import os +import pytest +from biasanalyzer import __version__ +import logging + + +def test_version(): + assert __version__ == '0.1.0' + +@pytest.mark.usefixtures +def test_set_config(caplog, fresh_bias_obj): + caplog.clear() + with caplog.at_level(logging.ERROR): + fresh_bias_obj.set_config('non_existent_config_file.yaml') + assert 'does not exist' in caplog.text + + caplog.clear() + with caplog.at_level(logging.ERROR): + invalid_config_file = os.path.join(os.path.dirname(__file__), 'assets', 'config', + 'test_invalid_config.yaml') + fresh_bias_obj.set_config(invalid_config_file) + assert 'is not valid' in caplog.text + + +@pytest.mark.usefixtures +def test_set_root_omop(caplog, fresh_bias_obj): + caplog.clear() + with caplog.at_level(logging.INFO): + fresh_bias_obj.set_root_omop() + assert 'no valid configuration' in caplog.text + + caplog.clear() + with caplog.at_level(logging.INFO): + config_file_with_unsupported_db_type = os.path.join(os.path.dirname(__file__), 'assets', 'config', + 'test_config_unsupported_db_type.yaml') + fresh_bias_obj.set_config(config_file_with_unsupported_db_type) + fresh_bias_obj.set_root_omop() + assert 'Unsupported database type' in caplog.text diff --git a/tests/test_config.py b/tests/test_config.py index eb9aeb0..060b940 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,13 +1,11 @@ import os -from numpy.ma.testutils import assert_equal - from biasanalyzer.config import load_config, load_cohort_creation_config def test_load_config(): try: - config = load_config(os.path.join(os.path.dirname(__file__), 'assets', 'test_config.yaml')) + config = load_config(os.path.join(os.path.dirname(__file__), 'assets', 'config', 'test_config.yaml')) except Exception as e: assert False, f"load_config() raised an exception: {e}" @@ -45,14 +43,3 @@ def test_load_cohort_creation_config(): in_events = config.get('inclusion_criteria')['temporal_events'] assert 'operator' in in_events[0] assert 'events' in in_events[0] - - # ex_events = config.get('exclusion_criteria')['temporal_events'] - # ex_demographics = config.get('exclusion_criteria').get('demographics') - # assert 'operator' in ex_events[0] - # assert 'events' in ex_events[0] - # assert 'event_type' in ex_events[0]['events'][0] - # assert_equal(ex_events[0]['events'][0]['event_type'], 'condition_occurrence', - # 'exclusion event type is not condition_occurrence') - # assert 'min_birth_year' in ex_demographics - # assert 'gender' not in ex_demographics - # assert 'max_birth_year' not in ex_demographics From 4083a7cf0b4712e1774591aca4bfb5d89a102720 Mon Sep 17 00:00:00 2001 From: hyi Date: Fri, 6 Jun 2025 11:26:40 -0400 Subject: [PATCH 02/12] get mock postgresql tests working --- .../config/.test_config_postgresql.yaml.swp | Bin 12288 -> 0 bytes .../assets/config/test_config_postgresql.yaml | 9 --- tests/test_biasanalyzer_api.py | 52 +++++++++++++++++- 3 files changed, 49 insertions(+), 12 deletions(-) delete mode 100644 tests/assets/config/.test_config_postgresql.yaml.swp delete mode 100644 tests/assets/config/test_config_postgresql.yaml diff --git a/tests/assets/config/.test_config_postgresql.yaml.swp b/tests/assets/config/.test_config_postgresql.yaml.swp deleted file mode 100644 index 5feb484dcde0cc44c29cdbe4c06e5f0b1503c846..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12288 zcmeI&O>Pr06bEoOI~Fa7Uf_`~SfDh`q>*TLRH70KmLOPJPV8hx&5T{!qcSX7jsX|o z7#stx!*epD6r{F`u0VfFPtN$`dHmZ&^JYiy-@K)lhav}!EiUX)uLFg$K>X> zbT-7LH{oJ+*r;-KTW*X)00Q?3?22(VI!db!`p;

AU+jU>*VxfB*y_009U<00I#B zj|8II5!Za5UVEoUZNKwZ-#(&)00bZa0SG_<0uX=z1Rwwb2teQ;6i`}-k9$IV<4E8C zyZ`^c9t-i4;|GW2c+K&GV~^u|yZ&q1g9ZTzKmY;|fB*y_009U<00IzrhyanZK2B+J zJkA7>R#DAVFjMMpE5iH4?SSfFNKwd@H<~I{8d_LSs-mJAl%}-4AzjbU^^AD9OSE0M z&By2DTyCCHOeqznOFfg_2v0gLQ@^xcx0Br?&+sew8{4`^p6Sk$v2mPrdl;ox=c;!q zB1KEKGGrGNb3^gcQm9?JKVK;A3x8}(w^-jtcx$7yrFC+yOW7SY!KpH%5qhBjsN PwO6sQ)f(nz`MvlJSW~dR diff --git a/tests/assets/config/test_config_postgresql.yaml b/tests/assets/config/test_config_postgresql.yaml deleted file mode 100644 index dcf1794..0000000 --- a/tests/assets/config/test_config_postgresql.yaml +++ /dev/null @@ -1,9 +0,0 @@ -# example configuration for BiasAnalyzer - -root_omop_cdm_database: - database_type: postgresql # set it to one of the two supported types: postgresql or duckdb - username: test_username - password: test_password - hostname: test_db_hostname - database: "postgresql" # use a shared name for an in-memory duckdb or database name for postgresql - port: 5432 diff --git a/tests/test_biasanalyzer_api.py b/tests/test_biasanalyzer_api.py index d9b4123..08792cb 100644 --- a/tests/test_biasanalyzer_api.py +++ b/tests/test_biasanalyzer_api.py @@ -1,7 +1,8 @@ import os import pytest -from biasanalyzer import __version__ import logging +from unittest.mock import patch +from biasanalyzer import __version__ def test_version(): @@ -21,9 +22,8 @@ def test_set_config(caplog, fresh_bias_obj): fresh_bias_obj.set_config(invalid_config_file) assert 'is not valid' in caplog.text - @pytest.mark.usefixtures -def test_set_root_omop(caplog, fresh_bias_obj): +def test_set_root_omop(monkeypatch, caplog, fresh_bias_obj): caplog.clear() with caplog.at_level(logging.INFO): fresh_bias_obj.set_root_omop() @@ -36,3 +36,49 @@ def test_set_root_omop(caplog, fresh_bias_obj): fresh_bias_obj.set_config(config_file_with_unsupported_db_type) fresh_bias_obj.set_root_omop() assert 'Unsupported database type' in caplog.text + + # Create a fake postgresql config + config = { + "root_omop_cdm_database": { + "database_type": "postgresql", + "username": "testuser", + "password": "testpass", + "hostname": "localhost", + "port": 5432, + "database": "testdb" + } + } + + # Patch the config parser to return this directly instead of reading a file + monkeypatch.setattr(fresh_bias_obj, "set_config", lambda x: setattr(fresh_bias_obj, "config", config)) + + # Patch OMOPCDMDatabase to avoid real DB connection + class MockOMOPCDMDatabase: + def __init__(self, db_url): + self.db_url = db_url + def close(self): + pass + + monkeypatch.setattr("biasanalyzer.api.OMOPCDMDatabase", MockOMOPCDMDatabase) + + # --- Step 3: Mock BiasDatabase and its methods --- + class MockBiasDatabase: + def __init__(self, path): + self.omop_cdm_db_url = None + + def load_postgres_extension(self): + pass + + def close(self): + pass + + monkeypatch.setattr("biasanalyzer.api.BiasDatabase", MockBiasDatabase) + + # Run + fresh_bias_obj.set_config("dummy.yaml") # This will now inject the mocked config + fresh_bias_obj.set_root_omop() + + # Check values + assert fresh_bias_obj.omop_cdm_db.db_url == "postgresql://testuser:testpass@localhost:5432/testdb" + assert fresh_bias_obj.bias_db is not None + assert fresh_bias_obj.bias_db.omop_cdm_db_url == "postgresql://testuser:testpass@localhost:5432/testdb" From 645b9e15720dc3096a351ad0a3692f632102f23a Mon Sep 17 00:00:00 2001 From: hyi Date: Fri, 6 Jun 2025 15:00:40 -0400 Subject: [PATCH 03/12] changed BIAS() from Singleton class to normal class and added more tests --- biasanalyzer/api.py | 16 ++++++--------- tests/conftest.py | 23 +++++++++++----------- tests/test_biasanalyzer_api.py | 36 ++++++++++++++++++++++++++++++---- 3 files changed, 50 insertions(+), 25 deletions(-) diff --git a/biasanalyzer/api.py b/biasanalyzer/api.py index a79cb25..a9c4d2c 100644 --- a/biasanalyzer/api.py +++ b/biasanalyzer/api.py @@ -10,19 +10,14 @@ class BIAS: - _instance = None - - def __init__(self): - self.config = {} + def __init__(self, config_file_path=None): self.bias_db = None self.omop_cdm_db = None self.cohort_action = None - - def __new__(cls, config_file_path=None): - if cls._instance is None: - cls._instance = super(BIAS, cls).__new__(cls) - cls._instance.set_config(config_file_path) - return cls._instance + if config_file_path is None: + self.config = {} + else: + self.set_config(config_file_path) def set_config(self, config_file_path: str): if config_file_path is None: @@ -77,6 +72,7 @@ def _set_cohort_action(self): return self.cohort_action def get_domains_and_vocabularies(self): + print(f'self.omop_cdm_db: {self.omop_cdm_db}') if self.omop_cdm_db is None: notify_users('A valid OMOP CDM must be set before getting domains. ' 'Call set_root_omop first to set a valid root OMOP CDM') diff --git a/tests/conftest.py b/tests/conftest.py index 88edef8..33aa222 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -33,7 +33,8 @@ def test_db(): concept_id INTEGER PRIMARY KEY, concept_name TEXT, concept_code TEXT, - vocabulary_id TEXT + vocabulary_id TEXT, + domain_id TEXT ); """) conn.execute(""" @@ -127,17 +128,17 @@ def test_db(): result = conn.execute("SELECT COUNT(*) FROM concept").fetchone() if result[0] == 0: conn.execute(""" - INSERT INTO concept (concept_id, concept_name, concept_code, vocabulary_id) + INSERT INTO concept (concept_id, concept_name, concept_code, vocabulary_id, domain_id) VALUES - (4274025, 'Disease', '64572001', 'SNOMED'), - (1, 'Diabetes Mellitus', 'E10-E14', 'ICD10CM'), - (2, 'Type 1 Diabetes Mellitus', 'E10', 'ICD10CM'), - (3, 'Type 2 Diabetes Mellitus', 'E11', 'ICD10CM'), - (4, 'Diabetic Retinopathy', 'E10.3/E11.3', 'ICD10CM'), - (5, 'Fever', 'R50.9', 'ICD10CM'), - (37311061, 'COVID-19', '840539006', 'SNOMED'), - (4041664, 'Difficulty breathing', '230145002', 'SNOMED'), - (316139, 'Heart failure', '84114007', 'SNOMED'); + (4274025, 'Disease', '64572001', 'SNOMED', 'Condition'), + (1, 'Diabetes Mellitus', 'E10-E14', 'ICD10CM', 'Condition'), + (2, 'Type 1 Diabetes Mellitus', 'E10', 'ICD10CM', 'Condition'), + (3, 'Type 2 Diabetes Mellitus', 'E11', 'ICD10CM', 'Condition'), + (4, 'Diabetic Retinopathy', 'E10.3/E11.3', 'ICD10CM', 'Condition'), + (5, 'Fever', 'R50.9', 'ICD10CM', 'Condition'), + (37311061, 'COVID-19', '840539006', 'SNOMED', 'Condition'), + (4041664, 'Difficulty breathing', '230145002', 'SNOMED', 'Condition'), + (316139, 'Heart failure', '84114007', 'SNOMED', 'Condition'); """) # Insert hierarchical relationships as needed diff --git a/tests/test_biasanalyzer_api.py b/tests/test_biasanalyzer_api.py index 08792cb..22b78a7 100644 --- a/tests/test_biasanalyzer_api.py +++ b/tests/test_biasanalyzer_api.py @@ -1,14 +1,12 @@ import os import pytest import logging -from unittest.mock import patch from biasanalyzer import __version__ def test_version(): assert __version__ == '0.1.0' -@pytest.mark.usefixtures def test_set_config(caplog, fresh_bias_obj): caplog.clear() with caplog.at_level(logging.ERROR): @@ -22,7 +20,6 @@ def test_set_config(caplog, fresh_bias_obj): fresh_bias_obj.set_config(invalid_config_file) assert 'is not valid' in caplog.text -@pytest.mark.usefixtures def test_set_root_omop(monkeypatch, caplog, fresh_bias_obj): caplog.clear() with caplog.at_level(logging.INFO): @@ -50,7 +47,7 @@ def test_set_root_omop(monkeypatch, caplog, fresh_bias_obj): } # Patch the config parser to return this directly instead of reading a file - monkeypatch.setattr(fresh_bias_obj, "set_config", lambda x: setattr(fresh_bias_obj, "config", config)) + monkeypatch.setattr(fresh_bias_obj, "config", config) # Patch OMOPCDMDatabase to avoid real DB connection class MockOMOPCDMDatabase: @@ -82,3 +79,34 @@ def close(self): assert fresh_bias_obj.omop_cdm_db.db_url == "postgresql://testuser:testpass@localhost:5432/testdb" assert fresh_bias_obj.bias_db is not None assert fresh_bias_obj.bias_db.omop_cdm_db_url == "postgresql://testuser:testpass@localhost:5432/testdb" + +def test_set_cohort_action(caplog, fresh_bias_obj): + caplog.clear() + with caplog.at_level(logging.INFO): + fresh_bias_obj._set_cohort_action() + assert 'valid OMOP CDM must be set' in caplog.text + +def test_get_domains_and_vocabularies_invalid(caplog, fresh_bias_obj): + caplog.clear() + with caplog.at_level(logging.INFO): + fresh_bias_obj.get_domains_and_vocabularies() + assert 'valid OMOP CDM must be set' in caplog.text + +def test_get_domains_and_vocabularies(test_db): + domains_and_vocabularies = test_db.get_domains_and_vocabularies() + print(f'domains_and_vocabs: {domains_and_vocabularies}', flush=True) + expected = [{'domain_id': 'Condition', 'vocabulary_id': 'ICD10CM'}, + {'domain_id': 'Condition', 'vocabulary_id': 'SNOMED'}] + assert domains_and_vocabularies == expected + +def test_get_concepts(caplog, fresh_bias_obj): + caplog.clear() + with caplog.at_level(logging.INFO): + fresh_bias_obj.get_concepts('dummy') + assert 'valid OMOP CDM must be set' in caplog.text + +def test_get_concept_hierarchy(caplog, fresh_bias_obj): + caplog.clear() + with caplog.at_level(logging.INFO): + fresh_bias_obj.get_concept_hierarchy('dummy') + assert 'valid OMOP CDM must be set' in caplog.text From 07dd5aa48e04a0f387d0cfd8eaf9674f31f3b8ab Mon Sep 17 00:00:00 2001 From: hyi Date: Fri, 6 Jun 2025 18:07:04 -0400 Subject: [PATCH 04/12] added more tests --- biasanalyzer/api.py | 2 +- biasanalyzer/database.py | 116 +++++++++++++++++++++++---------- tests/conftest.py | 27 ++++---- tests/test_biasanalyzer_api.py | 44 +++++++++++-- 4 files changed, 137 insertions(+), 52 deletions(-) diff --git a/biasanalyzer/api.py b/biasanalyzer/api.py index a9c4d2c..ade0e32 100644 --- a/biasanalyzer/api.py +++ b/biasanalyzer/api.py @@ -20,7 +20,7 @@ def __init__(self, config_file_path=None): self.set_config(config_file_path) def set_config(self, config_file_path: str): - if config_file_path is None: + if not config_file_path: notify_users('no configuration file specified. ' 'Call set_config(config_file_path) next to specify configurations') else: diff --git a/biasanalyzer/database.py b/biasanalyzer/database.py index 3f36158..ccdfb72 100644 --- a/biasanalyzer/database.py +++ b/biasanalyzer/database.py @@ -372,45 +372,90 @@ def get_domains_and_vocabularies(self) -> list: return self.execute_query(query) def get_concepts(self, search_term: str, domain: Optional[str], vocab: Optional[str]) -> list: - # find a concept ID based on a search term search_term_exact = search_term.lower() search_term_suffix = f'{search_term_exact} ' search_term_prefix = f' {search_term_exact}' search_term_prefix_suffix = f' {search_term_exact} ' - param_set = { - "search_term_exact": search_term_exact, - "search_term_prefix": search_term_prefix, - "search_term_suffix": search_term_suffix, - "search_term_prefix_suffix": search_term_prefix_suffix - } - if domain is not None and vocab is not None: - condition_str = "domain_id = :domain and vocabulary_id = :vocabulary" - param_set['domain'] = domain - param_set['vocabulary'] = vocab - elif domain is None: - condition_str = "vocabulary_id = :vocabulary" - param_set['vocabulary'] = vocab - else: - # vocab is None - condition_str = "domain_id = :domain" - param_set['domain'] = domain - query = f""" - SELECT concept_id, concept_name, valid_start_date, valid_end_date, domain_id, vocabulary_id FROM concept - where {condition_str} and - (LOWER(concept_name) = :search_term_exact or LOWER(concept_name) LIKE '%' || :search_term_prefix - or LOWER(concept_name) LIKE :search_term_suffix || '%' - or LOWER(concept_name) LIKE '%' || :search_term_prefix_suffix || '%') - ORDER BY concept_id - """ + if self._database_type == 'duckdb': + # Use positional parameters and ? as placeholder to meet duckdb syntax requirement + base_query = """ + SELECT concept_id, concept_name, valid_start_date, valid_end_date, domain_id, vocabulary_id \ + FROM concept + WHERE {condition_str} \ + AND ( + LOWER (concept_name) = ? \ + OR + LOWER (concept_name) LIKE '%' || ? \ + OR + LOWER (concept_name) LIKE ? || '%' \ + OR + LOWER (concept_name) LIKE '%' || ? || '%' + ) + ORDER BY concept_id \ + """ + + if domain is not None and vocab is not None: + condition_str = "domain_id = ? AND vocabulary_id = ?" + params = [domain, vocab, search_term_exact, search_term_prefix, search_term_suffix, + search_term_prefix_suffix] + elif domain is None: + condition_str = "vocabulary_id = ?" + params = [vocab, search_term_exact, search_term_prefix, search_term_suffix, + search_term_prefix_suffix] + else: + condition_str = "domain_id = ?" + params = [domain, search_term_exact, search_term_prefix, search_term_suffix, + search_term_prefix_suffix] - return self.execute_query(query, params=param_set) + else: + # Use named parameters with :param_name syntax for SQLAlchemy/PostgreSQL + base_query = """ + SELECT concept_id, concept_name, valid_start_date, valid_end_date, domain_id, vocabulary_id \ + FROM concept + WHERE {condition_str} \ + AND ( + LOWER (concept_name) = :search_term_exact \ + OR + LOWER (concept_name) LIKE '%' || :search_term_prefix \ + OR + LOWER (concept_name) LIKE :search_term_suffix || '%' \ + OR + LOWER (concept_name) LIKE '%' || :search_term_prefix_suffix || '%' + ) + ORDER BY concept_id \ + """ + + params = { + "search_term_exact": search_term_exact, + "search_term_prefix": search_term_prefix, + "search_term_suffix": search_term_suffix, + "search_term_prefix_suffix": search_term_prefix_suffix + } + + if domain is not None and vocab is not None: + condition_str = "domain_id = :domain AND vocabulary_id = :vocabulary" + params['domain'] = domain + params['vocabulary'] = vocab + elif domain is None: + condition_str = "vocabulary_id = :vocabulary" + params['vocabulary'] = vocab + else: + condition_str = "domain_id = :domain" + params['domain'] = domain + + query = base_query.format(condition_str=condition_str) + return self.execute_query(query, params=params) def get_concept_hierarchy(self, concept_id: int): """ Retrieves the full concept hierarchy (ancestors and descendants) for a given concept_id and organizes it into a nested dictionary to represent the tree structure. """ + if not isinstance(concept_id, int): + # this check is important to avoid SQL injection risk + raise ValueError("concept_id must be an integer") + stages = [ "Queried concept hierarchy", "Fetched concept details", @@ -419,11 +464,12 @@ def get_concept_hierarchy(self, concept_id: int): progress = tqdm(total=len(stages), desc="Concept Hierarchy", unit="stage") progress.set_postfix_str(stages[0]) - query = """ + # Inline the concept_id directly into the query + query = f""" WITH RECURSIVE concept_hierarchy AS ( SELECT ancestor_concept_id, descendant_concept_id, min_levels_of_separation FROM concept_ancestor - WHERE ancestor_concept_id = :concept_id OR descendant_concept_id = :concept_id + WHERE ancestor_concept_id = {concept_id} OR descendant_concept_id = {concept_id} UNION @@ -434,9 +480,9 @@ def get_concept_hierarchy(self, concept_id: int): SELECT ancestor_concept_id, descendant_concept_id FROM concept_hierarchy WHERE min_levels_of_separation > 0 - """ + """ + results = self.execute_query(query) - results = self.execute_query(query, params={"concept_id": concept_id}) progress.update(1) progress.set_postfix_str(stages[1]) @@ -445,13 +491,15 @@ def get_concept_hierarchy(self, concept_id: int): # Fetch details of each concept concept_details = {} if concept_ids: - query = """ + # Convert set of integers to comma-separated string + concept_ids_str = ", ".join(str(cid) for cid in concept_ids) + query = f""" SELECT concept_id, concept_name, vocabulary_id, concept_code FROM concept - WHERE concept_id IN :concept_ids + WHERE concept_id IN ({concept_ids_str}) """ - result = self.execute_query(query, params={"concept_ids": tuple(concept_ids)}) + result = self.execute_query(query) concept_details = {row['concept_id']: row for row in result} progress.update(1) diff --git a/tests/conftest.py b/tests/conftest.py index 33aa222..4fc856c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -32,6 +32,8 @@ def test_db(): CREATE TABLE IF NOT EXISTS concept ( concept_id INTEGER PRIMARY KEY, concept_name TEXT, + valid_start_date DATE, + valid_end_date DATE, concept_code TEXT, vocabulary_id TEXT, domain_id TEXT @@ -128,17 +130,18 @@ def test_db(): result = conn.execute("SELECT COUNT(*) FROM concept").fetchone() if result[0] == 0: conn.execute(""" - INSERT INTO concept (concept_id, concept_name, concept_code, vocabulary_id, domain_id) + INSERT INTO concept (concept_id, concept_name, valid_start_date, valid_end_date, concept_code, + vocabulary_id, domain_id) VALUES - (4274025, 'Disease', '64572001', 'SNOMED', 'Condition'), - (1, 'Diabetes Mellitus', 'E10-E14', 'ICD10CM', 'Condition'), - (2, 'Type 1 Diabetes Mellitus', 'E10', 'ICD10CM', 'Condition'), - (3, 'Type 2 Diabetes Mellitus', 'E11', 'ICD10CM', 'Condition'), - (4, 'Diabetic Retinopathy', 'E10.3/E11.3', 'ICD10CM', 'Condition'), - (5, 'Fever', 'R50.9', 'ICD10CM', 'Condition'), - (37311061, 'COVID-19', '840539006', 'SNOMED', 'Condition'), - (4041664, 'Difficulty breathing', '230145002', 'SNOMED', 'Condition'), - (316139, 'Heart failure', '84114007', 'SNOMED', 'Condition'); + (4274025, 'Disease', '2012-04-01', '2020-04-01', '64572001', 'SNOMED', 'Condition'), + (1, 'Diabetes Mellitus', '2012-04-01', '2020-04-01', 'E10-E14', 'ICD10CM', 'Condition'), + (2, 'Type 1 Diabetes Mellitus', '2012-04-01', '2020-04-01', 'E10', 'ICD10CM', 'Condition'), + (3, 'Type 2 Diabetes Mellitus', '2012-04-01', '2020-04-01', 'E11', 'ICD10CM', 'Condition'), + (4, 'Diabetic Retinopathy', '2012-04-01', '2020-04-01', 'E10.3/E11.3', 'ICD10CM', 'Condition'), + (5, 'Fever', '2012-04-01', '2020-04-01', 'R50.9', 'ICD10CM', 'Condition'), + (37311061, 'COVID-19', '2012-04-01', '2020-04-01', '840539006', 'SNOMED', 'Condition'), + (4041664, 'Difficulty breathing', '2012-04-01', '2020-04-01', '230145002', 'SNOMED', 'Condition'), + (316139, 'Heart failure', '2012-04-01', '2020-04-01', '84114007', 'SNOMED', 'Condition'); """) # Insert hierarchical relationships as needed @@ -249,9 +252,7 @@ def test_db(): # mock configuration file - bias = BIAS() - - bias.set_config(config_file) + bias = BIAS(config_file_path=config_file) bias.set_root_omop() yield bias # Provide the connection to the test diff --git a/tests/test_biasanalyzer_api.py b/tests/test_biasanalyzer_api.py index 22b78a7..fc594f7 100644 --- a/tests/test_biasanalyzer_api.py +++ b/tests/test_biasanalyzer_api.py @@ -1,6 +1,8 @@ import os -import pytest +import datetime import logging +import pytest + from biasanalyzer import __version__ @@ -8,6 +10,11 @@ def test_version(): assert __version__ == '0.1.0' def test_set_config(caplog, fresh_bias_obj): + caplog.clear() + with caplog.at_level(logging.INFO): + fresh_bias_obj.set_config('') + assert 'no configuration file specified' in caplog.text + caplog.clear() with caplog.at_level(logging.ERROR): fresh_bias_obj.set_config('non_existent_config_file.yaml') @@ -94,19 +101,48 @@ def test_get_domains_and_vocabularies_invalid(caplog, fresh_bias_obj): def test_get_domains_and_vocabularies(test_db): domains_and_vocabularies = test_db.get_domains_and_vocabularies() - print(f'domains_and_vocabs: {domains_and_vocabularies}', flush=True) expected = [{'domain_id': 'Condition', 'vocabulary_id': 'ICD10CM'}, {'domain_id': 'Condition', 'vocabulary_id': 'SNOMED'}] assert domains_and_vocabularies == expected -def test_get_concepts(caplog, fresh_bias_obj): +def test_get_concepts_no_omop_cdm(caplog, fresh_bias_obj): caplog.clear() with caplog.at_level(logging.INFO): fresh_bias_obj.get_concepts('dummy') assert 'valid OMOP CDM must be set' in caplog.text -def test_get_concept_hierarchy(caplog, fresh_bias_obj): +def test_get_concepts_no_domain_and_vocab(caplog, test_db): + caplog.clear() + with caplog.at_level(logging.INFO): + test_db.get_concepts('dummy') + assert 'either domain or vocabulary must be set' in caplog.text + +def test_get_concepts(test_db): + concepts = test_db.get_concepts('Heart failure', domain='Condition', vocabulary='SNOMED') + print(f'concepts: {concepts}', flush=True) + expected = [{'concept_id': 316139, 'concept_name': 'Heart failure', + 'valid_start_date': datetime.date(2012, 4, 1), + 'valid_end_date': datetime.date(2020, 4, 1), + 'domain_id': 'Condition', 'vocabulary_id': 'SNOMED'}] + assert concepts == expected + +def test_get_concept_hierarchy_no_omop_cdm(caplog, fresh_bias_obj): caplog.clear() with caplog.at_level(logging.INFO): fresh_bias_obj.get_concept_hierarchy('dummy') assert 'valid OMOP CDM must be set' in caplog.text + +def test_get_concept_hierarchy(test_db): + with pytest.raises(ValueError): + test_db.get_concept_hierarchy('not_int_str') + + hierarchy = test_db.get_concept_hierarchy(2) + print(f'hierarchy: {hierarchy}', flush=True) + expected = ({'details': {'concept_id': 2, 'concept_name': 'Type 1 Diabetes Mellitus', 'vocabulary_id': 'ICD10CM', + 'concept_code': 'E10'}, 'parents': [{'details': {'concept_id': 1, 'concept_name': + 'Diabetes Mellitus', 'vocabulary_id': 'ICD10CM', 'concept_code': 'E10-E14'}, 'parents': []}]}, + {'details': {'concept_id': 2, 'concept_name': 'Type 1 Diabetes Mellitus', 'vocabulary_id': 'ICD10CM', + 'concept_code': 'E10'}, 'children': [{'details': {'concept_id': 4, 'concept_name': + 'Diabetic Retinopathy', 'vocabulary_id': 'ICD10CM', 'concept_code': 'E10.3/E11.3'}, + 'children': []}]}) + assert hierarchy == expected From 2b61ae89518a84dde53246b2589a756c8832268d Mon Sep 17 00:00:00 2001 From: hyi Date: Fri, 6 Jun 2025 22:58:23 -0400 Subject: [PATCH 05/12] added more tests --- biasanalyzer/database.py | 50 +++++++++++------------ biasanalyzer/sql.py | 2 + tests/query_based/test_cohort_creation.py | 45 ++++++++++---------- tests/test_biasanalyzer_api.py | 14 ++++++- 4 files changed, 64 insertions(+), 47 deletions(-) diff --git a/biasanalyzer/database.py b/biasanalyzer/database.py index ccdfb72..5ed6749 100644 --- a/biasanalyzer/database.py +++ b/biasanalyzer/database.py @@ -8,7 +8,7 @@ from sqlalchemy import create_engine, text from biasanalyzer.models import Cohort, CohortDefinition from biasanalyzer.sql import * -from biasanalyzer.utils import build_concept_hierarchy, print_hierarchy, find_roots +from biasanalyzer.utils import build_concept_hierarchy, print_hierarchy, find_roots, notify_users class BiasDatabase: @@ -51,7 +51,7 @@ def _create_cohort_definition_table(self): self.conn.execute('CREATE SEQUENCE id_sequence START 1') except duckdb.Error as e: if "already exists" in str(e).lower(): - print("Sequence already exists, skipping creation.") + notify_users("Sequence already exists, skipping creation.") else: raise self.conn.execute(''' @@ -65,7 +65,7 @@ def _create_cohort_definition_table(self): PRIMARY KEY (id) ) ''') - print("Cohort Definition table created.") + notify_users("Cohort Definition table created.") def _create_cohort_table(self): self.conn.execute(''' @@ -83,10 +83,10 @@ def _create_cohort_table(self): ''') except duckdb.Error as e: if "already exists" in str(e).lower(): - print("Index already exists, skipping creation.") + notify_users("Index already exists, skipping creation.") else: raise - print("Cohort table created.") + notify_users("Cohort table created.") def load_postgres_extension(self): self.conn.execute("INSTALL postgres_scanner;") @@ -104,7 +104,7 @@ def create_cohort_definition(self, cohort_definition: CohortDefinition, progress cohort_definition.created_by )) if progress_obj is None: - print("Cohort definition inserted successfully.") + notify_users("Cohort definition inserted successfully.") else: progress_obj.write("Cohort definition inserted successfully.") self.conn.execute("SELECT id from cohort_definition ORDER BY id DESC LIMIT 1") @@ -194,7 +194,7 @@ def get_cohort_basic_stats(self, cohort_definition_id: int, variable=''): f"Valid variables are {self.__class__.stats_queries.keys()}") stats_query = query_str.format(cohort_definition_id) else: - print("Cannot connect to the OMOP database to query person table") + notify_users("Cannot connect to the OMOP database to query person table") return None else: # Query the cohort data to get basic statistics @@ -225,7 +225,7 @@ def get_cohort_basic_stats(self, cohort_definition_id: int, variable=''): return self._execute_query(stats_query) except Exception as e: - print(f"Error computing cohort basic statistics: {e}") + notify_users(f"Error computing cohort basic statistics: {e}", level='error') return None @property @@ -239,16 +239,16 @@ def get_cohort_distributions(self, cohort_definition_id: int, variable: str): try: if self._create_omop_table('person'): query_str = self.__class__.distribution_queries.get(variable) - if query_str is None: + if not query_str: raise ValueError(f"Distribution for variable '{variable}' is not available. " f"Valid variables are {self.__class__.distribution_queries.keys()}") query = query_str.format(cohort_definition_id) return self._execute_query(query) else: - print("Cannot connect to the OMOP database to query person table") + notify_users("Cannot connect to the OMOP database to query person table") return None except Exception as e: - print(f"Error computing cohort {variable} distributions: {e}") + notify_users(f"Error computing cohort {variable} distributions: {e}", level='error') return None def get_cohort_concept_stats(self, cohort_definition_id: int, @@ -259,8 +259,8 @@ def get_cohort_concept_stats(self, cohort_definition_id: int, """ concept_stats = {} if concept_type not in self.__class__.cohort_concept_queries: - print(f"input {concept_type} is not a valid concept type. " - f"Supported concept types are: {self.__class__.cohort_concept_queries.keys()}") + notify_users(f"input {concept_type} is not a valid concept type. " + f"Supported concept types are: {self.__class__.cohort_concept_queries.keys()}", level='error') return concept_stats try: if self._create_omop_table('concept') and self._create_omop_table('concept_ancestor'): @@ -279,26 +279,26 @@ def get_cohort_concept_stats(self, cohort_definition_id: int, filtered_cs_df = cs_df[cs_df['ancestor_concept_id'] != cs_df['descendant_concept_id']] roots = find_roots(filtered_cs_df) hierarchy = build_concept_hierarchy(filtered_cs_df) - print(f'cohort concept hierarchy for {concept_type} with root concept ids {roots}:') + notify_users(f'cohort concept hierarchy for {concept_type} with root concept ids {roots}:') for root in roots: root_detail = cs_df[(cs_df['ancestor_concept_id'] == root) & (cs_df['descendant_concept_id'] == root)]['details'].iloc[0] print_hierarchy(hierarchy, parent=root, level=0, parent_details=root_detail) return concept_stats else: - print(f"Cannot connect to the OMOP database to query {concept_type} table") + notify_users(f"Cannot connect to the OMOP database to query {concept_type} table") return concept_stats else: - print("Cannot connect to the OMOP database to query concept table") + notify_users("Cannot connect to the OMOP database to query concept table") return concept_stats except Exception as e: - print(f"Error computing cohort concept stats: {e}") + notify_users(f"Error computing cohort concept stats: {e}", level='error') return concept_stats def close(self): self.conn.close() BiasDatabase._instance = None - print("Connection to BiasDatabase closed.") + notify_users("Connection to BiasDatabase closed.") class OMOPCDMDatabase: @@ -315,9 +315,9 @@ def _initialize(self, db_url): # Handle DuckDB connection try: self.engine = duckdb.connect(db_url) - print(f"Connected to the DuckDB database: {db_url}.") + notify_users(f"Connected to the DuckDB database: {db_url}.") except duckdb.Error as e: - print(f"Failed to connect to DuckDB: {e}") + notify_users(f"Failed to connect to DuckDB: {e}", level='error') self.Session = self.engine # Use engine directly for DuckDB self._database_type = 'duckdb' try: @@ -327,10 +327,10 @@ def _initialize(self, db_url): connect_args={'options': '-c default_transaction_read_only=on'} # Enforce read-only transactions ) self.Session = sessionmaker(bind=self.engine) - print("Connected to the OMOP CDM database (read-only).") + notify_users("Connected to the OMOP CDM database (read-only).") self._database_type = 'postgresql' except SQLAlchemyError as e: - print(f"Failed to connect to the database: {e}") + notify_users(f"Failed to connect to the database: {e}", level='error') def get_session(self): if self._database_type == 'duckdb': @@ -357,10 +357,10 @@ def execute_query(self, query, params=None): return [dict(zip(headers, row)) for row in results] except duckdb.Error as e: - print(f"Error executing query: {e}") + notify_users(f"Error executing query: {e}", level='error') return [] except SQLAlchemyError as e: - print(f"Error executing query: {e}") + notify_users(f"Error executing query: {e}", level='error') omop_session.close() return [] @@ -534,4 +534,4 @@ def close(self): else: self.engine.dispose() OMOPCDMDatabase._instance = None - print("Connection to the OMOP CDM database closed.") + notify_users("Connection to the OMOP CDM database closed.") diff --git a/biasanalyzer/sql.py b/biasanalyzer/sql.py index 8aa39c4..a87c665 100644 --- a/biasanalyzer/sql.py +++ b/biasanalyzer/sql.py @@ -5,6 +5,7 @@ SELECT p.person_id, EXTRACT(YEAR FROM COALESCE( + c.cohort_start_date, c.cohort_end_date, CURRENT_DATE ) @@ -82,6 +83,7 @@ SELECT p.person_id, EXTRACT(YEAR FROM COALESCE( + c.cohort_start_date, c.cohort_end_date, CURRENT_DATE ) diff --git a/tests/query_based/test_cohort_creation.py b/tests/query_based/test_cohort_creation.py index 12a9d11..1334402 100644 --- a/tests/query_based/test_cohort_creation.py +++ b/tests/query_based/test_cohort_creation.py @@ -1,12 +1,13 @@ import pytest import os import datetime +import logging from numpy.ma.testutils import assert_equal -@pytest.mark.usefixtures -def test_cohort_creation_baseline(test_db): +def test_cohort_creation_baseline(caplog, test_db): bias = test_db + cohort = bias.create_cohort( "COVID-19 patient", "Cohort of young female patients", @@ -16,11 +17,18 @@ def test_cohort_creation_baseline(test_db): ) # Test cohort object and methods assert cohort is not None, "Cohort creation failed" - print(f'metadata: {cohort.metadata}') assert cohort.metadata is not None, "Cohort creation wrongly returned None metadata" assert 'creation_info' in cohort.metadata, "Cohort creation does not contain 'creation_info' key" assert cohort.data is not None, "Cohort creation wrongly returned None data" - print(f'baseline cohort data: {cohort.data}', flush=True) + caplog.clear() + with caplog.at_level(logging.ERROR): + cohort.get_distributions('ethnicity') + assert "Distribution for variable 'ethnicity' is not available" in caplog.text + + assert len(cohort.get_distributions('age')) == 10, "Cohort get_distribution('age') does not return 10 age_bin items" + assert len(cohort.get_distributions('gender')) == 3, ("Cohort get_distribution('gender') does not return " + "3 gender_bin items") + patient_ids = set([item['subject_id'] for item in cohort.data]) assert_equal(len(patient_ids), 5) assert_equal(patient_ids, {106, 108, 110, 111, 112}) @@ -39,7 +47,6 @@ def test_cohort_creation_baseline(test_db): "Incorrect cohort_end_date for patient 108") -@pytest.mark.usefixtures def test_cohort_creation_study(test_db): bias = test_db cohort = bias.create_cohort( @@ -51,8 +58,6 @@ def test_cohort_creation_study(test_db): ) # Test cohort object and methods assert cohort is not None, "Cohort creation failed" - print(f'metadata: {cohort.metadata}') - print(f'data: {cohort.data}') assert cohort.metadata is not None, "Cohort creation wrongly returned None metadata" assert 'creation_info' in cohort.metadata, "Cohort creation does not contain 'creation_info' key" assert cohort.data is not None, "Cohort creation wrongly returned None data" @@ -60,20 +65,21 @@ def test_cohort_creation_study(test_db): assert_equal(len(patient_ids), 4) assert_equal(patient_ids, {108, 110, 111, 112}) -@pytest.mark.usefixtures -def test_cohort_creation_study2(test_db): +def test_cohort_creation_study2(caplog, test_db): bias = test_db - cohort = bias.create_cohort( - "COVID-19 patient", - "Cohort of young female patients with no COVID-19", - os.path.join(os.path.dirname(__file__), '..', 'assets', 'cohort_creation', - 'test_cohort_creation_condition_occurrence_config_study2.yaml'), - "test_user" - ) + caplog.clear() + with caplog.at_level(logging.INFO): + cohort = bias.create_cohort( + "COVID-19 patient", + "Cohort of young female patients with no COVID-19", + os.path.join(os.path.dirname(__file__), '..', 'assets', 'cohort_creation', + 'test_cohort_creation_condition_occurrence_config_study2.yaml'), + "test_user", + delay=1 + ) + assert 'Simulating long-running task' in caplog.text # Test cohort object and methods assert cohort is not None, "Cohort creation failed" - print(f'metadata: {cohort.metadata}') - print(f'data: {cohort.data}') assert cohort.metadata is not None, "Cohort creation wrongly returned None metadata" assert 'creation_info' in cohort.metadata, "Cohort creation does not contain 'creation_info' key" assert cohort.data is not None, "Cohort creation wrongly returned None data" @@ -81,7 +87,6 @@ def test_cohort_creation_study2(test_db): assert_equal(len(patient_ids), 1) assert_equal(patient_ids, {106}) -@pytest.mark.usefixtures def test_cohort_creation_all(test_db): bias = test_db cohort = bias.create_cohort( @@ -95,7 +100,6 @@ def test_cohort_creation_all(test_db): ) # Test cohort object and methods assert cohort is not None, "Cohort creation failed" - print(f'metadata: {cohort.metadata}') assert cohort.metadata is not None, "Cohort creation wrongly returned None metadata" assert 'creation_info' in cohort.metadata, "Cohort creation does not contain 'creation_info' key" stats = cohort.get_stats() @@ -106,7 +110,6 @@ def test_cohort_creation_all(test_db): assert_equal(len(patient_ids), 2) assert_equal(patient_ids, {108, 110}) -@pytest.mark.usefixtures def test_cohort_creation_mixed_domains(test_db): """ Test cohort creation with mixed domains (condition, drug, visit, procedure). diff --git a/tests/test_biasanalyzer_api.py b/tests/test_biasanalyzer_api.py index fc594f7..382ee5b 100644 --- a/tests/test_biasanalyzer_api.py +++ b/tests/test_biasanalyzer_api.py @@ -93,6 +93,18 @@ def test_set_cohort_action(caplog, fresh_bias_obj): fresh_bias_obj._set_cohort_action() assert 'valid OMOP CDM must be set' in caplog.text +def test_create_cohort_with_no_action(caplog, fresh_bias_obj): + caplog.clear() + with caplog.at_level(logging.INFO): + fresh_bias_obj.create_cohort('test', 'test', 'test.yaml', 'test') + assert 'failed to create a valid cohort action object' in caplog.text + +def test_compare_cohort_with_no_action(caplog, fresh_bias_obj): + caplog.clear() + with caplog.at_level(logging.INFO): + fresh_bias_obj.compare_cohorts(1, 2) + assert 'failed to create a valid cohort action object' in caplog.text + def test_get_domains_and_vocabularies_invalid(caplog, fresh_bias_obj): caplog.clear() with caplog.at_level(logging.INFO): @@ -135,7 +147,7 @@ def test_get_concept_hierarchy_no_omop_cdm(caplog, fresh_bias_obj): def test_get_concept_hierarchy(test_db): with pytest.raises(ValueError): test_db.get_concept_hierarchy('not_int_str') - + hierarchy = test_db.get_concept_hierarchy(2) print(f'hierarchy: {hierarchy}', flush=True) expected = ({'details': {'concept_id': 2, 'concept_name': 'Type 1 Diabetes Mellitus', 'vocabulary_id': 'ICD10CM', From 782e4614985c55a4def7f129b0f9deead9b0f922 Mon Sep 17 00:00:00 2001 From: hyi Date: Sat, 7 Jun 2025 12:09:58 -0400 Subject: [PATCH 06/12] added more tests --- .coveragerc | 12 ++++++ .github/workflows/test.yml | 2 +- biasanalyzer/api.py | 29 ++----------- biasanalyzer/utils.py | 23 ++++++++++ tests/query_based/test_cohort_creation.py | 23 ++++++++++ tests/test_biasanalyzer_api.py | 52 +++++++++++++++++++++++ 6 files changed, 115 insertions(+), 26 deletions(-) create mode 100644 .coveragerc diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000..384b44e --- /dev/null +++ b/.coveragerc @@ -0,0 +1,12 @@ +[run] +omit = + */module_test.py + +[report] +exclude_lines = + pragma: no cover + if __name__ == .__main__.: + +[html] +directory = coverage_html_report + diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8f19c7e..24c2fc5 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -39,4 +39,4 @@ jobs: # Step 5: Run Tests - name: Run tests run: | - poetry run pytest -s --cov=biasanalyzer + poetry run pytest -s --cov=biasanalyzer --cov-config=.coveragerc diff --git a/biasanalyzer/api.py b/biasanalyzer/api.py index ade0e32..50fca55 100644 --- a/biasanalyzer/api.py +++ b/biasanalyzer/api.py @@ -4,9 +4,9 @@ from biasanalyzer.cohort import CohortAction from biasanalyzer.config import load_config from ipywidgets import VBox, Label -from ipytree import Tree, Node +from ipytree import Tree from IPython.display import display -from biasanalyzer.utils import get_direction_arrow, notify_users +from biasanalyzer.utils import get_direction_arrow, notify_users, build_concept_tree class BIAS: @@ -96,27 +96,6 @@ def get_concept_hierarchy(self, concept_id): return None return self.omop_cdm_db.get_concept_hierarchy(concept_id) - def _build_concept_tree(self, concept_tree: dict, tree_type: str) -> Node: - """ - Recursively builds an ipytree Node for a given concept tree. - """ - # Extract concept details - details = concept_tree.get("details", {}) - concept_name = details.get("concept_name", "Unknown Concept") - concept_id = details.get("concept_id", "") - concept_code = details.get("concept_code", "") - direction_arrow = get_direction_arrow(tree_type) - # Create a label for the current concept - label_text = f"{direction_arrow} {concept_name} (ID: {concept_id}, Code: {concept_code})" - node = Node(label_text) - - # Recursively add child nodes - for child in concept_tree.get(tree_type, []): - child_node = self._build_concept_tree(child, tree_type) - node.add_node(child_node) - - return node - def display_concept_tree(self, concept_tree: dict, level: int = 0, show_in_text_format=True, tree_type=None): """ Recursively prints the concept hierarchy tree in an indented format for display. @@ -146,12 +125,12 @@ def display_concept_tree(self, concept_tree: dict, level: int = 0, show_in_text_ else: # Extract concept details # Build the root tree node - root_node = self._build_concept_tree(concept_tree, tree_type) + root_node = build_concept_tree(concept_tree, tree_type) tree = Tree() tree.add_node(root_node) tree.opened = True display(VBox([Label("Concept Hierarchy"), tree])) - return None + return root_node def create_cohort(self, cohort_name: str, cohort_desc: str, query_or_yaml_file: str, created_by: str, diff --git a/biasanalyzer/utils.py b/biasanalyzer/utils.py index faa9953..c2a2e2b 100644 --- a/biasanalyzer/utils.py +++ b/biasanalyzer/utils.py @@ -1,5 +1,6 @@ import numpy as np import re +from ipytree import Node import logging @@ -67,6 +68,28 @@ def build_concept_hierarchy(df, parent_col="ancestor_concept_id", child_col="des return hierarchy +def build_concept_tree(concept_tree: dict, tree_type: str) -> Node: + """ + Recursively builds an ipytree Node for a given concept tree. + """ + # Extract concept details + details = concept_tree.get("details", {}) + concept_name = details.get("concept_name", "Unknown Concept") + concept_id = details.get("concept_id", "") + concept_code = details.get("concept_code", "") + direction_arrow = get_direction_arrow(tree_type) + # Create a label for the current concept + label_text = f"{direction_arrow} {concept_name} (ID: {concept_id}, Code: {concept_code})" + node = Node(label_text) + + # Recursively add child nodes + for child in concept_tree.get(tree_type, []): + child_node = build_concept_tree(child, tree_type) + node.add_node(child_node) + + return node + + def find_roots(df, parent_col="ancestor_concept_id", child_col="descendant_concept_id"): """ Finds root nodes in the hierarchy. Roots are nodes that are parents diff --git a/tests/query_based/test_cohort_creation.py b/tests/query_based/test_cohort_creation.py index 1334402..7878b65 100644 --- a/tests/query_based/test_cohort_creation.py +++ b/tests/query_based/test_cohort_creation.py @@ -143,3 +143,26 @@ def test_cohort_creation_mixed_domains(test_db): end_dates = [item['cohort_end_date'] for item in cohort.data] assert_equal(len(end_dates), 2) assert_equal(end_dates, [datetime.date(2020, 6, 20), datetime.date(2020, 6, 20)]) + +def test_cohort_comparison(test_db): + bias = test_db + cohort_base = bias.create_cohort( + "COVID-19 patient", + "Cohort of young female patients", + os.path.join(os.path.dirname(__file__), '..', 'assets', 'cohort_creation', + 'test_cohort_creation_condition_occurrence_config_baseline.yaml'), + "test_user" + ) + cohort_study = bias.create_cohort( + "Female diabetes patients born between 1970 and 2000", + "Cohort of female patients with diabetes who had insulin prescribed 0-30 days after diagnosis " + "and have at least one outpatient or emergency visit and underwent a blood test before 12/31/2020, " + "with patients born after 1995 and with cardiac surgery excluded", + os.path.join(os.path.dirname(__file__), '..', 'assets', 'cohort_creation', + 'test_cohort_creation_config.yaml'), + "test_user" + ) + results = bias.compare_cohorts(cohort_base.cohort_id, cohort_study.cohort_id) + print(f'results: {results}', flush=True) + assert {'gender_hellinger_distance': 0.0} in results + assert any('age_hellinger_distance' in r for r in results) diff --git a/tests/test_biasanalyzer_api.py b/tests/test_biasanalyzer_api.py index 382ee5b..3d1c241 100644 --- a/tests/test_biasanalyzer_api.py +++ b/tests/test_biasanalyzer_api.py @@ -2,6 +2,7 @@ import datetime import logging import pytest +from ipytree import Node from biasanalyzer import __version__ @@ -158,3 +159,54 @@ def test_get_concept_hierarchy(test_db): 'Diabetic Retinopathy', 'vocabulary_id': 'ICD10CM', 'concept_code': 'E10.3/E11.3'}, 'children': []}]}) assert hierarchy == expected + +def test_display_concept_tree_text_format(capsys, test_db): + sample_tree = { + "details": { + "concept_id": 123, + "concept_name": "Hypertension", + "concept_code": "I10" + } + } + test_db.display_concept_tree(sample_tree) + captured = capsys.readouterr() + assert "concept tree must contain parents or children key" in captured.out + + sample_tree['children'] = [{ + "details": { + "concept_id": 456, + "concept_name": "Essential Hypertension", + "concept_code": "I10.0" + }, + "children": [] + }] + test_db.display_concept_tree(sample_tree, show_in_text_format=True) + captured = capsys.readouterr() + assert "Hypertension (ID: 123" in captured.out + assert "Essential Hypertension (ID: 456" in captured.out + +def test_display_concept_tree_widget(test_db): + sample_tree = { + "details": { + "concept_id": 456, + "concept_name": "Essential Hypertension", + "concept_code": "I10.0" + }, + "parents": [{ + "details": { + "concept_id": 123, + "concept_name": "Hypertension", + "concept_code": "I10" + }, + "parents": [] + }] + } + + tree_output = test_db.display_concept_tree(sample_tree, show_in_text_format=False) + assert tree_output is not None + print(tree_output) + assert isinstance(tree_output, Node) + assert "Essential Hypertension" in tree_output.name + assert len(tree_output.nodes) == 1 + parent_node = tree_output.nodes[0] + assert "Hypertension" in parent_node.name From 6ee464de0054e7f2935c4161be5fcd40774f39a1 Mon Sep 17 00:00:00 2001 From: hyi Date: Sat, 7 Jun 2025 14:34:33 -0400 Subject: [PATCH 07/12] added more tests --- .coveragerc | 2 +- biasanalyzer/cohort.py | 18 ++++---- tests/query_based/test_cohort_creation.py | 53 +++++++++++++++++++++-- 3 files changed, 60 insertions(+), 13 deletions(-) diff --git a/.coveragerc b/.coveragerc index 384b44e..b1be6ff 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,7 +1,7 @@ [run] omit = */module_test.py - + biasanalyzer/background/threading_utils.py [report] exclude_lines = pragma: no cover diff --git a/biasanalyzer/cohort.py b/biasanalyzer/cohort.py index a36853d..6ca1d75 100644 --- a/biasanalyzer/cohort.py +++ b/biasanalyzer/cohort.py @@ -4,10 +4,10 @@ from datetime import datetime from tqdm.auto import tqdm from pydantic import ValidationError -from biasanalyzer.models import CohortDefinition, Cohort +from biasanalyzer.models import CohortDefinition from biasanalyzer.config import load_cohort_creation_config from biasanalyzer.database import OMOPCDMDatabase, BiasDatabase -from biasanalyzer.utils import hellinger_distance, clean_string +from biasanalyzer.utils import hellinger_distance, clean_string, notify_users from biasanalyzer.cohort_query_builder import CohortQueryBuilder @@ -99,12 +99,11 @@ def create_cohort(self, cohort_name: str, description: str, query_or_yaml_file: cohort_config = load_cohort_creation_config(query_or_yaml_file) tqdm.write(f'configuration specified in {query_or_yaml_file} loaded successfully') except FileNotFoundError: - print('specified cohort creation configuration file does not exist. Make sure ' - 'the configuration file name with path is specified correctly.') + notify_users('specified cohort creation configuration file does not exist. Make sure ' + 'the configuration file name with path is specified correctly.') return None except ValidationError as ex: - print(f'cohort creation configuration yaml file is not valid with ' - f'validation error: {ex}') + notify_users(f'cohort creation configuration yaml file is not valid with validation error: {ex}') return None query = self._query_builder.build_query(cohort_config) @@ -139,11 +138,12 @@ def create_cohort(self, cohort_name: str, description: str, query_or_yaml_file: tqdm.write(f"Cohort {cohort_name} successfully created.") return CohortData(cohort_id=cohort_def_id, bias_db=self.bias_db, omop_db=self.omop_db) except duckdb.Error as e: - print(f"Error executing query: {e}") + notify_users(f"Error executing query: {e}") return None except SQLAlchemyError as e: - print(f"Error executing query: {e}") - omop_session.close() + notify_users(f"Error executing query: {e}") + if omop_session is not None: + omop_session.close() return None def compare_cohorts(self, cohort_id_1: int, cohort_id_2: int): diff --git a/tests/query_based/test_cohort_creation.py b/tests/query_based/test_cohort_creation.py index 7878b65..6c6acbb 100644 --- a/tests/query_based/test_cohort_creation.py +++ b/tests/query_based/test_cohort_creation.py @@ -1,13 +1,12 @@ -import pytest import os import datetime import logging +from sqlalchemy.exc import SQLAlchemyError from numpy.ma.testutils import assert_equal def test_cohort_creation_baseline(caplog, test_db): bias = test_db - cohort = bias.create_cohort( "COVID-19 patient", "Cohort of young female patients", @@ -163,6 +162,54 @@ def test_cohort_comparison(test_db): "test_user" ) results = bias.compare_cohorts(cohort_base.cohort_id, cohort_study.cohort_id) - print(f'results: {results}', flush=True) assert {'gender_hellinger_distance': 0.0} in results assert any('age_hellinger_distance' in r for r in results) + +def test_cohort_invalid(caplog, test_db): + caplog.clear() + with caplog.at_level(logging.INFO): + invalid_cohort = test_db.create_cohort('invalid_cohort', 'invalid_cohort', + 'invalid_yaml_file.yml', + 'invalid_created_by') + assert 'cohort creation configuration file does not exist' in caplog.text + assert invalid_cohort is None + + caplog.clear() + with caplog.at_level(logging.INFO): + invalid_cohort = test_db.create_cohort('invalid_cohort', 'invalid_cohort', + os.path.join(os.path.dirname(__file__), '..', 'assets', 'config', + 'test_config.yaml'), 'invalid_created_by') + assert 'configuration yaml file is not valid' in caplog.text + assert invalid_cohort is None + + with caplog.at_level(logging.INFO): + invalid_cohort = test_db.create_cohort('invalid_cohort', 'invalid_cohort', + 'INVALID SQL QUERY STRING', + 'invalid_created_by') + assert 'Error executing query:' in caplog.text + assert invalid_cohort is None + +def test_create_cohort_sqlalchemy_error(monkeypatch, fresh_bias_obj): + # Mock omop_db methods + class MockOmopDB: + def get_session(self): + return self # not used after error + def execute_query(self, query): + raise SQLAlchemyError("Mocked SQLAlchemy error") + def close(self): + pass + + class MockBiasDB: + def create_cohort_definition(self, *args, **kwargs): + pass + def create_cohort_in_bulk(self, *args, **kwargs): + pass + def close(self): + pass + + fresh_bias_obj.omop_cdm_db = MockOmopDB() + fresh_bias_obj.bias_db = MockBiasDB() + + result = fresh_bias_obj.create_cohort("test", "desc", "SELECT * FROM person", "test_user") + + assert result is None From ca6733bcc0cf447c71222197421400f7a1a7e781 Mon Sep 17 00:00:00 2001 From: hyi Date: Sat, 7 Jun 2025 18:53:06 -0400 Subject: [PATCH 08/12] added more tests --- biasanalyzer/cohort_query_builder.py | 38 ++++++++----- ...iple_temporal_groups_without_operator.yaml | 57 +++++++++++++++++++ tests/query_based/test_cohort_creation.py | 17 ++++++ 3 files changed, 97 insertions(+), 15 deletions(-) create mode 100644 tests/assets/cohort_creation/test_cohort_creation_multiple_temporal_groups_without_operator.yaml diff --git a/biasanalyzer/cohort_query_builder.py b/biasanalyzer/cohort_query_builder.py index 95f411e..860970a 100644 --- a/biasanalyzer/cohort_query_builder.py +++ b/biasanalyzer/cohort_query_builder.py @@ -9,14 +9,14 @@ class CohortQueryBuilder: def __init__(self): """Get the path to SQL templates, whether running from source or installed.""" try: - if sys.version_info >= (3, 9): + if sys.version_info >= (3, 9): # pragma: no cover # Python 3.9+: Use importlib.resources.files() template_path = importlib.resources.files("biasanalyzer").joinpath("sql_templates") else: # Python 3.8: Use importlib.resources.path() (context manager) with importlib.resources.path("biasanalyzer", "sql_templates") as p: template_path = str(p) - except ModuleNotFoundError: + except ModuleNotFoundError: # pragma: no cover template_path = os.path.join(os.path.dirname(__file__), "sql_templates") print(f'template_path: {template_path}') @@ -117,7 +117,7 @@ def render_event_group(event_group, alias_prefix="evt"): event_sql = CohortQueryBuilder.render_event_group(event, f"{alias_prefix}_{i}") if event_sql: queries.append(event_sql) - if not queries: + if not queries: # pragma: no cover return "" if event_group["operator"] == "AND": @@ -150,9 +150,6 @@ def render_event_group(event_group, alias_prefix="evt"): elif event_group["operator"] == "OR": return f"SELECT person_id, event_start_date, event_end_date FROM ({' UNION '.join(queries)}) AS {alias_prefix}_or" elif event_group["operator"] == "NOT": - if len(queries) != 1: - raise ValueError("NOT operator expects exactly one event subquery") - # Keep the full subquery with dates for consistency, but use it as a filter not_query = queries[0] # Return a query that selects all persons from a base table (e.g., person), # excluding those in the NOT subquery, while allowing dates from other criteria @@ -187,10 +184,6 @@ def render_event_group(event_group, alias_prefix="evt"): FROM ({queries[0]}) AS {alias_prefix}_0 WHERE event_start_date < DATE '{timestamp}' """ - else: - print(f"Error: event_group: {event_group} with BEFORE operator only " - f"has one query event {queries}") - return '' elif len(queries) == 2: event_group = TemporalEventGroup(**event_group) e1_alias = f"e1_{alias_prefix}" @@ -213,7 +206,7 @@ def render_event_group(event_group, alias_prefix="evt"): AND {e1_alias}.event_start_date < {e2_alias}.event_start_date {interval_sql} """ - return "" + return "" # pragma: no cover def temporal_event_filter(self, event_groups, alias='c'): """ @@ -236,15 +229,30 @@ def temporal_event_filter(self, event_groups, alias='c'): filters.append(f"AND {alias}.person_id IN (SELECT person_id FROM ({group_sql}) AS ex_subquery_{i})") else: filters.append(f"({group_sql})") - if not filters: + if not filters: # pragma: no cover return "" if alias == 'ex': # For exclusion, combine with AND as filters return " ".join(filters) else: - # For inclusion, combine as a single subquery (assuming one event group for simplicity) - # If multiple groups, may need UNION or further logic + # For inclusion, handle both single event group case with operator defined and multiple event group + # case with no operator defined if len(filters) > 1: + # For multiple temporal event group case with no operator defined, use "OR" operator by default + # An example YAML block for multiple temporal event group is shown below for reference, in which + # case, patients who satisfy either group (condition 37311061 or drug 67890) will be included: + # inclusion_criteria: + # temporal_events: + # - operator: AND + # events: + # - event_type: condition_occurrence + # event_concept_id: 37311061 + # - operator: AND + # events: + # - event_type: drug_exposure + # event_concept_id: 67890 return (f"SELECT person_id, event_start_date, event_end_date FROM " f"({' UNION ALL '.join(filters)}) AS combined_events") - return filters[0] # Single event group case + + # Single event group case with operator defined + return filters[0] diff --git a/tests/assets/cohort_creation/test_cohort_creation_multiple_temporal_groups_without_operator.yaml b/tests/assets/cohort_creation/test_cohort_creation_multiple_temporal_groups_without_operator.yaml new file mode 100644 index 0000000..a4df661 --- /dev/null +++ b/tests/assets/cohort_creation/test_cohort_creation_multiple_temporal_groups_without_operator.yaml @@ -0,0 +1,57 @@ +inclusion_criteria: + demographics: # Optional + gender: 'female' # accepted values: female or male, optional field + min_birth_year: 2000 # Born at the year of 2000 or after, optional field + max_birth_year: 2020 # Born at the year of 2020 or before, optional field + temporal_events: + # Since no top operator is defined for the OR operator nested event and the BEFORE nested event, + # the default OR operator will be applied by default since a convenience feature is supported + # for multiple temporal event groups with no explicit operator defined. Therefore, the temporal events + # below captures a cohort of patients who have the condition with + # difficulty breathing 2 to 5 days before a COVID diagnosis 3/15/20-12/11/20 or have at + # least one emergency room visit or at least two inpatient visits + - operator: 'OR' + events: + - event_type: 'visit_occurrence' + event_concept_id: 9201 # inpatient visit + event_instance: 2 # Optional, minimum number of occurrences the event has happened + - event_type: 'visit_occurrence' + event_concept_id: 9203 # Emergency Room visit + # The operator BEFORE event below captures the condition that difficulty breathing happens + # 2 to 5 days before a COVID diagnosis between 3/15/20 and 12/11/20 + - operator: 'BEFORE' + # interval is an inclusive list, allowing lower or higher number being null indicating no lower or higher + # bound, respectively + interval: [2, 5] # 2 to 5 days between difficulty breathing and COVID diagnosis 3/15/20 - 12/11/20 + events: + - event_type: 'condition_occurrence' + event_concept_id: 4041664 # difficulty breathing + # The operator AND event below captures a COVID diagnosis between 3/15/20 and 12/11/20 + - operator: 'AND' + events: + - operator: 'BEFORE' + events: + - event_type: date + timestamp: '2020-03-15' + - event_type: 'condition_occurrence' + event_concept_id: 37311061 # COVID condition + - operator: 'BEFORE' + events: + - event_type: 'condition_occurrence' + event_concept_id: 37311061 # COVID condition + - event_type: date + timestamp: '2020-12-11' # when the first COVID vaccine was given + +exclusion_criteria: + # Among those patients meeting the inclusion criteria, patients born after 2010 and with + # heart failure conditions will be excluded from the cohort + demographics: # Optional + min_birth_year: 2010 + temporal_events: + # AND and OR operators allow one or more defined events, but NOT operator only allows + # one event; if NOT operator were used here, patients with no heart failure conditions + # will be excluded + - operator: 'AND' + events: + - event_type: 'condition_occurrence' + event_concept_id: 316139 # Exclude patients with heart failure diff --git a/tests/query_based/test_cohort_creation.py b/tests/query_based/test_cohort_creation.py index 6c6acbb..7a39090 100644 --- a/tests/query_based/test_cohort_creation.py +++ b/tests/query_based/test_cohort_creation.py @@ -109,6 +109,23 @@ def test_cohort_creation_all(test_db): assert_equal(len(patient_ids), 2) assert_equal(patient_ids, {108, 110}) +def test_cohort_creation_multiple_temporary_groups_with_no_operator(test_db): + bias = test_db + cohort = bias.create_cohort( + "Patients with COVID or other emergency conditions", + "Cohort of young female patients who either have COVID-19 with difficulty breathing 2 to 5 days " + "before a COVID diagnosis 3/15/20-12/11/20 OR have at least one emergency room visit or at least " + "two inpatient visits", + os.path.join(os.path.dirname(__file__), '..', 'assets', 'cohort_creation', + 'test_cohort_creation_multiple_temporal_groups_without_operator.yaml'), + "test_user" + ) + # Test cohort object and methods + patient_ids = set([item['subject_id'] for item in cohort.data]) + print(f'patient_ids: {patient_ids}', flush=True) + assert_equal(len(patient_ids), 2) + assert_equal(patient_ids, {108, 110}) + def test_cohort_creation_mixed_domains(test_db): """ Test cohort creation with mixed domains (condition, drug, visit, procedure). From b125b6b626359b97a249f196b1554315bd8769e6 Mon Sep 17 00:00:00 2001 From: hyi Date: Sat, 7 Jun 2025 20:44:16 -0400 Subject: [PATCH 09/12] added more tests --- biasanalyzer/models.py | 27 +++++-------- tests/query_based/test_cohort_creation.py | 49 +++++++++++++++++++++++ 2 files changed, 59 insertions(+), 17 deletions(-) diff --git a/biasanalyzer/models.py b/biasanalyzer/models.py index 9c114cc..2ec6140 100644 --- a/biasanalyzer/models.py +++ b/biasanalyzer/models.py @@ -125,26 +125,19 @@ class TemporalEventGroup(BaseModel): events: List[Union[TemporalEvent, "TemporalEventGroup"]] # A list of events or nested operators interval: Optional[List[Union[int, None]]] = None # [start, end] interval only applying for BEFORE operator - @field_validator("interval", mode="before") - def validate_interval_structure(cls, value): - """Ensure interval is a list with exactly two elements, or None.""" - if value is None: - return value - if not isinstance(value, list) or len(value) != 2: - raise ValueError("Interval must be a list with exactly two elements: [start, end].") - return value - @model_validator(mode="before") def validate_interval_logic(cls, values): - operator = values.get("operator") + """ + Validate interval structure and logic for all operators, though only used for BEFORE. + Ensures interval is None or a list of two elements [start, end], with start <= end if both are integers. + For AND, OR, NOT, interval is validated but ignored in SQL generation. + """ interval = values.get("interval") - """Ensure interval is logically consistent when operator is 'BEFORE'.""" - if operator == "BEFORE" and interval is not None: + """Ensure interval is logically consistent which is only used for operator 'BEFORE'.""" + if interval is not None: + if not isinstance(interval, list) or len(interval) != 2: + raise ValueError("Interval must be a list with exactly two elements: [start, end].") start, end = interval - if start is not None and not isinstance(start, int): - raise ValueError("Interval start must be an integer or None.") - if end is not None and not isinstance(end, int): - raise ValueError("Interval end must be an integer or None.") if start is not None and end is not None and start > end: raise ValueError("Interval start cannot be greater than interval end.") return values @@ -167,7 +160,7 @@ def validate_events_list(cls, values): def get_interval_sql(self, e1_alias='e1', e2_alias='e2') -> str: """Generate SQL for the interval.""" - if not self.interval: + if not self.interval: # pragma: no cover return "" start = self.interval[0] if self.interval[0] is not None else 0 end = self.interval[1] if self.interval[1] is not None else 99999 diff --git a/tests/query_based/test_cohort_creation.py b/tests/query_based/test_cohort_creation.py index 7a39090..10c7e93 100644 --- a/tests/query_based/test_cohort_creation.py +++ b/tests/query_based/test_cohort_creation.py @@ -1,10 +1,59 @@ import os import datetime import logging +import pytest from sqlalchemy.exc import SQLAlchemyError from numpy.ma.testutils import assert_equal +from biasanalyzer.models import DemographicsCriteria, TemporalEvent, TemporalEventGroup +def test_cohort_yaml_validation(test_db): + invalid_data = { + "gender": "female", + "min_birth_year": 2000, + "max_birth_year": 1999 # Invalid: less than min_birth_year + } + with pytest.raises(ValueError): + DemographicsCriteria(**invalid_data) + + invalid_data = { + "event_type": "date", + "event_concept_id": "dummy" + } + # validate date event_type must have a timestamp field + with pytest.raises(ValueError): + TemporalEvent(**invalid_data) + + invalid_data = { + "operator": "BEFORE", + "events": [ + {'event_type': 'condition_occurrence', + 'event_concept_id': 201826}, + {'event_type': 'drug_exposure', + 'event_concept_id': 4285892}, + ], + "interval": [100, 50] + } + # validate interval start must be smaller than interval end + with pytest.raises(ValueError): + TemporalEventGroup(**invalid_data) + + # validate interval must be either a list of 2 integers or a None + invalid_data["interval"] = [123] + with pytest.raises(ValueError): + TemporalEventGroup(**invalid_data) + + # validate NOT operator cannot have more than one event + invalid_data["operator"] = "NOT" + with pytest.raises(ValueError): + TemporalEventGroup(**invalid_data) + + # validate BEFORE operator must have two events + invalid_data["operator"] = "BEFORE" + del invalid_data["events"][1] + with pytest.raises(ValueError): + TemporalEventGroup(**invalid_data) + def test_cohort_creation_baseline(caplog, test_db): bias = test_db cohort = bias.create_cohort( From e036f8f5efea00710def8f5a231126dd9506b0b8 Mon Sep 17 00:00:00 2001 From: hyi Date: Sat, 7 Jun 2025 22:33:11 -0400 Subject: [PATCH 10/12] added more database tests --- biasanalyzer/database.py | 7 +- tests/test_biasanalyzer_api.py | 6 +- tests/test_database.py | 122 +++++++++++++++++++++++++++++++++ 3 files changed, 131 insertions(+), 4 deletions(-) create mode 100644 tests/test_database.py diff --git a/biasanalyzer/database.py b/biasanalyzer/database.py index 5ed6749..1398f73 100644 --- a/biasanalyzer/database.py +++ b/biasanalyzer/database.py @@ -296,7 +296,8 @@ def get_cohort_concept_stats(self, cohort_definition_id: int, return concept_stats def close(self): - self.conn.close() + if self.conn: + self.conn.close() BiasDatabase._instance = None notify_users("Connection to BiasDatabase closed.") @@ -316,7 +317,7 @@ def _initialize(self, db_url): try: self.engine = duckdb.connect(db_url) notify_users(f"Connected to the DuckDB database: {db_url}.") - except duckdb.Error as e: + except duckdb.Error as e: # pragma: no cover notify_users(f"Failed to connect to DuckDB: {e}", level='error') self.Session = self.engine # Use engine directly for DuckDB self._database_type = 'duckdb' @@ -532,6 +533,6 @@ def close(self): if isinstance(self.engine, duckdb.DuckDBPyConnection): self.engine.close() else: - self.engine.dispose() + self.engine.dispose() # pragma: no cover OMOPCDMDatabase._instance = None notify_users("Connection to the OMOP CDM database closed.") diff --git a/tests/test_biasanalyzer_api.py b/tests/test_biasanalyzer_api.py index 3d1c241..0ed8ae3 100644 --- a/tests/test_biasanalyzer_api.py +++ b/tests/test_biasanalyzer_api.py @@ -132,12 +132,16 @@ def test_get_concepts_no_domain_and_vocab(caplog, test_db): def test_get_concepts(test_db): concepts = test_db.get_concepts('Heart failure', domain='Condition', vocabulary='SNOMED') - print(f'concepts: {concepts}', flush=True) expected = [{'concept_id': 316139, 'concept_name': 'Heart failure', 'valid_start_date': datetime.date(2012, 4, 1), 'valid_end_date': datetime.date(2020, 4, 1), 'domain_id': 'Condition', 'vocabulary_id': 'SNOMED'}] assert concepts == expected + concepts = test_db.get_concepts('Heart failure', vocabulary='SNOMED') + assert concepts == expected + concepts = test_db.get_concepts('Heart failure', domain='Condition') + print(f'concepts: {concepts}', flush=True) + assert concepts == expected def test_get_concept_hierarchy_no_omop_cdm(caplog, fresh_bias_obj): caplog.clear() diff --git a/tests/test_database.py b/tests/test_database.py new file mode 100644 index 0000000..1a9b074 --- /dev/null +++ b/tests/test_database.py @@ -0,0 +1,122 @@ +import duckdb +import pytest +from biasanalyzer.database import BiasDatabase + + +def test_create_cohort_definition_table_error_on_sequence(): + db = BiasDatabase(":memory:") + class MockConn: + def __init__(self): + self.calls = [] + + def execute(self, sql): + self.calls.append(sql) + if "CREATE SEQUENCE" in sql: + raise duckdb.Error("random error") # simulate failure + return None + + def close(self): + pass + + db.conn = MockConn() + + with pytest.raises(duckdb.Error, match="random error"): + db._create_cohort_definition_table() + +def test_create_cohort_definition_table_sequence_exists(): + db = BiasDatabase(":memory:") + class MockConn: + def __init__(self): + self.call_count = 0 + self.executed_sql = [] + + def execute(self, sql): + self.call_count += 1 + self.executed_sql.append(sql) + if "CREATE SEQUENCE" in sql: + raise duckdb.Error("Sequence already exists") + + return None + + def close(self): + pass + + db.conn = MockConn() + + # Should handle "Index already exists" without raising + db._create_cohort_definition_table() + + # Optional assertions + assert db.conn.call_count >= 2 + assert any("CREATE SEQUENCE" in sql for sql in db.conn.executed_sql) + +def test_create_cohort_index_error(): + db = BiasDatabase(":memory:") + class MockConn: + def __init__(self): + self.calls = [] + + def execute(self, sql): + self.calls.append(sql) + if "CREATE INDEX" in sql: + raise duckdb.Error("random error") # simulate failure + return None + + def close(self): + pass + + db.conn = MockConn() + + with pytest.raises(duckdb.Error, match="random error"): + db._create_cohort_table() + +def test_create_cohort_index_exists(): + db = BiasDatabase(":memory:") + class MockConn: + def __init__(self): + self.call_count = 0 + self.executed_sql = [] + + def execute(self, sql): + self.call_count += 1 + self.executed_sql.append(sql) + if "CREATE INDEX" in sql: + raise duckdb.Error("Index already exists") + + return None + + def close(self): + pass + + db.conn = MockConn() + + # Should handle "Index already exists" without raising + db._create_cohort_table() + + # Optional assertions + assert db.conn.call_count >= 2 + assert any("CREATE INDEX" in sql for sql in db.conn.executed_sql) + +def test_create_omop_table_postgres(monkeypatch): + # Set up tracking dict + called = {"executed": False, "query": None} + + # Patch before BiasDatabase instance is created + def mock_execute(self, query): + called["executed"] = True + called["query"] = query + return None + + # Monkeypatch at class level first + monkeypatch.setattr(duckdb.DuckDBPyConnection, "execute", mock_execute) + + # Now create the instance (so it uses the patched class method) + BiasDatabase._instance = None + db = BiasDatabase(":memory:") + db.omop_cdm_db_url = "postgresql://user:pass@localhost:5432/mydb" + + result = db._create_omop_table("person") + + assert result is True + assert called["executed"] is True + assert "postgres_scan" in called["query"] From 9ceb9a88703f98caa65024ecf94fb5a2b017a5be Mon Sep 17 00:00:00 2001 From: hyi Date: Sun, 8 Jun 2025 14:27:39 -0400 Subject: [PATCH 11/12] added more tests bringing coverage to 93% --- tests/query_based/test_cohort_creation.py | 4 ++ tests/test_database.py | 76 +++++++++++++++-------- 2 files changed, 54 insertions(+), 26 deletions(-) diff --git a/tests/query_based/test_cohort_creation.py b/tests/query_based/test_cohort_creation.py index 10c7e93..f497df1 100644 --- a/tests/query_based/test_cohort_creation.py +++ b/tests/query_based/test_cohort_creation.py @@ -63,8 +63,12 @@ def test_cohort_creation_baseline(caplog, test_db): 'test_cohort_creation_condition_occurrence_config_baseline.yaml'), "test_user" ) + # Test cohort object and methods assert cohort is not None, "Cohort creation failed" + cohort_id = cohort.cohort_id + assert bias.bias_db.get_cohort_definition(cohort_id)['name'] == "COVID-19 patient" + assert bias.bias_db.get_cohort_definition(cohort_id + 1) == {} assert cohort.metadata is not None, "Cohort creation wrongly returned None metadata" assert 'creation_info' in cohort.metadata, "Cohort creation does not contain 'creation_info' key" assert cohort.data is not None, "Cohort creation wrongly returned None data" diff --git a/tests/test_database.py b/tests/test_database.py index 1a9b074..2631c3a 100644 --- a/tests/test_database.py +++ b/tests/test_database.py @@ -3,7 +3,54 @@ from biasanalyzer.database import BiasDatabase +def test_create_omop_table_postgres(monkeypatch): + # Set up tracking dict + called = {"executed": False, "query": None} + + # Patch before BiasDatabase instance is created + def mock_execute(self, query): + called["executed"] = True + called["query"] = query + return None + + # Monkeypatch at class level first + monkeypatch.setattr(duckdb.DuckDBPyConnection, "execute", mock_execute) + + # Now create the instance (so it uses the patched class method) + BiasDatabase._instance = None + db = BiasDatabase(":memory:") + db.omop_cdm_db_url = "postgresql://user:pass@localhost:5432/mydb" + + result = db._create_omop_table("person") + + assert result is True + assert called["executed"] is True + assert "postgres_scan" in called["query"] + +def test_load_postgres_extension_executes_twice(monkeypatch): + # Reset singleton to get a clean instance + BiasDatabase._instance = None + db = BiasDatabase(":memory:") + + calls = [] + + class MockConn: + def execute(self, query): + calls.append(query) + return None + + db.conn = MockConn() + + # Run the method under test + db.load_postgres_extension() + + # Assert that execute() was called twice + assert len(calls) == 2 + assert "INSTALL postgres_scanner" in calls[0] + assert "LOAD postgres_scanner" in calls[1] + def test_create_cohort_definition_table_error_on_sequence(): + BiasDatabase._instance = None db = BiasDatabase(":memory:") class MockConn: def __init__(self): @@ -24,6 +71,7 @@ def close(self): db._create_cohort_definition_table() def test_create_cohort_definition_table_sequence_exists(): + BiasDatabase._instance = None db = BiasDatabase(":memory:") class MockConn: def __init__(self): @@ -46,11 +94,11 @@ def close(self): # Should handle "Index already exists" without raising db._create_cohort_definition_table() - # Optional assertions assert db.conn.call_count >= 2 assert any("CREATE SEQUENCE" in sql for sql in db.conn.executed_sql) def test_create_cohort_index_error(): + BiasDatabase._instance = None db = BiasDatabase(":memory:") class MockConn: def __init__(self): @@ -71,6 +119,7 @@ def close(self): db._create_cohort_table() def test_create_cohort_index_exists(): + BiasDatabase._instance = None db = BiasDatabase(":memory:") class MockConn: def __init__(self): @@ -93,30 +142,5 @@ def close(self): # Should handle "Index already exists" without raising db._create_cohort_table() - # Optional assertions assert db.conn.call_count >= 2 assert any("CREATE INDEX" in sql for sql in db.conn.executed_sql) - -def test_create_omop_table_postgres(monkeypatch): - # Set up tracking dict - called = {"executed": False, "query": None} - - # Patch before BiasDatabase instance is created - def mock_execute(self, query): - called["executed"] = True - called["query"] = query - return None - - # Monkeypatch at class level first - monkeypatch.setattr(duckdb.DuckDBPyConnection, "execute", mock_execute) - - # Now create the instance (so it uses the patched class method) - BiasDatabase._instance = None - db = BiasDatabase(":memory:") - db.omop_cdm_db_url = "postgresql://user:pass@localhost:5432/mydb" - - result = db._create_omop_table("person") - - assert result is True - assert called["executed"] is True - assert "postgres_scan" in called["query"] From 845f2db5ec4727e33b456924a030af6ec912143f Mon Sep 17 00:00:00 2001 From: hyi Date: Sun, 8 Jun 2025 17:31:08 -0400 Subject: [PATCH 12/12] bug fixes and added tests with 100% test coverage now --- biasanalyzer/database.py | 108 ++++++++---------- tests/query_based/test_cohort_creation.py | 9 +- .../test_hierarchical_prevalence.py | 7 +- tests/test_database.py | 28 +++++ 4 files changed, 87 insertions(+), 65 deletions(-) diff --git a/biasanalyzer/database.py b/biasanalyzer/database.py index 1398f73..af951bb 100644 --- a/biasanalyzer/database.py +++ b/biasanalyzer/database.py @@ -104,25 +104,13 @@ def create_cohort_definition(self, cohort_definition: CohortDefinition, progress cohort_definition.created_by )) if progress_obj is None: - notify_users("Cohort definition inserted successfully.") + notify_users("Cohort definition inserted successfully.") # pragma: no cover else: progress_obj.write("Cohort definition inserted successfully.") self.conn.execute("SELECT id from cohort_definition ORDER BY id DESC LIMIT 1") created_cohort_id = self.conn.fetchone()[0] return created_cohort_id - # Method to insert cohort data - def create_cohort(self, cohort: Cohort): - self.conn.execute(''' - INSERT INTO cohort (subject_id, cohort_definition_id, cohort_start_date, cohort_end_date) - VALUES (?, ?, ?, ?) - ''', ( - cohort.subject_id, - cohort.cohort_definition_id, - cohort.cohort_start_date, - cohort.cohort_end_date - )) - # Method to insert cohort data in bulk from a dataframe def create_cohort_in_bulk(self, cohort_df: pd.DataFrame): # make duckdb to treat cohort_df dataframe as a virtual table named "cohort_df" @@ -161,20 +149,18 @@ def _create_omop_table(self, table_name): SELECT * from postgres_scan('{self.omop_cdm_db_url}', 'public', {table_name}) """) return True # success - elif self.omop_cdm_db_url.endswith('.duckdb'): + elif self.omop_cdm_db_url is None: + return False + else: # omop table is already included in duckdb return True - else: - return False # failure + def _execute_query(self, query_str): results = self.conn.execute(query_str) headers = [desc[0] for desc in results.description] rows = results.fetchall() - if len(rows) == 0: - return [] - else: - return [dict(zip(headers, row)) for row in rows] + return [dict(zip(headers, row)) for row in rows] def get_cohort_basic_stats(self, cohort_definition_id: int, variable=''): """ @@ -263,31 +249,28 @@ def get_cohort_concept_stats(self, cohort_definition_id: int, f"Supported concept types are: {self.__class__.cohort_concept_queries.keys()}", level='error') return concept_stats try: - if self._create_omop_table('concept') and self._create_omop_table('concept_ancestor'): + if (self._create_omop_table('concept') and self._create_omop_table('concept_ancestor') + and self._create_omop_table(concept_type)): query_str = self.__class__.cohort_concept_queries[concept_type]['query'] - if self._create_omop_table(concept_type): - if not vocab: - vocab = self.__class__.cohort_concept_queries[concept_type]['default_vocab'] - query = query_str.format(cid=cohort_definition_id, filter_count=filter_count, - vocab=vocab, include_hierarchy=include_hierarchy) - concept_stats[concept_type] = self._execute_query(query) - cs_df = pd.DataFrame(concept_stats[concept_type]) - # Combine concept_name and prevalence into a "details" column - cs_df["details"] = cs_df.apply( - lambda row: f"{row['concept_name']} (Code: {row['concept_code']}, " - f"Count: {row['count_in_cohort']}, Prevalence: {row['prevalence']:.3%})", axis=1) - filtered_cs_df = cs_df[cs_df['ancestor_concept_id'] != cs_df['descendant_concept_id']] - roots = find_roots(filtered_cs_df) - hierarchy = build_concept_hierarchy(filtered_cs_df) - notify_users(f'cohort concept hierarchy for {concept_type} with root concept ids {roots}:') - for root in roots: - root_detail = cs_df[(cs_df['ancestor_concept_id'] == root) - & (cs_df['descendant_concept_id'] == root)]['details'].iloc[0] - print_hierarchy(hierarchy, parent=root, level=0, parent_details=root_detail) - return concept_stats - else: - notify_users(f"Cannot connect to the OMOP database to query {concept_type} table") - return concept_stats + if not vocab: + vocab = self.__class__.cohort_concept_queries[concept_type]['default_vocab'] + query = query_str.format(cid=cohort_definition_id, filter_count=filter_count, + vocab=vocab, include_hierarchy=include_hierarchy) + concept_stats[concept_type] = self._execute_query(query) + cs_df = pd.DataFrame(concept_stats[concept_type]) + # Combine concept_name and prevalence into a "details" column + cs_df["details"] = cs_df.apply( + lambda row: f"{row['concept_name']} (Code: {row['concept_code']}, " + f"Count: {row['count_in_cohort']}, Prevalence: {row['prevalence']:.3%})", axis=1) + filtered_cs_df = cs_df[cs_df['ancestor_concept_id'] != cs_df['descendant_concept_id']] + roots = find_roots(filtered_cs_df) + hierarchy = build_concept_hierarchy(filtered_cs_df) + notify_users(f'cohort concept hierarchy for {concept_type} with root concept ids {roots}:') + for root in roots: + root_detail = cs_df[(cs_df['ancestor_concept_id'] == root) + & (cs_df['descendant_concept_id'] == root)]['details'].iloc[0] + print_hierarchy(hierarchy, parent=root, level=0, parent_details=root_detail) + return concept_stats else: notify_users("Cannot connect to the OMOP database to query concept table") return concept_stats @@ -321,23 +304,25 @@ def _initialize(self, db_url): notify_users(f"Failed to connect to DuckDB: {e}", level='error') self.Session = self.engine # Use engine directly for DuckDB self._database_type = 'duckdb' - try: - self.engine = create_engine( - db_url, - echo=False, - connect_args={'options': '-c default_transaction_read_only=on'} # Enforce read-only transactions - ) - self.Session = sessionmaker(bind=self.engine) - notify_users("Connected to the OMOP CDM database (read-only).") - self._database_type = 'postgresql' - except SQLAlchemyError as e: - notify_users(f"Failed to connect to the database: {e}", level='error') + else: # pragma: no cover + # Handle PostgreSQL connection + try: + self.engine = create_engine( + db_url, + echo=False, + connect_args={'options': '-c default_transaction_read_only=on'} # Enforce read-only transactions + ) + self.Session = sessionmaker(bind=self.engine) + notify_users("Connected to the OMOP CDM database (read-only).") + self._database_type = 'postgresql' + except SQLAlchemyError as e: + notify_users(f"Failed to connect to the database: {e}", level='error') def get_session(self): if self._database_type == 'duckdb': return self.engine - else: # postgresql connection - # Provide a new session for read-only queries + else: # pragma: no cover + # postgresql connection: provide a new session for read-only queries return self.Session() def execute_query(self, query, params=None): @@ -346,7 +331,7 @@ def execute_query(self, query, params=None): # DuckDB query execution results = self.engine.execute(query, params).fetchall() headers = [desc[0] for desc in self.engine.execute(query, params).description] - else: + else: # pragma: no cover # PostgreSQL query execution omop_session = self.get_session() query = text(query) @@ -360,9 +345,10 @@ def execute_query(self, query, params=None): except duckdb.Error as e: notify_users(f"Error executing query: {e}", level='error') return [] - except SQLAlchemyError as e: + except SQLAlchemyError as e: # pragma: no cover notify_users(f"Error executing query: {e}", level='error') - omop_session.close() + if omop_session: + omop_session.close() return [] def get_domains_and_vocabularies(self) -> list: @@ -409,7 +395,7 @@ def get_concepts(self, search_term: str, domain: Optional[str], vocab: Optional[ params = [domain, search_term_exact, search_term_prefix, search_term_suffix, search_term_prefix_suffix] - else: + else: # pragma: no cover # Use named parameters with :param_name syntax for SQLAlchemy/PostgreSQL base_query = """ SELECT concept_id, concept_name, valid_start_date, valid_end_date, domain_id, vocabulary_id \ diff --git a/tests/query_based/test_cohort_creation.py b/tests/query_based/test_cohort_creation.py index f497df1..8211cad 100644 --- a/tests/query_based/test_cohort_creation.py +++ b/tests/query_based/test_cohort_creation.py @@ -139,7 +139,7 @@ def test_cohort_creation_study2(caplog, test_db): assert_equal(len(patient_ids), 1) assert_equal(patient_ids, {106}) -def test_cohort_creation_all(test_db): +def test_cohort_creation_all(caplog, test_db): bias = test_db cohort = bias.create_cohort( "COVID-19 patient", @@ -156,6 +156,13 @@ def test_cohort_creation_all(test_db): assert 'creation_info' in cohort.metadata, "Cohort creation does not contain 'creation_info' key" stats = cohort.get_stats() assert stats is not None, "Created cohort's stats is None" + gender_stats = cohort.get_stats(variable='gender') + assert gender_stats is not None, "Created cohort's gender stats is None" + caplog.clear() + with caplog.at_level(logging.ERROR): + cohort.get_stats(variable='address') + assert 'is not available' in caplog.text + assert gender_stats is not None, "Created cohort's gender stats is None" assert cohort.data is not None, "Cohort creation wrongly returned None data" patient_ids = set([item['subject_id'] for item in cohort.data]) print(f'patient_ids: {patient_ids}', flush=True) diff --git a/tests/query_based/test_hierarchical_prevalence.py b/tests/query_based/test_hierarchical_prevalence.py index 85ea594..ba849fa 100644 --- a/tests/query_based/test_hierarchical_prevalence.py +++ b/tests/query_based/test_hierarchical_prevalence.py @@ -1,6 +1,3 @@ -import pytest - -@pytest.mark.usefixtures def test_cohort_concept_hierarchical_prevalence(test_db): bias = test_db cohort_query = """ @@ -18,6 +15,10 @@ def test_cohort_concept_hierarchical_prevalence(test_db): ) # Test cohort object and methods assert cohort is not None, "Cohort creation failed" + # test cohort.get_concept_stats only supports concept stats for condition_occurrence and drug_exposures currently + concept_stats = cohort.get_concept_stats(concept_type='procedure_occurrence') + assert concept_stats == {} + include_hierarchy_flags = [True, False] for flag in include_hierarchy_flags: concept_stats = cohort.get_concept_stats(include_hierarchy=flag) diff --git a/tests/test_database.py b/tests/test_database.py index 2631c3a..89652ed 100644 --- a/tests/test_database.py +++ b/tests/test_database.py @@ -1,5 +1,6 @@ import duckdb import pytest +import logging from biasanalyzer.database import BiasDatabase @@ -19,6 +20,10 @@ def mock_execute(self, query): # Now create the instance (so it uses the patched class method) BiasDatabase._instance = None db = BiasDatabase(":memory:") + db.omop_cdm_db_url = None + result = db._create_omop_table("person") + assert result is False + db.omop_cdm_db_url = "postgresql://user:pass@localhost:5432/mydb" result = db._create_omop_table("person") @@ -144,3 +149,26 @@ def close(self): assert db.conn.call_count >= 2 assert any("CREATE INDEX" in sql for sql in db.conn.executed_sql) + +def test_get_cohort_concept_stats_handles_exception(caplog): + BiasDatabase._instance = None + db = BiasDatabase(":memory:") + db.omop_cdm_db_url = 'duckdb' + caplog.clear() + with caplog.at_level(logging.ERROR): + result = db.get_cohort_concept_stats(123) + assert 'Error computing cohort concept stats' in caplog.text + assert result == {} + +def test_get_cohort_attributes_handles_exception(): + BiasDatabase._instance = None + db = BiasDatabase(":memory:") + + db.omop_cdm_db_url = None + result_stats = db.get_cohort_basic_stats(123, variable='age') + assert result_stats is None + result = db.get_cohort_distributions(123, 'age') + assert result is None + result = db.get_cohort_concept_stats(123) + assert result == {} +