Skip to content

Commit 2b18d3f

Browse files
authored
Merge pull request #15 from VACLab/add-more-tests
Add more tests to increase test coverage to 100% with a couple of code improvements and bug fixes along the way. Also updated code to use cohort_start_date to compute cohort age in cohort stats and distribution to be consistent with OHDSI ATLAS implementation.
2 parents 4694e52 + 845f2db commit 2b18d3f

25 files changed

Lines changed: 994 additions & 313 deletions

.coveragerc

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
[run]
2+
omit =
3+
*/module_test.py
4+
biasanalyzer/background/threading_utils.py
5+
[report]
6+
exclude_lines =
7+
pragma: no cover
8+
if __name__ == .__main__.:
9+
10+
[html]
11+
directory = coverage_html_report
12+

.github/workflows/test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,4 +39,4 @@ jobs:
3939
# Step 5: Run Tests
4040
- name: Run tests
4141
run: |
42-
poetry run pytest -s --cov=biasanalyzer
42+
poetry run pytest -s --cov=biasanalyzer --cov-config=.coveragerc

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ to install the python package from this github repo.
2222
- Run `bias = BIAS()` to create an object of the imported BIAS class.
2323
- Create a config.yaml file for specifying OMOP database connection configuration information.
2424
The config.yaml file must include root_omop_cdm_database key.
25-
- [A test OMOP database configuration yaml file](https://github.com/VACLab/BiasAnalyzer/blob/main/tests/assets/test_config.yaml)
25+
- [A test OMOP database configuration yaml file](https://github.com/VACLab/BiasAnalyzer/blob/main/tests/assets/config/test_config.yaml)
2626
can serve as an example. Another config.yaml example for connecting to a OMOP postgreSQL database
2727
is also copied below for reference.
2828
```angular2html

biasanalyzer/api.py

Lines changed: 42 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -4,46 +4,41 @@
44
from biasanalyzer.cohort import CohortAction
55
from biasanalyzer.config import load_config
66
from ipywidgets import VBox, Label
7-
from ipytree import Tree, Node
7+
from ipytree import Tree
88
from IPython.display import display
9-
from biasanalyzer.utils import get_direction_arrow
9+
from biasanalyzer.utils import get_direction_arrow, notify_users, build_concept_tree
1010

1111

1212
class BIAS:
13-
_instance = None
14-
15-
def __init__(self):
16-
self.config = {}
13+
def __init__(self, config_file_path=None):
1714
self.bias_db = None
1815
self.omop_cdm_db = None
1916
self.cohort_action = None
20-
21-
def __new__(cls, config_file_path=None):
22-
if cls._instance is None:
23-
cls._instance = super(BIAS, cls).__new__(cls)
24-
cls._instance.set_config(config_file_path)
25-
return cls._instance
17+
if config_file_path is None:
18+
self.config = {}
19+
else:
20+
self.set_config(config_file_path)
2621

2722
def set_config(self, config_file_path: str):
28-
if config_file_path is None:
29-
print('no configuration file specified. '
30-
'Call set_config(config_file_path) next to specify configurations')
23+
if not config_file_path:
24+
notify_users('no configuration file specified. '
25+
'Call set_config(config_file_path) next to specify configurations')
3126
else:
3227
try:
3328
self.config = load_config(config_file_path)
34-
print(f'configuration specified in {config_file_path} loaded successfully')
29+
notify_users(f'configuration specified in {config_file_path} loaded successfully')
3530
except FileNotFoundError:
36-
print('specified configuration file does not exist. '
37-
'Call set_config(config_file_path) next to specify a valid '
38-
'configuration file')
31+
notify_users('specified configuration file does not exist. '
32+
'Call set_config(config_file_path) next to specify a valid configuration file',
33+
level='error')
3934
except ValidationError as ex:
40-
print(f'configuration yaml file is not valid with validation error: {ex}')
35+
notify_users(f'configuration yaml file is not valid with validation error: {ex}', level='error')
4136

4237
def set_root_omop(self):
4338
if not self.config:
44-
print('no valid configuration to set root OMOP CDM data. '
45-
'Call set_config(config_file_path) to specify configurations first.')
46-
elif 'root_omop_cdm_database' in self.config:
39+
notify_users('no valid configuration to set root OMOP CDM data. '
40+
'Call set_config(config_file_path) to specify configurations first.')
41+
else:
4742
db_type = self.config['root_omop_cdm_database']['database_type']
4843
if db_type == 'postgresql':
4944
user = self.config['root_omop_cdm_database']['username']
@@ -65,64 +60,42 @@ def set_root_omop(self):
6560
self.bias_db = BiasDatabase(db_path)
6661
self.bias_db.omop_cdm_db_url = db_path
6762
else:
68-
print(f"Unsupported database type: {db_type}")
69-
else:
70-
print('Configuration file must include configuration values for root_omop_cdm_database key.')
63+
notify_users(f"Unsupported database type: {db_type}")
7164

7265
def _set_cohort_action(self):
7366
if self.omop_cdm_db is None:
74-
print('A valid OMOP CDM must be set before creating a cohort. '
75-
'Call set_root_omop first to set a valid root OMOP CDM')
67+
notify_users('A valid OMOP CDM must be set before creating a cohort. '
68+
'Call set_root_omop first to set a valid root OMOP CDM')
7669
return None
7770
if self.cohort_action is None:
7871
self.cohort_action = CohortAction(self.omop_cdm_db, self.bias_db)
7972
return self.cohort_action
8073

8174
def get_domains_and_vocabularies(self):
75+
print(f'self.omop_cdm_db: {self.omop_cdm_db}')
8276
if self.omop_cdm_db is None:
83-
print('A valid OMOP CDM must be set before getting domains. '
84-
'Call set_root_omop first to set a valid root OMOP CDM')
77+
notify_users('A valid OMOP CDM must be set before getting domains. '
78+
'Call set_root_omop first to set a valid root OMOP CDM')
8579
return None
8680
return self.omop_cdm_db.get_domains_and_vocabularies()
8781

8882
def get_concepts(self, search_term, domain=None, vocabulary=None):
8983
if self.omop_cdm_db is None:
90-
print('A valid OMOP CDM must be set before getting concepts. '
91-
'Call set_root_omop first to set a valid root OMOP CDM')
84+
notify_users('A valid OMOP CDM must be set before getting concepts. '
85+
'Call set_root_omop first to set a valid root OMOP CDM')
9286
return None
9387
if domain is None and vocabulary is None:
94-
print('either domain or vocabulary must be set to constrain the number of returned concepts')
88+
notify_users('either domain or vocabulary must be set to constrain the number of returned concepts')
9589
return None
9690
return self.omop_cdm_db.get_concepts(search_term, domain, vocabulary)
9791

9892
def get_concept_hierarchy(self, concept_id):
9993
if self.omop_cdm_db is None:
100-
print('A valid OMOP CDM must be set before getting concepts. '
101-
'Call set_root_omop first to set a valid root OMOP CDM')
94+
notify_users('A valid OMOP CDM must be set before getting concepts. '
95+
'Call set_root_omop first to set a valid root OMOP CDM')
10296
return None
10397
return self.omop_cdm_db.get_concept_hierarchy(concept_id)
10498

105-
def _build_concept_tree(self, concept_tree: dict, tree_type: str) -> Node:
106-
"""
107-
Recursively builds an ipytree Node for a given concept tree.
108-
"""
109-
# Extract concept details
110-
details = concept_tree.get("details", {})
111-
concept_name = details.get("concept_name", "Unknown Concept")
112-
concept_id = details.get("concept_id", "")
113-
concept_code = details.get("concept_code", "")
114-
direction_arrow = get_direction_arrow(tree_type)
115-
# Create a label for the current concept
116-
label_text = f"{direction_arrow} {concept_name} (ID: {concept_id}, Code: {concept_code})"
117-
node = Node(label_text)
118-
119-
# Recursively add child nodes
120-
for child in concept_tree.get(tree_type, []):
121-
child_node = self._build_concept_tree(child, tree_type)
122-
node.add_node(child_node)
123-
124-
return node
125-
12699
def display_concept_tree(self, concept_tree: dict, level: int = 0, show_in_text_format=True, tree_type=None):
127100
"""
128101
Recursively prints the concept hierarchy tree in an indented format for display.
@@ -134,7 +107,7 @@ def display_concept_tree(self, concept_tree: dict, level: int = 0, show_in_text_
134107
elif 'children' in concept_tree:
135108
tree_type = 'children'
136109
else:
137-
print('The input concept tree must contain parents or children key as the type of the tree.')
110+
notify_users('The input concept tree must contain parents or children key as the type of the tree.')
138111
return ''
139112

140113
if show_in_text_format:
@@ -152,12 +125,12 @@ def display_concept_tree(self, concept_tree: dict, level: int = 0, show_in_text_
152125
else:
153126
# Extract concept details
154127
# Build the root tree node
155-
root_node = self._build_concept_tree(concept_tree, tree_type)
128+
root_node = build_concept_tree(concept_tree, tree_type)
156129
tree = Tree()
157130
tree.add_node(root_node)
158131
tree.opened = True
159132
display(VBox([Label("Concept Hierarchy"), tree]))
160-
return None
133+
return root_node
161134

162135

163136
def create_cohort(self, cohort_name: str, cohort_desc: str, query_or_yaml_file: str, created_by: str,
@@ -178,12 +151,12 @@ def create_cohort(self, cohort_name: str, cohort_desc: str, query_or_yaml_file:
178151
created_cohort = c_action.create_cohort(cohort_name, cohort_desc, query_or_yaml_file, created_by)
179152
if created_cohort is not None:
180153
if delay > 0:
181-
print(f"[DEBUG] Simulating long-running task with {delay} seconds delay...")
154+
notify_users(f"[DEBUG] Simulating long-running task with {delay} seconds delay...")
182155
time.sleep(delay)
183-
print('cohort created successfully')
156+
notify_users('cohort created successfully')
184157
return created_cohort
185158
else:
186-
print('failed to create a valid cohort action object')
159+
notify_users('failed to create a valid cohort action object')
187160
return None
188161

189162

@@ -192,11 +165,14 @@ def compare_cohorts(self, cohort_id1, cohort_id2):
192165
if c_action:
193166
return c_action.compare_cohorts(cohort_id1, cohort_id2)
194167
else:
195-
print('failed to create a valid cohort action object')
168+
notify_users('failed to create a valid cohort action object')
196169
return None
197170

198171

199172
def cleanup(self):
200-
self.bias_db.close()
201-
self.omop_cdm_db.close()
202-
del self.cohort_action
173+
if self.bias_db:
174+
self.bias_db.close()
175+
if self.omop_cdm_db:
176+
self.omop_cdm_db.close()
177+
if self.cohort_action:
178+
del self.cohort_action

biasanalyzer/cohort.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@
44
from datetime import datetime
55
from tqdm.auto import tqdm
66
from pydantic import ValidationError
7-
from biasanalyzer.models import CohortDefinition, Cohort
7+
from biasanalyzer.models import CohortDefinition
88
from biasanalyzer.config import load_cohort_creation_config
99
from biasanalyzer.database import OMOPCDMDatabase, BiasDatabase
10-
from biasanalyzer.utils import hellinger_distance, clean_string
10+
from biasanalyzer.utils import hellinger_distance, clean_string, notify_users
1111
from biasanalyzer.cohort_query_builder import CohortQueryBuilder
1212

1313

@@ -99,12 +99,11 @@ def create_cohort(self, cohort_name: str, description: str, query_or_yaml_file:
9999
cohort_config = load_cohort_creation_config(query_or_yaml_file)
100100
tqdm.write(f'configuration specified in {query_or_yaml_file} loaded successfully')
101101
except FileNotFoundError:
102-
print('specified cohort creation configuration file does not exist. Make sure '
103-
'the configuration file name with path is specified correctly.')
102+
notify_users('specified cohort creation configuration file does not exist. Make sure '
103+
'the configuration file name with path is specified correctly.')
104104
return None
105105
except ValidationError as ex:
106-
print(f'cohort creation configuration yaml file is not valid with '
107-
f'validation error: {ex}')
106+
notify_users(f'cohort creation configuration yaml file is not valid with validation error: {ex}')
108107
return None
109108

110109
query = self._query_builder.build_query(cohort_config)
@@ -139,11 +138,12 @@ def create_cohort(self, cohort_name: str, description: str, query_or_yaml_file:
139138
tqdm.write(f"Cohort {cohort_name} successfully created.")
140139
return CohortData(cohort_id=cohort_def_id, bias_db=self.bias_db, omop_db=self.omop_db)
141140
except duckdb.Error as e:
142-
print(f"Error executing query: {e}")
141+
notify_users(f"Error executing query: {e}")
143142
return None
144143
except SQLAlchemyError as e:
145-
print(f"Error executing query: {e}")
146-
omop_session.close()
144+
notify_users(f"Error executing query: {e}")
145+
if omop_session is not None:
146+
omop_session.close()
147147
return None
148148

149149
def compare_cohorts(self, cohort_id_1: int, cohort_id_2: int):

biasanalyzer/cohort_query_builder.py

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,14 @@ class CohortQueryBuilder:
99
def __init__(self):
1010
"""Get the path to SQL templates, whether running from source or installed."""
1111
try:
12-
if sys.version_info >= (3, 9):
12+
if sys.version_info >= (3, 9): # pragma: no cover
1313
# Python 3.9+: Use importlib.resources.files()
1414
template_path = importlib.resources.files("biasanalyzer").joinpath("sql_templates")
1515
else:
1616
# Python 3.8: Use importlib.resources.path() (context manager)
1717
with importlib.resources.path("biasanalyzer", "sql_templates") as p:
1818
template_path = str(p)
19-
except ModuleNotFoundError:
19+
except ModuleNotFoundError: # pragma: no cover
2020
template_path = os.path.join(os.path.dirname(__file__), "sql_templates")
2121

2222
print(f'template_path: {template_path}')
@@ -117,7 +117,7 @@ def render_event_group(event_group, alias_prefix="evt"):
117117
event_sql = CohortQueryBuilder.render_event_group(event, f"{alias_prefix}_{i}")
118118
if event_sql:
119119
queries.append(event_sql)
120-
if not queries:
120+
if not queries: # pragma: no cover
121121
return ""
122122

123123
if event_group["operator"] == "AND":
@@ -150,9 +150,6 @@ def render_event_group(event_group, alias_prefix="evt"):
150150
elif event_group["operator"] == "OR":
151151
return f"SELECT person_id, event_start_date, event_end_date FROM ({' UNION '.join(queries)}) AS {alias_prefix}_or"
152152
elif event_group["operator"] == "NOT":
153-
if len(queries) != 1:
154-
raise ValueError("NOT operator expects exactly one event subquery")
155-
# Keep the full subquery with dates for consistency, but use it as a filter
156153
not_query = queries[0]
157154
# Return a query that selects all persons from a base table (e.g., person),
158155
# excluding those in the NOT subquery, while allowing dates from other criteria
@@ -187,10 +184,6 @@ def render_event_group(event_group, alias_prefix="evt"):
187184
FROM ({queries[0]}) AS {alias_prefix}_0
188185
WHERE event_start_date < DATE '{timestamp}'
189186
"""
190-
else:
191-
print(f"Error: event_group: {event_group} with BEFORE operator only "
192-
f"has one query event {queries}")
193-
return ''
194187
elif len(queries) == 2:
195188
event_group = TemporalEventGroup(**event_group)
196189
e1_alias = f"e1_{alias_prefix}"
@@ -213,7 +206,7 @@ def render_event_group(event_group, alias_prefix="evt"):
213206
AND {e1_alias}.event_start_date < {e2_alias}.event_start_date
214207
{interval_sql}
215208
"""
216-
return ""
209+
return "" # pragma: no cover
217210

218211
def temporal_event_filter(self, event_groups, alias='c'):
219212
"""
@@ -236,15 +229,30 @@ def temporal_event_filter(self, event_groups, alias='c'):
236229
filters.append(f"AND {alias}.person_id IN (SELECT person_id FROM ({group_sql}) AS ex_subquery_{i})")
237230
else:
238231
filters.append(f"({group_sql})")
239-
if not filters:
232+
if not filters: # pragma: no cover
240233
return ""
241234
if alias == 'ex':
242235
# For exclusion, combine with AND as filters
243236
return " ".join(filters)
244237
else:
245-
# For inclusion, combine as a single subquery (assuming one event group for simplicity)
246-
# If multiple groups, may need UNION or further logic
238+
# For inclusion, handle both single event group case with operator defined and multiple event group
239+
# case with no operator defined
247240
if len(filters) > 1:
241+
# For multiple temporal event group case with no operator defined, use "OR" operator by default
242+
# An example YAML block for multiple temporal event group is shown below for reference, in which
243+
# case, patients who satisfy either group (condition 37311061 or drug 67890) will be included:
244+
# inclusion_criteria:
245+
# temporal_events:
246+
# - operator: AND
247+
# events:
248+
# - event_type: condition_occurrence
249+
# event_concept_id: 37311061
250+
# - operator: AND
251+
# events:
252+
# - event_type: drug_exposure
253+
# event_concept_id: 67890
248254
return (f"SELECT person_id, event_start_date, event_end_date FROM "
249255
f"({' UNION ALL '.join(filters)}) AS combined_events")
250-
return filters[0] # Single event group case
256+
257+
# Single event group case with operator defined
258+
return filters[0]

0 commit comments

Comments
 (0)