From 9079c7b12137f84f14b87d3646c9668adea06d5a Mon Sep 17 00:00:00 2001 From: Tamara Slosarek Date: Thu, 10 Oct 2024 12:22:49 +0200 Subject: [PATCH] refactor(scripts): move checks and corrections to files --- pharme.code-workspace | 1 + scripts/README.md | 6 +- scripts/analyze.py | 315 ------------------ .../analyze/checks/brand_name_whitespace.py | 8 + scripts/analyze/checks/constants.py | 1 + scripts/analyze/checks/consult.py | 4 + .../analyze/checks/metabolization_severity.py | 43 +++ scripts/analyze/checks/warning_levels.py | 73 ++++ .../corrections/brand_name_whitespace.py | 4 + scripts/analyze/corrections/consult.py | 14 + scripts/analyze/data_helpers.py | 53 +++ scripts/run_analysis.py | 131 ++++++++ 12 files changed, 335 insertions(+), 318 deletions(-) delete mode 100644 scripts/analyze.py create mode 100644 scripts/analyze/checks/brand_name_whitespace.py create mode 100644 scripts/analyze/checks/constants.py create mode 100644 scripts/analyze/checks/consult.py create mode 100644 scripts/analyze/checks/metabolization_severity.py create mode 100644 scripts/analyze/checks/warning_levels.py create mode 100644 scripts/analyze/corrections/brand_name_whitespace.py create mode 100644 scripts/analyze/corrections/consult.py create mode 100644 scripts/analyze/data_helpers.py create mode 100644 scripts/run_analysis.py diff --git a/pharme.code-workspace b/pharme.code-workspace index fd4b4154..b08ff853 100644 --- a/pharme.code-workspace +++ b/pharme.code-workspace @@ -81,6 +81,7 @@ "LTRB", "maxdepth", "MedlinePlus", + "metabolization", "Metabolizer", "metabolizers", "mirabegron", diff --git a/scripts/README.md b/scripts/README.md index d3ce2bdc..4a7689a3 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -85,14 +85,14 @@ Run `python clean.py` to remove the `scripts/temp` directory and all files in ## Analyze (and correct) annotations -Run `python analyze.py [--correct]` to analyze annotations and -optionally correct what can be corrected easily in +Run `python run_analysis.py [--correct]` to analyze annotations +and optionally correct what can be corrected easily in `_corrected_.base64.json`. | Check | Description | `--correct`ed | Only for single-gene results* | | ----- | ----------- | ------------- | ----------------------------- | | `has_consult` | Is "consult your pharmacist..." included in recommendation? | ✅ | ❌ | -| `implication_severity` | "Much" keyword, should only be used if reflected by guideline implication. | ❌ | ✅ | +| `check_metabolization_severity` | "Much" keyword, should only be used if reflected by guideline implication. | ❌ | ✅ | | `red_warning` | Red warning level should be present with recommendation containing "may not be the right medication". | ❌ | ❌ | | `yellow_warning` | Yellow warning level should be present when the red warning level does not apply but the implication contains "may not work" or "side effects" or the recommendation contains non-standard dose. | ❌ | ❌ | | `green_warning` | Green warning level should be applied in all non-red and non-yellow cases and when the recommendation states "at standard dose" or similar formulations. | ❌ | ❌ | diff --git a/scripts/analyze.py b/scripts/analyze.py deleted file mode 100644 index a5fca73c..00000000 --- a/scripts/analyze.py +++ /dev/null @@ -1,315 +0,0 @@ -import sys -from common.constants import DRUG_COLLECTION_NAME, SCRIPT_POSTFIXES, \ - BRICK_COLLECTION_NAME -from common.get_data import get_data, get_guideline_by_id, get_phenotype_key -from common.write_data import write_data, write_log - -CONSULT_TEXT = 'consult your pharmacist or doctor' -WHOLE_CONSULT_TEXT = '{} for more information.'.format(CONSULT_TEXT) -RED_TEXT = 'not be the right medication' -NOT_RED_TEXTS = [ - 'if more than this dose is needed', - "if #drug-name isn't working for you", -] -ADJUST_TEXT = 'adjusted' -YELLOW_RECOMMENDATION_TEXTS = NOT_RED_TEXTS + [ - ADJUST_TEXT, - 'increased', - 'decreased', - 'lower dose', - 'higher dose', - 'up to a certain dose', - 'dose increases should be done cautiously and slowly', - 'further testing is recommended', -] -YELLOW_IMPLICATION_TEXTS = [ - 'increased risk', - 'may not work', -] -GREEN_TEXTS = ['at standard dose', 'there is no reason to avoid'] - -def ensure_unique_item(item_filter, field_name, value): - item = list(item_filter) - if len(item) != 1: - message = f'[ERROR] Items are not unique for {field_name} == ' \ - f'{value}: {item}' - raise Exception(message) - return item[0] - -def get_unique_item(items, field_name, value): - item_filter = filter(lambda item: item[field_name] == value, items) - return ensure_unique_item(item_filter, field_name, value) - -def get_english_text(brick): - translation = get_unique_item(brick['translations'], 'language', 'English') - return translation['text'].lower() - -def get_brick_meaning(data, brick_id): - bricks = data[BRICK_COLLECTION_NAME] - brick = get_unique_item(bricks, '_id', brick_id) - return get_english_text(brick) - -def get_bricks_meaning(data, brick_ids): - return ' '.join(map( - lambda brick_id: get_brick_meaning(data, brick_id), - brick_ids)) - -def get_annotation(data, item, key, resolve=True): - if not key in item['annotations']: return None - annotation = item['annotations'][key] - if resolve: annotation = get_bricks_meaning(data, annotation) - return annotation - -def get_guideline_annotations(data, guideline): - return { - 'implication': get_annotation(data, guideline, 'implication'), - 'recommendation': get_annotation(data, guideline, 'recommendation'), - 'warning_level': get_annotation(data, guideline, 'warningLevel', - resolve=False) - } - -def get_drug_annotations(data, drug): - return { - 'drugclass': get_annotation(data, drug, 'drugclass'), - 'indication': get_annotation(data, drug, 'indication'), - 'brand_names': get_annotation(data, drug, 'brandNames', resolve=False) - } - -def has_annotations(annotations): - return all(list(map( - lambda value: value != None, - annotations.values()))) - -def has_consult(_, annotations): - return CONSULT_TEXT in annotations['recommendation'] - -def check_implication_severity(guideline, annotations): - ignored_phenotypes = ['no result', 'indeterminate', 'normal metabolizer'] - multiple_relevant_phenotypes = False - relevant_gene = None - for current_gene, current_phenotypes in guideline['phenotypes'].items(): - if not current_phenotypes[0].lower() in ignored_phenotypes: - if relevant_gene != None: - multiple_relevant_phenotypes = True - break - relevant_gene = current_gene - if multiple_relevant_phenotypes or relevant_gene == None: - return None - implication = \ - guideline['externalData'][0]['implications'][relevant_gene].lower() - much_implying_formulations = [ - 'greatly decreased', - 'greatly reduced', - 'significantly reduced', - 'extremely high concentrations', - 'when compared to cyp2c19 rapid and normal metabolizers', - 'as compared to non-poor metabolizers', - 'when compared to cyp2c19 normal and intermediate metabolizers', - 'as compared to normal and intermediate metabolizer', - 'complete dpd deficiency', - ] - much_formulations = [ - 'much faster', - 'much slower' - ] - much_is_implied = any( - map( - lambda much_implying_formulation: - much_implying_formulation in implication, - much_implying_formulations, - ) - ) - implication_has_much = any( - map( - lambda much_formulation: much_formulation in annotations['implication'], - much_formulations, - ) - ) - return much_is_implied == implication_has_much - -def should_be_red(annotations): - return RED_TEXT in annotations['recommendation'] and all(map( - lambda not_red_text: not_red_text not in annotations['recommendation'], - NOT_RED_TEXTS, - )) - -def should_be_yellow(annotations): - return any(map( - lambda yellow_text: yellow_text in annotations['recommendation'], - YELLOW_RECOMMENDATION_TEXTS, - )) or any(map( - lambda yellow_text: yellow_text in annotations['implication'], - YELLOW_IMPLICATION_TEXTS, - )) or ( - # Special case: no other recommendation given - annotations['recommendation'] == WHOLE_CONSULT_TEXT - ) - -def should_be_green(annotations): - return any(map( - lambda green_text: green_text in annotations['recommendation'], - GREEN_TEXTS, - )) - -def check_red_warning_level(_, annotations): - has_warning_level = annotations['warning_level'] == 'red' - should_have_warning_level = should_be_red(annotations) - return has_warning_level == should_have_warning_level - -def check_yellow_warning_level(_, annotations): - has_warning_level = annotations['warning_level'] == 'yellow' - should_have_warning_level = not should_be_red(annotations) and \ - should_be_yellow(annotations) - return has_warning_level if should_have_warning_level else True - -def check_green_warning_level(_, annotations): - has_warning_level = annotations['warning_level'] == 'green' - should_have_warning_level = not should_be_red(annotations) and \ - not should_be_yellow(annotations) and \ - should_be_green(annotations) - return has_warning_level == should_have_warning_level - -def check_none_warning_level(_, annotations): - has_warning_level = annotations['warning_level'] == 'none' - should_have_warning_level = not should_be_red(annotations) and \ - not should_be_yellow(annotations) and \ - not should_be_green(annotations) - return has_warning_level == should_have_warning_level - -def analyze_annotations(item, annotations, checks): - results = {} - for check_name, check_function in checks.items(): - results[check_name] = check_function(item, annotations) - return results - -def get_consult_brick(data): - brick_filter = filter( - lambda brick: get_english_text(brick).startswith(CONSULT_TEXT), - data[BRICK_COLLECTION_NAME]) - return ensure_unique_item(brick_filter, 'brick meaning', CONSULT_TEXT) - -def add_consult(data, guideline): - guideline['annotations']['recommendation'].append( - get_consult_brick(data)['_id']) - -def check_brand_name_whitespace(_, annotations): - check_applies = True - for brand_name in annotations['brand_names']: - trimmed_name = brand_name.strip() - if trimmed_name != brand_name: - check_applies = False - break - return check_applies - -def correct_brand_name_whitespace(_, drug): - drug['annotations']['brandNames'] = list(map( - lambda brand_name: brand_name.strip(), - drug['annotations']['brandNames'])) - -def correct_inconsistency(data, item, check_name, corrections): - if check_name in corrections: - corrections[check_name](data, item) - return check_name in corrections - -def log_not_annotated(log_content): - log_content.append(' – _not annotated_\n') - -def log_all_passed(log_content, postfix=''): - log_content.append(f' – _all checks passed_{postfix}\n') - -def log_annotations(log_content, annotations): - for key, value in annotations.items(): - pretty_key = key.capitalize().replace('_', ' ') - log_content.append(f' {pretty_key}: {value}\n') - -def handle_failed_checks( - data, item, result, corrections, should_correct, annotations, log_content): - failed_checks = [] - skipped_checks = [] - for check_name, check_result in result.items(): - if check_result == False: - corrected = should_correct and \ - correct_inconsistency(data, item, - check_name, corrections) - check_name = f'{check_name} (corrected)' if corrected \ - else check_name - failed_checks.append(check_name) - if check_result == None: - skipped_checks.append(check_name) - skipped_checks_string = '' - if len(skipped_checks) > 0: - skipped_checks_string = (' (skipped checks: ' \ - f'{", ".join(skipped_checks)})') - if len(failed_checks) > 0: - log_content.append(' - _some checks failed_: ' \ - f'{", ".join(failed_checks)}{skipped_checks_string}\n') - log_annotations(log_content, annotations) - else: - log_all_passed(log_content, postfix=skipped_checks_string) - -DRUG_CHECKS = { - 'brand_whitespace': check_brand_name_whitespace, -} - -DRUG_CORRECTIONS = { - 'brand_whitespace': correct_brand_name_whitespace, -} - -GUIDELINE_CHECKS = { - 'has_consult': has_consult, - 'implication_severity': check_implication_severity, - 'red_warning_level': check_red_warning_level, - 'yellow_warning_level': check_yellow_warning_level, - 'green_warning_level': check_green_warning_level, - 'none_warning_level': check_none_warning_level, -} - -GUIDELINE_CORRECTIONS = { - 'has_consult': add_consult, -} - -def main(): - correct_inconsistencies = '--correct' in sys.argv - data = get_data() - log_content = [ - '# Analyze annotation data\n\n', - f'_Correct if possible: {correct_inconsistencies}_\n\n' - ] - for drug in data[DRUG_COLLECTION_NAME]: - drug_name = drug['name'] - log_content.append(f'* {drug_name}') - drug_annotations = get_drug_annotations(data, drug) - if not has_annotations(drug_annotations): log_not_annotated(log_content) - else: - drug_result = analyze_annotations( - drug, drug_annotations, DRUG_CHECKS) - if not all(drug_result.values()): - handle_failed_checks(data, drug, drug_result, - DRUG_CORRECTIONS, correct_inconsistencies, - drug_annotations, log_content) - else: - log_all_passed(log_content) - for guideline_id in drug['guidelines']: - guideline = get_guideline_by_id(data, guideline_id) - phenotype = get_phenotype_key(guideline) - log_content.append(f' * {phenotype}') - guideline_annotations = get_guideline_annotations(data, guideline) - if not has_annotations(guideline_annotations): - log_not_annotated(log_content) - continue - guideline_result = analyze_annotations( - guideline, guideline_annotations, GUIDELINE_CHECKS) - if guideline_result == None: continue - if not all(guideline_result.values()): - handle_failed_checks(data, guideline, guideline_result, - GUIDELINE_CORRECTIONS, correct_inconsistencies, - guideline_annotations, log_content) - else: - log_all_passed(log_content) - - write_log(log_content, postfix=SCRIPT_POSTFIXES['correct']) - if correct_inconsistencies: - write_data(data, postfix=SCRIPT_POSTFIXES['correct']) - -if __name__ == '__main__': - main() \ No newline at end of file diff --git a/scripts/analyze/checks/brand_name_whitespace.py b/scripts/analyze/checks/brand_name_whitespace.py new file mode 100644 index 00000000..3c1e41a8 --- /dev/null +++ b/scripts/analyze/checks/brand_name_whitespace.py @@ -0,0 +1,8 @@ +def check_brand_name_whitespace(_, annotations): + check_applies = True + for brand_name in annotations['brand_names']: + trimmed_name = brand_name.strip() + if trimmed_name != brand_name: + check_applies = False + break + return check_applies \ No newline at end of file diff --git a/scripts/analyze/checks/constants.py b/scripts/analyze/checks/constants.py new file mode 100644 index 00000000..f8aa339a --- /dev/null +++ b/scripts/analyze/checks/constants.py @@ -0,0 +1 @@ +CONSULT_TEXT = 'consult your pharmacist or doctor' \ No newline at end of file diff --git a/scripts/analyze/checks/consult.py b/scripts/analyze/checks/consult.py new file mode 100644 index 00000000..b2c35364 --- /dev/null +++ b/scripts/analyze/checks/consult.py @@ -0,0 +1,4 @@ +from analyze.checks.constants import CONSULT_TEXT + +def has_consult(_, annotations): + return CONSULT_TEXT in annotations['recommendation'] \ No newline at end of file diff --git a/scripts/analyze/checks/metabolization_severity.py b/scripts/analyze/checks/metabolization_severity.py new file mode 100644 index 00000000..7b35a190 --- /dev/null +++ b/scripts/analyze/checks/metabolization_severity.py @@ -0,0 +1,43 @@ +def check_metabolization_severity(guideline, annotations): + ignored_phenotypes = ['no result', 'indeterminate', 'normal metabolizer'] + multiple_relevant_phenotypes = False + relevant_gene = None + for current_gene, current_phenotypes in guideline['phenotypes'].items(): + if not current_phenotypes[0].lower() in ignored_phenotypes: + if relevant_gene != None: + multiple_relevant_phenotypes = True + break + relevant_gene = current_gene + if multiple_relevant_phenotypes or relevant_gene == None: + return None + implication = \ + guideline['externalData'][0]['implications'][relevant_gene].lower() + much_implying_formulations = [ + 'greatly decreased', + 'greatly reduced', + 'significantly reduced', + 'extremely high concentrations', + 'when compared to cyp2c19 rapid and normal metabolizers', + 'as compared to non-poor metabolizers', + 'when compared to cyp2c19 normal and intermediate metabolizers', + 'as compared to normal and intermediate metabolizer', + 'complete dpd deficiency', + ] + much_formulations = [ + 'much faster', + 'much slower' + ] + much_is_implied = any( + map( + lambda much_implying_formulation: + much_implying_formulation in implication, + much_implying_formulations, + ) + ) + implication_has_much = any( + map( + lambda much_formulation: much_formulation in annotations['implication'], + much_formulations, + ) + ) + return much_is_implied == implication_has_much \ No newline at end of file diff --git a/scripts/analyze/checks/warning_levels.py b/scripts/analyze/checks/warning_levels.py new file mode 100644 index 00000000..40f7db44 --- /dev/null +++ b/scripts/analyze/checks/warning_levels.py @@ -0,0 +1,73 @@ +from analyze.checks.constants import CONSULT_TEXT + +WHOLE_CONSULT_TEXT = '{} for more information.'.format(CONSULT_TEXT) +RED_TEXT = 'not be the right medication' +NOT_RED_TEXTS = [ + 'if more than this dose is needed', + "if #drug-name isn't working for you", +] +ADJUST_TEXT = 'adjusted' +YELLOW_RECOMMENDATION_TEXTS = NOT_RED_TEXTS + [ + ADJUST_TEXT, + 'increased', + 'decreased', + 'lower dose', + 'higher dose', + 'up to a certain dose', + 'dose increases should be done cautiously and slowly', + 'further testing is recommended', +] +YELLOW_IMPLICATION_TEXTS = [ + 'increased risk', + 'may not work', +] +GREEN_TEXTS = ['at standard dose', 'there is no reason to avoid'] + +def should_be_red(annotations): + return RED_TEXT in annotations['recommendation'] and all(map( + lambda not_red_text: not_red_text not in annotations['recommendation'], + NOT_RED_TEXTS, + )) + +def should_be_yellow(annotations): + return any(map( + lambda yellow_text: yellow_text in annotations['recommendation'], + YELLOW_RECOMMENDATION_TEXTS, + )) or any(map( + lambda yellow_text: yellow_text in annotations['implication'], + YELLOW_IMPLICATION_TEXTS, + )) or ( + # Special case: no other recommendation given + annotations['recommendation'] == WHOLE_CONSULT_TEXT + ) + +def should_be_green(annotations): + return any(map( + lambda green_text: green_text in annotations['recommendation'], + GREEN_TEXTS, + )) + +def check_red_warning_level(_, annotations): + has_warning_level = annotations['warning_level'] == 'red' + should_have_warning_level = should_be_red(annotations) + return has_warning_level == should_have_warning_level + +def check_yellow_warning_level(_, annotations): + has_warning_level = annotations['warning_level'] == 'yellow' + should_have_warning_level = not should_be_red(annotations) and \ + should_be_yellow(annotations) + return has_warning_level if should_have_warning_level else True + +def check_green_warning_level(_, annotations): + has_warning_level = annotations['warning_level'] == 'green' + should_have_warning_level = not should_be_red(annotations) and \ + not should_be_yellow(annotations) and \ + should_be_green(annotations) + return has_warning_level == should_have_warning_level + +def check_none_warning_level(_, annotations): + has_warning_level = annotations['warning_level'] == 'none' + should_have_warning_level = not should_be_red(annotations) and \ + not should_be_yellow(annotations) and \ + not should_be_green(annotations) + return has_warning_level == should_have_warning_level \ No newline at end of file diff --git a/scripts/analyze/corrections/brand_name_whitespace.py b/scripts/analyze/corrections/brand_name_whitespace.py new file mode 100644 index 00000000..30e9b9f5 --- /dev/null +++ b/scripts/analyze/corrections/brand_name_whitespace.py @@ -0,0 +1,4 @@ +def correct_brand_name_whitespace(_, drug): + drug['annotations']['brandNames'] = list(map( + lambda brand_name: brand_name.strip(), + drug['annotations']['brandNames'])) \ No newline at end of file diff --git a/scripts/analyze/corrections/consult.py b/scripts/analyze/corrections/consult.py new file mode 100644 index 00000000..a0d9976f --- /dev/null +++ b/scripts/analyze/corrections/consult.py @@ -0,0 +1,14 @@ +from analyze.data_helpers import ensure_unique_item, get_english_text +from common.constants import BRICK_COLLECTION_NAME + +from analyze.checks.constants import CONSULT_TEXT + +def get_consult_brick(data): + brick_filter = filter( + lambda brick: get_english_text(brick).startswith(CONSULT_TEXT), + data[BRICK_COLLECTION_NAME]) + return ensure_unique_item(brick_filter, 'brick meaning', CONSULT_TEXT) + +def add_consult(data, guideline): + guideline['annotations']['recommendation'].append( + get_consult_brick(data)['_id']) \ No newline at end of file diff --git a/scripts/analyze/data_helpers.py b/scripts/analyze/data_helpers.py new file mode 100644 index 00000000..c6edf6ed --- /dev/null +++ b/scripts/analyze/data_helpers.py @@ -0,0 +1,53 @@ +from common.constants import BRICK_COLLECTION_NAME + +def ensure_unique_item(item_filter, field_name, value): + item = list(item_filter) + if len(item) != 1: + message = f'[ERROR] Items are not unique for {field_name} == ' \ + f'{value}: {item}' + raise Exception(message) + return item[0] + +def get_unique_item(items, field_name, value): + item_filter = filter(lambda item: item[field_name] == value, items) + return ensure_unique_item(item_filter, field_name, value) + +def get_english_text(brick): + translation = get_unique_item(brick['translations'], 'language', 'English') + return translation['text'].lower() + +def get_brick_meaning(data, brick_id): + bricks = data[BRICK_COLLECTION_NAME] + brick = get_unique_item(bricks, '_id', brick_id) + return get_english_text(brick) + +def get_bricks_meaning(data, brick_ids): + return ' '.join(map( + lambda brick_id: get_brick_meaning(data, brick_id), + brick_ids)) + +def get_annotation(data, item, key, resolve=True): + if not key in item['annotations']: return None + annotation = item['annotations'][key] + if resolve: annotation = get_bricks_meaning(data, annotation) + return annotation + +def get_guideline_annotations(data, guideline): + return { + 'implication': get_annotation(data, guideline, 'implication'), + 'recommendation': get_annotation(data, guideline, 'recommendation'), + 'warning_level': get_annotation(data, guideline, 'warningLevel', + resolve=False) + } + +def get_drug_annotations(data, drug): + return { + 'drugclass': get_annotation(data, drug, 'drugclass'), + 'indication': get_annotation(data, drug, 'indication'), + 'brand_names': get_annotation(data, drug, 'brandNames', resolve=False) + } + +def has_annotations(annotations): + return all(list(map( + lambda value: value != None, + annotations.values()))) \ No newline at end of file diff --git a/scripts/run_analysis.py b/scripts/run_analysis.py new file mode 100644 index 00000000..4e4126cd --- /dev/null +++ b/scripts/run_analysis.py @@ -0,0 +1,131 @@ +import sys + +from analyze.checks.brand_name_whitespace import check_brand_name_whitespace +from analyze.checks.warning_levels import check_green_warning_level, \ + check_none_warning_level, check_red_warning_level, \ + check_yellow_warning_level +from analyze.checks.consult import has_consult +from analyze.checks.metabolization_severity import check_metabolization_severity + +from analyze.corrections.consult import add_consult +from analyze.corrections.brand_name_whitespace import correct_brand_name_whitespace + +from analyze.data_helpers import get_drug_annotations, get_guideline_annotations, has_annotations +from common.constants import DRUG_COLLECTION_NAME, SCRIPT_POSTFIXES +from common.get_data import get_data, get_guideline_by_id, get_phenotype_key +from common.write_data import write_data, write_log + +DRUG_CHECKS = { + 'brand_whitespace': check_brand_name_whitespace, +} + +DRUG_CORRECTIONS = { + 'brand_whitespace': correct_brand_name_whitespace, +} + +GUIDELINE_CHECKS = { + 'has_consult': has_consult, + 'check_metabolization_severity': check_metabolization_severity, + 'red_warning_level': check_red_warning_level, + 'yellow_warning_level': check_yellow_warning_level, + 'green_warning_level': check_green_warning_level, + 'none_warning_level': check_none_warning_level, +} + +GUIDELINE_CORRECTIONS = { + 'has_consult': add_consult, +} + + +def analyze_annotations(item, annotations, checks): + results = {} + for check_name, check_function in checks.items(): + results[check_name] = check_function(item, annotations) + return results + +def correct_inconsistency(data, item, check_name, corrections): + if check_name in corrections: + corrections[check_name](data, item) + return check_name in corrections + +def log_not_annotated(log_content): + log_content.append(' – _not annotated_\n') + +def log_all_passed(log_content, postfix=''): + log_content.append(f' – _all checks passed_{postfix}\n') + +def log_annotations(log_content, annotations): + for key, value in annotations.items(): + pretty_key = key.capitalize().replace('_', ' ') + log_content.append(f' {pretty_key}: {value}\n') + +def handle_failed_checks( + data, item, result, corrections, should_correct, annotations, log_content): + failed_checks = [] + skipped_checks = [] + for check_name, check_result in result.items(): + if check_result == False: + corrected = should_correct and \ + correct_inconsistency(data, item, + check_name, corrections) + check_name = f'{check_name} (corrected)' if corrected \ + else check_name + failed_checks.append(check_name) + if check_result == None: + skipped_checks.append(check_name) + skipped_checks_string = '' + if len(skipped_checks) > 0: + skipped_checks_string = (' (skipped checks: ' \ + f'{", ".join(skipped_checks)})') + if len(failed_checks) > 0: + log_content.append(' - _some checks failed_: ' \ + f'{", ".join(failed_checks)}{skipped_checks_string}\n') + log_annotations(log_content, annotations) + else: + log_all_passed(log_content, postfix=skipped_checks_string) + +def main(): + correct_inconsistencies = '--correct' in sys.argv + data = get_data() + log_content = [ + '# Analyze annotation data\n\n', + f'_Correct if possible: {correct_inconsistencies}_\n\n' + ] + for drug in data[DRUG_COLLECTION_NAME]: + drug_name = drug['name'] + log_content.append(f'* {drug_name}') + drug_annotations = get_drug_annotations(data, drug) + if not has_annotations(drug_annotations): log_not_annotated(log_content) + else: + drug_result = analyze_annotations( + drug, drug_annotations, DRUG_CHECKS) + if not all(drug_result.values()): + handle_failed_checks(data, drug, drug_result, + DRUG_CORRECTIONS, correct_inconsistencies, + drug_annotations, log_content) + else: + log_all_passed(log_content) + for guideline_id in drug['guidelines']: + guideline = get_guideline_by_id(data, guideline_id) + phenotype = get_phenotype_key(guideline) + log_content.append(f' * {phenotype}') + guideline_annotations = get_guideline_annotations(data, guideline) + if not has_annotations(guideline_annotations): + log_not_annotated(log_content) + continue + guideline_result = analyze_annotations( + guideline, guideline_annotations, GUIDELINE_CHECKS) + if guideline_result == None: continue + if not all(guideline_result.values()): + handle_failed_checks(data, guideline, guideline_result, + GUIDELINE_CORRECTIONS, correct_inconsistencies, + guideline_annotations, log_content) + else: + log_all_passed(log_content) + + write_log(log_content, postfix=SCRIPT_POSTFIXES['correct']) + if correct_inconsistencies: + write_data(data, postfix=SCRIPT_POSTFIXES['correct']) + +if __name__ == '__main__': + main() \ No newline at end of file