From 9079c7b12137f84f14b87d3646c9668adea06d5a Mon Sep 17 00:00:00 2001
From: Tamara Slosarek <tamara.slosarek@hpi.de>
Date: Thu, 10 Oct 2024 12:22:49 +0200
Subject: [PATCH] refactor(scripts): move checks and corrections to files

---
 pharme.code-workspace                         |   1 +
 scripts/README.md                             |   6 +-
 scripts/analyze.py                            | 315 ------------------
 .../analyze/checks/brand_name_whitespace.py   |   8 +
 scripts/analyze/checks/constants.py           |   1 +
 scripts/analyze/checks/consult.py             |   4 +
 .../analyze/checks/metabolization_severity.py |  43 +++
 scripts/analyze/checks/warning_levels.py      |  73 ++++
 .../corrections/brand_name_whitespace.py      |   4 +
 scripts/analyze/corrections/consult.py        |  14 +
 scripts/analyze/data_helpers.py               |  53 +++
 scripts/run_analysis.py                       | 131 ++++++++
 12 files changed, 335 insertions(+), 318 deletions(-)
 delete mode 100644 scripts/analyze.py
 create mode 100644 scripts/analyze/checks/brand_name_whitespace.py
 create mode 100644 scripts/analyze/checks/constants.py
 create mode 100644 scripts/analyze/checks/consult.py
 create mode 100644 scripts/analyze/checks/metabolization_severity.py
 create mode 100644 scripts/analyze/checks/warning_levels.py
 create mode 100644 scripts/analyze/corrections/brand_name_whitespace.py
 create mode 100644 scripts/analyze/corrections/consult.py
 create mode 100644 scripts/analyze/data_helpers.py
 create mode 100644 scripts/run_analysis.py
diff --git a/pharme.code-workspace b/pharme.code-workspace
index fd4b4154..b08ff853 100644
--- a/pharme.code-workspace
+++ b/pharme.code-workspace
@@ -81,6 +81,7 @@
       "LTRB",
       "maxdepth",
       "MedlinePlus",
+      "metabolization",
       "Metabolizer",
       "metabolizers",
       "mirabegron",
diff --git a/scripts/README.md b/scripts/README.md
index d3ce2bdc..4a7689a3 100644
--- a/scripts/README.md
+++ b/scripts/README.md
@@ -85,14 +85,14 @@ Run `python clean.py` to remove the `scripts/temp` directory and all files in
 
 ## Analyze (and correct) annotations
 
-Run `python analyze.py <PATH_TO_BACKUP> [--correct]` to analyze annotations and
-optionally correct what can be corrected easily in
+Run `python run_analysis.py <PATH_TO_BACKUP> [--correct]` to analyze annotations
+and optionally correct what can be corrected easily in
 `<PATH_TO_BACKUP>_corrected_<TIMESTAMP>.base64.json`.
 
 | Check | Description | `--correct`ed | Only for single-gene results* |
 | ----- | ----------- | ------------- | ----------------------------- |
 | `has_consult` | Is "consult your pharmacist..." included in recommendation? | ✅ | ❌ |
-| `implication_severity` | "Much" keyword, should only be used if reflected by guideline implication. | ❌ | ✅ |
+| `check_metabolization_severity` | "Much" keyword, should only be used if reflected by guideline implication. | ❌ | ✅ |
 | `red_warning` | Red warning level should be present with recommendation containing "may not be the right medication". | ❌ | ❌ |
 | `yellow_warning` | Yellow warning level should be present when the red warning level does not apply but the implication contains "may not work" or "side effects" or the recommendation contains non-standard dose. | ❌ | ❌ |
 | `green_warning` | Green warning level should be applied in all non-red and non-yellow cases and when the recommendation states "at standard dose" or similar formulations. | ❌ | ❌ |
diff --git a/scripts/analyze.py b/scripts/analyze.py
deleted file mode 100644
index a5fca73c..00000000
--- a/scripts/analyze.py
+++ /dev/null
@@ -1,315 +0,0 @@
-import sys
-from common.constants import DRUG_COLLECTION_NAME, SCRIPT_POSTFIXES, \
-    BRICK_COLLECTION_NAME
-from common.get_data import get_data, get_guideline_by_id, get_phenotype_key
-from common.write_data import write_data, write_log
-
-CONSULT_TEXT = 'consult your pharmacist or doctor'
-WHOLE_CONSULT_TEXT = '{} for more information.'.format(CONSULT_TEXT)
-RED_TEXT = 'not be the right medication'
-NOT_RED_TEXTS = [
-    'if more than this dose is needed',
-    "if #drug-name isn't working for you",
-]
-ADJUST_TEXT = 'adjusted'
-YELLOW_RECOMMENDATION_TEXTS = NOT_RED_TEXTS + [
-    ADJUST_TEXT,
-    'increased',
-    'decreased',
-    'lower dose',
-    'higher dose',
-    'up to a certain dose',
-    'dose increases should be done cautiously and slowly',
-    'further testing is recommended',
-]
-YELLOW_IMPLICATION_TEXTS = [
-    'increased risk',
-    'may not work',
-]
-GREEN_TEXTS = ['at standard dose', 'there is no reason to avoid']
-
-def ensure_unique_item(item_filter, field_name, value):
-    item = list(item_filter)
-    if len(item) != 1:
-        message = f'[ERROR] Items are not unique for {field_name} == ' \
-            f'{value}: {item}'
-        raise Exception(message)
-    return item[0]
-
-def get_unique_item(items, field_name, value):
-    item_filter = filter(lambda item: item[field_name] == value, items)
-    return ensure_unique_item(item_filter, field_name, value)
-
-def get_english_text(brick):
-    translation = get_unique_item(brick['translations'], 'language', 'English')
-    return translation['text'].lower()
-
-def get_brick_meaning(data, brick_id):
-    bricks = data[BRICK_COLLECTION_NAME]
-    brick = get_unique_item(bricks, '_id', brick_id)
-    return get_english_text(brick)
-
-def get_bricks_meaning(data, brick_ids):
-    return ' '.join(map(
-        lambda brick_id: get_brick_meaning(data, brick_id),
-        brick_ids))
-
-def get_annotation(data, item, key, resolve=True):
-    if not key in item['annotations']: return None
-    annotation = item['annotations'][key]
-    if resolve: annotation = get_bricks_meaning(data, annotation)
-    return annotation
-
-def get_guideline_annotations(data, guideline):
-    return {
-        'implication': get_annotation(data, guideline, 'implication'),
-        'recommendation': get_annotation(data, guideline, 'recommendation'),
-        'warning_level': get_annotation(data, guideline, 'warningLevel',
-            resolve=False)
-    }
-
-def get_drug_annotations(data, drug):
-    return {
-        'drugclass': get_annotation(data, drug, 'drugclass'),
-        'indication': get_annotation(data, drug, 'indication'),
-        'brand_names': get_annotation(data, drug, 'brandNames', resolve=False)
-    }
-
-def has_annotations(annotations):
-    return all(list(map(
-        lambda value: value != None,
-        annotations.values())))
-
-def has_consult(_, annotations):
-    return CONSULT_TEXT in annotations['recommendation']
-
-def check_implication_severity(guideline, annotations):
-    ignored_phenotypes = ['no result', 'indeterminate', 'normal metabolizer']
-    multiple_relevant_phenotypes = False
-    relevant_gene = None
-    for current_gene, current_phenotypes in guideline['phenotypes'].items():
-        if not current_phenotypes[0].lower() in ignored_phenotypes:
-            if relevant_gene != None:
-                multiple_relevant_phenotypes = True
-                break
-            relevant_gene = current_gene
-    if multiple_relevant_phenotypes or relevant_gene == None:
-        return None
-    implication = \
-        guideline['externalData'][0]['implications'][relevant_gene].lower()
-    much_implying_formulations = [
-        'greatly decreased',
-        'greatly reduced',
-        'significantly reduced',
-        'extremely high concentrations',
-        'when compared to cyp2c19 rapid and normal metabolizers',
-        'as compared to non-poor metabolizers',
-        'when compared to cyp2c19 normal and intermediate metabolizers',
-        'as compared to normal and intermediate metabolizer',
-        'complete dpd deficiency',
-    ]
-    much_formulations = [
-        'much faster',
-        'much slower'
-    ]
-    much_is_implied = any(
-        map(
-            lambda much_implying_formulation:
-                much_implying_formulation in implication,
-            much_implying_formulations,
-        )
-    )
-    implication_has_much = any(
-        map(
-            lambda much_formulation: much_formulation in annotations['implication'],
-            much_formulations,
-        )
-    )
-    return much_is_implied == implication_has_much
-
-def should_be_red(annotations):
-    return RED_TEXT in annotations['recommendation'] and all(map(
-        lambda not_red_text: not_red_text not in annotations['recommendation'],
-        NOT_RED_TEXTS,
-    ))
-
-def should_be_yellow(annotations):
-    return any(map(
-        lambda yellow_text: yellow_text in annotations['recommendation'],
-        YELLOW_RECOMMENDATION_TEXTS,
-    )) or any(map(
-        lambda yellow_text: yellow_text in annotations['implication'],
-        YELLOW_IMPLICATION_TEXTS,
-    )) or (
-        # Special case: no other recommendation given
-        annotations['recommendation'] == WHOLE_CONSULT_TEXT
-    )
-
-def should_be_green(annotations):
-    return any(map(
-        lambda green_text: green_text in annotations['recommendation'],
-        GREEN_TEXTS,
-    ))
-
-def check_red_warning_level(_, annotations):
-    has_warning_level = annotations['warning_level'] == 'red'
-    should_have_warning_level = should_be_red(annotations)
-    return has_warning_level == should_have_warning_level
-
-def check_yellow_warning_level(_, annotations):
-    has_warning_level = annotations['warning_level'] == 'yellow'
-    should_have_warning_level = not should_be_red(annotations) and \
-        should_be_yellow(annotations)
-    return has_warning_level if should_have_warning_level else True
-
-def check_green_warning_level(_, annotations):
-    has_warning_level = annotations['warning_level'] == 'green'
-    should_have_warning_level = not should_be_red(annotations) and \
-        not should_be_yellow(annotations) and \
-        should_be_green(annotations)
-    return has_warning_level == should_have_warning_level
-
-def check_none_warning_level(_, annotations):
-    has_warning_level = annotations['warning_level'] == 'none'
-    should_have_warning_level = not should_be_red(annotations) and \
-        not should_be_yellow(annotations) and \
-        not should_be_green(annotations)
-    return has_warning_level == should_have_warning_level
-
-def analyze_annotations(item, annotations, checks):
-    results = {}
-    for check_name, check_function in checks.items():
-        results[check_name] = check_function(item, annotations)
-    return results
-
-def get_consult_brick(data):
-    brick_filter = filter(
-        lambda brick: get_english_text(brick).startswith(CONSULT_TEXT),
-        data[BRICK_COLLECTION_NAME])
-    return ensure_unique_item(brick_filter, 'brick meaning', CONSULT_TEXT)
-
-def add_consult(data, guideline):
-    guideline['annotations']['recommendation'].append(
-        get_consult_brick(data)['_id'])
-    
-def check_brand_name_whitespace(_, annotations):
-    check_applies = True
-    for brand_name in annotations['brand_names']:
-        trimmed_name = brand_name.strip()
-        if trimmed_name != brand_name:
-            check_applies = False
-            break
-    return check_applies
-
-def correct_brand_name_whitespace(_, drug):
-    drug['annotations']['brandNames'] = list(map(
-        lambda brand_name: brand_name.strip(),
-        drug['annotations']['brandNames']))
-
-def correct_inconsistency(data, item, check_name, corrections):
-    if check_name in corrections:
-        corrections[check_name](data, item)
-    return check_name in corrections
-
-def log_not_annotated(log_content):
-    log_content.append(' – _not annotated_\n')
-
-def log_all_passed(log_content, postfix=''):
-    log_content.append(f' – _all checks passed_{postfix}\n')
-
-def log_annotations(log_content, annotations):
-    for key, value in annotations.items():
-        pretty_key = key.capitalize().replace('_', ' ')
-        log_content.append(f'   {pretty_key}: {value}\n')
-
-def handle_failed_checks(
-    data, item, result, corrections, should_correct, annotations, log_content):
-    failed_checks = []
-    skipped_checks = []
-    for check_name, check_result in result.items():
-        if check_result == False:
-            corrected = should_correct and \
-                correct_inconsistency(data, item,
-                    check_name, corrections)
-            check_name = f'{check_name} (corrected)' if corrected \
-                else check_name
-            failed_checks.append(check_name)
-        if check_result == None:
-            skipped_checks.append(check_name)
-    skipped_checks_string = ''
-    if len(skipped_checks) > 0:
-        skipped_checks_string = (' (skipped checks: ' \
-        f'{", ".join(skipped_checks)})')
-    if len(failed_checks) > 0:
-        log_content.append(' - _some checks failed_: ' \
-            f'{", ".join(failed_checks)}{skipped_checks_string}\n')
-        log_annotations(log_content, annotations)
-    else:
-        log_all_passed(log_content, postfix=skipped_checks_string)
-
-DRUG_CHECKS = {
-    'brand_whitespace': check_brand_name_whitespace,
-}
-
-DRUG_CORRECTIONS = {
-    'brand_whitespace': correct_brand_name_whitespace,
-}
-
-GUIDELINE_CHECKS = {
-    'has_consult': has_consult,
-    'implication_severity': check_implication_severity,
-    'red_warning_level': check_red_warning_level,
-    'yellow_warning_level': check_yellow_warning_level,
-    'green_warning_level': check_green_warning_level,
-    'none_warning_level': check_none_warning_level,
-}
-
-GUIDELINE_CORRECTIONS = {
-    'has_consult': add_consult,
-}
-
-def main():
-    correct_inconsistencies = '--correct' in sys.argv
-    data = get_data()
-    log_content = [
-        '# Analyze annotation data\n\n',
-        f'_Correct if possible: {correct_inconsistencies}_\n\n'
-    ]
-    for drug in data[DRUG_COLLECTION_NAME]:
-        drug_name = drug['name']
-        log_content.append(f'* {drug_name}')
-        drug_annotations = get_drug_annotations(data, drug)
-        if not has_annotations(drug_annotations): log_not_annotated(log_content)
-        else:
-            drug_result = analyze_annotations(
-                drug, drug_annotations, DRUG_CHECKS)
-            if not all(drug_result.values()):
-                handle_failed_checks(data, drug, drug_result,
-                    DRUG_CORRECTIONS, correct_inconsistencies,
-                    drug_annotations, log_content)
-            else:
-                log_all_passed(log_content)
-        for guideline_id in drug['guidelines']:
-            guideline = get_guideline_by_id(data, guideline_id)
-            phenotype = get_phenotype_key(guideline)
-            log_content.append(f'  * {phenotype}')
-            guideline_annotations = get_guideline_annotations(data, guideline)
-            if not has_annotations(guideline_annotations):
-                log_not_annotated(log_content)
-                continue
-            guideline_result = analyze_annotations(
-                guideline, guideline_annotations, GUIDELINE_CHECKS)
-            if guideline_result == None: continue
-            if not all(guideline_result.values()):
-                handle_failed_checks(data, guideline, guideline_result,
-                    GUIDELINE_CORRECTIONS, correct_inconsistencies,
-                    guideline_annotations, log_content)
-            else:
-                log_all_passed(log_content)
-
-    write_log(log_content, postfix=SCRIPT_POSTFIXES['correct'])
-    if correct_inconsistencies:
-        write_data(data, postfix=SCRIPT_POSTFIXES['correct'])
-
-if __name__ == '__main__':
-    main()
\ No newline at end of file
diff --git a/scripts/analyze/checks/brand_name_whitespace.py b/scripts/analyze/checks/brand_name_whitespace.py
new file mode 100644
index 00000000..3c1e41a8
--- /dev/null
+++ b/scripts/analyze/checks/brand_name_whitespace.py
@@ -0,0 +1,8 @@
+def check_brand_name_whitespace(_, annotations):
+    check_applies = True
+    for brand_name in annotations['brand_names']:
+        trimmed_name = brand_name.strip()
+        if trimmed_name != brand_name:
+            check_applies = False
+            break
+    return check_applies
\ No newline at end of file
diff --git a/scripts/analyze/checks/constants.py b/scripts/analyze/checks/constants.py
new file mode 100644
index 00000000..f8aa339a
--- /dev/null
+++ b/scripts/analyze/checks/constants.py
@@ -0,0 +1 @@
+CONSULT_TEXT = 'consult your pharmacist or doctor'
\ No newline at end of file
diff --git a/scripts/analyze/checks/consult.py b/scripts/analyze/checks/consult.py
new file mode 100644
index 00000000..b2c35364
--- /dev/null
+++ b/scripts/analyze/checks/consult.py
@@ -0,0 +1,4 @@
+from analyze.checks.constants import CONSULT_TEXT
+
+def has_consult(_, annotations):
+    return CONSULT_TEXT in annotations['recommendation']
\ No newline at end of file
diff --git a/scripts/analyze/checks/metabolization_severity.py b/scripts/analyze/checks/metabolization_severity.py
new file mode 100644
index 00000000..7b35a190
--- /dev/null
+++ b/scripts/analyze/checks/metabolization_severity.py
@@ -0,0 +1,43 @@
+def check_metabolization_severity(guideline, annotations):
+    ignored_phenotypes = ['no result', 'indeterminate', 'normal metabolizer']
+    multiple_relevant_phenotypes = False
+    relevant_gene = None
+    for current_gene, current_phenotypes in guideline['phenotypes'].items():
+        if not current_phenotypes[0].lower() in ignored_phenotypes:
+            if relevant_gene != None:
+                multiple_relevant_phenotypes = True
+                break
+            relevant_gene = current_gene
+    if multiple_relevant_phenotypes or relevant_gene == None:
+        return None
+    implication = \
+        guideline['externalData'][0]['implications'][relevant_gene].lower()
+    much_implying_formulations = [
+        'greatly decreased',
+        'greatly reduced',
+        'significantly reduced',
+        'extremely high concentrations',
+        'when compared to cyp2c19 rapid and normal metabolizers',
+        'as compared to non-poor metabolizers',
+        'when compared to cyp2c19 normal and intermediate metabolizers',
+        'as compared to normal and intermediate metabolizer',
+        'complete dpd deficiency',
+    ]
+    much_formulations = [
+        'much faster',
+        'much slower'
+    ]
+    much_is_implied = any(
+        map(
+            lambda much_implying_formulation:
+                much_implying_formulation in implication,
+            much_implying_formulations,
+        )
+    )
+    implication_has_much = any(
+        map(
+            lambda much_formulation: much_formulation in annotations['implication'],
+            much_formulations,
+        )
+    )
+    return much_is_implied == implication_has_much
\ No newline at end of file
diff --git a/scripts/analyze/checks/warning_levels.py b/scripts/analyze/checks/warning_levels.py
new file mode 100644
index 00000000..40f7db44
--- /dev/null
+++ b/scripts/analyze/checks/warning_levels.py
@@ -0,0 +1,73 @@
+from analyze.checks.constants import CONSULT_TEXT
+
+WHOLE_CONSULT_TEXT = '{} for more information.'.format(CONSULT_TEXT)
+RED_TEXT = 'not be the right medication'
+NOT_RED_TEXTS = [
+    'if more than this dose is needed',
+    "if #drug-name isn't working for you",
+]
+ADJUST_TEXT = 'adjusted'
+YELLOW_RECOMMENDATION_TEXTS = NOT_RED_TEXTS + [
+    ADJUST_TEXT,
+    'increased',
+    'decreased',
+    'lower dose',
+    'higher dose',
+    'up to a certain dose',
+    'dose increases should be done cautiously and slowly',
+    'further testing is recommended',
+]
+YELLOW_IMPLICATION_TEXTS = [
+    'increased risk',
+    'may not work',
+]
+GREEN_TEXTS = ['at standard dose', 'there is no reason to avoid']
+
+def should_be_red(annotations):
+    return RED_TEXT in annotations['recommendation'] and all(map(
+        lambda not_red_text: not_red_text not in annotations['recommendation'],
+        NOT_RED_TEXTS,
+    ))
+
+def should_be_yellow(annotations):
+    return any(map(
+        lambda yellow_text: yellow_text in annotations['recommendation'],
+        YELLOW_RECOMMENDATION_TEXTS,
+    )) or any(map(
+        lambda yellow_text: yellow_text in annotations['implication'],
+        YELLOW_IMPLICATION_TEXTS,
+    )) or (
+        # Special case: no other recommendation given
+        annotations['recommendation'] == WHOLE_CONSULT_TEXT
+    )
+
+def should_be_green(annotations):
+    return any(map(
+        lambda green_text: green_text in annotations['recommendation'],
+        GREEN_TEXTS,
+    ))
+
+def check_red_warning_level(_, annotations):
+    has_warning_level = annotations['warning_level'] == 'red'
+    should_have_warning_level = should_be_red(annotations)
+    return has_warning_level == should_have_warning_level
+
+def check_yellow_warning_level(_, annotations):
+    has_warning_level = annotations['warning_level'] == 'yellow'
+    should_have_warning_level = not should_be_red(annotations) and \
+        should_be_yellow(annotations)
+    return has_warning_level if should_have_warning_level else True
+
+def check_green_warning_level(_, annotations):
+    has_warning_level = annotations['warning_level'] == 'green'
+    should_have_warning_level = not should_be_red(annotations) and \
+        not should_be_yellow(annotations) and \
+        should_be_green(annotations)
+    return has_warning_level == should_have_warning_level
+
+def check_none_warning_level(_, annotations):
+    has_warning_level = annotations['warning_level'] == 'none'
+    should_have_warning_level = not should_be_red(annotations) and \
+        not should_be_yellow(annotations) and \
+        not should_be_green(annotations)
+    return has_warning_level == should_have_warning_level
\ No newline at end of file
diff --git a/scripts/analyze/corrections/brand_name_whitespace.py b/scripts/analyze/corrections/brand_name_whitespace.py
new file mode 100644
index 00000000..30e9b9f5
--- /dev/null
+++ b/scripts/analyze/corrections/brand_name_whitespace.py
@@ -0,0 +1,4 @@
+def correct_brand_name_whitespace(_, drug):
+    drug['annotations']['brandNames'] = list(map(
+        lambda brand_name: brand_name.strip(),
+        drug['annotations']['brandNames']))
\ No newline at end of file
diff --git a/scripts/analyze/corrections/consult.py b/scripts/analyze/corrections/consult.py
new file mode 100644
index 00000000..a0d9976f
--- /dev/null
+++ b/scripts/analyze/corrections/consult.py
@@ -0,0 +1,14 @@
+from analyze.data_helpers import ensure_unique_item, get_english_text
+from common.constants import BRICK_COLLECTION_NAME
+
+from analyze.checks.constants import CONSULT_TEXT
+
+def get_consult_brick(data):
+    brick_filter = filter(
+        lambda brick: get_english_text(brick).startswith(CONSULT_TEXT),
+        data[BRICK_COLLECTION_NAME])
+    return ensure_unique_item(brick_filter, 'brick meaning', CONSULT_TEXT)
+
+def add_consult(data, guideline):
+    guideline['annotations']['recommendation'].append(
+        get_consult_brick(data)['_id'])
\ No newline at end of file
diff --git a/scripts/analyze/data_helpers.py b/scripts/analyze/data_helpers.py
new file mode 100644
index 00000000..c6edf6ed
--- /dev/null
+++ b/scripts/analyze/data_helpers.py
@@ -0,0 +1,53 @@
+from common.constants import BRICK_COLLECTION_NAME
+
+def ensure_unique_item(item_filter, field_name, value):
+    item = list(item_filter)
+    if len(item) != 1:
+        message = f'[ERROR] Items are not unique for {field_name} == ' \
+            f'{value}: {item}'
+        raise Exception(message)
+    return item[0]
+
+def get_unique_item(items, field_name, value):
+    item_filter = filter(lambda item: item[field_name] == value, items)
+    return ensure_unique_item(item_filter, field_name, value)
+
+def get_english_text(brick):
+    translation = get_unique_item(brick['translations'], 'language', 'English')
+    return translation['text'].lower()
+
+def get_brick_meaning(data, brick_id):
+    bricks = data[BRICK_COLLECTION_NAME]
+    brick = get_unique_item(bricks, '_id', brick_id)
+    return get_english_text(brick)
+
+def get_bricks_meaning(data, brick_ids):
+    return ' '.join(map(
+        lambda brick_id: get_brick_meaning(data, brick_id),
+        brick_ids))
+
+def get_annotation(data, item, key, resolve=True):
+    if not key in item['annotations']: return None
+    annotation = item['annotations'][key]
+    if resolve: annotation = get_bricks_meaning(data, annotation)
+    return annotation
+
+def get_guideline_annotations(data, guideline):
+    return {
+        'implication': get_annotation(data, guideline, 'implication'),
+        'recommendation': get_annotation(data, guideline, 'recommendation'),
+        'warning_level': get_annotation(data, guideline, 'warningLevel',
+            resolve=False)
+    }
+
+def get_drug_annotations(data, drug):
+    return {
+        'drugclass': get_annotation(data, drug, 'drugclass'),
+        'indication': get_annotation(data, drug, 'indication'),
+        'brand_names': get_annotation(data, drug, 'brandNames', resolve=False)
+    }
+
+def has_annotations(annotations):
+    return all(list(map(
+        lambda value: value != None,
+        annotations.values())))
\ No newline at end of file
diff --git a/scripts/run_analysis.py b/scripts/run_analysis.py
new file mode 100644
index 00000000..4e4126cd
--- /dev/null
+++ b/scripts/run_analysis.py
@@ -0,0 +1,131 @@
+import sys
+
+from analyze.checks.brand_name_whitespace import check_brand_name_whitespace
+from analyze.checks.warning_levels import check_green_warning_level, \
+    check_none_warning_level, check_red_warning_level, \
+        check_yellow_warning_level
+from analyze.checks.consult import has_consult
+from analyze.checks.metabolization_severity import check_metabolization_severity
+
+from analyze.corrections.consult import add_consult
+from analyze.corrections.brand_name_whitespace import correct_brand_name_whitespace
+
+from analyze.data_helpers import get_drug_annotations, get_guideline_annotations, has_annotations
+from common.constants import DRUG_COLLECTION_NAME, SCRIPT_POSTFIXES
+from common.get_data import get_data, get_guideline_by_id, get_phenotype_key
+from common.write_data import write_data, write_log
+
+DRUG_CHECKS = {
+    'brand_whitespace': check_brand_name_whitespace,
+}
+
+DRUG_CORRECTIONS = {
+    'brand_whitespace': correct_brand_name_whitespace,
+}
+
+GUIDELINE_CHECKS = {
+    'has_consult': has_consult,
+    'check_metabolization_severity': check_metabolization_severity,
+    'red_warning_level': check_red_warning_level,
+    'yellow_warning_level': check_yellow_warning_level,
+    'green_warning_level': check_green_warning_level,
+    'none_warning_level': check_none_warning_level,
+}
+
+GUIDELINE_CORRECTIONS = {
+    'has_consult': add_consult,
+}
+
+
+def analyze_annotations(item, annotations, checks):
+    results = {}
+    for check_name, check_function in checks.items():
+        results[check_name] = check_function(item, annotations)
+    return results
+
+def correct_inconsistency(data, item, check_name, corrections):
+    if check_name in corrections:
+        corrections[check_name](data, item)
+    return check_name in corrections
+
+def log_not_annotated(log_content):
+    log_content.append(' – _not annotated_\n')
+
+def log_all_passed(log_content, postfix=''):
+    log_content.append(f' – _all checks passed_{postfix}\n')
+
+def log_annotations(log_content, annotations):
+    for key, value in annotations.items():
+        pretty_key = key.capitalize().replace('_', ' ')
+        log_content.append(f'   {pretty_key}: {value}\n')
+
+def handle_failed_checks(
+    data, item, result, corrections, should_correct, annotations, log_content):
+    failed_checks = []
+    skipped_checks = []
+    for check_name, check_result in result.items():
+        if check_result == False:
+            corrected = should_correct and \
+                correct_inconsistency(data, item,
+                    check_name, corrections)
+            check_name = f'{check_name} (corrected)' if corrected \
+                else check_name
+            failed_checks.append(check_name)
+        if check_result == None:
+            skipped_checks.append(check_name)
+    skipped_checks_string = ''
+    if len(skipped_checks) > 0:
+        skipped_checks_string = (' (skipped checks: ' \
+        f'{", ".join(skipped_checks)})')
+    if len(failed_checks) > 0:
+        log_content.append(' - _some checks failed_: ' \
+            f'{", ".join(failed_checks)}{skipped_checks_string}\n')
+        log_annotations(log_content, annotations)
+    else:
+        log_all_passed(log_content, postfix=skipped_checks_string)
+
+def main():
+    correct_inconsistencies = '--correct' in sys.argv
+    data = get_data()
+    log_content = [
+        '# Analyze annotation data\n\n',
+        f'_Correct if possible: {correct_inconsistencies}_\n\n'
+    ]
+    for drug in data[DRUG_COLLECTION_NAME]:
+        drug_name = drug['name']
+        log_content.append(f'* {drug_name}')
+        drug_annotations = get_drug_annotations(data, drug)
+        if not has_annotations(drug_annotations): log_not_annotated(log_content)
+        else:
+            drug_result = analyze_annotations(
+                drug, drug_annotations, DRUG_CHECKS)
+            if not all(drug_result.values()):
+                handle_failed_checks(data, drug, drug_result,
+                    DRUG_CORRECTIONS, correct_inconsistencies,
+                    drug_annotations, log_content)
+            else:
+                log_all_passed(log_content)
+        for guideline_id in drug['guidelines']:
+            guideline = get_guideline_by_id(data, guideline_id)
+            phenotype = get_phenotype_key(guideline)
+            log_content.append(f'  * {phenotype}')
+            guideline_annotations = get_guideline_annotations(data, guideline)
+            if not has_annotations(guideline_annotations):
+                log_not_annotated(log_content)
+                continue
+            guideline_result = analyze_annotations(
+                guideline, guideline_annotations, GUIDELINE_CHECKS)
+            if guideline_result == None: continue
+            if not all(guideline_result.values()):
+                handle_failed_checks(data, guideline, guideline_result,
+                    GUIDELINE_CORRECTIONS, correct_inconsistencies,
+                    guideline_annotations, log_content)
+            else:
+                log_all_passed(log_content)
+
+    write_log(log_content, postfix=SCRIPT_POSTFIXES['correct'])
+    if correct_inconsistencies:
+        write_data(data, postfix=SCRIPT_POSTFIXES['correct'])
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file