From 04e7746effe28d965e2606b8d3f098219079917b Mon Sep 17 00:00:00 2001
From: Tamara Slosarek <tamara.slosarek@hpi.de>
Date: Thu, 10 Oct 2024 15:10:24 +0200
Subject: [PATCH] feat(scripts): add implications annotation order check

---
 scripts/README.md                             |  1 +
 scripts/analyze/checks/constants.py           |  1 -
 scripts/analyze/checks/consult.py             |  2 +-
 .../metabolization_before_consequence.py      | 26 ++++++++++
 .../analyze/checks/metabolization_severity.py | 21 ++-------
 scripts/analyze/checks/warning_levels.py      | 37 +++------------
 scripts/analyze/constants.py                  | 47 +++++++++++++++++++
 scripts/analyze/corrections/consult.py        |  2 +-
 scripts/run_analysis.py                       |  2 +
 9 files changed, 89 insertions(+), 50 deletions(-)
 delete mode 100644 scripts/analyze/checks/constants.py
 create mode 100644 scripts/analyze/checks/metabolization_before_consequence.py
 create mode 100644 scripts/analyze/constants.py

diff --git a/scripts/README.md b/scripts/README.md
index 4a7689a3..7b7254e5 100644
--- a/scripts/README.md
+++ b/scripts/README.md
@@ -98,6 +98,7 @@ and optionally correct what can be corrected easily in
 | `green_warning` | Green warning level should be applied in all non-red and non-yellow cases and when the recommendation states "at standard dose" or similar formulations. | ❌ | ❌ |
 | `none_warning` | None warning level should be applied in all not handled warning level cases. | ❌ | ❌ |
 | `brand_whitespace` | Drug brand names should not have leading or trailing white space. | ✅ | ❌ |
+| `metabolization_before_consequence` | Metabolization implications should come before consequences. | ❌ | ❌ |
 
 \* Skips guidelines with multiple genes unless all results but one are missing
 or indeterminate.
diff --git a/scripts/analyze/checks/constants.py b/scripts/analyze/checks/constants.py
deleted file mode 100644
index f8aa339a..00000000
--- a/scripts/analyze/checks/constants.py
+++ /dev/null
@@ -1 +0,0 @@
-CONSULT_TEXT = 'consult your pharmacist or doctor'
\ No newline at end of file
diff --git a/scripts/analyze/checks/consult.py b/scripts/analyze/checks/consult.py
index b2c35364..259d904f 100644
--- a/scripts/analyze/checks/consult.py
+++ b/scripts/analyze/checks/consult.py
@@ -1,4 +1,4 @@
-from analyze.checks.constants import CONSULT_TEXT
+from analyze.constants import CONSULT_TEXT
 
 def has_consult(_, annotations):
     return CONSULT_TEXT in annotations['recommendation']
\ No newline at end of file
diff --git a/scripts/analyze/checks/metabolization_before_consequence.py b/scripts/analyze/checks/metabolization_before_consequence.py
new file mode 100644
index 00000000..3c84ec19
--- /dev/null
+++ b/scripts/analyze/checks/metabolization_before_consequence.py
@@ -0,0 +1,26 @@
+import analyze.constants as constants
+
+def _get_first_substring_position(string, substrings):
+    positions = list(filter(
+        lambda position: position > 0,
+        map(
+            lambda substring: string.find(substring),
+            substrings,
+        ),
+    ))
+    if (len(positions) == 0): return None
+    return min(positions)
+
+def check_metabolization_before_consequence(_, annotations):
+    implication = annotations['implication']
+    metabolization_position = _get_first_substring_position(
+        implication,
+        constants.METABOLIZATION_FORMULATIONS,
+    )
+    consequence_position = _get_first_substring_position(
+        implication,
+        constants.CONSEQUENCE_FORMULATIONS,
+    )
+    if metabolization_position == None or consequence_position == None:
+        return True
+    return metabolization_position < consequence_position
\ No newline at end of file
diff --git a/scripts/analyze/checks/metabolization_severity.py b/scripts/analyze/checks/metabolization_severity.py
index 7b35a190..2f26cacd 100644
--- a/scripts/analyze/checks/metabolization_severity.py
+++ b/scripts/analyze/checks/metabolization_severity.py
@@ -1,3 +1,5 @@
+import analyze.constants as constants
+
 def check_metabolization_severity(guideline, annotations):
     ignored_phenotypes = ['no result', 'indeterminate', 'normal metabolizer']
     multiple_relevant_phenotypes = False
@@ -12,32 +14,17 @@ def check_metabolization_severity(guideline, annotations):
         return None
     implication = \
         guideline['externalData'][0]['implications'][relevant_gene].lower()
-    much_implying_formulations = [
-        'greatly decreased',
-        'greatly reduced',
-        'significantly reduced',
-        'extremely high concentrations',
-        'when compared to cyp2c19 rapid and normal metabolizers',
-        'as compared to non-poor metabolizers',
-        'when compared to cyp2c19 normal and intermediate metabolizers',
-        'as compared to normal and intermediate metabolizer',
-        'complete dpd deficiency',
-    ]
-    much_formulations = [
-        'much faster',
-        'much slower'
-    ]
     much_is_implied = any(
         map(
             lambda much_implying_formulation:
                 much_implying_formulation in implication,
-            much_implying_formulations,
+            constants.MUCH_IMPLYING_METABOLIZATION_FORMULATIONS,
         )
     )
     implication_has_much = any(
         map(
             lambda much_formulation: much_formulation in annotations['implication'],
-            much_formulations,
+            constants.MUCH_METABOLIZATION_FORMULATIONS,
         )
     )
     return much_is_implied == implication_has_much
\ No newline at end of file
diff --git a/scripts/analyze/checks/warning_levels.py b/scripts/analyze/checks/warning_levels.py
index 40f7db44..19f790a1 100644
--- a/scripts/analyze/checks/warning_levels.py
+++ b/scripts/analyze/checks/warning_levels.py
@@ -1,50 +1,27 @@
-from analyze.checks.constants import CONSULT_TEXT
-
-WHOLE_CONSULT_TEXT = '{} for more information.'.format(CONSULT_TEXT)
-RED_TEXT = 'not be the right medication'
-NOT_RED_TEXTS = [
-    'if more than this dose is needed',
-    "if #drug-name isn't working for you",
-]
-ADJUST_TEXT = 'adjusted'
-YELLOW_RECOMMENDATION_TEXTS = NOT_RED_TEXTS + [
-    ADJUST_TEXT,
-    'increased',
-    'decreased',
-    'lower dose',
-    'higher dose',
-    'up to a certain dose',
-    'dose increases should be done cautiously and slowly',
-    'further testing is recommended',
-]
-YELLOW_IMPLICATION_TEXTS = [
-    'increased risk',
-    'may not work',
-]
-GREEN_TEXTS = ['at standard dose', 'there is no reason to avoid']
+import analyze.constants as constants
 
 def should_be_red(annotations):
-    return RED_TEXT in annotations['recommendation'] and all(map(
+    return constants.RED_TEXT in annotations['recommendation'] and all(map(
         lambda not_red_text: not_red_text not in annotations['recommendation'],
-        NOT_RED_TEXTS,
+        constants.NOT_RED_TEXTS,
     ))
 
 def should_be_yellow(annotations):
     return any(map(
         lambda yellow_text: yellow_text in annotations['recommendation'],
-        YELLOW_RECOMMENDATION_TEXTS,
+        constants.YELLOW_RECOMMENDATION_TEXTS,
     )) or any(map(
         lambda yellow_text: yellow_text in annotations['implication'],
-        YELLOW_IMPLICATION_TEXTS,
+        constants.YELLOW_IMPLICATION_TEXTS,
     )) or (
         # Special case: no other recommendation given
-        annotations['recommendation'] == WHOLE_CONSULT_TEXT
+        annotations['recommendation'] == constants.WHOLE_CONSULT_TEXT
     )
 
 def should_be_green(annotations):
     return any(map(
         lambda green_text: green_text in annotations['recommendation'],
-        GREEN_TEXTS,
+        constants.GREEN_TEXTS,
     ))
 
 def check_red_warning_level(_, annotations):
diff --git a/scripts/analyze/constants.py b/scripts/analyze/constants.py
new file mode 100644
index 00000000..59b58207
--- /dev/null
+++ b/scripts/analyze/constants.py
@@ -0,0 +1,47 @@
+CONSULT_TEXT = 'consult your pharmacist or doctor'
+WHOLE_CONSULT_TEXT = '{} for more information.'.format(CONSULT_TEXT)
+RED_TEXT = 'not be the right medication'
+NOT_RED_TEXTS = [
+    'if more than this dose is needed',
+    "if #drug-name isn't working for you",
+]
+ADJUST_TEXT = 'adjusted'
+YELLOW_RECOMMENDATION_TEXTS = NOT_RED_TEXTS + [
+    ADJUST_TEXT,
+    'increased',
+    'decreased',
+    'lower dose',
+    'higher dose',
+    'up to a certain dose',
+    'dose increases should be done cautiously and slowly',
+    'further testing is recommended',
+]
+MAY_NOT_WORK_TEXT = 'may not work'
+YELLOW_IMPLICATION_TEXTS = [
+    'increased risk',
+    MAY_NOT_WORK_TEXT,
+]
+GREEN_TEXTS = ['at standard dose', 'there is no reason to avoid']
+MUCH_IMPLYING_METABOLIZATION_FORMULATIONS = [
+        'greatly decreased',
+        'greatly reduced',
+        'significantly reduced',
+        'extremely high concentrations',
+        'when compared to cyp2c19 rapid and normal metabolizers',
+        'as compared to non-poor metabolizers',
+        'when compared to cyp2c19 normal and intermediate metabolizers',
+        'as compared to normal and intermediate metabolizer',
+        'complete dpd deficiency',
+    ]
+MUCH_METABOLIZATION_FORMULATIONS = [
+    'much faster',
+    'much slower'
+]
+METABOLIZATION_FORMULATIONS = [
+    'activate',
+    'break down',
+]
+CONSEQUENCE_FORMULATIONS = [
+    'risk',
+    MAY_NOT_WORK_TEXT,
+]
\ No newline at end of file
diff --git a/scripts/analyze/corrections/consult.py b/scripts/analyze/corrections/consult.py
index a0d9976f..c2deb542 100644
--- a/scripts/analyze/corrections/consult.py
+++ b/scripts/analyze/corrections/consult.py
@@ -1,7 +1,7 @@
 from analyze.data_helpers import ensure_unique_item, get_english_text
 from common.constants import BRICK_COLLECTION_NAME
 
-from analyze.checks.constants import CONSULT_TEXT
+from analyze.constants import CONSULT_TEXT
 
 def get_consult_brick(data):
     brick_filter = filter(
diff --git a/scripts/run_analysis.py b/scripts/run_analysis.py
index 1bbb0ea1..9a94fd1c 100644
--- a/scripts/run_analysis.py
+++ b/scripts/run_analysis.py
@@ -1,6 +1,7 @@
 import sys
 
 from analyze.checks.brand_name_whitespace import check_brand_name_whitespace
+from analyze.checks.metabolization_before_consequence import check_metabolization_before_consequence
 from analyze.checks.warning_levels import check_green_warning_level, \
     check_none_warning_level, check_red_warning_level, \
         check_yellow_warning_level
@@ -30,6 +31,7 @@
     'yellow_warning_level': check_yellow_warning_level,
     'green_warning_level': check_green_warning_level,
     'none_warning_level': check_none_warning_level,
+    'metabolization_before_consequence': check_metabolization_before_consequence,
 }
 
 GUIDELINE_CORRECTIONS = {