Skip to content

Commit

Permalink
feat(scripts): adapt update script to Anni changes
Browse files Browse the repository at this point in the history
  • Loading branch information
tamslo committed Aug 5, 2024
1 parent 9efb01f commit c246218
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 16 deletions.
20 changes: 15 additions & 5 deletions scripts/common/get_data.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import copy
import base64
import json
import os
Expand Down Expand Up @@ -89,7 +90,7 @@ def get_phenotype_value_lengths(guideline, expect_same_length = False):
if expect_same_length:
if len(phenotype_values_lengths) != 1:
raise Exception('[ERROR] Expecting lookupkey and phenotypes per ' \
'gene to have same lenghts but lengths differ ' \
'gene to have same lengths but lengths differ ' \
'for guideline {}'.format(guideline['_id']))
return phenotype_values_lengths[0]
return phenotype_values_lengths
Expand All @@ -105,16 +106,25 @@ def dict_to_key(dictionary, format_value=lambda value: value):
lambda key: f'{key} {format_value(dictionary[key])}',
dict(sorted(dictionary.items())).keys()))

def get_phenotype_description_key(guideline, property):
def get_phenotype_description_key(dictionary):
return dict_to_key(
guideline[property],
dictionary,
lambda phenotype_value: ', '.join(sorted(phenotype_value)))

def get_lookupkey_key(guideline):
return get_phenotype_description_key(guideline, 'lookupkey')
return get_phenotype_description_key(guideline['lookupkey'])

def get_phenotype(guideline):
return get_phenotype_description_key(guideline['phenotypes'])

def get_phenotype_key(guideline):
return get_phenotype_description_key(guideline, 'phenotypes')
phenotypes = {}
for gene in guideline['phenotypes'].keys():
phenotypes[gene] = copy.deepcopy(guideline['phenotypes'][gene])
for lookupkey in guideline['lookupkey'][gene]:
if not lookupkey in phenotypes[gene]:
phenotypes[gene].append(lookupkey)
return get_phenotype_description_key(phenotypes)

def get_information_key(external_data):
information_key = external_data['comments'] \
Expand Down
44 changes: 33 additions & 11 deletions scripts/update.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import copy

from common.get_data import get_data, get_drug_by_name
from common.get_data import get_data, get_drug_by_name, get_phenotype
from common.get_data import get_guidelines_by_ids
from common.get_data import get_phenotype_key
from common.get_data import get_lookupkey_key
Expand Down Expand Up @@ -188,9 +188,10 @@ def update_guidelines(data, guidelines, updated_guidelines):
updated_guideline = next(
updated_guideline for updated_guideline in updated_guidelines \
if get_phenotype_key(updated_guideline) == phenotype_key)
# Test if lookupkey changed; only the list for each key can change,
# everything else will be covered by removing or adding phenotype
# guidelines
# Test if lookupkey changed; this is legacy code that removes multiples
# of one lookupkey, as now the phenotype key also includes the
# lookupkey; everything else will be covered by removing or adding
# phenotype guidelines
guideline_updates += update_guideline_information(data, guideline, \
updated_guideline, 'lookupkey', get_lookupkey_key)
# Test if external data changed
Expand Down Expand Up @@ -230,7 +231,14 @@ def get_new_genes(stale_guideline, updated_guideline):
lambda gene: gene not in stale_guideline['phenotypes'],
updated_guideline['phenotypes']))

def should_transfer_guideline(stale_guideline, updated_guideline):
def lookups_for_phenotype_changed(stale_guideline, updated_guideline):
same_phenotype = get_phenotype(stale_guideline) == \
get_phenotype(updated_guideline)
lookups_changed = get_phenotype_key(stale_guideline) != \
get_phenotype_key(updated_guideline)
return same_phenotype and lookups_changed

def new_genes_are_non_results(stale_guideline, updated_guideline):
stale_phenotype = get_phenotype_key(stale_guideline)
updated_phenotype = get_phenotype_key(updated_guideline)
if stale_phenotype in updated_phenotype:
Expand All @@ -244,11 +252,11 @@ def should_transfer_guideline(stale_guideline, updated_guideline):
))
return len(new_genes) == len(non_results)

def get_annotation_transfer_text(stale_guideline, updated_guideline):
def get_annotation_transfer_text(stale_guideline, updated_guideline, reason):
stale_phenotype = get_phenotype_key(stale_guideline)
updated_phenotype = get_phenotype_key(updated_guideline)
update_text = f'Transferred annotations from {stale_phenotype} to ' \
f'{updated_phenotype}'
f'{updated_phenotype} because of {reason}'
external_data_changed = len(stale_guideline['externalData']) != \
len(updated_guideline['externalData'])
if not external_data_changed:
Expand All @@ -268,16 +276,30 @@ def get_annotation_transfer_text(stale_guideline, updated_guideline):
return log_item(update_text, level=1)

# Changes updated_guidelines in-place
def transfer_annotations_for_added_phenotypes(guidelines, updated_guidelines):
def transfer_annotations(guidelines, updated_guidelines):
update_log = []
stale_guidelines = get_stale_guidelines(guidelines, updated_guidelines)
for stale_guideline in stale_guidelines:
for updated_guideline in updated_guidelines:
if should_transfer_guideline(stale_guideline, updated_guideline):
transfer_because_of_new_genes = new_genes_are_non_results(
stale_guideline,
updated_guideline,
)
transfer_because_of_lookups = (not transfer_because_of_new_genes) \
and lookups_for_phenotype_changed(
stale_guideline,
updated_guideline,
)
if transfer_because_of_new_genes or transfer_because_of_lookups:
reason = 'unknown reason'
if transfer_because_of_new_genes:
reason = 'added genes'
if transfer_because_of_lookups:
reason = 'changed lookupkey'
updated_guideline['annotations'] = \
stale_guideline['annotations']
update_text = get_annotation_transfer_text(
stale_guideline, updated_guideline)
stale_guideline, updated_guideline, reason)
update_log.append(update_text)
return update_log

Expand Down Expand Up @@ -323,7 +345,7 @@ def update_drugs(data, updated_external_data):
data, current_drug['guidelines'])
updated_guidelines = get_guidelines_by_ids(
updated_external_data, updated_drug['guidelines'])
drug_updates += transfer_annotations_for_added_phenotypes(
drug_updates += transfer_annotations(
current_guidelines, updated_guidelines
)
drug_updates += remove_outdated_guidelines(
Expand Down

0 comments on commit c246218

Please sign in to comment.