From 3126ce77ec51430cfba827c3de65785f846695ef Mon Sep 17 00:00:00 2001 From: Tamara Slosarek Date: Mon, 5 Aug 2024 18:47:08 +0200 Subject: [PATCH] feat(anni): split up activity scores again --- .../src/database/helpers/contraction-utils.ts | 104 ++++++++++++++++++ .../src/database/helpers/cpic-constructors.ts | 101 ++--------------- anni/src/pages/api/init-external.ts | 6 +- 3 files changed, 116 insertions(+), 95 deletions(-) create mode 100644 anni/src/database/helpers/contraction-utils.ts diff --git a/anni/src/database/helpers/contraction-utils.ts b/anni/src/database/helpers/contraction-utils.ts new file mode 100644 index 00000000..6f6644e5 --- /dev/null +++ b/anni/src/database/helpers/contraction-utils.ts @@ -0,0 +1,104 @@ +import { DrugWithGuidelines } from './cpic-constructors'; +import { IGuideline_Any, IExternalData } from '../models/Guideline'; + +function externalDataInformationKey(externalData: IExternalData): string { + return [ + externalData.comments ?? '', + externalData.recommendation, + ...Object.entries(externalData.implications).map( + ([gene, implication]) => `${gene}${implication}`, + ), + ].join(''); +} + +// used to merge guidelines with equal information (e.g. when the same +// guideline is used for multiple phenotypes) +function phenotypeInformationKey(guideline: IGuideline_Any): string { + return [ + ...Object.keys(guideline.phenotypes).map( + (gene) => `${gene}${guideline.phenotypes[gene]}`, + ), + ...guideline.externalData + .map((externalData) => externalDataInformationKey(externalData)) + .sort((a, b) => (a > b ? 1 : -1)), + ].join(''); +} + +// merge same-information guidelines per phenotype if information matches (e.g., +// different activity scores with same guideline) +// never merge FDA guidelines +function contractByPhenotypeAndInformation( + drugsWithGuidelines: Array, + source: string, +): Array { + return drugsWithGuidelines.map(({ drug, guidelines }) => { + // Do not concat FDA guidelines + if (source == 'FDA') return { drug, guidelines }; + const phenotypeInformationMap = new Map(); + guidelines.forEach((guideline) => { + const key = phenotypeInformationKey(guideline); + if (phenotypeInformationMap.has(key)) { + const existingGuideline = phenotypeInformationMap.get(key)!; + // ensure that we don't miss information when + // getting only first index from externalData + const oneLookupPresent = Object.values( + guideline.lookupkey, + ).every((value) => value.length == 1); + if (!oneLookupPresent) { + throw Error('Expected only one lookup entry (per gene)'); + } + Object.keys(existingGuideline.lookupkey).forEach((gene) => { + existingGuideline.lookupkey[gene].push( + guideline.lookupkey[gene][0], + ); + }); + } else { + phenotypeInformationMap.set(key, guideline); + } + }); + return { + drug, + guidelines: Array.from(phenotypeInformationMap.values()), + }; + }); +} + +// merge same-lookupkey guidelines (per activity score, if present; e.g. when a +// drug-lookupkey-pair has multiple different guidelines because the drug is +// used for different applications such as clopidogrel or has a pediatric +// guideline) +function contractByLookupkey( + drugsWithGuidelines: Array, +): Array { + return drugsWithGuidelines.map(({ drug, guidelines }) => { + const lookupMap = new Map(); + guidelines.forEach((guideline) => { + const key = Object.keys(guideline.lookupkey) + .map((gene) => `${gene}${guideline.lookupkey[gene]}`) + .join(''); + if (lookupMap.has(key)) { + const existingGuideline = lookupMap.get(key)!; + // ensure that we don't miss information when + // getting only first index from externalData + const oneGuidelinePresent = guideline.externalData.length == 1; + if (!oneGuidelinePresent) { + throw Error('Expected only one externalData entry'); + } + existingGuideline.externalData.push(guideline.externalData[0]); + } else { + lookupMap.set(key, guideline); + } + }); + return { drug, guidelines: Array.from(lookupMap.values()) }; + }); +} + +export function contractGuidelines( + drugsWithGuidelines: Array, + source: string, +): Array { + return contractByPhenotypeAndInformation( + contractByLookupkey(drugsWithGuidelines), + source, + ); +} diff --git a/anni/src/database/helpers/cpic-constructors.ts b/anni/src/database/helpers/cpic-constructors.ts index 58c7e2dd..fe85ff77 100644 --- a/anni/src/database/helpers/cpic-constructors.ts +++ b/anni/src/database/helpers/cpic-constructors.ts @@ -1,6 +1,12 @@ +import { contractGuidelines } from './contraction-utils'; import { CpicRecommendation } from '../../common/cpic-api'; import { IDrug_Any } from '../models/Drug'; -import { IGuideline_Any, IExternalData } from '../models/Guideline'; +import { IGuideline_Any } from '../models/Guideline'; + +export interface DrugWithGuidelines { + drug: IDrug_Any; + guidelines: Array; +} function guidelineFromRecommendation( recommendation: CpicRecommendation, @@ -41,29 +47,6 @@ function guidelineFromRecommendation( }; } -// used to merge guidelines with equal information (e.g. when the same -// guideline is used for multiple phenotypes) -function informationKey(externalData: IExternalData): string { - return [ - externalData.comments ?? '', - externalData.recommendation, - ...Object.entries(externalData.implications).map( - ([gene, implication]) => `${gene}${implication}`, - ), - ].join(''); -} - -// used to merge guidelines with same lookupkeys/phenotypes (e.g. when a -// drug-phenotype-pair has multiple different guideline because the drug is -// used for different applications such as clopidogrel or when multiple -// lookupkeys match to the same phenotye, as it often is the case for -// activity scores) -function phenotypeKey(guideline: IGuideline_Any): string { - return Object.keys(guideline.phenotypes) - .map((gene) => `${gene}${guideline.phenotypes[gene]}`) - .join(''); -} - function drugFromRecommendation(recommendation: CpicRecommendation): IDrug_Any { return { name: recommendation.drug.name, @@ -77,71 +60,7 @@ function drugFromRecommendation(recommendation: CpicRecommendation): IDrug_Any { }; } -// initially (before contracting) guideline.externalData and values -// (phenotype descriptions) of guideline.lookupkey should be of length -// 1, as we set it this way ourselves in guidelineFromRecommendation -function ensureInitialGuidelineStructure(guideline: IGuideline_Any) { - if ( - guideline.externalData.length != 1 || - !Object.values(guideline.lookupkey).every((value) => value.length == 1) - ) { - throw Error('Expected different initial guideline data structure'); - } -} - -// merge same-phenotype guidelines -function contractByPhenotype( - drugsWithGuidelines: Array, -): Array { - return drugsWithGuidelines.map(({ drug, guidelines }) => { - const phenotypeMap = new Map(); - guidelines.forEach((guideline) => { - const key = phenotypeKey(guideline); - const existingGuideline = phenotypeMap.get(key); - if (existingGuideline) { - // ensure that we don't miss information when - // getting only first index from externalData - // and lookupkey[gene] - ensureInitialGuidelineStructure(guideline); - existingGuideline.externalData.push(guideline.externalData[0]); - Object.keys(existingGuideline.lookupkey).forEach((gene) => { - existingGuideline.lookupkey[gene].push( - guideline.lookupkey[gene][0], - ); - }); - } else { - phenotypeMap.set(key, guideline); - } - }); - return { drug, guidelines: Array.from(phenotypeMap.values()) }; - }); -} - -// merge same-information guidelines -function contractByInformation( - drugsWithGuidelines: Array, -): Array { - return drugsWithGuidelines.map(({ drug, guidelines }) => { - guidelines.forEach((guideline) => { - const informationMap = new Map(); - guideline.externalData.forEach((externalData) => { - const key = informationKey(externalData); - if (!informationMap.has(key)) { - informationMap.set(key, externalData); - } - }); - guideline.externalData = Array.from(informationMap.values()); - }); - return { drug, guidelines }; - }); -} - -export interface DrugWithGuidelines { - drug: IDrug_Any; - guidelines: Array; -} - -export function getDrugsWithContractedGuidelines( +export function getDrugsWithGuidelines( recommendations: Array, source: string, ): Array { @@ -159,9 +78,7 @@ export function getDrugsWithContractedGuidelines( } }); - return contractByInformation( - contractByPhenotype(Array.from(drugIdMap.values())), - ); + return contractGuidelines(Array.from(drugIdMap.values()), source); } export function getAdditionalDrugs( diff --git a/anni/src/pages/api/init-external.ts b/anni/src/pages/api/init-external.ts index df84aabb..03d9ab1c 100644 --- a/anni/src/pages/api/init-external.ts +++ b/anni/src/pages/api/init-external.ts @@ -12,7 +12,7 @@ import dbConnect from '../../database/helpers/connect'; import { DrugWithGuidelines, getAdditionalDrugs, - getDrugsWithContractedGuidelines, + getDrugsWithGuidelines, } from '../../database/helpers/cpic-constructors'; import Drug from '../../database/models/Drug'; import Guideline from '../../database/models/Guideline'; @@ -25,7 +25,7 @@ const getCpicData = async (): Promise => { }, ); const recommendations = response.data; - return getDrugsWithContractedGuidelines(recommendations, 'CPIC'); + return getDrugsWithGuidelines(recommendations, 'CPIC'); }; type GHContentResponse = { @@ -57,7 +57,7 @@ const getAdditionalData = async (): Promise => { if (source == 'additional_drugs') { return getAdditionalDrugs(response.data); } - return getDrugsWithContractedGuidelines(response.data, source); + return getDrugsWithGuidelines(response.data, source); }), ); };