From 3f0027f29516e94d6ec7b6c6b0bfe04027f5369c Mon Sep 17 00:00:00 2001 From: Florian Borchert Date: Thu, 22 Dec 2022 12:54:06 +0100 Subject: [PATCH 1/2] Some other ideas of LFs --- WeakSupervision.ipynb | 3627 ++++++++++++++++++++++++++++++++++++++--- proteins.txt | 36 + 2 files changed, 3403 insertions(+), 260 deletions(-) create mode 100644 proteins.txt diff --git a/WeakSupervision.ipynb b/WeakSupervision.ipynb index c8ecc75..912fb3e 100644 --- a/WeakSupervision.ipynb +++ b/WeakSupervision.ipynb @@ -17,7 +17,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 1, "id": "bb5a326a-91b3-4057-af63-82389654e86c", "metadata": {}, "outputs": [], @@ -82,7 +82,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 3, @@ -273,6 +273,79 @@ { "cell_type": "code", "execution_count": 12, + "id": "d9454936-02a2-4d19-900f-b06e0d183c6f", + "metadata": {}, + "outputs": [], + "source": [ + "entrez_df = pd.read_csv('Homo_sapiens.gene_info', sep='\\t')" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "96921b47-84ab-4f09-8864-4e33059bcde0", + "metadata": {}, + "outputs": [], + "source": [ + "symbols = set()\n", + "# Get all synonyms for CIViC genes, remove short ones and German stopwords\n", + "for _, r in entrez_df.set_index('GeneID').loc[df.entrez_id].iterrows():\n", + " symbols.add(r.Symbol)\n", + " for s in r.Synonyms.split('|'):\n", + " if not s in ['R1', 'R2', 'eN', 'HNPCC'] and len(s) > 1 and not s.lower() in stops:\n", + " symbols.add(s.lower())" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "8cb9c469-a8b9-47b0-8f8d-ab7581a40044", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|████████████████████████████████████████████████████████████████████████████████████████████████| 2028/2028 [00:00<00:00, 12322.11it/s]\n" + ] + } + ], + "source": [ + "from spacy.matcher import Matcher\n", + "\n", + "entrez_matcher = Matcher(nlp.vocab)\n", + "pattern = []\n", + "for s in nlp.pipe(tqdm(symbols), disable=[\"ner\", \"tok2vec\"]):\n", + " for pos in ['NOUN', 'PROPN', 'X']: # Consider only if first POS is one of these\n", + " p = [{'LOWER' : spl.text.lower() } for spl in s]\n", + " p[0]['POS'] = pos\n", + " pattern.append(p)\n", + " p2 = p + [{'LOWER' : '-'}, {'LOWER' : 'gen'}] #also consider if followed by -Gen\n", + " pattern.append(p2)\n", + "entrez_matcher.add(\"entrez\", pattern)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "ce0939e0-0515-4921-af01-3ede88f7a043", + "metadata": {}, + "outputs": [], + "source": [ + "def lf_entrez(doc):\n", + " matches = entrez_matcher(doc)\n", + " if matches:\n", + " # Keep longest matches only\n", + " spans = [doc[start:end] for _, start, end in matches]\n", + " spans = spacy.util.filter_spans(spans)\n", + " for s in spans:\n", + " yield s.start, s.end, 'Gene or Protein'\n", + "entrez = heuristics.FunctionAnnotator(\"entrez\", lf_entrez) " + ] + }, + { + "cell_type": "code", + "execution_count": 16, "id": "3b2354cf", "metadata": {}, "outputs": [ @@ -309,7 +382,91 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 17, + "id": "ee7c77c8-907d-4078-aea1-b0194e20e8cf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'protein_gazetteer': [PD-L1]}" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from skweak.gazetteers import Trie, GazetteerAnnotator\n", + "\n", + "terms = [t.strip() for t in open('proteins.txt', 'r').readlines()]\n", + "\n", + "trie = Trie()\n", + "for term in terms:\n", + " trie.add([t.text for t in nlp(term)])\n", + "\n", + "protein_gazetteer = GazetteerAnnotator('protein_gazetteer', tries = {'Gene or Protein' : trie })\n", + "\n", + "doc = nlp(\"PD-L1\")\n", + "protein_gazetteer(doc)\n", + "doc.spans" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "53b47dda-c62d-4d51-bbb3-ead523313686", + "metadata": {}, + "outputs": [], + "source": [ + "protein_matcher = Matcher(nlp.vocab)\n", + "patterns = []\n", + "\n", + "for suffix in ['[A-Z]*[Kk]inase[n]?$', '[A-Z]+[rR]ezeptor(en|s)?$', '^(RAS|ras)$']:\n", + " p = [{'TEXT' : { 'REGEX' : suffix}}]\n", + " patterns.append(p)\n", + " for _ in range(0, 3): # Consider also combinations like Rezepter-Tyrosinkinasen\n", + " p = [{'IS_ALPHA' : True}, {'lower' : '-'}] + p\n", + " patterns.append(p)\n", + "protein_matcher.add('protein', patterns[-1::-1])\n", + "\n", + "def lf_protein_families(doc):\n", + " matches = protein_matcher(doc)\n", + " if matches:\n", + " # Keep longest matches only\n", + " spans = [doc[start:end] for _, start, end in matches]\n", + " spans = spacy.util.filter_spans(spans)\n", + " for s in spans:\n", + " yield s.start, s.end, 'Gene or Protein'\n", + "\n", + "protein_families = heuristics.FunctionAnnotator(\"protein_families\", lf_protein_families) " + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "3bc24870-761e-4b31-8792-49ff70dedeab", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[(0, 1, 'Gene or Protein'), (1, 4, 'Gene or Protein')]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list(lf_protein_families(nlp(\"RAS k-RAS krass\")))" + ] + }, + { + "cell_type": "code", + "execution_count": 20, "id": "e6813e7a-8d36-4da2-9749-9c9b6ffc6d0f", "metadata": {}, "outputs": [], @@ -331,7 +488,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 21, "id": "faecaa97", "metadata": {}, "outputs": [], @@ -351,7 +508,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 22, "id": "60fe7a41", "metadata": {}, "outputs": [], @@ -376,7 +533,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 23, "id": "653ed53d", "metadata": {}, "outputs": [], @@ -398,7 +555,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 24, "id": "c4aed24b-34e9-48b9-b4fa-927ceb9172a8", "metadata": {}, "outputs": [ @@ -430,7 +587,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 25, "id": "d9bc3aae-f3c3-4d76-93f1-ea2b44d8e9f9", "metadata": {}, "outputs": [ @@ -438,18 +595,18 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 85996/85996 [14:58<00:00, 95.70it/s]\n" + "100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 85996/85996 [20:17<00:00, 70.62it/s]\n" ] } ], "source": [ - "lfs = [construct, cue_civic, omim, cue_cosmic_census]\n", + "lfs = [construct, cue_civic, omim, cue_cosmic_census, entrez, protein_gazetteer, protein_families]\n", "\n", "#For Quick Run with Random Sentences!\n", "#random_files = files_df.sample(n = 10000)\n", "all_docs = []\n", "\n", - "for sentence_idx, doc in zip(tqdm(list(sentence_df.reset_index().iterrows())), nlp.pipe(sentence_df.text, batch_size=32, disable=[\"ner\"])):\n", + "for sentence_idx, doc in zip(tqdm(list(sentence_df.reset_index().iterrows())), nlp.pipe(sentence_df.text, disable=[\"ner\"])):\n", " i, row = sentence_idx\n", " for lf in lfs:\n", " doc = lf(doc)\n", @@ -458,37 +615,15 @@ }, { "cell_type": "markdown", - "id": "6ef09bac-468f-4b72-bad7-13c8878c55ed", + "id": "f6954ced-b4be-49cb-8416-bf95ce225033", "metadata": {}, "source": [ "Remove files which have been manually annotated from the training dataset" ] }, - { - "cell_type": "markdown", - "id": "447db4d7-1452-41ed-b0e8-802d8a7d8364", - "metadata": { - "tags": [] - }, - "source": [ - "## Training Set Evaluation" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "id": "628aeab7-901d-42a8-ade1-80e2d8bb5df5", - "metadata": {}, - "outputs": [], - "source": [ - "# TODO: Turn into new LF\n", - "#for t in nlp(\"Wir behandeln die Mutation des mit-Gens mit Chemotherapie.\"):\n", - "# print(t, t.pos_)" - ] - }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 26, "id": "34c1e291-67ce-449e-a2dc-4aaa37b16d24", "metadata": {}, "outputs": [ @@ -498,7 +633,7 @@ "2000" ] }, - "execution_count": 40, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -511,7 +646,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 27, "id": "9022b1d4-4b52-4a80-8c26-37a9fc6af7cb", "metadata": {}, "outputs": [ @@ -521,7 +656,7 @@ "(83624, 83624)" ] }, - "execution_count": 41, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -533,9 +668,17 @@ "len(docs), len(filtered_sentence_df)" ] }, + { + "cell_type": "markdown", + "id": "c9567f81-8883-404e-8156-63f625b99612", + "metadata": {}, + "source": [ + "## Training Set Evaluation" + ] + }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 28, "id": "6dbc1515-9a22-4c9a-8437-7c85bda222dd", "metadata": {}, "outputs": [ @@ -560,38 +703,51 @@ " \n", " \n", " \n", - " construct\n", - " cue_civic\n", " cue_cosmic_census\n", + " cue_civic\n", + " construct\n", " omim\n", + " entrez\n", + " protein_families\n", + " protein_gazetteer\n", " \n", " \n", " \n", " \n", " Coverage\n", - " 0.498998\n", - " 0.286573\n", - " 0.304609\n", - " 0.469739\n", + " 0.223103\n", + " 0.209893\n", + " 0.365478\n", + " 0.344048\n", + " 0.405695\n", + " 0.017907\n", + " 0.136651\n", " \n", " \n", " Overlaps\n", - " 0.236546\n", - " 0.949650\n", " 0.929605\n", - " 0.453925\n", + " 0.980420\n", + " 0.381526\n", + " 0.499573\n", + " 0.685962\n", + " 0.368852\n", + " 0.699248\n", " \n", " \n", "\n", "" ], "text/plain": [ - " construct cue_civic cue_cosmic_census omim\n", - "Coverage 0.498998 0.286573 0.304609 0.469739\n", - "Overlaps 0.236546 0.949650 0.929605 0.453925" + " cue_cosmic_census cue_civic construct omim entrez \\\n", + "Coverage 0.223103 0.209893 0.365478 0.344048 0.405695 \n", + "Overlaps 0.929605 0.980420 0.381526 0.499573 0.685962 \n", + "\n", + " protein_families protein_gazetteer \n", + "Coverage 0.017907 0.136651 \n", + "Overlaps 0.368852 0.699248 " ] }, - "execution_count": 43, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -615,7 +771,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 29, "id": "cf2dd2ea-50b3-4a17-8905-30545b57a78b", "metadata": {}, "outputs": [ @@ -627,7 +783,8 @@ "Number of processed documents: 1000\n", "Number of processed documents: 2000\n", "Number of processed documents: 3000\n", - "Finished E-step with 3900 documents\n", + "Number of processed documents: 4000\n", + "Finished E-step with 4624 documents\n", "Starting iteration 2\n" ] }, @@ -635,7 +792,7 @@ "name": "stderr", "output_type": "stream", "text": [ - " 1 -23893.4823 +nan\n" + " 1 -33073.4892 +nan\n" ] }, { @@ -645,7 +802,8 @@ "Number of processed documents: 1000\n", "Number of processed documents: 2000\n", "Number of processed documents: 3000\n", - "Finished E-step with 3900 documents\n", + "Number of processed documents: 4000\n", + "Finished E-step with 4624 documents\n", "Starting iteration 3\n" ] }, @@ -653,7 +811,7 @@ "name": "stderr", "output_type": "stream", "text": [ - " 2 -23327.7100 +565.7723\n" + " 2 -32400.1132 +673.3760\n" ] }, { @@ -663,7 +821,8 @@ "Number of processed documents: 1000\n", "Number of processed documents: 2000\n", "Number of processed documents: 3000\n", - "Finished E-step with 3900 documents\n", + "Number of processed documents: 4000\n", + "Finished E-step with 4624 documents\n", "Starting iteration 4\n" ] }, @@ -671,7 +830,7 @@ "name": "stderr", "output_type": "stream", "text": [ - " 3 -23319.1366 +8.5734\n" + " 3 -32386.9774 +13.1358\n" ] }, { @@ -681,14 +840,15 @@ "Number of processed documents: 1000\n", "Number of processed documents: 2000\n", "Number of processed documents: 3000\n", - "Finished E-step with 3900 documents\n" + "Number of processed documents: 4000\n", + "Finished E-step with 4624 documents\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - " 4 -23317.6761 +1.4605\n" + " 4 -32383.9291 +3.0482\n" ] } ], @@ -702,7 +862,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 30, "id": "00c04eb3-5190-4b49-80a1-457e98e1fa70", "metadata": {}, "outputs": [], @@ -722,7 +882,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 31, "id": "d07ab611-b36a-4eb4-beb4-0002fda81327", "metadata": {}, "outputs": [], @@ -741,110 +901,2765 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 32, "id": "19236a84-2a05-4d61-aefd-789e804bd7da", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(83624, 31299, 3900)" + "(83624, 35501, 4624)" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(docs), len(filtered_docs), len(gene_docs)" + ] + }, + { + "cell_type": "markdown", + "id": "6e00ac4b-8333-44bf-9274-ab6734810bbb", + "metadata": { + "tags": [] + }, + "source": [ + "# Labeling Function Analysis" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "595183ba-63b3-4ca7-8870-2647745676d4", + "metadata": {}, + "outputs": [], + "source": [ + "gold_docs_dev = list(DocBin().from_disk('data/molecular/gold_dev.spacy').get_docs(nlp.vocab))" + ] + }, + { + "cell_type": "markdown", + "id": "8764f34a", + "metadata": {}, + "source": [ + "Our labeling functions must also be deployed onto the gold standard data to evaluate strong supervision against weak supervision." + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "97cce16c-4d79-4dc8-83dd-c54f60795f67", + "metadata": {}, + "outputs": [], + "source": [ + "def apply_hmm(gold_docs):\n", + " for g in tqdm(gold_docs):\n", + " if 'Gene or Protein' in g.spans:\n", + " del g.spans['Gene or Protein']\n", + " for lf in lfs:\n", + " g = lf(g)\n", + " g = hmm(g)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "6c5ac964-a203-4c0e-a291-9ab837c7a18f", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:15<00:00, 64.68it/s]\n" + ] + } + ], + "source": [ + "apply_hmm(gold_docs_dev)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "eb6e8ab3-ae42-4546-ac7b-f61df37784e5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Zu den entscheidenden Sekundärneoplasien zählen hämatologische Neoplasien wie die akute myeloische Leukämie (AML), die myelodysplastischen Syndrome (\n", + "\n", + " MDS\n", + " Gene or Protein\n", + "\n", + "), das Non-Hodgkin Lymphom (NHL) und solide Tumore wie das Bronchialkarzinom, das Mammakarzinom und das Kolonkarzinom [REF] [REF] [REF] [REF] [REF].
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Zu den entscheidenden Sekundärneoplasien zählen hämatologische Neoplasien wie die akute myeloische Leukämie (AML), die myelodysplastischen Syndrome (MDS), das Non-Hodgkin Lymphom (NHL) und solide Tumore wie das Bronchialkarzinom, das Mammakarzinom und das Kolonkarzinom [REF] [REF] [REF] [REF] [REF].
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
In der Metaanalyse von Glas et al wurde anhand der Daten von 1.160 Probanden eine Sensitivität für den BTA Stat von 70% (95% CI: 66–74%) und eine Spezifität von 75% (95% CI: 64–84%) ermittelt, für den BTA \n", + "\n", + " TRAK\n", + " Gene or Protein\n", + "\n", + " von 66% (95% CI: 62–71%) bzw. 65% (95% CI: 45–81%) [REF].
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
In der Metaanalyse von Glas et al wurde anhand der Daten von 1.160 Probanden eine Sensitivität für den BTA Stat von 70% (95% CI: 66–74%) und eine Spezifität von 75% (95% CI: 64–84%) ermittelt, für den BTA TRAK von 66% (95% CI: 62–71%) bzw. 65% (95% CI: 45–81%) [REF].
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Das für die \n", + "\n", + " FAP\n", + " Gene or Protein\n", + "\n", + " zugelassene Präparat mit dem Wirkstoff Celecoxib wurde im April 2011 vom Hersteller aufgrund mangelnder Rekrutierung einer von der europäischen Arzneimittelbehörde (EMA) geforderten Post-Zulassungsstudie vom Markt genommen.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Das für die FAP zugelassene Präparat mit dem Wirkstoff Celecoxib wurde im April 2011 vom Hersteller aufgrund mangelnder Rekrutierung einer von der europäischen Arzneimittelbehörde (EMA) geforderten Post-Zulassungsstudie vom Markt genommen.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Ovarialtumoren bei \n", + "\n", + " PJS\n", + " Gene or Protein\n", + "\n", + " sind in der Regel SCTAT und nicht-epithelialen Ursprunges und werden zum Teil bereits auch bei kleinen Mädchen diagnostiziert (mittleres Alter 28 Jahre, 4-57 Jahre).
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Ovarialtumoren bei PJS sind in der Regel SCTAT und nicht-epithelialen Ursprunges und werden zum Teil bereits auch bei kleinen Mädchen diagnostiziert (mittleres Alter 28 Jahre, 4-57 Jahre).
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Bemerkenswert ist, dass Patientinnen mit Keimbahnmutationen des \n", + "\n", + " BRCA1\n", + " Gene or Protein\n", + "\n", + "-Gens gehäuft Karzinome mit medullären Eigenschaften aufweisen.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Bemerkenswert ist, dass Patientinnen mit Keimbahnmutationen des \n", + "\n", + " BRCA1-Gens\n", + " Gene or Protein\n", + "\n", + " gehäuft Karzinome mit medullären Eigenschaften aufweisen.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Nach den Empfehlungen der S3/\n", + "\n", + " NVL\n", + " Gene or Protein\n", + "\n", + " Unipolare Depression ist erst am Ende dieser Erhaltungstherapiephase eine schrittweise Dosisreduktion sinnvoll.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Nach den Empfehlungen der S3/NVL Unipolare Depression ist erst am Ende dieser Erhaltungstherapiephase eine schrittweise Dosisreduktion sinnvoll.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Der Nachweis einer \n", + "\n", + " HER2\n", + " Gene or Protein\n", + "\n", + "-neu Amplifikation/Überexpression hat zum gegenwärtigen Zeitpunkt keinen gesicherten Stellenwert in der Wahl der Erstlinientherapie [REF].
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Der Nachweis einer \n", + "\n", + " HER2-neu\n", + " Gene or Protein\n", + "\n", + " Amplifikation/Überexpression hat zum gegenwärtigen Zeitpunkt keinen gesicherten Stellenwert in der Wahl der Erstlinientherapie [REF].
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
In einer großen prospektiven Studie wurde gezeigt, dass von den vier Komponenten des UroVision-Testes der prädiktive Wert des Verlustes von \n", + "\n", + " 9p21\n", + " Gene or Protein\n", + "\n", + " am geringsten ist [REF].
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
In einer großen prospektiven Studie wurde gezeigt, dass von den vier Komponenten des UroVision-Testes der prädiktive Wert des Verlustes von 9p21 am geringsten ist [REF].
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Der selektive COX2-Hemmer Celecoxib, der zu einer Reduktion rektaler Adenome führt [REF], wurde zur Chemoprävention bei \n", + "\n", + " FAP\n", + " Gene or Protein\n", + "\n", + " als Ergänzung zu chirurgischen Maßnahmen und weiteren endoskopischen Kontrollen zugelassen.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Der selektive \n", + "\n", + " COX2\n", + " Gene or Protein\n", + "\n", + "-Hemmer Celecoxib, der zu einer Reduktion rektaler Adenome führt [REF], wurde zur Chemoprävention bei FAP als Ergänzung zu chirurgischen Maßnahmen und weiteren endoskopischen Kontrollen zugelassen.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Geprüft wurde v.a. die Frage nach dem Effekt hinsichtlich des Endpunktes/der Häufigkeit einer Erkrankung (i.e. kumulative Inzidenz einer \n", + "\n", + " CIN3\n", + " Gene or Protein\n", + "\n", + " oder eines invasiven Zervixkarzinoms).
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Geprüft wurde v.a. die Frage nach dem Effekt hinsichtlich des Endpunktes/der Häufigkeit einer Erkrankung (i.e. kumulative Inzidenz einer CIN3 oder eines invasiven Zervixkarzinoms).
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
In der NASABP-Studie C-08 wurde das modifizierte \n", + "\n", + " FOLFOX6\n", + " Gene or Protein\n", + "\n", + "-Schema (12 Zyklen alle 2 Wochen) mit \n", + "\n", + " FOLFOX6\n", + " Gene or Protein\n", + "\n", + " + Bevacizumab verglichen.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
In der NASABP-Studie C-08 wurde das modifizierte FOLFOX6-Schema (12 Zyklen alle 2 Wochen) mit FOLFOX6 + Bevacizumab verglichen.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
In der NASABP-Studie C-08 wurde das modifizierte \n", + "\n", + " FOLFOX6\n", + " Gene or Protein\n", + "\n", + "-Schema (12 Zyklen alle 2 Wochen) mit \n", + "\n", + " FOLFOX6\n", + " Gene or Protein\n", + "\n", + " + Bevacizumab verglichen.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
In der NASABP-Studie C-08 wurde das modifizierte FOLFOX6-Schema (12 Zyklen alle 2 Wochen) mit FOLFOX6 + Bevacizumab verglichen.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
In der dreiarmigen \n", + "\n", + " CONTRALTO\n", + " Gene or Protein\n", + "\n", + " Studie (randomisierte Phase-II) wurden Patienten mit rezidiviertem follikulärem Lymphom entweder mit einer Standardtherapie mit Rituximab und Bendamustin, mit Rituximab und Venetoclax oder mit der Kombination aus allen drei Komponenten behandelt.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
In der dreiarmigen CONTRALTO Studie (randomisierte Phase-II) wurden Patienten mit rezidiviertem follikulärem Lymphom entweder mit einer Standardtherapie mit Rituximab und Bendamustin, mit Rituximab und Venetoclax oder mit der Kombination aus allen drei Komponenten behandelt.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Vor dem Hintergrund einer primären Imatinib-Resistenz bei Patienten mit primären \n", + "\n", + " KIT\n", + " Gene or Protein\n", + "\n", + "-Exon 17-Mutationen sowie bestimmten Mutationen des \n", + "\n", + " PDGFRA\n", + " Gene or Protein\n", + "\n", + "-Gens soll vor Einleitung einer Therapie das Ergebnis der Bestimmung des Genotyps vorliegen, um eine ineffiziente Behandlung zu vermeiden [REF], [REF].
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Vor dem Hintergrund einer primären Imatinib-Resistenz bei Patienten mit primären KIT-Exon 17-Mutationen sowie bestimmten Mutationen des \n", + "\n", + " PDGFRA\n", + " Gene or Protein\n", + "\n", + "-Gens soll vor Einleitung einer Therapie das Ergebnis der Bestimmung des Genotyps vorliegen, um eine ineffiziente Behandlung zu vermeiden [REF], [REF].
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
In Deutschland ist das Mammographie-Screening für Frauen ab dem Alter von 50 Jahren bis zum Ende des 70. Lebensjahres Bestandteil der Richtlinie des gemeinsamen Bundesausschusses über die Früherkennung von Krebserkrankungen (Quelle: Richtlinie des Gemeinsamen Bundesausschusses über die Früherkennung von Krebserkrankungen (Krebsfrüherkennungs-Richtlinie / KFE-RL) in der Fassung vom 18. Juni 2009 veröf-fentlicht im Bundesanzeiger 2009, Nr. 148a in Kraft getreten am 3. Oktober 2009 zu-letzt geändert am 21. April 2016, veröffentlicht im Bundesanzeiger \n", + "\n", + " AT\n", + " Gene or Protein\n", + "\n", + " 08.07.2016 B2, in Kraft getreten am 1. Januar 2017 [URL]), da für dieses Kollektiv durch regelmäßige Teilnahme am Mammographie-Screening eine Reduktion der Mortalität gegenüber Nicht-Teilnehmerinnen zu erwarten ist.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
In Deutschland ist das Mammographie-Screening für Frauen ab dem Alter von 50 Jahren bis zum Ende des 70. Lebensjahres Bestandteil der Richtlinie des gemeinsamen Bundesausschusses über die Früherkennung von Krebserkrankungen (Quelle: Richtlinie des Gemeinsamen Bundesausschusses über die Früherkennung von Krebserkrankungen (Krebsfrüherkennungs-Richtlinie / KFE-RL) in der Fassung vom 18. Juni 2009 veröf-fentlicht im Bundesanzeiger 2009, Nr. 148a in Kraft getreten am 3. Oktober 2009 zu-letzt geändert am 21. April 2016, veröffentlicht im Bundesanzeiger AT 08.07.2016 B2, in Kraft getreten am 1. Januar 2017 [URL]), da für dieses Kollektiv durch regelmäßige Teilnahme am Mammographie-Screening eine Reduktion der Mortalität gegenüber Nicht-Teilnehmerinnen zu erwarten ist.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Wesentliche, meist früh auftretende molekulare Veränderungen erfassen die Gene \n", + "\n", + " PTEN\n", + " Gene or Protein\n", + "\n", + ", \n", + "\n", + " K-\n", + " Gene or Protein\n", + "\n", + "RAS und ß-catenin sowie das Mismatch-Reparatur-System [REF], während \n", + "\n", + " TP53\n", + " Gene or Protein\n", + "\n", + "-Mutationen erst im Zuge der Karzinomprogression vorkommen.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Wesentliche, meist früh auftretende molekulare Veränderungen erfassen die Gene \n", + "\n", + " PTEN\n", + " Gene or Protein\n", + "\n", + ", \n", + "\n", + " K-RAS\n", + " Gene or Protein\n", + "\n", + " und ß-catenin sowie das Mismatch-Reparatur-System [REF], während \n", + "\n", + " TP53\n", + " Gene or Protein\n", + "\n", + "-Mutationen erst im Zuge der Karzinomprogression vorkommen.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Im Rahmen einer gemeinsamen Auswertung der beiden Studien (\n", + "\n", + " META\n", + " Gene or Protein\n", + "\n", + "-GIST-Analyse) zeigte sich ein medianes progressionsfreies Überleben von 1,6 bis 2,0 Jahren (p= 0,04) sowie ein medianes Überleben in beiden Dosisarmen von 4 Jahren.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Im Rahmen einer gemeinsamen Auswertung der beiden Studien (META-GIST-Analyse) zeigte sich ein medianes progressionsfreies Überleben von 1,6 bis 2,0 Jahren (p= 0,04) sowie ein medianes Überleben in beiden Dosisarmen von 4 Jahren.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
In einer randomisierten Multicenterstudie wurde der Nutzen des gegen den \n", + "\n", + " EGF\n", + " Gene or Protein\n", + "\n", + " Rezeptor gerichteten monoklonalen Antikörpers Cetuximab in Kombination mit einer radikalen Strahlentherapie bei fortgeschrittenen Kopf-Hals-Karzinomen, jedoch ohne Einschluss von Mundhöhlenkarzinomen, untersucht.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
In einer randomisierten Multicenterstudie wurde der Nutzen des gegen den \n", + "\n", + " EGF Rezeptor\n", + " Gene or Protein\n", + "\n", + " gerichteten monoklonalen Antikörpers Cetuximab in Kombination mit einer radikalen Strahlentherapie bei fortgeschrittenen Kopf-Hals-Karzinomen, jedoch ohne Einschluss von Mundhöhlenkarzinomen, untersucht.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Der Urinary Bladder Cancer Antigen (Rapid) Test (IDL Biotech, Borlange, Schweden) weist als immunchemischer Assay Fragmente der Zytokeratine 8 und 18 nach, quantitativ als kolorimetrischer Sandwich-Assay (cut-off 12 μg/l) oder qualitativ als Schnelltest mit Antikörper-Komplexbildung (\n", + "\n", + " UBC\n", + " Gene or Protein\n", + "\n", + " Rapid®).
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Der Urinary Bladder Cancer Antigen (Rapid) Test (IDL Biotech, Borlange, Schweden) weist als immunchemischer Assay Fragmente der \n", + "\n", + " Zytokeratine 8\n", + " Gene or Protein\n", + "\n", + " und \n", + "\n", + " 18\n", + " Gene or Protein\n", + "\n", + " nach, quantitativ als kolorimetrischer Sandwich-Assay (cut-off 12 μg/l) oder qualitativ als Schnelltest mit Antikörper-Komplexbildung (UBC Rapid®).
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Beispiele für molekulare Veränderungen, die therapeutisch genutzt werden können, sind neben \n", + "\n", + " FGFR2\n", + " Gene or Protein\n", + "\n", + " insbesondere die Untersuchung auf Mikrosatelliteninstabilität, NTRK-Fusionsgene, Amplifikationen von HER2, die \n", + "\n", + " BRAF\n", + " Gene or Protein\n", + "\n", + " V600E Mutation oder Mutationen im \n", + "\n", + " IDH1\n", + " Gene or Protein\n", + "\n", + "-Gen.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Beispiele für molekulare Veränderungen, die therapeutisch genutzt werden können, sind neben \n", + "\n", + " FGFR2\n", + " Gene or Protein\n", + "\n", + " insbesondere die Untersuchung auf Mikrosatelliteninstabilität, \n", + "\n", + " NTRK-Fusionsgene\n", + " Gene or Protein\n", + "\n", + ", Amplifikationen von \n", + "\n", + " HER2\n", + " Gene or Protein\n", + "\n", + ", die \n", + "\n", + " BRAF\n", + " Gene or Protein\n", + "\n", + " V600E Mutation oder Mutationen im \n", + "\n", + " IDH1-Gen\n", + " Gene or Protein\n", + "\n", + ".
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
BB/diff, LDH, \n", + "\n", + " BSG\n", + " Gene or Protein\n", + "\n", + ", 17-OH-Progesteron, Testosteron, DHEA-S, Androstendion.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
BB/diff, LDH, BSG, 17-OH-Progesteron, Testosteron, DHEA-S, Androstendion.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
- Lokale Entzündung (gemessen am Schnitt als intratumorale chronische Zelldichte (\n", + "\n", + " CIC\n", + " Gene or Protein\n", + "\n", + "), Lymphozyten, Plasmazellen und Makrophagen) und systemischeEntzündung (gemessen im Blut als Neutrophilen-zu-Lymphozyten Verhältnis (NLR)) auf die Prognose bestimmt.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
- Lokale Entzündung (gemessen am Schnitt als intratumorale chronische Zelldichte (CIC), Lymphozyten, Plasmazellen und Makrophagen) und systemischeEntzündung (gemessen im Blut als Neutrophilen-zu-Lymphozyten Verhältnis (NLR)) auf die Prognose bestimmt.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Beim Vergleich der \n", + "\n", + " BRAF\n", + " Gene or Protein\n", + "\n", + " V600 Mutation mit den deutlich selteneren \n", + "\n", + " BRAF\n", + " Gene or Protein\n", + "\n", + " Mutationen in Kodons 594 und 596 fällt auf, dass die \n", + "\n", + " BRAF\n", + " Gene or Protein\n", + "\n", + " V600 Mutationen häufiger in rechtsseitigen und muzinösen Primärtumoren mit peritonealer Metastasierung gefunden werden.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Beim Vergleich der \n", + "\n", + " BRAF V600 Mutation\n", + " Gene or Protein\n", + "\n", + " mit den deutlich selteneren \n", + "\n", + " BRAF\n", + " Gene or Protein\n", + "\n", + " Mutationen in Kodons 594 und 596 fällt auf, dass die \n", + "\n", + " BRAF V600 Mutationen\n", + " Gene or Protein\n", + "\n", + " häufiger in rechtsseitigen und muzinösen Primärtumoren mit peritonealer Metastasierung gefunden werden.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Beim Vergleich der \n", + "\n", + " BRAF\n", + " Gene or Protein\n", + "\n", + " V600 Mutation mit den deutlich selteneren \n", + "\n", + " BRAF\n", + " Gene or Protein\n", + "\n", + " Mutationen in Kodons 594 und 596 fällt auf, dass die \n", + "\n", + " BRAF\n", + " Gene or Protein\n", + "\n", + " V600 Mutationen häufiger in rechtsseitigen und muzinösen Primärtumoren mit peritonealer Metastasierung gefunden werden.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Beim Vergleich der \n", + "\n", + " BRAF V600 Mutation\n", + " Gene or Protein\n", + "\n", + " mit den deutlich selteneren \n", + "\n", + " BRAF\n", + " Gene or Protein\n", + "\n", + " Mutationen in Kodons 594 und 596 fällt auf, dass die \n", + "\n", + " BRAF V600 Mutationen\n", + " Gene or Protein\n", + "\n", + " häufiger in rechtsseitigen und muzinösen Primärtumoren mit peritonealer Metastasierung gefunden werden.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Traditionelle serratierte Adenome sind im Gegensatz zu den \n", + "\n", + " SSA\n", + " Gene or Protein\n", + "\n", + " polypoid in das Darmlumen vorragende Läsionen.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Traditionelle serratierte Adenome sind im Gegensatz zu den SSA polypoid in das Darmlumen vorragende Läsionen.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Dieses Verfahren detektiert mit fluoreszenz-markierten Antikörpern die häufig auf malignen Urothelzellen vorkommenden Oberflächenantigene CEA und zwei Mucine (\n", + "\n", + " MO344\n", + " Gene or Protein\n", + "\n", + ", \n", + "\n", + " LDQ10\n", + " Gene or Protein\n", + "\n", + ").
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Dieses Verfahren detektiert mit fluoreszenz-markierten Antikörpern die häufig auf malignen Urothelzellen vorkommenden Oberflächenantigene \n", + "\n", + " CEA\n", + " Gene or Protein\n", + "\n", + " und zwei Mucine (MO344, LDQ10).
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Dieses Verfahren detektiert mit fluoreszenz-markierten Antikörpern die häufig auf malignen Urothelzellen vorkommenden Oberflächenantigene CEA und zwei Mucine (\n", + "\n", + " MO344\n", + " Gene or Protein\n", + "\n", + ", \n", + "\n", + " LDQ10\n", + " Gene or Protein\n", + "\n", + ").
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Dieses Verfahren detektiert mit fluoreszenz-markierten Antikörpern die häufig auf malignen Urothelzellen vorkommenden Oberflächenantigene \n", + "\n", + " CEA\n", + " Gene or Protein\n", + "\n", + " und zwei Mucine (MO344, LDQ10).
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Aktuell wurde als Vereinfachung des FLIPI-2 der PRIMA-\n", + "\n", + " PI\n", + " Gene or Protein\n", + "\n", + " vorgestellt [REF].
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Aktuell wurde als Vereinfachung des FLIPI-2 der PRIMA-PI vorgestellt [REF].
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Obwohl zahlenmäßig der geringste, weist der Polymerase  mutierte Subtyp (\n", + "\n", + " POLE\n", + " Gene or Protein\n", + "\n", + "-Mutation) eine sehr günstige Prognose auf [REF], gefolgt vom mikrosatelliteninstabilen hypermutierten Subtyp, der auch bei sporadischen Endometriumkarzinomen vorkommt [REF].
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Obwohl zahlenmäßig der geringste, weist der Polymerase  mutierte Subtyp (POLE-Mutation) eine sehr günstige Prognose auf [REF], gefolgt vom mikrosatelliteninstabilen hypermutierten Subtyp, der auch bei sporadischen Endometriumkarzinomen vorkommt [REF].
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Freies \n", + "\n", + " SN38\n", + " Gene or Protein\n", + "\n", + " ist enterotoxisch und gilt als Ursache der Irinotecan induzierten Diarrhoe.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Freies SN38 ist enterotoxisch und gilt als Ursache der Irinotecan induzierten Diarrhoe.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Patienten, bei denen sich keine Mutationen im \n", + "\n", + " KIT\n", + " Gene or Protein\n", + "\n", + "- oder \n", + "\n", + " PDGFRA-Gen\n", + " Gene or Protein\n", + "\n", + " nachweisen lassen, bedürfen einer intensiven molekularpathologischen Abklärung, da sich viele therapeutische Implikationen ergeben.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Patienten, bei denen sich keine Mutationen im \n", + "\n", + " KIT\n", + " Gene or Protein\n", + "\n", + "- oder \n", + "\n", + " PDGFRA\n", + " Gene or Protein\n", + "\n", + "-Gen nachweisen lassen, bedürfen einer intensiven molekularpathologischen Abklärung, da sich viele therapeutische Implikationen ergeben.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Die Ergebnisse zeigten eine Auftrennung der Wahrscheinlichkeiten für Gesamtüberleben und progressionsfreies Überleben der drei FLIPI-Prognosegruppen sowohl in der Gesamtgruppe als auch in den Patientengruppen, die therapiefrei beobachtet wurden, eine Rituximab-Monotherapie, R-CVP oder R-\n", + "\n", + " CHOP\n", + " Gene or Protein\n", + "\n", + " erhalten hatten.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Die Ergebnisse zeigten eine Auftrennung der Wahrscheinlichkeiten für Gesamtüberleben und progressionsfreies Überleben der drei FLIPI-Prognosegruppen sowohl in der Gesamtgruppe als auch in den Patientengruppen, die therapiefrei beobachtet wurden, eine Rituximab-Monotherapie, R-CVP oder R-CHOP erhalten hatten.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
In einer Studie mit >1500 Probanden einer Hamäturiesprechstunde wurde der immunfluorometrische \n", + "\n", + " MCM\n", + " Gene or Protein\n", + "\n", + "-5 Nachweis mit NMP22 und Urinzytologie verglichen; \n", + "\n", + " MCM\n", + " Gene or Protein\n", + "\n", + "-5 hatte einen hohen NPV von >90% bei einem niedrigen PPV von 20% [REF].
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
In einer Studie mit >1500 Probanden einer Hamäturiesprechstunde wurde der immunfluorometrische \n", + "\n", + " MCM-5\n", + " Gene or Protein\n", + "\n", + " Nachweis mit \n", + "\n", + " NMP22\n", + " Gene or Protein\n", + "\n", + " und Urinzytologie verglichen; \n", + "\n", + " MCM-5\n", + " Gene or Protein\n", + "\n", + " hatte einen hohen NPV von >90% bei einem niedrigen PPV von 20% [REF].
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
In einer Studie mit >1500 Probanden einer Hamäturiesprechstunde wurde der immunfluorometrische \n", + "\n", + " MCM\n", + " Gene or Protein\n", + "\n", + "-5 Nachweis mit NMP22 und Urinzytologie verglichen; \n", + "\n", + " MCM\n", + " Gene or Protein\n", + "\n", + "-5 hatte einen hohen NPV von >90% bei einem niedrigen PPV von 20% [REF].
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
In einer Studie mit >1500 Probanden einer Hamäturiesprechstunde wurde der immunfluorometrische \n", + "\n", + " MCM-5\n", + " Gene or Protein\n", + "\n", + " Nachweis mit \n", + "\n", + " NMP22\n", + " Gene or Protein\n", + "\n", + " und Urinzytologie verglichen; \n", + "\n", + " MCM-5\n", + " Gene or Protein\n", + "\n", + " hatte einen hohen NPV von >90% bei einem niedrigen PPV von 20% [REF].
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Falsch-positive Ergebnisse treten vermehrt auf bei Hämaturie (bis zu 80%), da das \n", + "\n", + " hCFHrp\n", + " Gene or Protein\n", + "\n", + "-Protein im Blut in hoher Konzentration vorliegt [REF] [REF].
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Falsch-positive Ergebnisse treten vermehrt auf bei Hämaturie (bis zu 80%), da das \n", + "\n", + " hCFHrp-Protein\n", + " Gene or Protein\n", + "\n", + " im Blut in hoher Konzentration vorliegt [REF] [REF].
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Die neuen Entitäten sessil serratiertes Adenom (\n", + "\n", + " SSA\n", + " Gene or Protein\n", + "\n", + ") und traditionell serratiertes Adenom (TSA) sind erst seit 2010 definiert [REF].
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Die neuen Entitäten sessil serratiertes Adenom (SSA) und traditionell serratiertes Adenom (TSA) sind erst seit 2010 definiert [REF].
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Jedoch konnte bislang die Prognose der sekundären AML und des \n", + "\n", + " MDS\n", + " Gene or Protein\n", + "\n", + " auch durch eine Behandlung mittels allogener Stammzelltransplantation nicht durchgreifend verbessert ben nach zwei Jahren bei 8% [REF].
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Jedoch konnte bislang die Prognose der sekundären AML und des MDS auch durch eine Behandlung mittels allogener Stammzelltransplantation nicht durchgreifend verbessert ben nach zwei Jahren bei 8% [REF].
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Obwohl eine Duodenalpolyposis bei MAP-Patienten seltener (17%) als bei \n", + "\n", + " FAP\n", + " Gene or Protein\n", + "\n", + "-Patienten beobachtet wird, erscheint das Risiko von etwa 4% für die Entwicklung eines Duodenalkarzinoms vergleichbar hoch zu sein [REF].
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Obwohl eine Duodenalpolyposis bei MAP-Patienten seltener (17%) als bei FAP-Patienten beobachtet wird, erscheint das Risiko von etwa 4% für die Entwicklung eines Duodenalkarzinoms vergleichbar hoch zu sein [REF].
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Die für die Typ-I-Karzinome charakteristischen genetischen Veränderungen in \n", + "\n", + " PTEN\n", + " Gene or Protein\n", + "\n", + ", \n", + "\n", + " K-\n", + " Gene or Protein\n", + "\n", + "RAS, ß-catenin sowie dem Mismatch-Reparatur-System sind sehr selten.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Die für die Typ-I-Karzinome charakteristischen genetischen Veränderungen in \n", + "\n", + " PTEN\n", + " Gene or Protein\n", + "\n", + ", \n", + "\n", + " K-RAS\n", + " Gene or Protein\n", + "\n", + ", \n", + "\n", + " ß-catenin\n", + " Gene or Protein\n", + "\n", + " sowie dem Mismatch-Reparatur-System sind sehr selten.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Kohorten-Studien, bei denen der Rauch-Status vor der FL-Diagnose bei ehemals Nicht-Erkrankten erhoben wurde, bestätigen das etwa doppelt so hohe Follikuläres Lymphom-Risiko weiblicher Raucherinnen (95% CI 1.20-3.77 [REF], [REF]. Auch häufige Passivrauch-Exposition in der Kindheit und als Erwachsener scheinen das FLRisiko zu erhöhen [REF]. Sechs und mehr Stunden Passivrauch-Exposition im Erwachsenenalter erhöhen das FL-Risiko signifikant um das 2,4-fache. Kombinierte Betrachtungen epidemiologischer und genetischer Faktoren deuten auf ein mögliches Zusammenwirken von bestimmten Varianten des HLA-\n", + "\n", + " DRB1\n", + " Gene or Protein\n", + "\n", + " Locus und Rauchen auf das FL-Risiko hin [REF].
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Kohorten-Studien, bei denen der Rauch-Status vor der FL-Diagnose bei ehemals Nicht-Erkrankten erhoben wurde, bestätigen das etwa doppelt so hohe Follikuläres Lymphom-Risiko weiblicher Raucherinnen (95% CI 1.20-3.77 [REF], [REF]. Auch häufige Passivrauch-Exposition in der Kindheit und als Erwachsener scheinen das FLRisiko zu erhöhen [REF]. Sechs und mehr Stunden Passivrauch-Exposition im Erwachsenenalter erhöhen das FL-Risiko signifikant um das 2,4-fache. Kombinierte Betrachtungen epidemiologischer und genetischer Faktoren deuten auf ein mögliches Zusammenwirken von bestimmten Varianten des \n", + "\n", + " HLA-DRB1 Locus\n", + " Gene or Protein\n", + "\n", + " und Rauchen auf das FL-Risiko hin [REF].
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Mögliche Wechselwirkungen von Granatapfel mit anderen Medikamenten und Substraten wurden bezüglich Cytochrom P4503A und \n", + "\n", + " CYP\n", + " Gene or Protein\n", + "\n", + " 2C9 sowie in Bezug auf Warfarin und Metformin untersucht [REF].
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Mögliche Wechselwirkungen von Granatapfel mit anderen Medikamenten und Substraten wurden bezüglich \n", + "\n", + " Cytochrom P4503A\n", + " Gene or Protein\n", + "\n", + " und \n", + "\n", + " CYP 2C9\n", + " Gene or Protein\n", + "\n", + " sowie in Bezug auf Warfarin und Metformin untersucht [REF].
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Soluble \n", + "\n", + " Fas\n", + " Gene or Protein\n", + "\n", + " (sFas) ist ein Produkt abnormer mRNA Splicevarianten des membrangebundenen \n", + "\n", + " Fas\n", + " Gene or Protein\n", + "\n", + "-Rezeptors, der für apoptotische Signalregulierung bedeutsam ist.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Soluble \n", + "\n", + " Fas\n", + " Gene or Protein\n", + "\n", + " (\n", + "\n", + " sFas\n", + " Gene or Protein\n", + "\n", + ") ist ein Produkt abnormer mRNA Splicevarianten des membrangebundenen \n", + "\n", + " Fas-Rezeptors\n", + " Gene or Protein\n", + "\n", + ", der für apoptotische Signalregulierung bedeutsam ist.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Der nach der Einführung der \n", + "\n", + " CD20\n", + " Gene or Protein\n", + "\n", + "-Antikörpertherapie entwickelte Follicular Lymphoma International Prognostic Index -2 (FLIPI-2) [REF] wurde deutlich seltener als der FLIPI auf seine prognostische Relevanz untersucht.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Der nach der Einführung der \n", + "\n", + " CD20-Antikörpertherapie\n", + " Gene or Protein\n", + "\n", + " entwickelte Follicular Lymphoma International Prognostic Index -2 (FLIPI-2) [REF] wurde deutlich seltener als der FLIPI auf seine prognostische Relevanz untersucht.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Im Gesamtüberleben wurde sogar ein negativer Einfluss der Bevacizumab-Therapie beobachtet (HR für Bevacizumab-\n", + "\n", + " FOLFOX4\n", + " Gene or Protein\n", + "\n", + " vs. \n", + "\n", + " FOLFOX4\n", + " Gene or Protein\n", + "\n", + " 1,27 (p=0,02), Bevacizumab-XELOX vs. \n", + "\n", + " FOLFOX4\n", + " Gene or Protein\n", + "\n", + " 1,15 (p=0,21)).
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Im Gesamtüberleben wurde sogar ein negativer Einfluss der Bevacizumab-Therapie beobachtet (HR für Bevacizumab-FOLFOX4 vs. FOLFOX4 1,27 (p=0,02), Bevacizumab-XELOX vs. FOLFOX4 1,15 (p=0,21)).
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Im Gesamtüberleben wurde sogar ein negativer Einfluss der Bevacizumab-Therapie beobachtet (HR für Bevacizumab-\n", + "\n", + " FOLFOX4\n", + " Gene or Protein\n", + "\n", + " vs. \n", + "\n", + " FOLFOX4\n", + " Gene or Protein\n", + "\n", + " 1,27 (p=0,02), Bevacizumab-XELOX vs. \n", + "\n", + " FOLFOX4\n", + " Gene or Protein\n", + "\n", + " 1,15 (p=0,21)).
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Im Gesamtüberleben wurde sogar ein negativer Einfluss der Bevacizumab-Therapie beobachtet (HR für Bevacizumab-FOLFOX4 vs. FOLFOX4 1,27 (p=0,02), Bevacizumab-XELOX vs. FOLFOX4 1,15 (p=0,21)).
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Im Gesamtüberleben wurde sogar ein negativer Einfluss der Bevacizumab-Therapie beobachtet (HR für Bevacizumab-\n", + "\n", + " FOLFOX4\n", + " Gene or Protein\n", + "\n", + " vs. \n", + "\n", + " FOLFOX4\n", + " Gene or Protein\n", + "\n", + " 1,27 (p=0,02), Bevacizumab-XELOX vs. \n", + "\n", + " FOLFOX4\n", + " Gene or Protein\n", + "\n", + " 1,15 (p=0,21)).
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Im Gesamtüberleben wurde sogar ein negativer Einfluss der Bevacizumab-Therapie beobachtet (HR für Bevacizumab-FOLFOX4 vs. FOLFOX4 1,27 (p=0,02), Bevacizumab-XELOX vs. FOLFOX4 1,15 (p=0,21)).
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Aktivierende Mutationen im \n", + "\n", + " BRAF-Gen\n", + " Gene or Protein\n", + "\n", + " werden bei etwa 8-12% der Patienten mit mKRK beschrieben [REF], [REF].
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Aktivierende Mutationen im \n", + "\n", + " BRAF\n", + " Gene or Protein\n", + "\n", + "-Gen werden bei etwa 8-12% der Patienten mit mKRK beschrieben [REF], [REF].
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Patienten mit einem \n", + "\n", + " SSA\n", + " Gene or Protein\n", + "\n", + " proximal der linken Flexur wiesen das größte Risiko für die Entstehung eines kolorektalen Karzinoms auf.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Patienten mit einem SSA proximal der linken Flexur wiesen das größte Risiko für die Entstehung eines kolorektalen Karzinoms auf.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Der mediane Anstieg des \n", + "\n", + " PSA\n", + " Gene or Protein\n", + "\n", + " in der Nahrungsergänzungsmittelgruppe (FSG) betrug 14,7% im Gegensatz zu 78,5% in der Placebogruppe (PG), Differenz 63,8% (p = 0,0008).
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Der mediane Anstieg des PSA in der Nahrungsergänzungsmittelgruppe (FSG) betrug 14,7% im Gegensatz zu 78,5% in der Placebogruppe (PG), Differenz 63,8% (p = 0,0008).
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + " p16-\n", + " Gene or Protein\n", + "\n", + "\n", + "\n", + " INK4a\n", + " Gene or Protein\n", + "\n", + " Nachweis, CINtec® \n", + "\n", + " p16\n", + " Gene or Protein\n", + "\n", + " (Roche mtm laboratories, Heidelberg):
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + " p16-INK4a\n", + " Gene or Protein\n", + "\n", + " Nachweis, CINtec® p16 (Roche mtm laboratories, Heidelberg):
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + " p16-\n", + " Gene or Protein\n", + "\n", + "\n", + "\n", + " INK4a\n", + " Gene or Protein\n", + "\n", + " Nachweis, CINtec® \n", + "\n", + " p16\n", + " Gene or Protein\n", + "\n", + " (Roche mtm laboratories, Heidelberg):
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + " p16-INK4a\n", + " Gene or Protein\n", + "\n", + " Nachweis, CINtec® p16 (Roche mtm laboratories, Heidelberg):
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + " p16-\n", + " Gene or Protein\n", + "\n", + "\n", + "\n", + " INK4a\n", + " Gene or Protein\n", + "\n", + " Nachweis, CINtec® \n", + "\n", + " p16\n", + " Gene or Protein\n", + "\n", + " (Roche mtm laboratories, Heidelberg):
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + " p16-INK4a\n", + " Gene or Protein\n", + "\n", + " Nachweis, CINtec® p16 (Roche mtm laboratories, Heidelberg):
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Eine auf morphologischen Faktoren beruhende Risikostratifizierung des Endometriumkarzinoms, basierend auf einem Konsens der European Society for Medical Oncology (\n", + "\n", + " ESMO\n", + " Gene or Protein\n", + "\n", + "), der European Society for Radiotherapy Oncology (ESTRO) und der European Society of Gynaecological Oncology (ESGO) ist in Abbildung 4 zusammengefasst [REF], [REF].
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Eine auf morphologischen Faktoren beruhende Risikostratifizierung des Endometriumkarzinoms, basierend auf einem Konsens der European Society for Medical Oncology (ESMO), der European Society for Radiotherapy Oncology (ESTRO) und der European Society of Gynaecological Oncology (ESGO) ist in Abbildung 4 zusammengefasst [REF], [REF].
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Ein Großteil (70 – 95%) der follikulären Lymphome ist durch die t(14;18) Translokation charakterisiert, die in einer Fusion des Gens für den Apoptose-Regulator \n", + "\n", + " BCL2\n", + " Gene or Protein\n", + "\n", + " mit dem Immunoglobulin H Locus resultiert, einhergehend mit einer gesteigerten Produktion des anti-apoptotischen \n", + "\n", + " BCL-2\n", + " Gene or Protein\n", + "\n", + " Proteins.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Ein Großteil (70 – 95%) der follikulären Lymphome ist durch die t(14;18) Translokation charakterisiert, die in einer Fusion des Gens für den Apoptose-Regulator \n", + "\n", + " BCL2\n", + " Gene or Protein\n", + "\n", + " mit dem \n", + "\n", + " Immunoglobulin H Locus\n", + " Gene or Protein\n", + "\n", + " resultiert, einhergehend mit einer gesteigerten Produktion des anti-apoptotischen \n", + "\n", + " BCL-2 Proteins\n", + " Gene or Protein\n", + "\n", + ".
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Die klinische Diagnose einer \n", + "\n", + " NF1\n", + " Gene or Protein\n", + "\n", + " wird gestellt, wenn zwei der nachfolgenden sieben Kriterien erfüllt sind:
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Die klinische Diagnose einer NF1 wird gestellt, wenn zwei der nachfolgenden sieben Kriterien erfüllt sind:
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Der primäre Zielparameter war die Veränderung der Subskala der Alltagsbeeinflussung des Brief Fatigue Inventory (BFI), sekundärer Parameter die Änderung der BFI-Subskala der allgemeinen Fatigue sowie die Werte der Medical Outcome Scale des Short Form-36 (\n", + "\n", + " SF\n", + " Gene or Protein\n", + "\n", + "-36) und des Pittsburgh Sleep Quality Index (PSQI).
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Der primäre Zielparameter war die Veränderung der Subskala der Alltagsbeeinflussung des Brief Fatigue Inventory (BFI), sekundärer Parameter die Änderung der BFI-Subskala der allgemeinen Fatigue sowie die Werte der Medical Outcome Scale des Short Form-36 (SF-36) und des Pittsburgh Sleep Quality Index (PSQI).
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
(1) ≥ 6 Café au lait Spots der Haut (mindestens 5 mm Durchmesser in präpubertären und mindestens 15 mm Durchmesser in postpubertären Menschen), (2) ≥ 2 Neurofibrome oder ein plexiformes Neurofibrom, (3) Freckling in der Axilla oder Leiste, (4) ≥ 2 Lisch-Knötchen (\n", + "\n", + " Iris\n", + " Gene or Protein\n", + "\n", + "- Hamartome), (5) Diagnose eines Opticus-Glioms, (6) Dysplasien der langen Röhrenknochen mit und ohne Pseudarthrose, (7) Neurofibromatose bei einem Verwandten ersten Grades.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
(1) ≥ 6 Café au lait Spots der Haut (mindestens 5 mm Durchmesser in präpubertären und mindestens 15 mm Durchmesser in postpubertären Menschen), (2) ≥ 2 Neurofibrome oder ein plexiformes Neurofibrom, (3) Freckling in der Axilla oder Leiste, (4) ≥ 2 Lisch-Knötchen (Iris- Hamartome), (5) Diagnose eines Opticus-Glioms, (6) Dysplasien der langen Röhrenknochen mit und ohne Pseudarthrose, (7) Neurofibromatose bei einem Verwandten ersten Grades.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Die Bestimmung dieses SNPs erlaubt eine solide Aussage hinsichtlich des \n", + "\n", + " NAT2\n", + " Gene or Protein\n", + "\n", + "-Acetyliererstatus zumindest bei Mitteleuropäern, ohne Bestimmung der 7 SNPS im \n", + "\n", + " NAT2\n", + " Gene or Protein\n", + "\n", + " Gen.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Die Bestimmung dieses SNPs erlaubt eine solide Aussage hinsichtlich des \n", + "\n", + " NAT2\n", + " Gene or Protein\n", + "\n", + "-Acetyliererstatus zumindest bei Mitteleuropäern, ohne Bestimmung der 7 SNPS im \n", + "\n", + " NAT2 Gen\n", + " Gene or Protein\n", + "\n", + ".
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Mehrheitlich sind eine \n", + "\n", + " VEGF(\n", + " Gene or Protein\n", + "\n", + "-A) und eine \n", + "\n", + " VEGFR\n", + " Gene or Protein\n", + "\n", + "-Expression nachweisbar.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Mehrheitlich sind eine \n", + "\n", + " VEGF(-A)\n", + " Gene or Protein\n", + "\n", + " und eine \n", + "\n", + " VEGFR\n", + " Gene or Protein\n", + "\n", + "-Expression nachweisbar.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------\n" + ] + }, + { + "data": { + "text/html": [ + "
Das ASPL-\n", + "\n", + " TFE3\n", + " Gene or Protein\n", + "\n", + "-Fusionsprotein aktiviert eine \n", + "\n", + " MET\n", + " Gene or Protein\n", + "\n", + " Transkription u.a. die Transkription des c-\n", + "\n", + " MET\n", + " Gene or Protein\n", + "\n", + "-Gens.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Das \n", + "\n", + " ASPL\n", + " Gene or Protein\n", + "\n", + "-\n", + "\n", + " TFE3\n", + " Gene or Protein\n", + "\n", + "-Fusionsprotein aktiviert eine \n", + "\n", + " MET\n", + " Gene or Protein\n", + "\n", + " Transkription u.a. die Transkription des \n", + "\n", + " c-MET\n", + " Gene or Protein\n", + "\n", + "-Gens.
" + ], + "text/plain": [ + "" ] }, - "execution_count": 47, "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(docs), len(filtered_docs), len(gene_docs)" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "id": "c1bd6907-4ed1-4f3e-8bfc-a032a69cfcca", - "metadata": {}, - "outputs": [ + "output_type": "display_data" + }, { "name": "stdout", "output_type": "stream", "text": [ - "Write to output/weak_training_lg.spacy...done\n", - "Write to output/weak_training_md.spacy...done\n" + "------\n" ] - } - ], - "source": [ - "utils.docbin_writer(docs, f\"output/weak_training_lg.spacy\")\n", - "utils.docbin_writer(filtered_docs, f\"output/weak_training_md.spacy\")" - ] - }, - { - "cell_type": "markdown", - "id": "6e00ac4b-8333-44bf-9274-ab6734810bbb", - "metadata": { - "tags": [] - }, - "source": [ - "# Labeling Function Analysis" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "595183ba-63b3-4ca7-8870-2647745676d4", - "metadata": {}, - "outputs": [], - "source": [ - "gold_docs_dev = list(DocBin().from_disk('data/molecular/gold_dev.spacy').get_docs(nlp.vocab))" - ] - }, - { - "cell_type": "markdown", - "id": "8764f34a", - "metadata": {}, - "source": [ - "Our labeling functions must also be deployed onto the gold standard data to evaluate strong supervision against weak supervision." - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "97cce16c-4d79-4dc8-83dd-c54f60795f67", - "metadata": {}, - "outputs": [], - "source": [ - "def apply_hmm(gold_docs):\n", - " for g in tqdm(gold_docs):\n", - " if 'Gene or Protein' in g.spans:\n", - " del g.spans['Gene or Protein']\n", - " for lf in lfs:\n", - " g = lf(g)\n", - " g = hmm(g)" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "2113b209", - "metadata": {}, - "outputs": [ + }, { - "name": "stderr", + "data": { + "text/html": [ + "
In einer retrospektiven Analyse der klinischen Studie \n", + "\n", + " AB20\n", + " Gene or Protein\n", + "\n", + "/99 (n=102, in die Auswertung flossen 83 Patienten ein) zeigte sich mit Zunahme der ungünstigen Faktoren eine signifikante Verschlechterung des Gesamtüberlebens im Gesamtkollektiv:
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
In einer retrospektiven Analyse der klinischen Studie AB20/99 (n=102, in die Auswertung flossen 83 Patienten ein) zeigte sich mit Zunahme der ungünstigen Faktoren eine signifikante Verschlechterung des Gesamtüberlebens im Gesamtkollektiv:
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", "output_type": "stream", "text": [ - "100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:11<00:00, 90.78it/s]\n" + "------\n" ] } ], "source": [ - "apply_hmm(gold_docs_dev)" + "from skweak.utils import display_entities\n", + "\n", + "for d in gold_docs_dev:\n", + " for g in d.spans['hmm']:\n", + " if not g in d.ents:\n", + " display_entities(d, layer='hmm')\n", + " display_entities(d)\n", + " print('------')\n", + " continue" ] }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 37, "id": "f79a91a8", "metadata": {}, "outputs": [ @@ -898,8 +3713,8 @@ " \n", " \n", " \n", - " Gene or Protein\n", - " 100.0 %\n", + " Gene or Protein\n", + " 100.0 %\n", " construct\n", " 0.879\n", " 0.244\n", @@ -936,16 +3751,28 @@ " 0.594\n", " \n", " \n", - " hmm\n", - " 0.916\n", - " 0.486\n", - " 0.636\n", + " entrez\n", + " 0.937\n", + " 0.450\n", + " 0.608\n", " \n", " \n", " \n", - " 0.870\n", - " 0.608\n", + " 0.902\n", + " 0.503\n", + " 0.646\n", + " \n", + " \n", + " hmm\n", + " 0.899\n", + " 0.596\n", " 0.716\n", + " \n", + " \n", + " \n", + " 0.841\n", + " 0.680\n", + " 0.752\n", " \n", " \n", " omim\n", @@ -960,8 +3787,32 @@ " 0.670\n", " \n", " \n", - " macro\n", - " \n", + " protein_families\n", + " 1.000\n", + " 0.067\n", + " 0.126\n", + " \n", + " \n", + " \n", + " 1.000\n", + " 0.076\n", + " 0.142\n", + " \n", + " \n", + " protein_gazetteer\n", + " 1.000\n", + " 0.117\n", + " 0.210\n", + " \n", + " \n", + " \n", + " 0.934\n", + " 0.120\n", + " 0.212\n", + " \n", + " \n", + " macro\n", + " \n", " construct\n", " 0.879\n", " 0.244\n", @@ -998,16 +3849,28 @@ " 0.594\n", " \n", " \n", - " hmm\n", - " 0.916\n", - " 0.486\n", - " 0.636\n", + " entrez\n", + " 0.937\n", + " 0.450\n", + " 0.608\n", " \n", " \n", " \n", - " 0.870\n", - " 0.608\n", + " 0.902\n", + " 0.503\n", + " 0.646\n", + " \n", + " \n", + " hmm\n", + " 0.899\n", + " 0.596\n", " 0.716\n", + " \n", + " \n", + " \n", + " 0.841\n", + " 0.680\n", + " 0.752\n", " \n", " \n", " omim\n", @@ -1022,8 +3885,32 @@ " 0.670\n", " \n", " \n", - " micro\n", - " \n", + " protein_families\n", + " 1.000\n", + " 0.067\n", + " 0.126\n", + " \n", + " \n", + " \n", + " 1.000\n", + " 0.076\n", + " 0.142\n", + " \n", + " \n", + " protein_gazetteer\n", + " 1.000\n", + " 0.117\n", + " 0.210\n", + " \n", + " \n", + " \n", + " 0.934\n", + " 0.120\n", + " 0.212\n", + " \n", + " \n", + " micro\n", + " \n", " construct\n", " 0.879\n", " 0.244\n", @@ -1060,16 +3947,28 @@ " 0.594\n", " \n", " \n", - " hmm\n", - " 0.916\n", - " 0.486\n", - " 0.636\n", - " 0.42\n", - " 0.987\n", - " 0.53\n", - " 0.870\n", + " entrez\n", + " 0.937\n", + " 0.450\n", " 0.608\n", + " 0.819\n", + " 0.976\n", + " 0.481\n", + " 0.902\n", + " 0.503\n", + " 0.646\n", + " \n", + " \n", + " hmm\n", + " 0.899\n", + " 0.596\n", " 0.716\n", + " 0.326\n", + " 0.989\n", + " 0.663\n", + " 0.841\n", + " 0.680\n", + " 0.752\n", " \n", " \n", " omim\n", @@ -1084,8 +3983,32 @@ " 0.670\n", " \n", " \n", - " weighted\n", - " \n", + " protein_families\n", + " 1.000\n", + " 0.067\n", + " 0.126\n", + " 0.819\n", + " 0.976\n", + " 0.067\n", + " 1.000\n", + " 0.076\n", + " 0.142\n", + " \n", + " \n", + " protein_gazetteer\n", + " 1.000\n", + " 0.117\n", + " 0.210\n", + " 0.819\n", + " 0.976\n", + " 0.117\n", + " 0.934\n", + " 0.120\n", + " 0.212\n", + " \n", + " \n", + " weighted\n", + " \n", " construct\n", " 0.879\n", " 0.244\n", @@ -1122,16 +4045,28 @@ " 0.594\n", " \n", " \n", - " hmm\n", - " 0.916\n", - " 0.486\n", - " 0.636\n", + " entrez\n", + " 0.937\n", + " 0.450\n", + " 0.608\n", " \n", " \n", " \n", - " 0.870\n", - " 0.608\n", + " 0.902\n", + " 0.503\n", + " 0.646\n", + " \n", + " \n", + " hmm\n", + " 0.899\n", + " 0.596\n", " 0.716\n", + " \n", + " \n", + " \n", + " 0.841\n", + " 0.680\n", + " 0.752\n", " \n", " \n", " omim\n", @@ -1145,6 +4080,30 @@ " 0.524\n", " 0.670\n", " \n", + " \n", + " protein_families\n", + " 1.000\n", + " 0.067\n", + " 0.126\n", + " \n", + " \n", + " \n", + " 1.000\n", + " 0.076\n", + " 0.142\n", + " \n", + " \n", + " protein_gazetteer\n", + " 1.000\n", + " 0.117\n", + " 0.210\n", + " \n", + " \n", + " \n", + " 0.934\n", + " 0.120\n", + " 0.212\n", + " \n", " \n", "\n", "" @@ -1155,95 +4114,143 @@ "Gene or Protein 100.0 % construct 0.879 0.244 \n", " cue_civic 0.979 0.366 \n", " cue_cosmic_census 0.960 0.342 \n", - " hmm 0.916 0.486 \n", + " entrez 0.937 0.450 \n", + " hmm 0.899 0.596 \n", " omim 0.955 0.411 \n", + " protein_families 1.000 0.067 \n", + " protein_gazetteer 1.000 0.117 \n", "macro construct 0.879 0.244 \n", " cue_civic 0.979 0.366 \n", " cue_cosmic_census 0.960 0.342 \n", - " hmm 0.916 0.486 \n", + " entrez 0.937 0.450 \n", + " hmm 0.899 0.596 \n", " omim 0.955 0.411 \n", + " protein_families 1.000 0.067 \n", + " protein_gazetteer 1.000 0.117 \n", "micro construct 0.879 0.244 \n", " cue_civic 0.979 0.366 \n", " cue_cosmic_census 0.960 0.342 \n", - " hmm 0.916 0.486 \n", + " entrez 0.937 0.450 \n", + " hmm 0.899 0.596 \n", " omim 0.955 0.411 \n", + " protein_families 1.000 0.067 \n", + " protein_gazetteer 1.000 0.117 \n", "weighted construct 0.879 0.244 \n", " cue_civic 0.979 0.366 \n", " cue_cosmic_census 0.960 0.342 \n", - " hmm 0.916 0.486 \n", + " entrez 0.937 0.450 \n", + " hmm 0.899 0.596 \n", " omim 0.955 0.411 \n", + " protein_families 1.000 0.067 \n", + " protein_gazetteer 1.000 0.117 \n", "\n", " tok_f1 tok_cee tok_acc coverage \\\n", "label proportion model \n", "Gene or Protein 100.0 % construct 0.382 \n", " cue_civic 0.532 \n", " cue_cosmic_census 0.504 \n", - " hmm 0.636 \n", + " entrez 0.608 \n", + " hmm 0.716 \n", " omim 0.574 \n", + " protein_families 0.126 \n", + " protein_gazetteer 0.210 \n", "macro construct 0.382 \n", " cue_civic 0.532 \n", " cue_cosmic_census 0.504 \n", - " hmm 0.636 \n", + " entrez 0.608 \n", + " hmm 0.716 \n", " omim 0.574 \n", + " protein_families 0.126 \n", + " protein_gazetteer 0.210 \n", "micro construct 0.382 0.819 0.976 0.278 \n", " cue_civic 0.532 0.819 0.976 0.374 \n", " cue_cosmic_census 0.504 0.819 0.976 0.356 \n", - " hmm 0.636 0.42 0.987 0.53 \n", + " entrez 0.608 0.819 0.976 0.481 \n", + " hmm 0.716 0.326 0.989 0.663 \n", " omim 0.574 0.819 0.976 0.43 \n", + " protein_families 0.126 0.819 0.976 0.067 \n", + " protein_gazetteer 0.210 0.819 0.976 0.117 \n", "weighted construct 0.382 \n", " cue_civic 0.532 \n", " cue_cosmic_census 0.504 \n", - " hmm 0.636 \n", + " entrez 0.608 \n", + " hmm 0.716 \n", " omim 0.574 \n", + " protein_families 0.126 \n", + " protein_gazetteer 0.210 \n", "\n", " ent_precision ent_recall \\\n", "label proportion model \n", "Gene or Protein 100.0 % construct 0.833 0.305 \n", " cue_civic 0.944 0.465 \n", " cue_cosmic_census 0.928 0.436 \n", - " hmm 0.870 0.608 \n", + " entrez 0.902 0.503 \n", + " hmm 0.841 0.680 \n", " omim 0.926 0.524 \n", + " protein_families 1.000 0.076 \n", + " protein_gazetteer 0.934 0.120 \n", "macro construct 0.833 0.305 \n", " cue_civic 0.944 0.465 \n", " cue_cosmic_census 0.928 0.436 \n", - " hmm 0.870 0.608 \n", + " entrez 0.902 0.503 \n", + " hmm 0.841 0.680 \n", " omim 0.926 0.524 \n", + " protein_families 1.000 0.076 \n", + " protein_gazetteer 0.934 0.120 \n", "micro construct 0.833 0.305 \n", " cue_civic 0.944 0.465 \n", " cue_cosmic_census 0.928 0.436 \n", - " hmm 0.870 0.608 \n", + " entrez 0.902 0.503 \n", + " hmm 0.841 0.680 \n", " omim 0.926 0.524 \n", + " protein_families 1.000 0.076 \n", + " protein_gazetteer 0.934 0.120 \n", "weighted construct 0.833 0.305 \n", " cue_civic 0.944 0.465 \n", " cue_cosmic_census 0.928 0.436 \n", - " hmm 0.870 0.608 \n", + " entrez 0.902 0.503 \n", + " hmm 0.841 0.680 \n", " omim 0.926 0.524 \n", + " protein_families 1.000 0.076 \n", + " protein_gazetteer 0.934 0.120 \n", "\n", " ent_f1 \n", "label proportion model \n", "Gene or Protein 100.0 % construct 0.446 \n", " cue_civic 0.624 \n", " cue_cosmic_census 0.594 \n", - " hmm 0.716 \n", + " entrez 0.646 \n", + " hmm 0.752 \n", " omim 0.670 \n", + " protein_families 0.142 \n", + " protein_gazetteer 0.212 \n", "macro construct 0.446 \n", " cue_civic 0.624 \n", " cue_cosmic_census 0.594 \n", - " hmm 0.716 \n", + " entrez 0.646 \n", + " hmm 0.752 \n", " omim 0.670 \n", + " protein_families 0.142 \n", + " protein_gazetteer 0.212 \n", "micro construct 0.446 \n", " cue_civic 0.624 \n", " cue_cosmic_census 0.594 \n", - " hmm 0.716 \n", + " entrez 0.646 \n", + " hmm 0.752 \n", " omim 0.670 \n", + " protein_families 0.142 \n", + " protein_gazetteer 0.212 \n", "weighted construct 0.446 \n", " cue_civic 0.624 \n", " cue_cosmic_census 0.594 \n", - " hmm 0.716 \n", - " omim 0.670 " + " entrez 0.646 \n", + " hmm 0.752 \n", + " omim 0.670 \n", + " protein_families 0.142 \n", + " protein_gazetteer 0.212 " ] }, - "execution_count": 51, + "execution_count": 37, "metadata": {}, "output_type": "execute_result" } @@ -1262,6 +4269,17 @@ "# Training of Transformer-based NER Models" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "60f62095-f8ed-4fd6-b7a5-672e130c5282", + "metadata": {}, + "outputs": [], + "source": [ + "utils.docbin_writer(docs, f\"output/weak_training_lg.spacy\")\n", + "#utils.docbin_writer(filtered_docs, f\"output/weak_training_md.spacy\")" + ] + }, { "cell_type": "markdown", "id": "7fc7b292-c94a-45f9-8e06-a862a3932019", @@ -1272,19 +4290,10 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": null, "id": "5f713abd-6637-481b-8fea-39972eb129f4", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Write to output/strong_train.spacy...done\n", - "Write to output/strong_dev.spacy...done\n" - ] - } - ], + "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "gold_docs_strong = list(DocBin().from_disk('data/molecular/gold_dev.spacy').get_docs(nlp.vocab))\n", @@ -1337,7 +4346,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 38, "id": "8842e777-aa91-485e-8e61-a4fc70790679", "metadata": {}, "outputs": [], @@ -1348,7 +4357,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 39, "id": "ef897e37-9bae-49ee-b8c1-267f19ceffe5", "metadata": {}, "outputs": [], @@ -1391,7 +4400,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 40, "id": "5be94760-3fa5-40e9-bb8f-d798cd452f6b", "metadata": { "tags": [] @@ -1413,7 +4422,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:11<00:00, 90.17it/s]\n" + "100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:15<00:00, 64.20it/s]\n" ] }, { @@ -1464,7 +4473,7 @@ " \n", " \n", " \n", - " 100.0 %\n", + " 100.0 %\n", " construct\n", " 0.879\n", " 0.244\n", @@ -1501,16 +4510,28 @@ " 0.594\n", " \n", " \n", - " hmm\n", - " 0.916\n", - " 0.486\n", - " 0.636\n", + " entrez\n", + " 0.937\n", + " 0.450\n", + " 0.608\n", " \n", " \n", " \n", - " 0.870\n", - " 0.608\n", + " 0.902\n", + " 0.503\n", + " 0.646\n", + " \n", + " \n", + " hmm\n", + " 0.899\n", + " 0.596\n", " 0.716\n", + " \n", + " \n", + " \n", + " 0.841\n", + " 0.680\n", + " 0.752\n", " \n", " \n", " omim\n", @@ -1524,6 +4545,30 @@ " 0.524\n", " 0.670\n", " \n", + " \n", + " protein_families\n", + " 1.000\n", + " 0.067\n", + " 0.126\n", + " \n", + " \n", + " \n", + " 1.000\n", + " 0.076\n", + " 0.142\n", + " \n", + " \n", + " protein_gazetteer\n", + " 1.000\n", + " 0.117\n", + " 0.210\n", + " \n", + " \n", + " \n", + " 0.934\n", + " 0.120\n", + " 0.212\n", + " \n", " \n", "\n", "" @@ -1534,24 +4579,33 @@ "100.0 % construct 0.879 0.244 0.382 \n", " cue_civic 0.979 0.366 0.532 \n", " cue_cosmic_census 0.960 0.342 0.504 \n", - " hmm 0.916 0.486 0.636 \n", + " entrez 0.937 0.450 0.608 \n", + " hmm 0.899 0.596 0.716 \n", " omim 0.955 0.411 0.574 \n", + " protein_families 1.000 0.067 0.126 \n", + " protein_gazetteer 1.000 0.117 0.210 \n", "\n", " tok_acc coverage ent_precision ent_recall \\\n", "proportion model \n", "100.0 % construct 0.833 0.305 \n", " cue_civic 0.944 0.465 \n", " cue_cosmic_census 0.928 0.436 \n", - " hmm 0.870 0.608 \n", + " entrez 0.902 0.503 \n", + " hmm 0.841 0.680 \n", " omim 0.926 0.524 \n", + " protein_families 1.000 0.076 \n", + " protein_gazetteer 0.934 0.120 \n", "\n", " ent_f1 \n", "proportion model \n", "100.0 % construct 0.446 \n", " cue_civic 0.624 \n", " cue_cosmic_census 0.594 \n", - " hmm 0.716 \n", - " omim 0.670 " + " entrez 0.646 \n", + " hmm 0.752 \n", + " omim 0.670 \n", + " protein_families 0.142 \n", + " protein_gazetteer 0.212 " ] }, "metadata": {}, @@ -1573,7 +4627,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|███████████████████████████████████████████████████████████████████████████████████████████████████▉| 999/1000 [00:39<00:00, 25.06it/s]\n" + "100%|███████████████████████████████████████████████████████████████████████████████████████████████████▉| 999/1000 [00:34<00:00, 28.62it/s]\n" ] }, { @@ -1626,15 +4680,15 @@ " \n", " 100.0 %\n", " ner_model\n", - " 0.916\n", - " 0.505\n", - " 0.652\n", + " 0.903\n", + " 0.637\n", + " 0.748\n", " \n", " \n", " \n", - " 0.875\n", - " 0.636\n", - " 0.736\n", + " 0.855\n", + " 0.745\n", + " 0.796\n", " \n", " \n", "\n", @@ -1643,11 +4697,11 @@ "text/plain": [ " tok_precision tok_recall tok_f1 tok_cee tok_acc \\\n", "proportion model \n", - "100.0 % ner_model 0.916 0.505 0.652 \n", + "100.0 % ner_model 0.903 0.637 0.748 \n", "\n", " coverage ent_precision ent_recall ent_f1 \n", "proportion model \n", - "100.0 % ner_model 0.875 0.636 0.736 " + "100.0 % ner_model 0.855 0.745 0.796 " ] }, "metadata": {}, @@ -1670,7 +4724,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 41, "id": "631cbb84-3c39-4281-9160-5029493a23a7", "metadata": {}, "outputs": [ @@ -1690,7 +4744,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:09<00:00, 101.28it/s]\n" + "100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:13<00:00, 72.69it/s]\n" ] }, { @@ -1741,7 +4795,7 @@ " \n", " \n", " \n", - " 100.0 %\n", + " 100.0 %\n", " construct\n", " 0.853\n", " 0.190\n", @@ -1778,16 +4832,28 @@ " 0.608\n", " \n", " \n", + " entrez\n", + " 0.951\n", + " 0.525\n", + " 0.676\n", + " \n", + " \n", + " \n", + " 0.890\n", + " 0.608\n", + " 0.722\n", + " \n", + " \n", " hmm\n", - " 0.855\n", - " 0.398\n", - " 0.544\n", + " 0.864\n", + " 0.596\n", + " 0.706\n", " \n", " \n", " \n", - " 0.781\n", - " 0.545\n", - " 0.642\n", + " 0.789\n", + " 0.689\n", + " 0.736\n", " \n", " \n", " omim\n", @@ -1801,6 +4867,30 @@ " 0.493\n", " 0.616\n", " \n", + " \n", + " protein_families\n", + " 0.538\n", + " 0.027\n", + " 0.052\n", + " \n", + " \n", + " \n", + " 0.250\n", + " 0.012\n", + " 0.022\n", + " \n", + " \n", + " protein_gazetteer\n", + " 1.000\n", + " 0.131\n", + " 0.232\n", + " \n", + " \n", + " \n", + " 0.975\n", + " 0.112\n", + " 0.200\n", + " \n", " \n", "\n", "" @@ -1811,24 +4901,33 @@ "100.0 % construct 0.853 0.190 0.310 \n", " cue_civic 0.933 0.350 0.510 \n", " cue_cosmic_census 0.927 0.342 0.500 \n", - " hmm 0.855 0.398 0.544 \n", + " entrez 0.951 0.525 0.676 \n", + " hmm 0.864 0.596 0.706 \n", " omim 0.904 0.363 0.518 \n", + " protein_families 0.538 0.027 0.052 \n", + " protein_gazetteer 1.000 0.131 0.232 \n", "\n", " tok_acc coverage ent_precision ent_recall \\\n", "proportion model \n", "100.0 % construct 0.836 0.280 \n", " cue_civic 0.841 0.473 \n", " cue_cosmic_census 0.854 0.473 \n", - " hmm 0.781 0.545 \n", + " entrez 0.890 0.608 \n", + " hmm 0.789 0.689 \n", " omim 0.818 0.493 \n", + " protein_families 0.250 0.012 \n", + " protein_gazetteer 0.975 0.112 \n", "\n", " ent_f1 \n", "proportion model \n", "100.0 % construct 0.420 \n", " cue_civic 0.606 \n", " cue_cosmic_census 0.608 \n", - " hmm 0.642 \n", - " omim 0.616 " + " entrez 0.722 \n", + " hmm 0.736 \n", + " omim 0.616 \n", + " protein_families 0.022 \n", + " protein_gazetteer 0.200 " ] }, "metadata": {}, @@ -1850,7 +4949,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|███████████████████████████████████████████████████████████████████████████████████████████████████▉| 999/1000 [00:35<00:00, 28.01it/s]\n" + "100%|███████████████████████████████████████████████████████████████████████████████████████████████████▉| 999/1000 [00:33<00:00, 30.08it/s]\n" ] }, { @@ -1903,15 +5002,15 @@ " \n", " 100.0 %\n", " ner_model\n", - " 0.883\n", - " 0.437\n", - " 0.584\n", + " 0.887\n", + " 0.621\n", + " 0.73\n", " \n", " \n", " \n", - " 0.794\n", - " 0.588\n", - " 0.676\n", + " 0.799\n", + " 0.723\n", + " 0.76\n", " \n", " \n", "\n", @@ -1920,11 +5019,11 @@ "text/plain": [ " tok_precision tok_recall tok_f1 tok_cee tok_acc \\\n", "proportion model \n", - "100.0 % ner_model 0.883 0.437 0.584 \n", + "100.0 % ner_model 0.887 0.621 0.73 \n", "\n", " coverage ent_precision ent_recall ent_f1 \n", "proportion model \n", - "100.0 % ner_model 0.794 0.588 0.676 " + "100.0 % ner_model 0.799 0.723 0.76 " ] }, "metadata": {}, @@ -1946,7 +5045,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|███████████████████████████████████████████████████████████████████████████████████████████████████▉| 999/1000 [00:37<00:00, 26.69it/s]\n" + "100%|███████████████████████████████████████████████████████████████████████████████████████████████████▉| 999/1000 [00:33<00:00, 29.84it/s]\n" ] }, { @@ -2043,7 +5142,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 42, "id": "a313589f-d854-41c8-8df6-bccc2b0bf2d4", "metadata": {}, "outputs": [], @@ -2054,17 +5153,17 @@ }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 43, "id": "a655dd6c-92c2-4582-84ea-fb8af767856b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(83624, 4717)" + "(83624, 5617)" ] }, - "execution_count": 67, + "execution_count": 43, "metadata": {}, "output_type": "execute_result" } @@ -2075,7 +5174,7 @@ }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 44, "id": "1dff773c-af59-4896-87d2-0a65bff9a6cf", "metadata": {}, "outputs": [ @@ -2085,7 +5184,7 @@ "(1000, 475)" ] }, - "execution_count": 66, + "execution_count": 44, "metadata": {}, "output_type": "execute_result" } @@ -2096,7 +5195,7 @@ }, { "cell_type": "code", - "execution_count": 64, + "execution_count": 45, "id": "7bad482c-d324-4052-b965-26d27b9e686e", "metadata": {}, "outputs": [ @@ -2106,7 +5205,7 @@ "(1000, 347)" ] }, - "execution_count": 64, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } @@ -2114,6 +5213,14 @@ "source": [ "len(gold_docs_test_eval), len(get_genes(gold_docs_test_eval))" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "63e43db1-f326-4372-91bd-f2e413e089ae", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/proteins.txt b/proteins.txt new file mode 100644 index 0000000..7f0cca4 --- /dev/null +++ b/proteins.txt @@ -0,0 +1,36 @@ +PD-L1 +PD-1 +RAS +CYP +CYP3A4 +MEK +CYP3A +Transaminase +CYP2D6 +NTRK +CYP450 +Cyclooxygenase +COX-2 +a-Reduktase +a-Fetoprotein +Phosphodiesterase +CYP1A2 +CYP2C9 +a-Glutamyltransferase +n-Dehydrogenase +Glukose-6-Phosphat-Dehydrogenase +Uridin-5’-Diphospho-Glucuronosyltransferase +Glutamat-Oxalacetat-Transaminase +F-MEK +CYP2C19 +CYP2B6 +CYP19 +m-Laktatdehydrogenase +CYP2C19A +α-Reduktase +t-Dehydrogenase +CYP17 +CYP2C8 +l-RAS +d-Dehydrogenase +Tyrosin-Kinase \ No newline at end of file From 8444686e05f15bacdfc6f8a1eaf089d9d49d16cc Mon Sep 17 00:00:00 2001 From: Florian Borchert Date: Fri, 20 Jan 2023 20:33:59 +0100 Subject: [PATCH 2/2] Final resutls --- WeakSupervision.ipynb | 3790 ++++++----------------------------------- 1 file changed, 554 insertions(+), 3236 deletions(-) diff --git a/WeakSupervision.ipynb b/WeakSupervision.ipynb index 912fb3e..9efc808 100644 --- a/WeakSupervision.ipynb +++ b/WeakSupervision.ipynb @@ -82,7 +82,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 3, @@ -191,6 +191,14 @@ "# Labeling Functions" ] }, + { + "cell_type": "markdown", + "id": "2b8f5257-d74b-4e85-a0c1-f15879a2bcb3", + "metadata": {}, + "source": [ + "## Gazetteer-based LFs" + ] + }, { "cell_type": "markdown", "id": "0c52fdfc", @@ -206,15 +214,15 @@ "metadata": {}, "outputs": [], "source": [ - "df = pd.read_csv('data/molecular/nightly-GeneSummaries.tsv', sep='\\t')\n", - "CIVIC_genes = df['name'].tolist()\n", + "civic_genes_df = pd.read_csv('data/molecular/nightly-GeneSummaries.tsv', sep='\\t')\n", + "CIVIC_genes = civic_genes_df['name'].tolist()\n", "CIVIC_genes_lower = [c.lower() for c in CIVIC_genes]" ] }, { "cell_type": "code", "execution_count": 10, - "id": "34c3d555", + "id": "14ff4e2e-1545-4f11-96a5-e0a481e78017", "metadata": {}, "outputs": [ { @@ -232,8 +240,8 @@ } ], "source": [ - "df = pd.read_csv('data/molecular/nightly-VariantSummaries.tsv', sep='\\t', error_bad_lines=False )\n", - "CIVIC_variants = df['variant'].tolist()\n", + "civic_variant_df = pd.read_csv('data/molecular/nightly-VariantSummaries.tsv', sep='\\t', error_bad_lines=False )\n", + "CIVIC_variants = civic_variant_df['variant'].tolist()\n", "CIVIC_variants_lower = [c.lower() for c in CIVIC_variants]" ] }, @@ -252,22 +260,22 @@ "metadata": {}, "outputs": [], "source": [ - "def civic(doc):\n", + "def civic_fn(doc):\n", " for tok in doc:\n", " for cue in CIVIC_genes:\n", " if tok.text.find(cue) == -1:\n", " continue\n", " else:\n", " yield tok.i, tok.i+1, \"Gene or Protein\"\n", - "cue_civic = heuristics.FunctionAnnotator(\"cue_civic\", civic)" + "lf_civic = heuristics.FunctionAnnotator(\"CIViC\", civic_fn)" ] }, { "cell_type": "markdown", - "id": "a3d4ba0a", + "id": "7d750a40-b60a-47a1-8988-69a3d2ed81d2", "metadata": {}, "source": [ - "The Online Mendelian Inheritance in Man (OMIM) database is the encyclopedic collection of the human medical branch of genetics." + "Get all synonyms in Entrez for CIViC genes, remove short ones and German stopwords" ] }, { @@ -277,7 +285,7 @@ "metadata": {}, "outputs": [], "source": [ - "entrez_df = pd.read_csv('Homo_sapiens.gene_info', sep='\\t')" + "entrez_df = pd.read_csv('data/Homo_sapiens.gene_info', sep='\\t')" ] }, { @@ -288,8 +296,7 @@ "outputs": [], "source": [ "symbols = set()\n", - "# Get all synonyms for CIViC genes, remove short ones and German stopwords\n", - "for _, r in entrez_df.set_index('GeneID').loc[df.entrez_id].iterrows():\n", + "for _, r in entrez_df.set_index('GeneID').loc[civic_variant_df.entrez_id].iterrows():\n", " symbols.add(r.Symbol)\n", " for s in r.Synonyms.split('|'):\n", " if not s in ['R1', 'R2', 'eN', 'HNPCC'] and len(s) > 1 and not s.lower() in stops:\n", @@ -306,7 +313,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|████████████████████████████████████████████████████████████████████████████████████████████████| 2028/2028 [00:00<00:00, 12322.11it/s]\n" + "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2028/2028 [00:00<00:00, 12862.40it/s]\n" ] } ], @@ -332,7 +339,7 @@ "metadata": {}, "outputs": [], "source": [ - "def lf_entrez(doc):\n", + "def entrez_fn(doc):\n", " matches = entrez_matcher(doc)\n", " if matches:\n", " # Keep longest matches only\n", @@ -340,7 +347,15 @@ " spans = spacy.util.filter_spans(spans)\n", " for s in spans:\n", " yield s.start, s.end, 'Gene or Protein'\n", - "entrez = heuristics.FunctionAnnotator(\"entrez\", lf_entrez) " + "lf_entrez = heuristics.FunctionAnnotator(\"Entrez\", entrez_fn) " + ] + }, + { + "cell_type": "markdown", + "id": "54562797-295f-4920-863e-8d141a49ea80", + "metadata": {}, + "source": [ + "The Online Mendelian Inheritance in Man (OMIM) database is the encyclopedic collection of the human medical branch of genetics. \"omim\" is based on the OMIM database and checks whether tokens are present in its list of 16,767 approved gene symbols in lowercase as the diversity of genes often shows in volatile capitalization. To increase precision, genes with a length shorter than three characters are matched only correctly cased." ] }, { @@ -372,27 +387,78 @@ "print(len(omim_list))" ] }, + { + "cell_type": "code", + "execution_count": 17, + "id": "dab88a11-60d2-4075-9ae0-4f22d1e5874d", + "metadata": {}, + "outputs": [], + "source": [ + "def omim_fn(doc):\n", + " for tok in doc:\n", + " if tok.text.lower() in omim_list_lower and tok.text.lower() not in stops and len(tok.text.lower())>=3:\n", + " yield tok.i, tok.i+1, \"Gene or Protein\"\n", + "lf_omim = heuristics.FunctionAnnotator(\"OMIM\", omim_fn) " + ] + }, { "cell_type": "markdown", - "id": "35a2989a", + "id": "02dd22df-8fc6-44eb-9915-4e80ee9ef7d9", "metadata": {}, "source": [ - "\"omim\" is based on the OMIM database and checks whether tokens are present in its list of 16,767 approved gene symbols in lowercase as the diversity of genes often shows in volatile capitalization. To increase precision, genes with a length shorter than three characters are matched only correctly cased." + "The Catalogue of Somatic Mutations in Cancer (COSMIC) database harbors somatic cell mutations and additional information associated with cancer in humans." ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 18, + "id": "2890317a-11f0-4cd2-bc22-5f583ca95c7e", + "metadata": {}, + "outputs": [], + "source": [ + "cosmic_census = pd.read_csv(\"data/molecular/cancer_gene_census.csv\")\n", + "cosmic_census = cosmic_census['Gene Symbol'].tolist()\n", + "cosmic_census_lower = [c.lower() for c in cosmic_census]" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "11655485-9d45-443a-a736-3e9301e6be21", + "metadata": {}, + "outputs": [], + "source": [ + "def cosmic_fn(doc):\n", + " for tok in doc:\n", + " for cue in cosmic_census:\n", + " if tok.text.find(cue) == -1:\n", + " continue\n", + " else:\n", + " yield tok.i, tok.i+1, \"Gene or Protein\"\n", + "lf_cosmic = heuristics.FunctionAnnotator(\"COSMIC\", cosmic_fn) " + ] + }, + { + "cell_type": "markdown", + "id": "309be9e6-7453-4e00-a897-ef021f7c8bce", + "metadata": {}, + "source": [ + "Gazetteer based on common Protein names, sourced from Wikipedia and refined using the training part of the dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 20, "id": "ee7c77c8-907d-4078-aea1-b0194e20e8cf", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'protein_gazetteer': [PD-L1]}" + "{'Proteins': [PD-L1]}" ] }, - "execution_count": 17, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -406,17 +472,63 @@ "for term in terms:\n", " trie.add([t.text for t in nlp(term)])\n", "\n", - "protein_gazetteer = GazetteerAnnotator('protein_gazetteer', tries = {'Gene or Protein' : trie })\n", + "lf_protein_gazetteer = GazetteerAnnotator('Proteins', tries = {'Gene or Protein' : trie })\n", "\n", "doc = nlp(\"PD-L1\")\n", - "protein_gazetteer(doc)\n", + "lf_protein_gazetteer(doc)\n", "doc.spans" ] }, + { + "cell_type": "markdown", + "id": "64987ef9-e82d-43c2-86b6-2d4fef3b05f0", + "metadata": {}, + "source": [ + "## Rule-based LFs" + ] + }, + { + "cell_type": "markdown", + "id": "68e52e0a", + "metadata": {}, + "source": [ + "\"hgnc\" is based on the Human Genome Organization (HUGO) Gene Nomenclature Committee (HGNC) naming conventions for genes and leverages regular expressions to let the annotator abide by them. Those expressions comprise various combinations of letters and numbers and certain fixed terms for shorter terms to avoid underfitting. In addition, the CIViC database for variants has also been included for a better recall." + ] + }, { "cell_type": "code", - "execution_count": 18, - "id": "53b47dda-c62d-4d51-bbb3-ead523313686", + "execution_count": 21, + "id": "653ed53d", + "metadata": {}, + "outputs": [], + "source": [ + "def hgnc_fn(doc):\n", + " for tok in doc:\n", + " if re.search(r\"[a-zA-Z]{4}\\d{2}\", tok.text) or re.search(r\"[a-zA-Z]{5}\\d{1}\", tok.text)\\\n", + " or re.search(r\"[a-zA-Z]{4}\\d{1}\", tok.text) or re.search(r\"[A-Z]{5}\\d{1}\", tok.text)\\\n", + " or re.search(r\"[A-Z]{5}\\d{2}\", tok.text) or re.search(r\"[A-Z]{3}\\d{2}\", tok.text)\\\n", + " or re.search(r\"[a-zA-Z]{2}\\d{3}[a-zA-Z]{2}\", tok.text) or re.search(r\"[a-zA-Z]{1}\\d{3}[a-zA-Z]{1}\", tok.text)\\\n", + " or re.search(r\"[A-Z]{3}\\d{2}\", tok.text) or re.search(r\"[A-Z]{6}\\d{1}\", tok.text)\\\n", + " or re.search(r\"[A-Z]{3}\\d{3}\", tok.text) or re.search(r\"[p]\\d{2}\", tok.text)\\\n", + " or re.search(r\"CYP[a-zA-Z0-9]{3}\", tok.text) or re.search(r\"CYP[a-zA-Z0-9]{2}\", tok.text)\\\n", + " or re.search(r\"[A-Z]{3}\\d{1}\", tok.text) or re.search(r\"[A-Z]{2}\\d{2}\", tok.text)\\\n", + " or re.search(r\"^CK.\", tok.text) or re.search(r\"^PD-..\", tok.text) or re.search(r\"^PS[MA|A]\", tok.text) or tok.text.lower in CIVIC_variants_lower:\n", + " yield tok.i, tok.i+1, \"Gene or Protein\"\n", + "lf_hgnc = heuristics.FunctionAnnotator(\"HGNC\", hgnc_fn)" + ] + }, + { + "cell_type": "markdown", + "id": "21803256-76bf-496e-87e5-7205333d3763", + "metadata": {}, + "source": [ + "Rule-based matcher based on protein families" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "6a982767-0c6f-4ff7-839d-d52d72d1b5be", "metadata": {}, "outputs": [], "source": [ @@ -431,7 +543,7 @@ " patterns.append(p)\n", "protein_matcher.add('protein', patterns[-1::-1])\n", "\n", - "def lf_protein_families(doc):\n", + "def protein_families_fn(doc):\n", " matches = protein_matcher(doc)\n", " if matches:\n", " # Keep longest matches only\n", @@ -440,13 +552,13 @@ " for s in spans:\n", " yield s.start, s.end, 'Gene or Protein'\n", "\n", - "protein_families = heuristics.FunctionAnnotator(\"protein_families\", lf_protein_families) " + "lf_protein_families = heuristics.FunctionAnnotator(\"Protein Families\", protein_families_fn) " ] }, { "cell_type": "code", - "execution_count": 19, - "id": "3bc24870-761e-4b31-8792-49ff70dedeab", + "execution_count": 23, + "id": "59a961e6-ea23-416e-9549-c444a59a438e", "metadata": {}, "outputs": [ { @@ -455,102 +567,13 @@ "[(0, 1, 'Gene or Protein'), (1, 4, 'Gene or Protein')]" ] }, - "execution_count": 19, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "list(lf_protein_families(nlp(\"RAS k-RAS krass\")))" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "e6813e7a-8d36-4da2-9749-9c9b6ffc6d0f", - "metadata": {}, - "outputs": [], - "source": [ - "def omim(doc):\n", - " for tok in doc:\n", - " if tok.text.lower() in omim_list_lower and tok.text.lower() not in stops and len(tok.text.lower())>=3:\n", - " yield tok.i, tok.i+1, \"Gene or Protein\"\n", - "omim = heuristics.FunctionAnnotator(\"omim\", omim) " - ] - }, - { - "cell_type": "markdown", - "id": "4769cf3d", - "metadata": {}, - "source": [ - "The Catalogue of Somatic Mutations in Cancer (COSMIC) database harbors somatic cell mutations and additional information associated with cancer in humans." - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "faecaa97", - "metadata": {}, - "outputs": [], - "source": [ - "cosmic_census = pd.read_csv(\"data/molecular/cancer_gene_census.csv\")\n", - "cosmic_census = cosmic_census['Gene Symbol'].tolist()\n", - "cosmic_census_lower = [c.lower() for c in cosmic_census]" - ] - }, - { - "cell_type": "markdown", - "id": "4f3983b3", - "metadata": {}, - "source": [ - "\"cue_cosmic_census\" is based on the COSMIC database. If a token contains a gene symbol which is listed here, this token and its successor are annotated as a gene." - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "60fe7a41", - "metadata": {}, - "outputs": [], - "source": [ - "def cosmic(doc):\n", - " for tok in doc:\n", - " for cue in cosmic_census:\n", - " if tok.text.find(cue) == -1:\n", - " continue\n", - " else:\n", - " yield tok.i, tok.i+1, \"Gene or Protein\"\n", - "cue_cosmic_census = heuristics.FunctionAnnotator(\"cue_cosmic_census\", cosmic) " - ] - }, - { - "cell_type": "markdown", - "id": "68e52e0a", - "metadata": {}, - "source": [ - "\"construct\" is based on the Human Genome Organization (HUGO) Gene Nomenclature Committee (HGNC) naming conventions for genes and leverages regular expressions to let the annotator abide by them. Those expressions comprise various combinations of letters and numbers and certain fixed terms for shorter terms to avoid underfitting. In addition, the CIViC database for variants has also been included for a better recall." - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "653ed53d", - "metadata": {}, - "outputs": [], - "source": [ - "def structure(doc):\n", - " for tok in doc:\n", - " if re.search(r\"[a-zA-Z]{4}\\d{2}\", tok.text) or re.search(r\"[a-zA-Z]{5}\\d{1}\", tok.text)\\\n", - " or re.search(r\"[a-zA-Z]{4}\\d{1}\", tok.text) or re.search(r\"[A-Z]{5}\\d{1}\", tok.text)\\\n", - " or re.search(r\"[A-Z]{5}\\d{2}\", tok.text) or re.search(r\"[A-Z]{3}\\d{2}\", tok.text)\\\n", - " or re.search(r\"[a-zA-Z]{2}\\d{3}[a-zA-Z]{2}\", tok.text) or re.search(r\"[a-zA-Z]{1}\\d{3}[a-zA-Z]{1}\", tok.text)\\\n", - " or re.search(r\"[A-Z]{3}\\d{2}\", tok.text) or re.search(r\"[A-Z]{6}\\d{1}\", tok.text)\\\n", - " or re.search(r\"[A-Z]{3}\\d{3}\", tok.text) or re.search(r\"[p]\\d{2}\", tok.text)\\\n", - " or re.search(r\"CYP[a-zA-Z0-9]{3}\", tok.text) or re.search(r\"CYP[a-zA-Z0-9]{2}\", tok.text)\\\n", - " or re.search(r\"[A-Z]{3}\\d{1}\", tok.text) or re.search(r\"[A-Z]{2}\\d{2}\", tok.text)\\\n", - " or re.search(r\"^CK.\", tok.text) or re.search(r\"^PD-..\", tok.text) or re.search(r\"^PS[MA|A]\", tok.text) or tok.text.lower in CIVIC_variants_lower:\n", - " yield tok.i, tok.i+1, \"Gene or Protein\"\n", - "construct = heuristics.FunctionAnnotator(\"construct\", structure)" + "list(protein_families_fn(nlp(\"RAS k-RAS krass\")))" ] }, { @@ -595,12 +618,12 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 85996/85996 [20:17<00:00, 70.62it/s]\n" + "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 85996/85996 [20:11<00:00, 70.96it/s]\n" ] } ], "source": [ - "lfs = [construct, cue_civic, omim, cue_cosmic_census, entrez, protein_gazetteer, protein_families]\n", + "lfs = [lf_civic, lf_entrez, lf_omim, lf_cosmic, lf_protein_gazetteer, lf_hgnc, lf_protein_families]\n", "\n", "#For Quick Run with Random Sentences!\n", "#random_files = files_df.sample(n = 10000)\n", @@ -678,7 +701,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 30, "id": "6dbc1515-9a22-4c9a-8437-7c85bda222dd", "metadata": {}, "outputs": [ @@ -703,51 +726,51 @@ " \n", " \n", " \n", - " cue_cosmic_census\n", - " cue_civic\n", - " construct\n", - " omim\n", - " entrez\n", - " protein_families\n", - " protein_gazetteer\n", + " CIViC\n", + " Entrez\n", + " OMIM\n", + " COSMIC\n", + " Proteins\n", + " HGNC\n", + " Protein Families\n", " \n", " \n", " \n", " \n", " Coverage\n", - " 0.223103\n", " 0.209893\n", - " 0.365478\n", - " 0.344048\n", " 0.405695\n", - " 0.017907\n", + " 0.344048\n", + " 0.223103\n", " 0.136651\n", + " 0.365478\n", + " 0.017907\n", " \n", " \n", " Overlaps\n", - " 0.929605\n", " 0.980420\n", - " 0.381526\n", - " 0.499573\n", " 0.685962\n", - " 0.368852\n", + " 0.499573\n", + " 0.929605\n", " 0.699248\n", + " 0.381526\n", + " 0.368852\n", " \n", " \n", "\n", "" ], "text/plain": [ - " cue_cosmic_census cue_civic construct omim entrez \\\n", - "Coverage 0.223103 0.209893 0.365478 0.344048 0.405695 \n", - "Overlaps 0.929605 0.980420 0.381526 0.499573 0.685962 \n", + " CIViC Entrez OMIM COSMIC Proteins HGNC \\\n", + "Coverage 0.209893 0.405695 0.344048 0.223103 0.136651 0.365478 \n", + "Overlaps 0.980420 0.685962 0.499573 0.929605 0.699248 0.381526 \n", "\n", - " protein_families protein_gazetteer \n", - "Coverage 0.017907 0.136651 \n", - "Overlaps 0.368852 0.699248 " + " Protein Families \n", + "Coverage 0.017907 \n", + "Overlaps 0.368852 " ] }, - "execution_count": 28, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -758,7 +781,7 @@ "lfa = LFAnalysis(docs, ['Gene or Protein'])\n", "cov = lfa.lf_coverages().rename(index={'Gene or Protein' : 'Coverage'})\n", "overlap = lfa.lf_overlaps().rename(index={'Gene or Protein' : 'Overlaps'})\n", - "pd.concat([cov, overlap])" + "pd.concat([cov, overlap])[[lf.name for lf in lfs]]" ] }, { @@ -771,7 +794,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 31, "id": "cf2dd2ea-50b3-4a17-8905-30545b57a78b", "metadata": {}, "outputs": [ @@ -862,7 +885,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 32, "id": "00c04eb3-5190-4b49-80a1-457e98e1fa70", "metadata": {}, "outputs": [], @@ -882,7 +905,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 33, "id": "d07ab611-b36a-4eb4-beb4-0002fda81327", "metadata": {}, "outputs": [], @@ -901,7 +924,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 34, "id": "19236a84-2a05-4d61-aefd-789e804bd7da", "metadata": {}, "outputs": [ @@ -911,7 +934,7 @@ "(83624, 35501, 4624)" ] }, - "execution_count": 32, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } @@ -932,7 +955,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 35, "id": "595183ba-63b3-4ca7-8870-2647745676d4", "metadata": {}, "outputs": [], @@ -950,7 +973,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 36, "id": "97cce16c-4d79-4dc8-83dd-c54f60795f67", "metadata": {}, "outputs": [], @@ -966,7 +989,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 37, "id": "6c5ac964-a203-4c0e-a291-9ab837c7a18f", "metadata": {}, "outputs": [ @@ -974,7 +997,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:15<00:00, 64.68it/s]\n" + "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:15<00:00, 64.38it/s]\n" ] } ], @@ -984,2683 +1007,8 @@ }, { "cell_type": "code", - "execution_count": 36, - "id": "eb6e8ab3-ae42-4546-ac7b-f61df37784e5", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Zu den entscheidenden Sekundärneoplasien zählen hämatologische Neoplasien wie die akute myeloische Leukämie (AML), die myelodysplastischen Syndrome (\n", - "\n", - " MDS\n", - " Gene or Protein\n", - "\n", - "), das Non-Hodgkin Lymphom (NHL) und solide Tumore wie das Bronchialkarzinom, das Mammakarzinom und das Kolonkarzinom [REF] [REF] [REF] [REF] [REF].
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Zu den entscheidenden Sekundärneoplasien zählen hämatologische Neoplasien wie die akute myeloische Leukämie (AML), die myelodysplastischen Syndrome (MDS), das Non-Hodgkin Lymphom (NHL) und solide Tumore wie das Bronchialkarzinom, das Mammakarzinom und das Kolonkarzinom [REF] [REF] [REF] [REF] [REF].
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
In der Metaanalyse von Glas et al wurde anhand der Daten von 1.160 Probanden eine Sensitivität für den BTA Stat von 70% (95% CI: 66–74%) und eine Spezifität von 75% (95% CI: 64–84%) ermittelt, für den BTA \n", - "\n", - " TRAK\n", - " Gene or Protein\n", - "\n", - " von 66% (95% CI: 62–71%) bzw. 65% (95% CI: 45–81%) [REF].
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
In der Metaanalyse von Glas et al wurde anhand der Daten von 1.160 Probanden eine Sensitivität für den BTA Stat von 70% (95% CI: 66–74%) und eine Spezifität von 75% (95% CI: 64–84%) ermittelt, für den BTA TRAK von 66% (95% CI: 62–71%) bzw. 65% (95% CI: 45–81%) [REF].
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Das für die \n", - "\n", - " FAP\n", - " Gene or Protein\n", - "\n", - " zugelassene Präparat mit dem Wirkstoff Celecoxib wurde im April 2011 vom Hersteller aufgrund mangelnder Rekrutierung einer von der europäischen Arzneimittelbehörde (EMA) geforderten Post-Zulassungsstudie vom Markt genommen.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Das für die FAP zugelassene Präparat mit dem Wirkstoff Celecoxib wurde im April 2011 vom Hersteller aufgrund mangelnder Rekrutierung einer von der europäischen Arzneimittelbehörde (EMA) geforderten Post-Zulassungsstudie vom Markt genommen.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Ovarialtumoren bei \n", - "\n", - " PJS\n", - " Gene or Protein\n", - "\n", - " sind in der Regel SCTAT und nicht-epithelialen Ursprunges und werden zum Teil bereits auch bei kleinen Mädchen diagnostiziert (mittleres Alter 28 Jahre, 4-57 Jahre).
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Ovarialtumoren bei PJS sind in der Regel SCTAT und nicht-epithelialen Ursprunges und werden zum Teil bereits auch bei kleinen Mädchen diagnostiziert (mittleres Alter 28 Jahre, 4-57 Jahre).
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Bemerkenswert ist, dass Patientinnen mit Keimbahnmutationen des \n", - "\n", - " BRCA1\n", - " Gene or Protein\n", - "\n", - "-Gens gehäuft Karzinome mit medullären Eigenschaften aufweisen.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Bemerkenswert ist, dass Patientinnen mit Keimbahnmutationen des \n", - "\n", - " BRCA1-Gens\n", - " Gene or Protein\n", - "\n", - " gehäuft Karzinome mit medullären Eigenschaften aufweisen.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Nach den Empfehlungen der S3/\n", - "\n", - " NVL\n", - " Gene or Protein\n", - "\n", - " Unipolare Depression ist erst am Ende dieser Erhaltungstherapiephase eine schrittweise Dosisreduktion sinnvoll.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Nach den Empfehlungen der S3/NVL Unipolare Depression ist erst am Ende dieser Erhaltungstherapiephase eine schrittweise Dosisreduktion sinnvoll.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Der Nachweis einer \n", - "\n", - " HER2\n", - " Gene or Protein\n", - "\n", - "-neu Amplifikation/Überexpression hat zum gegenwärtigen Zeitpunkt keinen gesicherten Stellenwert in der Wahl der Erstlinientherapie [REF].
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Der Nachweis einer \n", - "\n", - " HER2-neu\n", - " Gene or Protein\n", - "\n", - " Amplifikation/Überexpression hat zum gegenwärtigen Zeitpunkt keinen gesicherten Stellenwert in der Wahl der Erstlinientherapie [REF].
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
In einer großen prospektiven Studie wurde gezeigt, dass von den vier Komponenten des UroVision-Testes der prädiktive Wert des Verlustes von \n", - "\n", - " 9p21\n", - " Gene or Protein\n", - "\n", - " am geringsten ist [REF].
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
In einer großen prospektiven Studie wurde gezeigt, dass von den vier Komponenten des UroVision-Testes der prädiktive Wert des Verlustes von 9p21 am geringsten ist [REF].
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Der selektive COX2-Hemmer Celecoxib, der zu einer Reduktion rektaler Adenome führt [REF], wurde zur Chemoprävention bei \n", - "\n", - " FAP\n", - " Gene or Protein\n", - "\n", - " als Ergänzung zu chirurgischen Maßnahmen und weiteren endoskopischen Kontrollen zugelassen.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Der selektive \n", - "\n", - " COX2\n", - " Gene or Protein\n", - "\n", - "-Hemmer Celecoxib, der zu einer Reduktion rektaler Adenome führt [REF], wurde zur Chemoprävention bei FAP als Ergänzung zu chirurgischen Maßnahmen und weiteren endoskopischen Kontrollen zugelassen.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Geprüft wurde v.a. die Frage nach dem Effekt hinsichtlich des Endpunktes/der Häufigkeit einer Erkrankung (i.e. kumulative Inzidenz einer \n", - "\n", - " CIN3\n", - " Gene or Protein\n", - "\n", - " oder eines invasiven Zervixkarzinoms).
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Geprüft wurde v.a. die Frage nach dem Effekt hinsichtlich des Endpunktes/der Häufigkeit einer Erkrankung (i.e. kumulative Inzidenz einer CIN3 oder eines invasiven Zervixkarzinoms).
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
In der NASABP-Studie C-08 wurde das modifizierte \n", - "\n", - " FOLFOX6\n", - " Gene or Protein\n", - "\n", - "-Schema (12 Zyklen alle 2 Wochen) mit \n", - "\n", - " FOLFOX6\n", - " Gene or Protein\n", - "\n", - " + Bevacizumab verglichen.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
In der NASABP-Studie C-08 wurde das modifizierte FOLFOX6-Schema (12 Zyklen alle 2 Wochen) mit FOLFOX6 + Bevacizumab verglichen.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
In der NASABP-Studie C-08 wurde das modifizierte \n", - "\n", - " FOLFOX6\n", - " Gene or Protein\n", - "\n", - "-Schema (12 Zyklen alle 2 Wochen) mit \n", - "\n", - " FOLFOX6\n", - " Gene or Protein\n", - "\n", - " + Bevacizumab verglichen.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
In der NASABP-Studie C-08 wurde das modifizierte FOLFOX6-Schema (12 Zyklen alle 2 Wochen) mit FOLFOX6 + Bevacizumab verglichen.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
In der dreiarmigen \n", - "\n", - " CONTRALTO\n", - " Gene or Protein\n", - "\n", - " Studie (randomisierte Phase-II) wurden Patienten mit rezidiviertem follikulärem Lymphom entweder mit einer Standardtherapie mit Rituximab und Bendamustin, mit Rituximab und Venetoclax oder mit der Kombination aus allen drei Komponenten behandelt.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
In der dreiarmigen CONTRALTO Studie (randomisierte Phase-II) wurden Patienten mit rezidiviertem follikulärem Lymphom entweder mit einer Standardtherapie mit Rituximab und Bendamustin, mit Rituximab und Venetoclax oder mit der Kombination aus allen drei Komponenten behandelt.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Vor dem Hintergrund einer primären Imatinib-Resistenz bei Patienten mit primären \n", - "\n", - " KIT\n", - " Gene or Protein\n", - "\n", - "-Exon 17-Mutationen sowie bestimmten Mutationen des \n", - "\n", - " PDGFRA\n", - " Gene or Protein\n", - "\n", - "-Gens soll vor Einleitung einer Therapie das Ergebnis der Bestimmung des Genotyps vorliegen, um eine ineffiziente Behandlung zu vermeiden [REF], [REF].
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Vor dem Hintergrund einer primären Imatinib-Resistenz bei Patienten mit primären KIT-Exon 17-Mutationen sowie bestimmten Mutationen des \n", - "\n", - " PDGFRA\n", - " Gene or Protein\n", - "\n", - "-Gens soll vor Einleitung einer Therapie das Ergebnis der Bestimmung des Genotyps vorliegen, um eine ineffiziente Behandlung zu vermeiden [REF], [REF].
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
In Deutschland ist das Mammographie-Screening für Frauen ab dem Alter von 50 Jahren bis zum Ende des 70. Lebensjahres Bestandteil der Richtlinie des gemeinsamen Bundesausschusses über die Früherkennung von Krebserkrankungen (Quelle: Richtlinie des Gemeinsamen Bundesausschusses über die Früherkennung von Krebserkrankungen (Krebsfrüherkennungs-Richtlinie / KFE-RL) in der Fassung vom 18. Juni 2009 veröf-fentlicht im Bundesanzeiger 2009, Nr. 148a in Kraft getreten am 3. Oktober 2009 zu-letzt geändert am 21. April 2016, veröffentlicht im Bundesanzeiger \n", - "\n", - " AT\n", - " Gene or Protein\n", - "\n", - " 08.07.2016 B2, in Kraft getreten am 1. Januar 2017 [URL]), da für dieses Kollektiv durch regelmäßige Teilnahme am Mammographie-Screening eine Reduktion der Mortalität gegenüber Nicht-Teilnehmerinnen zu erwarten ist.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
In Deutschland ist das Mammographie-Screening für Frauen ab dem Alter von 50 Jahren bis zum Ende des 70. Lebensjahres Bestandteil der Richtlinie des gemeinsamen Bundesausschusses über die Früherkennung von Krebserkrankungen (Quelle: Richtlinie des Gemeinsamen Bundesausschusses über die Früherkennung von Krebserkrankungen (Krebsfrüherkennungs-Richtlinie / KFE-RL) in der Fassung vom 18. Juni 2009 veröf-fentlicht im Bundesanzeiger 2009, Nr. 148a in Kraft getreten am 3. Oktober 2009 zu-letzt geändert am 21. April 2016, veröffentlicht im Bundesanzeiger AT 08.07.2016 B2, in Kraft getreten am 1. Januar 2017 [URL]), da für dieses Kollektiv durch regelmäßige Teilnahme am Mammographie-Screening eine Reduktion der Mortalität gegenüber Nicht-Teilnehmerinnen zu erwarten ist.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Wesentliche, meist früh auftretende molekulare Veränderungen erfassen die Gene \n", - "\n", - " PTEN\n", - " Gene or Protein\n", - "\n", - ", \n", - "\n", - " K-\n", - " Gene or Protein\n", - "\n", - "RAS und ß-catenin sowie das Mismatch-Reparatur-System [REF], während \n", - "\n", - " TP53\n", - " Gene or Protein\n", - "\n", - "-Mutationen erst im Zuge der Karzinomprogression vorkommen.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Wesentliche, meist früh auftretende molekulare Veränderungen erfassen die Gene \n", - "\n", - " PTEN\n", - " Gene or Protein\n", - "\n", - ", \n", - "\n", - " K-RAS\n", - " Gene or Protein\n", - "\n", - " und ß-catenin sowie das Mismatch-Reparatur-System [REF], während \n", - "\n", - " TP53\n", - " Gene or Protein\n", - "\n", - "-Mutationen erst im Zuge der Karzinomprogression vorkommen.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Im Rahmen einer gemeinsamen Auswertung der beiden Studien (\n", - "\n", - " META\n", - " Gene or Protein\n", - "\n", - "-GIST-Analyse) zeigte sich ein medianes progressionsfreies Überleben von 1,6 bis 2,0 Jahren (p= 0,04) sowie ein medianes Überleben in beiden Dosisarmen von 4 Jahren.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Im Rahmen einer gemeinsamen Auswertung der beiden Studien (META-GIST-Analyse) zeigte sich ein medianes progressionsfreies Überleben von 1,6 bis 2,0 Jahren (p= 0,04) sowie ein medianes Überleben in beiden Dosisarmen von 4 Jahren.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
In einer randomisierten Multicenterstudie wurde der Nutzen des gegen den \n", - "\n", - " EGF\n", - " Gene or Protein\n", - "\n", - " Rezeptor gerichteten monoklonalen Antikörpers Cetuximab in Kombination mit einer radikalen Strahlentherapie bei fortgeschrittenen Kopf-Hals-Karzinomen, jedoch ohne Einschluss von Mundhöhlenkarzinomen, untersucht.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
In einer randomisierten Multicenterstudie wurde der Nutzen des gegen den \n", - "\n", - " EGF Rezeptor\n", - " Gene or Protein\n", - "\n", - " gerichteten monoklonalen Antikörpers Cetuximab in Kombination mit einer radikalen Strahlentherapie bei fortgeschrittenen Kopf-Hals-Karzinomen, jedoch ohne Einschluss von Mundhöhlenkarzinomen, untersucht.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Der Urinary Bladder Cancer Antigen (Rapid) Test (IDL Biotech, Borlange, Schweden) weist als immunchemischer Assay Fragmente der Zytokeratine 8 und 18 nach, quantitativ als kolorimetrischer Sandwich-Assay (cut-off 12 μg/l) oder qualitativ als Schnelltest mit Antikörper-Komplexbildung (\n", - "\n", - " UBC\n", - " Gene or Protein\n", - "\n", - " Rapid®).
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Der Urinary Bladder Cancer Antigen (Rapid) Test (IDL Biotech, Borlange, Schweden) weist als immunchemischer Assay Fragmente der \n", - "\n", - " Zytokeratine 8\n", - " Gene or Protein\n", - "\n", - " und \n", - "\n", - " 18\n", - " Gene or Protein\n", - "\n", - " nach, quantitativ als kolorimetrischer Sandwich-Assay (cut-off 12 μg/l) oder qualitativ als Schnelltest mit Antikörper-Komplexbildung (UBC Rapid®).
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Beispiele für molekulare Veränderungen, die therapeutisch genutzt werden können, sind neben \n", - "\n", - " FGFR2\n", - " Gene or Protein\n", - "\n", - " insbesondere die Untersuchung auf Mikrosatelliteninstabilität, NTRK-Fusionsgene, Amplifikationen von HER2, die \n", - "\n", - " BRAF\n", - " Gene or Protein\n", - "\n", - " V600E Mutation oder Mutationen im \n", - "\n", - " IDH1\n", - " Gene or Protein\n", - "\n", - "-Gen.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Beispiele für molekulare Veränderungen, die therapeutisch genutzt werden können, sind neben \n", - "\n", - " FGFR2\n", - " Gene or Protein\n", - "\n", - " insbesondere die Untersuchung auf Mikrosatelliteninstabilität, \n", - "\n", - " NTRK-Fusionsgene\n", - " Gene or Protein\n", - "\n", - ", Amplifikationen von \n", - "\n", - " HER2\n", - " Gene or Protein\n", - "\n", - ", die \n", - "\n", - " BRAF\n", - " Gene or Protein\n", - "\n", - " V600E Mutation oder Mutationen im \n", - "\n", - " IDH1-Gen\n", - " Gene or Protein\n", - "\n", - ".
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
BB/diff, LDH, \n", - "\n", - " BSG\n", - " Gene or Protein\n", - "\n", - ", 17-OH-Progesteron, Testosteron, DHEA-S, Androstendion.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
BB/diff, LDH, BSG, 17-OH-Progesteron, Testosteron, DHEA-S, Androstendion.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
- Lokale Entzündung (gemessen am Schnitt als intratumorale chronische Zelldichte (\n", - "\n", - " CIC\n", - " Gene or Protein\n", - "\n", - "), Lymphozyten, Plasmazellen und Makrophagen) und systemischeEntzündung (gemessen im Blut als Neutrophilen-zu-Lymphozyten Verhältnis (NLR)) auf die Prognose bestimmt.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
- Lokale Entzündung (gemessen am Schnitt als intratumorale chronische Zelldichte (CIC), Lymphozyten, Plasmazellen und Makrophagen) und systemischeEntzündung (gemessen im Blut als Neutrophilen-zu-Lymphozyten Verhältnis (NLR)) auf die Prognose bestimmt.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Beim Vergleich der \n", - "\n", - " BRAF\n", - " Gene or Protein\n", - "\n", - " V600 Mutation mit den deutlich selteneren \n", - "\n", - " BRAF\n", - " Gene or Protein\n", - "\n", - " Mutationen in Kodons 594 und 596 fällt auf, dass die \n", - "\n", - " BRAF\n", - " Gene or Protein\n", - "\n", - " V600 Mutationen häufiger in rechtsseitigen und muzinösen Primärtumoren mit peritonealer Metastasierung gefunden werden.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Beim Vergleich der \n", - "\n", - " BRAF V600 Mutation\n", - " Gene or Protein\n", - "\n", - " mit den deutlich selteneren \n", - "\n", - " BRAF\n", - " Gene or Protein\n", - "\n", - " Mutationen in Kodons 594 und 596 fällt auf, dass die \n", - "\n", - " BRAF V600 Mutationen\n", - " Gene or Protein\n", - "\n", - " häufiger in rechtsseitigen und muzinösen Primärtumoren mit peritonealer Metastasierung gefunden werden.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Beim Vergleich der \n", - "\n", - " BRAF\n", - " Gene or Protein\n", - "\n", - " V600 Mutation mit den deutlich selteneren \n", - "\n", - " BRAF\n", - " Gene or Protein\n", - "\n", - " Mutationen in Kodons 594 und 596 fällt auf, dass die \n", - "\n", - " BRAF\n", - " Gene or Protein\n", - "\n", - " V600 Mutationen häufiger in rechtsseitigen und muzinösen Primärtumoren mit peritonealer Metastasierung gefunden werden.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Beim Vergleich der \n", - "\n", - " BRAF V600 Mutation\n", - " Gene or Protein\n", - "\n", - " mit den deutlich selteneren \n", - "\n", - " BRAF\n", - " Gene or Protein\n", - "\n", - " Mutationen in Kodons 594 und 596 fällt auf, dass die \n", - "\n", - " BRAF V600 Mutationen\n", - " Gene or Protein\n", - "\n", - " häufiger in rechtsseitigen und muzinösen Primärtumoren mit peritonealer Metastasierung gefunden werden.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Traditionelle serratierte Adenome sind im Gegensatz zu den \n", - "\n", - " SSA\n", - " Gene or Protein\n", - "\n", - " polypoid in das Darmlumen vorragende Läsionen.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Traditionelle serratierte Adenome sind im Gegensatz zu den SSA polypoid in das Darmlumen vorragende Läsionen.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Dieses Verfahren detektiert mit fluoreszenz-markierten Antikörpern die häufig auf malignen Urothelzellen vorkommenden Oberflächenantigene CEA und zwei Mucine (\n", - "\n", - " MO344\n", - " Gene or Protein\n", - "\n", - ", \n", - "\n", - " LDQ10\n", - " Gene or Protein\n", - "\n", - ").
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Dieses Verfahren detektiert mit fluoreszenz-markierten Antikörpern die häufig auf malignen Urothelzellen vorkommenden Oberflächenantigene \n", - "\n", - " CEA\n", - " Gene or Protein\n", - "\n", - " und zwei Mucine (MO344, LDQ10).
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Dieses Verfahren detektiert mit fluoreszenz-markierten Antikörpern die häufig auf malignen Urothelzellen vorkommenden Oberflächenantigene CEA und zwei Mucine (\n", - "\n", - " MO344\n", - " Gene or Protein\n", - "\n", - ", \n", - "\n", - " LDQ10\n", - " Gene or Protein\n", - "\n", - ").
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Dieses Verfahren detektiert mit fluoreszenz-markierten Antikörpern die häufig auf malignen Urothelzellen vorkommenden Oberflächenantigene \n", - "\n", - " CEA\n", - " Gene or Protein\n", - "\n", - " und zwei Mucine (MO344, LDQ10).
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Aktuell wurde als Vereinfachung des FLIPI-2 der PRIMA-\n", - "\n", - " PI\n", - " Gene or Protein\n", - "\n", - " vorgestellt [REF].
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Aktuell wurde als Vereinfachung des FLIPI-2 der PRIMA-PI vorgestellt [REF].
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Obwohl zahlenmäßig der geringste, weist der Polymerase  mutierte Subtyp (\n", - "\n", - " POLE\n", - " Gene or Protein\n", - "\n", - "-Mutation) eine sehr günstige Prognose auf [REF], gefolgt vom mikrosatelliteninstabilen hypermutierten Subtyp, der auch bei sporadischen Endometriumkarzinomen vorkommt [REF].
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Obwohl zahlenmäßig der geringste, weist der Polymerase  mutierte Subtyp (POLE-Mutation) eine sehr günstige Prognose auf [REF], gefolgt vom mikrosatelliteninstabilen hypermutierten Subtyp, der auch bei sporadischen Endometriumkarzinomen vorkommt [REF].
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Freies \n", - "\n", - " SN38\n", - " Gene or Protein\n", - "\n", - " ist enterotoxisch und gilt als Ursache der Irinotecan induzierten Diarrhoe.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Freies SN38 ist enterotoxisch und gilt als Ursache der Irinotecan induzierten Diarrhoe.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Patienten, bei denen sich keine Mutationen im \n", - "\n", - " KIT\n", - " Gene or Protein\n", - "\n", - "- oder \n", - "\n", - " PDGFRA-Gen\n", - " Gene or Protein\n", - "\n", - " nachweisen lassen, bedürfen einer intensiven molekularpathologischen Abklärung, da sich viele therapeutische Implikationen ergeben.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Patienten, bei denen sich keine Mutationen im \n", - "\n", - " KIT\n", - " Gene or Protein\n", - "\n", - "- oder \n", - "\n", - " PDGFRA\n", - " Gene or Protein\n", - "\n", - "-Gen nachweisen lassen, bedürfen einer intensiven molekularpathologischen Abklärung, da sich viele therapeutische Implikationen ergeben.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Die Ergebnisse zeigten eine Auftrennung der Wahrscheinlichkeiten für Gesamtüberleben und progressionsfreies Überleben der drei FLIPI-Prognosegruppen sowohl in der Gesamtgruppe als auch in den Patientengruppen, die therapiefrei beobachtet wurden, eine Rituximab-Monotherapie, R-CVP oder R-\n", - "\n", - " CHOP\n", - " Gene or Protein\n", - "\n", - " erhalten hatten.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Die Ergebnisse zeigten eine Auftrennung der Wahrscheinlichkeiten für Gesamtüberleben und progressionsfreies Überleben der drei FLIPI-Prognosegruppen sowohl in der Gesamtgruppe als auch in den Patientengruppen, die therapiefrei beobachtet wurden, eine Rituximab-Monotherapie, R-CVP oder R-CHOP erhalten hatten.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
In einer Studie mit >1500 Probanden einer Hamäturiesprechstunde wurde der immunfluorometrische \n", - "\n", - " MCM\n", - " Gene or Protein\n", - "\n", - "-5 Nachweis mit NMP22 und Urinzytologie verglichen; \n", - "\n", - " MCM\n", - " Gene or Protein\n", - "\n", - "-5 hatte einen hohen NPV von >90% bei einem niedrigen PPV von 20% [REF].
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
In einer Studie mit >1500 Probanden einer Hamäturiesprechstunde wurde der immunfluorometrische \n", - "\n", - " MCM-5\n", - " Gene or Protein\n", - "\n", - " Nachweis mit \n", - "\n", - " NMP22\n", - " Gene or Protein\n", - "\n", - " und Urinzytologie verglichen; \n", - "\n", - " MCM-5\n", - " Gene or Protein\n", - "\n", - " hatte einen hohen NPV von >90% bei einem niedrigen PPV von 20% [REF].
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
In einer Studie mit >1500 Probanden einer Hamäturiesprechstunde wurde der immunfluorometrische \n", - "\n", - " MCM\n", - " Gene or Protein\n", - "\n", - "-5 Nachweis mit NMP22 und Urinzytologie verglichen; \n", - "\n", - " MCM\n", - " Gene or Protein\n", - "\n", - "-5 hatte einen hohen NPV von >90% bei einem niedrigen PPV von 20% [REF].
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
In einer Studie mit >1500 Probanden einer Hamäturiesprechstunde wurde der immunfluorometrische \n", - "\n", - " MCM-5\n", - " Gene or Protein\n", - "\n", - " Nachweis mit \n", - "\n", - " NMP22\n", - " Gene or Protein\n", - "\n", - " und Urinzytologie verglichen; \n", - "\n", - " MCM-5\n", - " Gene or Protein\n", - "\n", - " hatte einen hohen NPV von >90% bei einem niedrigen PPV von 20% [REF].
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Falsch-positive Ergebnisse treten vermehrt auf bei Hämaturie (bis zu 80%), da das \n", - "\n", - " hCFHrp\n", - " Gene or Protein\n", - "\n", - "-Protein im Blut in hoher Konzentration vorliegt [REF] [REF].
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Falsch-positive Ergebnisse treten vermehrt auf bei Hämaturie (bis zu 80%), da das \n", - "\n", - " hCFHrp-Protein\n", - " Gene or Protein\n", - "\n", - " im Blut in hoher Konzentration vorliegt [REF] [REF].
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Die neuen Entitäten sessil serratiertes Adenom (\n", - "\n", - " SSA\n", - " Gene or Protein\n", - "\n", - ") und traditionell serratiertes Adenom (TSA) sind erst seit 2010 definiert [REF].
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Die neuen Entitäten sessil serratiertes Adenom (SSA) und traditionell serratiertes Adenom (TSA) sind erst seit 2010 definiert [REF].
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Jedoch konnte bislang die Prognose der sekundären AML und des \n", - "\n", - " MDS\n", - " Gene or Protein\n", - "\n", - " auch durch eine Behandlung mittels allogener Stammzelltransplantation nicht durchgreifend verbessert ben nach zwei Jahren bei 8% [REF].
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Jedoch konnte bislang die Prognose der sekundären AML und des MDS auch durch eine Behandlung mittels allogener Stammzelltransplantation nicht durchgreifend verbessert ben nach zwei Jahren bei 8% [REF].
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Obwohl eine Duodenalpolyposis bei MAP-Patienten seltener (17%) als bei \n", - "\n", - " FAP\n", - " Gene or Protein\n", - "\n", - "-Patienten beobachtet wird, erscheint das Risiko von etwa 4% für die Entwicklung eines Duodenalkarzinoms vergleichbar hoch zu sein [REF].
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Obwohl eine Duodenalpolyposis bei MAP-Patienten seltener (17%) als bei FAP-Patienten beobachtet wird, erscheint das Risiko von etwa 4% für die Entwicklung eines Duodenalkarzinoms vergleichbar hoch zu sein [REF].
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Die für die Typ-I-Karzinome charakteristischen genetischen Veränderungen in \n", - "\n", - " PTEN\n", - " Gene or Protein\n", - "\n", - ", \n", - "\n", - " K-\n", - " Gene or Protein\n", - "\n", - "RAS, ß-catenin sowie dem Mismatch-Reparatur-System sind sehr selten.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Die für die Typ-I-Karzinome charakteristischen genetischen Veränderungen in \n", - "\n", - " PTEN\n", - " Gene or Protein\n", - "\n", - ", \n", - "\n", - " K-RAS\n", - " Gene or Protein\n", - "\n", - ", \n", - "\n", - " ß-catenin\n", - " Gene or Protein\n", - "\n", - " sowie dem Mismatch-Reparatur-System sind sehr selten.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Kohorten-Studien, bei denen der Rauch-Status vor der FL-Diagnose bei ehemals Nicht-Erkrankten erhoben wurde, bestätigen das etwa doppelt so hohe Follikuläres Lymphom-Risiko weiblicher Raucherinnen (95% CI 1.20-3.77 [REF], [REF]. Auch häufige Passivrauch-Exposition in der Kindheit und als Erwachsener scheinen das FLRisiko zu erhöhen [REF]. Sechs und mehr Stunden Passivrauch-Exposition im Erwachsenenalter erhöhen das FL-Risiko signifikant um das 2,4-fache. Kombinierte Betrachtungen epidemiologischer und genetischer Faktoren deuten auf ein mögliches Zusammenwirken von bestimmten Varianten des HLA-\n", - "\n", - " DRB1\n", - " Gene or Protein\n", - "\n", - " Locus und Rauchen auf das FL-Risiko hin [REF].
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Kohorten-Studien, bei denen der Rauch-Status vor der FL-Diagnose bei ehemals Nicht-Erkrankten erhoben wurde, bestätigen das etwa doppelt so hohe Follikuläres Lymphom-Risiko weiblicher Raucherinnen (95% CI 1.20-3.77 [REF], [REF]. Auch häufige Passivrauch-Exposition in der Kindheit und als Erwachsener scheinen das FLRisiko zu erhöhen [REF]. Sechs und mehr Stunden Passivrauch-Exposition im Erwachsenenalter erhöhen das FL-Risiko signifikant um das 2,4-fache. Kombinierte Betrachtungen epidemiologischer und genetischer Faktoren deuten auf ein mögliches Zusammenwirken von bestimmten Varianten des \n", - "\n", - " HLA-DRB1 Locus\n", - " Gene or Protein\n", - "\n", - " und Rauchen auf das FL-Risiko hin [REF].
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Mögliche Wechselwirkungen von Granatapfel mit anderen Medikamenten und Substraten wurden bezüglich Cytochrom P4503A und \n", - "\n", - " CYP\n", - " Gene or Protein\n", - "\n", - " 2C9 sowie in Bezug auf Warfarin und Metformin untersucht [REF].
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Mögliche Wechselwirkungen von Granatapfel mit anderen Medikamenten und Substraten wurden bezüglich \n", - "\n", - " Cytochrom P4503A\n", - " Gene or Protein\n", - "\n", - " und \n", - "\n", - " CYP 2C9\n", - " Gene or Protein\n", - "\n", - " sowie in Bezug auf Warfarin und Metformin untersucht [REF].
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Soluble \n", - "\n", - " Fas\n", - " Gene or Protein\n", - "\n", - " (sFas) ist ein Produkt abnormer mRNA Splicevarianten des membrangebundenen \n", - "\n", - " Fas\n", - " Gene or Protein\n", - "\n", - "-Rezeptors, der für apoptotische Signalregulierung bedeutsam ist.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Soluble \n", - "\n", - " Fas\n", - " Gene or Protein\n", - "\n", - " (\n", - "\n", - " sFas\n", - " Gene or Protein\n", - "\n", - ") ist ein Produkt abnormer mRNA Splicevarianten des membrangebundenen \n", - "\n", - " Fas-Rezeptors\n", - " Gene or Protein\n", - "\n", - ", der für apoptotische Signalregulierung bedeutsam ist.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Der nach der Einführung der \n", - "\n", - " CD20\n", - " Gene or Protein\n", - "\n", - "-Antikörpertherapie entwickelte Follicular Lymphoma International Prognostic Index -2 (FLIPI-2) [REF] wurde deutlich seltener als der FLIPI auf seine prognostische Relevanz untersucht.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Der nach der Einführung der \n", - "\n", - " CD20-Antikörpertherapie\n", - " Gene or Protein\n", - "\n", - " entwickelte Follicular Lymphoma International Prognostic Index -2 (FLIPI-2) [REF] wurde deutlich seltener als der FLIPI auf seine prognostische Relevanz untersucht.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Im Gesamtüberleben wurde sogar ein negativer Einfluss der Bevacizumab-Therapie beobachtet (HR für Bevacizumab-\n", - "\n", - " FOLFOX4\n", - " Gene or Protein\n", - "\n", - " vs. \n", - "\n", - " FOLFOX4\n", - " Gene or Protein\n", - "\n", - " 1,27 (p=0,02), Bevacizumab-XELOX vs. \n", - "\n", - " FOLFOX4\n", - " Gene or Protein\n", - "\n", - " 1,15 (p=0,21)).
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Im Gesamtüberleben wurde sogar ein negativer Einfluss der Bevacizumab-Therapie beobachtet (HR für Bevacizumab-FOLFOX4 vs. FOLFOX4 1,27 (p=0,02), Bevacizumab-XELOX vs. FOLFOX4 1,15 (p=0,21)).
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Im Gesamtüberleben wurde sogar ein negativer Einfluss der Bevacizumab-Therapie beobachtet (HR für Bevacizumab-\n", - "\n", - " FOLFOX4\n", - " Gene or Protein\n", - "\n", - " vs. \n", - "\n", - " FOLFOX4\n", - " Gene or Protein\n", - "\n", - " 1,27 (p=0,02), Bevacizumab-XELOX vs. \n", - "\n", - " FOLFOX4\n", - " Gene or Protein\n", - "\n", - " 1,15 (p=0,21)).
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Im Gesamtüberleben wurde sogar ein negativer Einfluss der Bevacizumab-Therapie beobachtet (HR für Bevacizumab-FOLFOX4 vs. FOLFOX4 1,27 (p=0,02), Bevacizumab-XELOX vs. FOLFOX4 1,15 (p=0,21)).
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Im Gesamtüberleben wurde sogar ein negativer Einfluss der Bevacizumab-Therapie beobachtet (HR für Bevacizumab-\n", - "\n", - " FOLFOX4\n", - " Gene or Protein\n", - "\n", - " vs. \n", - "\n", - " FOLFOX4\n", - " Gene or Protein\n", - "\n", - " 1,27 (p=0,02), Bevacizumab-XELOX vs. \n", - "\n", - " FOLFOX4\n", - " Gene or Protein\n", - "\n", - " 1,15 (p=0,21)).
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Im Gesamtüberleben wurde sogar ein negativer Einfluss der Bevacizumab-Therapie beobachtet (HR für Bevacizumab-FOLFOX4 vs. FOLFOX4 1,27 (p=0,02), Bevacizumab-XELOX vs. FOLFOX4 1,15 (p=0,21)).
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Aktivierende Mutationen im \n", - "\n", - " BRAF-Gen\n", - " Gene or Protein\n", - "\n", - " werden bei etwa 8-12% der Patienten mit mKRK beschrieben [REF], [REF].
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Aktivierende Mutationen im \n", - "\n", - " BRAF\n", - " Gene or Protein\n", - "\n", - "-Gen werden bei etwa 8-12% der Patienten mit mKRK beschrieben [REF], [REF].
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Patienten mit einem \n", - "\n", - " SSA\n", - " Gene or Protein\n", - "\n", - " proximal der linken Flexur wiesen das größte Risiko für die Entstehung eines kolorektalen Karzinoms auf.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Patienten mit einem SSA proximal der linken Flexur wiesen das größte Risiko für die Entstehung eines kolorektalen Karzinoms auf.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Der mediane Anstieg des \n", - "\n", - " PSA\n", - " Gene or Protein\n", - "\n", - " in der Nahrungsergänzungsmittelgruppe (FSG) betrug 14,7% im Gegensatz zu 78,5% in der Placebogruppe (PG), Differenz 63,8% (p = 0,0008).
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Der mediane Anstieg des PSA in der Nahrungsergänzungsmittelgruppe (FSG) betrug 14,7% im Gegensatz zu 78,5% in der Placebogruppe (PG), Differenz 63,8% (p = 0,0008).
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - " p16-\n", - " Gene or Protein\n", - "\n", - "\n", - "\n", - " INK4a\n", - " Gene or Protein\n", - "\n", - " Nachweis, CINtec® \n", - "\n", - " p16\n", - " Gene or Protein\n", - "\n", - " (Roche mtm laboratories, Heidelberg):
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - " p16-INK4a\n", - " Gene or Protein\n", - "\n", - " Nachweis, CINtec® p16 (Roche mtm laboratories, Heidelberg):
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - " p16-\n", - " Gene or Protein\n", - "\n", - "\n", - "\n", - " INK4a\n", - " Gene or Protein\n", - "\n", - " Nachweis, CINtec® \n", - "\n", - " p16\n", - " Gene or Protein\n", - "\n", - " (Roche mtm laboratories, Heidelberg):
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - " p16-INK4a\n", - " Gene or Protein\n", - "\n", - " Nachweis, CINtec® p16 (Roche mtm laboratories, Heidelberg):
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - " p16-\n", - " Gene or Protein\n", - "\n", - "\n", - "\n", - " INK4a\n", - " Gene or Protein\n", - "\n", - " Nachweis, CINtec® \n", - "\n", - " p16\n", - " Gene or Protein\n", - "\n", - " (Roche mtm laboratories, Heidelberg):
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - " p16-INK4a\n", - " Gene or Protein\n", - "\n", - " Nachweis, CINtec® p16 (Roche mtm laboratories, Heidelberg):
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Eine auf morphologischen Faktoren beruhende Risikostratifizierung des Endometriumkarzinoms, basierend auf einem Konsens der European Society for Medical Oncology (\n", - "\n", - " ESMO\n", - " Gene or Protein\n", - "\n", - "), der European Society for Radiotherapy Oncology (ESTRO) und der European Society of Gynaecological Oncology (ESGO) ist in Abbildung 4 zusammengefasst [REF], [REF].
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Eine auf morphologischen Faktoren beruhende Risikostratifizierung des Endometriumkarzinoms, basierend auf einem Konsens der European Society for Medical Oncology (ESMO), der European Society for Radiotherapy Oncology (ESTRO) und der European Society of Gynaecological Oncology (ESGO) ist in Abbildung 4 zusammengefasst [REF], [REF].
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Ein Großteil (70 – 95%) der follikulären Lymphome ist durch die t(14;18) Translokation charakterisiert, die in einer Fusion des Gens für den Apoptose-Regulator \n", - "\n", - " BCL2\n", - " Gene or Protein\n", - "\n", - " mit dem Immunoglobulin H Locus resultiert, einhergehend mit einer gesteigerten Produktion des anti-apoptotischen \n", - "\n", - " BCL-2\n", - " Gene or Protein\n", - "\n", - " Proteins.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Ein Großteil (70 – 95%) der follikulären Lymphome ist durch die t(14;18) Translokation charakterisiert, die in einer Fusion des Gens für den Apoptose-Regulator \n", - "\n", - " BCL2\n", - " Gene or Protein\n", - "\n", - " mit dem \n", - "\n", - " Immunoglobulin H Locus\n", - " Gene or Protein\n", - "\n", - " resultiert, einhergehend mit einer gesteigerten Produktion des anti-apoptotischen \n", - "\n", - " BCL-2 Proteins\n", - " Gene or Protein\n", - "\n", - ".
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Die klinische Diagnose einer \n", - "\n", - " NF1\n", - " Gene or Protein\n", - "\n", - " wird gestellt, wenn zwei der nachfolgenden sieben Kriterien erfüllt sind:
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Die klinische Diagnose einer NF1 wird gestellt, wenn zwei der nachfolgenden sieben Kriterien erfüllt sind:
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Der primäre Zielparameter war die Veränderung der Subskala der Alltagsbeeinflussung des Brief Fatigue Inventory (BFI), sekundärer Parameter die Änderung der BFI-Subskala der allgemeinen Fatigue sowie die Werte der Medical Outcome Scale des Short Form-36 (\n", - "\n", - " SF\n", - " Gene or Protein\n", - "\n", - "-36) und des Pittsburgh Sleep Quality Index (PSQI).
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Der primäre Zielparameter war die Veränderung der Subskala der Alltagsbeeinflussung des Brief Fatigue Inventory (BFI), sekundärer Parameter die Änderung der BFI-Subskala der allgemeinen Fatigue sowie die Werte der Medical Outcome Scale des Short Form-36 (SF-36) und des Pittsburgh Sleep Quality Index (PSQI).
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
(1) ≥ 6 Café au lait Spots der Haut (mindestens 5 mm Durchmesser in präpubertären und mindestens 15 mm Durchmesser in postpubertären Menschen), (2) ≥ 2 Neurofibrome oder ein plexiformes Neurofibrom, (3) Freckling in der Axilla oder Leiste, (4) ≥ 2 Lisch-Knötchen (\n", - "\n", - " Iris\n", - " Gene or Protein\n", - "\n", - "- Hamartome), (5) Diagnose eines Opticus-Glioms, (6) Dysplasien der langen Röhrenknochen mit und ohne Pseudarthrose, (7) Neurofibromatose bei einem Verwandten ersten Grades.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
(1) ≥ 6 Café au lait Spots der Haut (mindestens 5 mm Durchmesser in präpubertären und mindestens 15 mm Durchmesser in postpubertären Menschen), (2) ≥ 2 Neurofibrome oder ein plexiformes Neurofibrom, (3) Freckling in der Axilla oder Leiste, (4) ≥ 2 Lisch-Knötchen (Iris- Hamartome), (5) Diagnose eines Opticus-Glioms, (6) Dysplasien der langen Röhrenknochen mit und ohne Pseudarthrose, (7) Neurofibromatose bei einem Verwandten ersten Grades.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Die Bestimmung dieses SNPs erlaubt eine solide Aussage hinsichtlich des \n", - "\n", - " NAT2\n", - " Gene or Protein\n", - "\n", - "-Acetyliererstatus zumindest bei Mitteleuropäern, ohne Bestimmung der 7 SNPS im \n", - "\n", - " NAT2\n", - " Gene or Protein\n", - "\n", - " Gen.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Die Bestimmung dieses SNPs erlaubt eine solide Aussage hinsichtlich des \n", - "\n", - " NAT2\n", - " Gene or Protein\n", - "\n", - "-Acetyliererstatus zumindest bei Mitteleuropäern, ohne Bestimmung der 7 SNPS im \n", - "\n", - " NAT2 Gen\n", - " Gene or Protein\n", - "\n", - ".
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Mehrheitlich sind eine \n", - "\n", - " VEGF(\n", - " Gene or Protein\n", - "\n", - "-A) und eine \n", - "\n", - " VEGFR\n", - " Gene or Protein\n", - "\n", - "-Expression nachweisbar.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Mehrheitlich sind eine \n", - "\n", - " VEGF(-A)\n", - " Gene or Protein\n", - "\n", - " und eine \n", - "\n", - " VEGFR\n", - " Gene or Protein\n", - "\n", - "-Expression nachweisbar.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
Das ASPL-\n", - "\n", - " TFE3\n", - " Gene or Protein\n", - "\n", - "-Fusionsprotein aktiviert eine \n", - "\n", - " MET\n", - " Gene or Protein\n", - "\n", - " Transkription u.a. die Transkription des c-\n", - "\n", - " MET\n", - " Gene or Protein\n", - "\n", - "-Gens.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Das \n", - "\n", - " ASPL\n", - " Gene or Protein\n", - "\n", - "-\n", - "\n", - " TFE3\n", - " Gene or Protein\n", - "\n", - "-Fusionsprotein aktiviert eine \n", - "\n", - " MET\n", - " Gene or Protein\n", - "\n", - " Transkription u.a. die Transkription des \n", - "\n", - " c-MET\n", - " Gene or Protein\n", - "\n", - "-Gens.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - }, - { - "data": { - "text/html": [ - "
In einer retrospektiven Analyse der klinischen Studie \n", - "\n", - " AB20\n", - " Gene or Protein\n", - "\n", - "/99 (n=102, in die Auswertung flossen 83 Patienten ein) zeigte sich mit Zunahme der ungünstigen Faktoren eine signifikante Verschlechterung des Gesamtüberlebens im Gesamtkollektiv:
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
In einer retrospektiven Analyse der klinischen Studie AB20/99 (n=102, in die Auswertung flossen 83 Patienten ein) zeigte sich mit Zunahme der ungünstigen Faktoren eine signifikante Verschlechterung des Gesamtüberlebens im Gesamtkollektiv:
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------\n" - ] - } - ], - "source": [ - "from skweak.utils import display_entities\n", - "\n", - "for d in gold_docs_dev:\n", - " for g in d.spans['hmm']:\n", - " if not g in d.ents:\n", - " display_entities(d, layer='hmm')\n", - " display_entities(d)\n", - " print('------')\n", - " continue" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "f79a91a8", + "execution_count": 41, + "id": "f79a91a8", "metadata": {}, "outputs": [ { @@ -3706,28 +1054,16 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " Gene or Protein\n", - " 100.0 %\n", - " construct\n", - " 0.879\n", - " 0.244\n", - " 0.382\n", - " \n", - " \n", - " \n", - " 0.833\n", - " 0.305\n", - " 0.446\n", + " \n", + " \n", + " \n", " \n", + " \n", + " \n", " \n", - " cue_civic\n", + " Gene or Protein\n", + " 100.0 %\n", + " CIViC\n", " 0.979\n", " 0.366\n", " 0.532\n", @@ -3739,7 +1075,7 @@ " 0.624\n", " \n", " \n", - " cue_cosmic_census\n", + " COSMIC\n", " 0.960\n", " 0.342\n", " 0.504\n", @@ -3751,7 +1087,7 @@ " 0.594\n", " \n", " \n", - " entrez\n", + " Entrez\n", " 0.937\n", " 0.450\n", " 0.608\n", @@ -3763,19 +1099,19 @@ " 0.646\n", " \n", " \n", - " hmm\n", - " 0.899\n", - " 0.596\n", - " 0.716\n", + " HGNC\n", + " 0.879\n", + " 0.244\n", + " 0.382\n", " \n", " \n", " \n", - " 0.841\n", - " 0.680\n", - " 0.752\n", + " 0.833\n", + " 0.305\n", + " 0.446\n", " \n", " \n", - " omim\n", + " OMIM\n", " 0.955\n", " 0.411\n", " 0.574\n", @@ -3787,7 +1123,7 @@ " 0.670\n", " \n", " \n", - " protein_families\n", + " Protein Families\n", " 1.000\n", " 0.067\n", " 0.126\n", @@ -3799,7 +1135,7 @@ " 0.142\n", " \n", " \n", - " protein_gazetteer\n", + " Proteins\n", " 1.000\n", " 0.117\n", " 0.210\n", @@ -3811,21 +1147,21 @@ " 0.212\n", " \n", " \n", - " macro\n", - " \n", - " construct\n", - " 0.879\n", - " 0.244\n", - " 0.382\n", + " hmm\n", + " 0.899\n", + " 0.596\n", + " 0.716\n", " \n", " \n", " \n", - " 0.833\n", - " 0.305\n", - " 0.446\n", + " 0.841\n", + " 0.680\n", + " 0.752\n", " \n", " \n", - " cue_civic\n", + " macro\n", + " \n", + " CIViC\n", " 0.979\n", " 0.366\n", " 0.532\n", @@ -3837,7 +1173,7 @@ " 0.624\n", " \n", " \n", - " cue_cosmic_census\n", + " COSMIC\n", " 0.960\n", " 0.342\n", " 0.504\n", @@ -3849,7 +1185,7 @@ " 0.594\n", " \n", " \n", - " entrez\n", + " Entrez\n", " 0.937\n", " 0.450\n", " 0.608\n", @@ -3861,19 +1197,19 @@ " 0.646\n", " \n", " \n", - " hmm\n", - " 0.899\n", - " 0.596\n", - " 0.716\n", + " HGNC\n", + " 0.879\n", + " 0.244\n", + " 0.382\n", " \n", " \n", " \n", - " 0.841\n", - " 0.680\n", - " 0.752\n", + " 0.833\n", + " 0.305\n", + " 0.446\n", " \n", " \n", - " omim\n", + " OMIM\n", " 0.955\n", " 0.411\n", " 0.574\n", @@ -3885,7 +1221,7 @@ " 0.670\n", " \n", " \n", - " protein_families\n", + " Protein Families\n", " 1.000\n", " 0.067\n", " 0.126\n", @@ -3897,7 +1233,7 @@ " 0.142\n", " \n", " \n", - " protein_gazetteer\n", + " Proteins\n", " 1.000\n", " 0.117\n", " 0.210\n", @@ -3909,21 +1245,21 @@ " 0.212\n", " \n", " \n", - " micro\n", - " \n", - " construct\n", - " 0.879\n", - " 0.244\n", - " 0.382\n", - " 0.819\n", - " 0.976\n", - " 0.278\n", - " 0.833\n", - " 0.305\n", - " 0.446\n", + " hmm\n", + " 0.899\n", + " 0.596\n", + " 0.716\n", + " \n", + " \n", + " \n", + " 0.841\n", + " 0.680\n", + " 0.752\n", " \n", " \n", - " cue_civic\n", + " micro\n", + " \n", + " CIViC\n", " 0.979\n", " 0.366\n", " 0.532\n", @@ -3935,7 +1271,7 @@ " 0.624\n", " \n", " \n", - " cue_cosmic_census\n", + " COSMIC\n", " 0.960\n", " 0.342\n", " 0.504\n", @@ -3947,7 +1283,7 @@ " 0.594\n", " \n", " \n", - " entrez\n", + " Entrez\n", " 0.937\n", " 0.450\n", " 0.608\n", @@ -3959,19 +1295,19 @@ " 0.646\n", " \n", " \n", - " hmm\n", - " 0.899\n", - " 0.596\n", - " 0.716\n", - " 0.326\n", - " 0.989\n", - " 0.663\n", - " 0.841\n", - " 0.680\n", - " 0.752\n", + " HGNC\n", + " 0.879\n", + " 0.244\n", + " 0.382\n", + " 0.819\n", + " 0.976\n", + " 0.278\n", + " 0.833\n", + " 0.305\n", + " 0.446\n", " \n", " \n", - " omim\n", + " OMIM\n", " 0.955\n", " 0.411\n", " 0.574\n", @@ -3983,7 +1319,7 @@ " 0.670\n", " \n", " \n", - " protein_families\n", + " Protein Families\n", " 1.000\n", " 0.067\n", " 0.126\n", @@ -3995,7 +1331,7 @@ " 0.142\n", " \n", " \n", - " protein_gazetteer\n", + " Proteins\n", " 1.000\n", " 0.117\n", " 0.210\n", @@ -4007,21 +1343,21 @@ " 0.212\n", " \n", " \n", - " weighted\n", - " \n", - " construct\n", - " 0.879\n", - " 0.244\n", - " 0.382\n", - " \n", - " \n", - " \n", - " 0.833\n", - " 0.305\n", - " 0.446\n", + " hmm\n", + " 0.899\n", + " 0.596\n", + " 0.716\n", + " 0.326\n", + " 0.989\n", + " 0.663\n", + " 0.841\n", + " 0.680\n", + " 0.752\n", " \n", " \n", - " cue_civic\n", + " weighted\n", + " \n", + " CIViC\n", " 0.979\n", " 0.366\n", " 0.532\n", @@ -4033,7 +1369,7 @@ " 0.624\n", " \n", " \n", - " cue_cosmic_census\n", + " COSMIC\n", " 0.960\n", " 0.342\n", " 0.504\n", @@ -4045,7 +1381,7 @@ " 0.594\n", " \n", " \n", - " entrez\n", + " Entrez\n", " 0.937\n", " 0.450\n", " 0.608\n", @@ -4057,19 +1393,19 @@ " 0.646\n", " \n", " \n", - " hmm\n", - " 0.899\n", - " 0.596\n", - " 0.716\n", + " HGNC\n", + " 0.879\n", + " 0.244\n", + " 0.382\n", " \n", " \n", " \n", - " 0.841\n", - " 0.680\n", - " 0.752\n", + " 0.833\n", + " 0.305\n", + " 0.446\n", " \n", " \n", - " omim\n", + " OMIM\n", " 0.955\n", " 0.411\n", " 0.574\n", @@ -4081,7 +1417,7 @@ " 0.670\n", " \n", " \n", - " protein_families\n", + " Protein Families\n", " 1.000\n", " 0.067\n", " 0.126\n", @@ -4093,7 +1429,7 @@ " 0.142\n", " \n", " \n", - " protein_gazetteer\n", + " Proteins\n", " 1.000\n", " 0.117\n", " 0.210\n", @@ -4104,153 +1440,130 @@ " 0.120\n", " 0.212\n", " \n", + " \n", + " hmm\n", + " 0.899\n", + " 0.596\n", + " 0.716\n", + " \n", + " \n", + " \n", + " 0.841\n", + " 0.680\n", + " 0.752\n", + " \n", " \n", "\n", "" ], "text/plain": [ - " tok_precision tok_recall \\\n", - "label proportion model \n", - "Gene or Protein 100.0 % construct 0.879 0.244 \n", - " cue_civic 0.979 0.366 \n", - " cue_cosmic_census 0.960 0.342 \n", - " entrez 0.937 0.450 \n", - " hmm 0.899 0.596 \n", - " omim 0.955 0.411 \n", - " protein_families 1.000 0.067 \n", - " protein_gazetteer 1.000 0.117 \n", - "macro construct 0.879 0.244 \n", - " cue_civic 0.979 0.366 \n", - " cue_cosmic_census 0.960 0.342 \n", - " entrez 0.937 0.450 \n", - " hmm 0.899 0.596 \n", - " omim 0.955 0.411 \n", - " protein_families 1.000 0.067 \n", - " protein_gazetteer 1.000 0.117 \n", - "micro construct 0.879 0.244 \n", - " cue_civic 0.979 0.366 \n", - " cue_cosmic_census 0.960 0.342 \n", - " entrez 0.937 0.450 \n", - " hmm 0.899 0.596 \n", - " omim 0.955 0.411 \n", - " protein_families 1.000 0.067 \n", - " protein_gazetteer 1.000 0.117 \n", - "weighted construct 0.879 0.244 \n", - " cue_civic 0.979 0.366 \n", - " cue_cosmic_census 0.960 0.342 \n", - " entrez 0.937 0.450 \n", - " hmm 0.899 0.596 \n", - " omim 0.955 0.411 \n", - " protein_families 1.000 0.067 \n", - " protein_gazetteer 1.000 0.117 \n", + " tok_precision tok_recall \\\n", + "label proportion model \n", + "Gene or Protein 100.0 % CIViC 0.979 0.366 \n", + " COSMIC 0.960 0.342 \n", + " Entrez 0.937 0.450 \n", + " HGNC 0.879 0.244 \n", + " OMIM 0.955 0.411 \n", + " Protein Families 1.000 0.067 \n", + " Proteins 1.000 0.117 \n", + " hmm 0.899 0.596 \n", + "macro CIViC 0.979 0.366 \n", + " COSMIC 0.960 0.342 \n", + " Entrez 0.937 0.450 \n", + " HGNC 0.879 0.244 \n", + " OMIM 0.955 0.411 \n", + " Protein Families 1.000 0.067 \n", + " Proteins 1.000 0.117 \n", + " hmm 0.899 0.596 \n", + "micro CIViC 0.979 0.366 \n", + " COSMIC 0.960 0.342 \n", + " Entrez 0.937 0.450 \n", + " HGNC 0.879 0.244 \n", + " OMIM 0.955 0.411 \n", + " Protein Families 1.000 0.067 \n", + " Proteins 1.000 0.117 \n", + " hmm 0.899 0.596 \n", + "weighted CIViC 0.979 0.366 \n", + " COSMIC 0.960 0.342 \n", + " Entrez 0.937 0.450 \n", + " HGNC 0.879 0.244 \n", + " OMIM 0.955 0.411 \n", + " Protein Families 1.000 0.067 \n", + " Proteins 1.000 0.117 \n", + " hmm 0.899 0.596 \n", "\n", - " tok_f1 tok_cee tok_acc coverage \\\n", - "label proportion model \n", - "Gene or Protein 100.0 % construct 0.382 \n", - " cue_civic 0.532 \n", - " cue_cosmic_census 0.504 \n", - " entrez 0.608 \n", - " hmm 0.716 \n", - " omim 0.574 \n", - " protein_families 0.126 \n", - " protein_gazetteer 0.210 \n", - "macro construct 0.382 \n", - " cue_civic 0.532 \n", - " cue_cosmic_census 0.504 \n", - " entrez 0.608 \n", - " hmm 0.716 \n", - " omim 0.574 \n", - " protein_families 0.126 \n", - " protein_gazetteer 0.210 \n", - "micro construct 0.382 0.819 0.976 0.278 \n", - " cue_civic 0.532 0.819 0.976 0.374 \n", - " cue_cosmic_census 0.504 0.819 0.976 0.356 \n", - " entrez 0.608 0.819 0.976 0.481 \n", - " hmm 0.716 0.326 0.989 0.663 \n", - " omim 0.574 0.819 0.976 0.43 \n", - " protein_families 0.126 0.819 0.976 0.067 \n", - " protein_gazetteer 0.210 0.819 0.976 0.117 \n", - "weighted construct 0.382 \n", - " cue_civic 0.532 \n", - " cue_cosmic_census 0.504 \n", - " entrez 0.608 \n", - " hmm 0.716 \n", - " omim 0.574 \n", - " protein_families 0.126 \n", - " protein_gazetteer 0.210 \n", - "\n", - " ent_precision ent_recall \\\n", - "label proportion model \n", - "Gene or Protein 100.0 % construct 0.833 0.305 \n", - " cue_civic 0.944 0.465 \n", - " cue_cosmic_census 0.928 0.436 \n", - " entrez 0.902 0.503 \n", - " hmm 0.841 0.680 \n", - " omim 0.926 0.524 \n", - " protein_families 1.000 0.076 \n", - " protein_gazetteer 0.934 0.120 \n", - "macro construct 0.833 0.305 \n", - " cue_civic 0.944 0.465 \n", - " cue_cosmic_census 0.928 0.436 \n", - " entrez 0.902 0.503 \n", - " hmm 0.841 0.680 \n", - " omim 0.926 0.524 \n", - " protein_families 1.000 0.076 \n", - " protein_gazetteer 0.934 0.120 \n", - "micro construct 0.833 0.305 \n", - " cue_civic 0.944 0.465 \n", - " cue_cosmic_census 0.928 0.436 \n", - " entrez 0.902 0.503 \n", - " hmm 0.841 0.680 \n", - " omim 0.926 0.524 \n", - " protein_families 1.000 0.076 \n", - " protein_gazetteer 0.934 0.120 \n", - "weighted construct 0.833 0.305 \n", - " cue_civic 0.944 0.465 \n", - " cue_cosmic_census 0.928 0.436 \n", - " entrez 0.902 0.503 \n", - " hmm 0.841 0.680 \n", - " omim 0.926 0.524 \n", - " protein_families 1.000 0.076 \n", - " protein_gazetteer 0.934 0.120 \n", + " tok_f1 tok_cee tok_acc coverage \\\n", + "label proportion model \n", + "Gene or Protein 100.0 % CIViC 0.532 \n", + " COSMIC 0.504 \n", + " Entrez 0.608 \n", + " HGNC 0.382 \n", + " OMIM 0.574 \n", + " Protein Families 0.126 \n", + " Proteins 0.210 \n", + " hmm 0.716 \n", + "macro CIViC 0.532 \n", + " COSMIC 0.504 \n", + " Entrez 0.608 \n", + " HGNC 0.382 \n", + " OMIM 0.574 \n", + " Protein Families 0.126 \n", + " Proteins 0.210 \n", + " hmm 0.716 \n", + "micro CIViC 0.532 0.819 0.976 0.374 \n", + " COSMIC 0.504 0.819 0.976 0.356 \n", + " Entrez 0.608 0.819 0.976 0.481 \n", + " HGNC 0.382 0.819 0.976 0.278 \n", + " OMIM 0.574 0.819 0.976 0.43 \n", + " Protein Families 0.126 0.819 0.976 0.067 \n", + " Proteins 0.210 0.819 0.976 0.117 \n", + " hmm 0.716 0.326 0.989 0.663 \n", + "weighted CIViC 0.532 \n", + " COSMIC 0.504 \n", + " Entrez 0.608 \n", + " HGNC 0.382 \n", + " OMIM 0.574 \n", + " Protein Families 0.126 \n", + " Proteins 0.210 \n", + " hmm 0.716 \n", "\n", - " ent_f1 \n", - "label proportion model \n", - "Gene or Protein 100.0 % construct 0.446 \n", - " cue_civic 0.624 \n", - " cue_cosmic_census 0.594 \n", - " entrez 0.646 \n", - " hmm 0.752 \n", - " omim 0.670 \n", - " protein_families 0.142 \n", - " protein_gazetteer 0.212 \n", - "macro construct 0.446 \n", - " cue_civic 0.624 \n", - " cue_cosmic_census 0.594 \n", - " entrez 0.646 \n", - " hmm 0.752 \n", - " omim 0.670 \n", - " protein_families 0.142 \n", - " protein_gazetteer 0.212 \n", - "micro construct 0.446 \n", - " cue_civic 0.624 \n", - " cue_cosmic_census 0.594 \n", - " entrez 0.646 \n", - " hmm 0.752 \n", - " omim 0.670 \n", - " protein_families 0.142 \n", - " protein_gazetteer 0.212 \n", - "weighted construct 0.446 \n", - " cue_civic 0.624 \n", - " cue_cosmic_census 0.594 \n", - " entrez 0.646 \n", - " hmm 0.752 \n", - " omim 0.670 \n", - " protein_families 0.142 \n", - " protein_gazetteer 0.212 " - ] - }, - "execution_count": 37, + " ent_precision ent_recall ent_f1 \n", + "label proportion model \n", + "Gene or Protein 100.0 % CIViC 0.944 0.465 0.624 \n", + " COSMIC 0.928 0.436 0.594 \n", + " Entrez 0.902 0.503 0.646 \n", + " HGNC 0.833 0.305 0.446 \n", + " OMIM 0.926 0.524 0.670 \n", + " Protein Families 1.000 0.076 0.142 \n", + " Proteins 0.934 0.120 0.212 \n", + " hmm 0.841 0.680 0.752 \n", + "macro CIViC 0.944 0.465 0.624 \n", + " COSMIC 0.928 0.436 0.594 \n", + " Entrez 0.902 0.503 0.646 \n", + " HGNC 0.833 0.305 0.446 \n", + " OMIM 0.926 0.524 0.670 \n", + " Protein Families 1.000 0.076 0.142 \n", + " Proteins 0.934 0.120 0.212 \n", + " hmm 0.841 0.680 0.752 \n", + "micro CIViC 0.944 0.465 0.624 \n", + " COSMIC 0.928 0.436 0.594 \n", + " Entrez 0.902 0.503 0.646 \n", + " HGNC 0.833 0.305 0.446 \n", + " OMIM 0.926 0.524 0.670 \n", + " Protein Families 1.000 0.076 0.142 \n", + " Proteins 0.934 0.120 0.212 \n", + " hmm 0.841 0.680 0.752 \n", + "weighted CIViC 0.944 0.465 0.624 \n", + " COSMIC 0.928 0.436 0.594 \n", + " Entrez 0.902 0.503 0.646 \n", + " HGNC 0.833 0.305 0.446 \n", + " OMIM 0.926 0.524 0.670 \n", + " Protein Families 1.000 0.076 0.142 \n", + " Proteins 0.934 0.120 0.212 \n", + " hmm 0.841 0.680 0.752 " + ] + }, + "execution_count": 41, "metadata": {}, "output_type": "execute_result" } @@ -4271,13 +1584,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 42, "id": "60f62095-f8ed-4fd6-b7a5-672e130c5282", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Write to output/weak_training_lg.spacy...done\n" + ] + } + ], "source": [ - "utils.docbin_writer(docs, f\"output/weak_training_lg.spacy\")\n", - "#utils.docbin_writer(filtered_docs, f\"output/weak_training_md.spacy\")" + "utils.docbin_writer(docs, f\"output/weak_training_lg.spacy\")" ] }, { @@ -4290,10 +1610,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 43, "id": "5f713abd-6637-481b-8fea-39972eb129f4", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Write to output/strong_train.spacy...done\n", + "Write to output/strong_dev.spacy...done\n" + ] + } + ], "source": [ "from sklearn.model_selection import train_test_split\n", "gold_docs_strong = list(DocBin().from_disk('data/molecular/gold_dev.spacy').get_docs(nlp.vocab))\n", @@ -4314,17 +1643,6 @@ "!spacy train config.cfg --paths.train output/weak_training_lg.spacy --paths.dev data/molecular/gold_dev.spacy --output output/weak_ner_lg --gpu-id 0 --code training.py" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "9ecbde44-9646-4267-bc26-e5332d4dc202", - "metadata": {}, - "outputs": [], - "source": [ - "# Train NER model on smaller set of weak labels with spaCy\n", - "#!spacy train config.cfg --paths.train output/weak_training_md.spacy --paths.dev data/molecular/gold_dev.spacy --output output/weak_ner_md --gpu-id 0 --code training.py" - ] - }, { "cell_type": "code", "execution_count": null, @@ -4346,7 +1664,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 58, "id": "8842e777-aa91-485e-8e61-a4fc70790679", "metadata": {}, "outputs": [], @@ -4357,7 +1675,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 59, "id": "ef897e37-9bae-49ee-b8c1-267f19ceffe5", "metadata": {}, "outputs": [], @@ -4400,7 +1718,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 60, "id": "5be94760-3fa5-40e9-bb8f-d798cd452f6b", "metadata": { "tags": [] @@ -4422,7 +1740,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:15<00:00, 64.20it/s]\n" + "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:15<00:00, 64.79it/s]\n" ] }, { @@ -4474,19 +1792,7 @@ " \n", " \n", " 100.0 %\n", - " construct\n", - " 0.879\n", - " 0.244\n", - " 0.382\n", - " \n", - " \n", - " \n", - " 0.833\n", - " 0.305\n", - " 0.446\n", - " \n", - " \n", - " cue_civic\n", + " CIViC\n", " 0.979\n", " 0.366\n", " 0.532\n", @@ -4498,7 +1804,7 @@ " 0.624\n", " \n", " \n", - " cue_cosmic_census\n", + " COSMIC\n", " 0.960\n", " 0.342\n", " 0.504\n", @@ -4510,7 +1816,7 @@ " 0.594\n", " \n", " \n", - " entrez\n", + " Entrez\n", " 0.937\n", " 0.450\n", " 0.608\n", @@ -4522,19 +1828,19 @@ " 0.646\n", " \n", " \n", - " hmm\n", - " 0.899\n", - " 0.596\n", - " 0.716\n", + " HGNC\n", + " 0.879\n", + " 0.244\n", + " 0.382\n", " \n", " \n", " \n", - " 0.841\n", - " 0.680\n", - " 0.752\n", + " 0.833\n", + " 0.305\n", + " 0.446\n", " \n", " \n", - " omim\n", + " OMIM\n", " 0.955\n", " 0.411\n", " 0.574\n", @@ -4546,7 +1852,7 @@ " 0.670\n", " \n", " \n", - " protein_families\n", + " Protein Families\n", " 1.000\n", " 0.067\n", " 0.126\n", @@ -4558,7 +1864,7 @@ " 0.142\n", " \n", " \n", - " protein_gazetteer\n", + " Proteins\n", " 1.000\n", " 0.117\n", " 0.210\n", @@ -4569,43 +1875,55 @@ " 0.120\n", " 0.212\n", " \n", + " \n", + " hmm\n", + " 0.899\n", + " 0.596\n", + " 0.716\n", + " \n", + " \n", + " \n", + " 0.841\n", + " 0.680\n", + " 0.752\n", + " \n", " \n", "\n", "" ], "text/plain": [ - " tok_precision tok_recall tok_f1 tok_cee \\\n", - "proportion model \n", - "100.0 % construct 0.879 0.244 0.382 \n", - " cue_civic 0.979 0.366 0.532 \n", - " cue_cosmic_census 0.960 0.342 0.504 \n", - " entrez 0.937 0.450 0.608 \n", - " hmm 0.899 0.596 0.716 \n", - " omim 0.955 0.411 0.574 \n", - " protein_families 1.000 0.067 0.126 \n", - " protein_gazetteer 1.000 0.117 0.210 \n", + " tok_precision tok_recall tok_f1 tok_cee \\\n", + "proportion model \n", + "100.0 % CIViC 0.979 0.366 0.532 \n", + " COSMIC 0.960 0.342 0.504 \n", + " Entrez 0.937 0.450 0.608 \n", + " HGNC 0.879 0.244 0.382 \n", + " OMIM 0.955 0.411 0.574 \n", + " Protein Families 1.000 0.067 0.126 \n", + " Proteins 1.000 0.117 0.210 \n", + " hmm 0.899 0.596 0.716 \n", "\n", - " tok_acc coverage ent_precision ent_recall \\\n", - "proportion model \n", - "100.0 % construct 0.833 0.305 \n", - " cue_civic 0.944 0.465 \n", - " cue_cosmic_census 0.928 0.436 \n", - " entrez 0.902 0.503 \n", - " hmm 0.841 0.680 \n", - " omim 0.926 0.524 \n", - " protein_families 1.000 0.076 \n", - " protein_gazetteer 0.934 0.120 \n", + " tok_acc coverage ent_precision ent_recall \\\n", + "proportion model \n", + "100.0 % CIViC 0.944 0.465 \n", + " COSMIC 0.928 0.436 \n", + " Entrez 0.902 0.503 \n", + " HGNC 0.833 0.305 \n", + " OMIM 0.926 0.524 \n", + " Protein Families 1.000 0.076 \n", + " Proteins 0.934 0.120 \n", + " hmm 0.841 0.680 \n", "\n", - " ent_f1 \n", - "proportion model \n", - "100.0 % construct 0.446 \n", - " cue_civic 0.624 \n", - " cue_cosmic_census 0.594 \n", - " entrez 0.646 \n", - " hmm 0.752 \n", - " omim 0.670 \n", - " protein_families 0.142 \n", - " protein_gazetteer 0.212 " + " ent_f1 \n", + "proportion model \n", + "100.0 % CIViC 0.624 \n", + " COSMIC 0.594 \n", + " Entrez 0.646 \n", + " HGNC 0.446 \n", + " OMIM 0.670 \n", + " Protein Families 0.142 \n", + " Proteins 0.212 \n", + " hmm 0.752 " ] }, "metadata": {}, @@ -4627,7 +1945,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|███████████████████████████████████████████████████████████████████████████████████████████████████▉| 999/1000 [00:34<00:00, 28.62it/s]\n" + "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 999/1000 [00:39<00:00, 24.99it/s]\n" ] }, { @@ -4680,15 +1998,15 @@ " \n", " 100.0 %\n", " ner_model\n", - " 0.903\n", - " 0.637\n", - " 0.748\n", + " 0.902\n", + " 0.617\n", + " 0.732\n", " \n", " \n", " \n", " 0.855\n", - " 0.745\n", - " 0.796\n", + " 0.72\n", + " 0.782\n", " \n", " \n", "\n", @@ -4697,11 +2015,11 @@ "text/plain": [ " tok_precision tok_recall tok_f1 tok_cee tok_acc \\\n", "proportion model \n", - "100.0 % ner_model 0.903 0.637 0.748 \n", + "100.0 % ner_model 0.902 0.617 0.732 \n", "\n", " coverage ent_precision ent_recall ent_f1 \n", "proportion model \n", - "100.0 % ner_model 0.855 0.745 0.796 " + "100.0 % ner_model 0.855 0.72 0.782 " ] }, "metadata": {}, @@ -4724,7 +2042,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 61, "id": "631cbb84-3c39-4281-9160-5029493a23a7", "metadata": {}, "outputs": [ @@ -4744,7 +2062,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:13<00:00, 72.69it/s]\n" + "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:13<00:00, 73.50it/s]\n" ] }, { @@ -4796,19 +2114,7 @@ " \n", " \n", " 100.0 %\n", - " construct\n", - " 0.853\n", - " 0.190\n", - " 0.310\n", - " \n", - " \n", - " \n", - " 0.836\n", - " 0.280\n", - " 0.420\n", - " \n", - " \n", - " cue_civic\n", + " CIViC\n", " 0.933\n", " 0.350\n", " 0.510\n", @@ -4820,7 +2126,7 @@ " 0.606\n", " \n", " \n", - " cue_cosmic_census\n", + " COSMIC\n", " 0.927\n", " 0.342\n", " 0.500\n", @@ -4832,7 +2138,7 @@ " 0.608\n", " \n", " \n", - " entrez\n", + " Entrez\n", " 0.951\n", " 0.525\n", " 0.676\n", @@ -4844,19 +2150,19 @@ " 0.722\n", " \n", " \n", - " hmm\n", - " 0.864\n", - " 0.596\n", - " 0.706\n", + " HGNC\n", + " 0.853\n", + " 0.190\n", + " 0.310\n", " \n", " \n", " \n", - " 0.789\n", - " 0.689\n", - " 0.736\n", + " 0.836\n", + " 0.280\n", + " 0.420\n", " \n", " \n", - " omim\n", + " OMIM\n", " 0.904\n", " 0.363\n", " 0.518\n", @@ -4868,7 +2174,7 @@ " 0.616\n", " \n", " \n", - " protein_families\n", + " Protein Families\n", " 0.538\n", " 0.027\n", " 0.052\n", @@ -4880,7 +2186,7 @@ " 0.022\n", " \n", " \n", - " protein_gazetteer\n", + " Proteins\n", " 1.000\n", " 0.131\n", " 0.232\n", @@ -4891,43 +2197,55 @@ " 0.112\n", " 0.200\n", " \n", + " \n", + " hmm\n", + " 0.864\n", + " 0.596\n", + " 0.706\n", + " \n", + " \n", + " \n", + " 0.789\n", + " 0.689\n", + " 0.736\n", + " \n", " \n", "\n", "" ], "text/plain": [ - " tok_precision tok_recall tok_f1 tok_cee \\\n", - "proportion model \n", - "100.0 % construct 0.853 0.190 0.310 \n", - " cue_civic 0.933 0.350 0.510 \n", - " cue_cosmic_census 0.927 0.342 0.500 \n", - " entrez 0.951 0.525 0.676 \n", - " hmm 0.864 0.596 0.706 \n", - " omim 0.904 0.363 0.518 \n", - " protein_families 0.538 0.027 0.052 \n", - " protein_gazetteer 1.000 0.131 0.232 \n", + " tok_precision tok_recall tok_f1 tok_cee \\\n", + "proportion model \n", + "100.0 % CIViC 0.933 0.350 0.510 \n", + " COSMIC 0.927 0.342 0.500 \n", + " Entrez 0.951 0.525 0.676 \n", + " HGNC 0.853 0.190 0.310 \n", + " OMIM 0.904 0.363 0.518 \n", + " Protein Families 0.538 0.027 0.052 \n", + " Proteins 1.000 0.131 0.232 \n", + " hmm 0.864 0.596 0.706 \n", "\n", - " tok_acc coverage ent_precision ent_recall \\\n", - "proportion model \n", - "100.0 % construct 0.836 0.280 \n", - " cue_civic 0.841 0.473 \n", - " cue_cosmic_census 0.854 0.473 \n", - " entrez 0.890 0.608 \n", - " hmm 0.789 0.689 \n", - " omim 0.818 0.493 \n", - " protein_families 0.250 0.012 \n", - " protein_gazetteer 0.975 0.112 \n", + " tok_acc coverage ent_precision ent_recall \\\n", + "proportion model \n", + "100.0 % CIViC 0.841 0.473 \n", + " COSMIC 0.854 0.473 \n", + " Entrez 0.890 0.608 \n", + " HGNC 0.836 0.280 \n", + " OMIM 0.818 0.493 \n", + " Protein Families 0.250 0.012 \n", + " Proteins 0.975 0.112 \n", + " hmm 0.789 0.689 \n", "\n", - " ent_f1 \n", - "proportion model \n", - "100.0 % construct 0.420 \n", - " cue_civic 0.606 \n", - " cue_cosmic_census 0.608 \n", - " entrez 0.722 \n", - " hmm 0.736 \n", - " omim 0.616 \n", - " protein_families 0.022 \n", - " protein_gazetteer 0.200 " + " ent_f1 \n", + "proportion model \n", + "100.0 % CIViC 0.606 \n", + " COSMIC 0.608 \n", + " Entrez 0.722 \n", + " HGNC 0.420 \n", + " OMIM 0.616 \n", + " Protein Families 0.022 \n", + " Proteins 0.200 \n", + " hmm 0.736 " ] }, "metadata": {}, @@ -4949,7 +2267,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|███████████████████████████████████████████████████████████████████████████████████████████████████▉| 999/1000 [00:33<00:00, 30.08it/s]\n" + "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 999/1000 [00:40<00:00, 24.53it/s]\n" ] }, { @@ -5002,15 +2320,15 @@ " \n", " 100.0 %\n", " ner_model\n", - " 0.887\n", - " 0.621\n", + " 0.901\n", + " 0.613\n", " 0.73\n", " \n", " \n", " \n", - " 0.799\n", - " 0.723\n", - " 0.76\n", + " 0.819\n", + " 0.718\n", + " 0.766\n", " \n", " \n", "\n", @@ -5019,11 +2337,11 @@ "text/plain": [ " tok_precision tok_recall tok_f1 tok_cee tok_acc \\\n", "proportion model \n", - "100.0 % ner_model 0.887 0.621 0.73 \n", + "100.0 % ner_model 0.901 0.613 0.73 \n", "\n", " coverage ent_precision ent_recall ent_f1 \n", "proportion model \n", - "100.0 % ner_model 0.799 0.723 0.76 " + "100.0 % ner_model 0.819 0.718 0.766 " ] }, "metadata": {}, @@ -5045,7 +2363,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|███████████████████████████████████████████████████████████████████████████████████████████████████▉| 999/1000 [00:33<00:00, 29.84it/s]\n" + "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 999/1000 [00:38<00:00, 25.86it/s]\n" ] }, { @@ -5142,7 +2460,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 62, "id": "a313589f-d854-41c8-8df6-bccc2b0bf2d4", "metadata": {}, "outputs": [], @@ -5153,7 +2471,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 63, "id": "a655dd6c-92c2-4582-84ea-fb8af767856b", "metadata": {}, "outputs": [ @@ -5163,7 +2481,7 @@ "(83624, 5617)" ] }, - "execution_count": 43, + "execution_count": 63, "metadata": {}, "output_type": "execute_result" } @@ -5174,7 +2492,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 64, "id": "1dff773c-af59-4896-87d2-0a65bff9a6cf", "metadata": {}, "outputs": [ @@ -5184,7 +2502,7 @@ "(1000, 475)" ] }, - "execution_count": 44, + "execution_count": 64, "metadata": {}, "output_type": "execute_result" } @@ -5195,7 +2513,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 65, "id": "7bad482c-d324-4052-b965-26d27b9e686e", "metadata": {}, "outputs": [ @@ -5205,7 +2523,7 @@ "(1000, 347)" ] }, - "execution_count": 45, + "execution_count": 65, "metadata": {}, "output_type": "execute_result" }