-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathexporter-diagnosis.py
More file actions
executable file
·123 lines (101 loc) · 3.62 KB
/
exporter-diagnosis.py
File metadata and controls
executable file
·123 lines (101 loc) · 3.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/usr/bin/python3
# vim:ts=4:sw=4:tw=0:sts=4:et
import pprint
import re
import logging as log
from builtins import str, isinstance, len, set, int
from typing import List
import pandas as pd
from cli_common import (
add_directory_schema_argument,
add_logging_arguments,
add_no_stdout_argument,
add_purge_cache_arguments,
add_withdrawn_scope_arguments,
add_xlsx_output_argument,
build_directory_kwargs,
build_parser,
configure_logging,
)
from directory import Directory
from orphacodes import OrphaCodes
from icd10codeshelper import ICD10CodesHelper
cachesList = ['directory']
pp = pprint.PrettyPrinter(indent=4)
parser = build_parser()
add_logging_arguments(parser)
add_xlsx_output_argument(parser)
parser.add_argument('-O', '--orphacodes-mapfile', dest='orphacodesfile', nargs=1,
help='file name of Orpha code mappings from http://www.orphadata.org/cgi-bin/ORPHAnomenclature.html')
add_no_stdout_argument(parser)
add_directory_schema_argument(parser, default="ERIC")
add_withdrawn_scope_arguments(parser)
add_purge_cache_arguments(parser, cachesList)
parser.set_defaults(purgeCaches=[])
args = parser.parse_args()
configure_logging(args)
# Main code
dir = Directory(**build_directory_kwargs(args, pp=pp))
log.info('Total biobanks: ' + str(dir.getBiobanksCount()))
log.info('Total collections: ' + str(dir.getCollectionsCount()))
orphacodes = OrphaCodes(args.orphacodesfile[0] if args.orphacodesfile is not None else None)
cancerExistingDiagnosed = []
cancerOnlyExistingDiagnosed = []
cancerExistingControls = []
cancerProspective = []
cancerBiobanksExistingDiagnosed = set()
cancerOnlyBiobanksExistingDiagnosed = set()
cancerBiobanksExistingControls = set()
cancerBiobanksProspective = set()
cancerBiobanks = set()
cancerCollectionSamplesExplicit = 0
cancerCollectionDonorsExplicit = 0
cancerCollectionSamplesIncOoM = 0
cancerOnlyCollectionSamplesExplicit = 0
cancerOnlyCollectionDonorsExplicit = 0
cancerOnlyCollectionSamplesIncOoM = 0
for collection in dir.getCollections():
log.debug("Analyzing collection " + collection['id'])
biobankId = dir.getCollectionBiobankId(collection['id'])
biobank = dir.getBiobankById(biobankId)
biobank_capabilities = []
if 'capabilities' in biobank:
for c in biobank['capabilities']:
biobank_capabilities.append(c['id'])
biobank_covid = []
if 'covid19biobank' in biobank:
for c in biobank['covid19biobank']:
biobank_covid.append(c['id'])
biobank_networks = []
if 'network' in biobank:
for n in biobank['network']:
biobank_networks.append(n['id'])
OoM = int(collection['order_of_magnitude'])
materials = []
if 'materials' in collection:
for m in collection['materials']:
materials.append(m)
data_categories = []
if 'data_categories' in collection:
for c in collection['data_categories']:
data_categories.append(c)
types = []
if 'type' in collection:
for t in collection['type']:
types.append(t)
log.debug("Types: " + str(types))
diags = []
diag_ranges = []
cancer_diag = False
cancer_control = False
cancer_prospective = False
non_cancer = False
if 'diagnosis_available' in collection:
for d in collection['diagnosis_available']:
if re.search('-', d['name']):
diag_ranges.append(d['name'])
else:
diags.append(d['name'])
log.debug(str(collection['diagnosis_available']))
if diag_ranges:
log.warning("There are diagnosis ranges provided for collection " + collection['id'] + ": " + str(diag_ranges))