Skip to content

Commit 38bdd50

Browse files
committed
Combine multiple tables into one
1 parent 7cf0116 commit 38bdd50

File tree

6 files changed

+374
-175
lines changed

6 files changed

+374
-175
lines changed

src/acquisition/rvdss/constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@
5353
'prairies', 'pr', "british columbia",'bc',"territories",'terr',]
5454
NATION = ["canada","can",'ca',]
5555

56+
PROVINCES = ['nl','pe','ns','nb','qc','on','mb','sk','ab','bc','yt','nt','nu']
57+
5658
# Construct dashboard and data report URLS.
5759
DASHBOARD_BASE_URL = "https://health-infobase.canada.ca/src/data/respiratory-virus-detections/"
5860
DASHBOARD_W_DATE_URL = DASHBOARD_BASE_URL + "archive/{date}/"

src/acquisition/rvdss/database.py

Lines changed: 198 additions & 127 deletions
Original file line numberDiff line numberDiff line change
@@ -32,118 +32,168 @@
3232

3333

3434

35-
respiratory_detections_cols= (
36-
"epiweek",
37-
"time_value",
38-
"issue",
39-
"geo_type",
40-
"geo_value",
41-
"sarscov2_tests",
42-
"sarscov2_positive_tests",
43-
"flu_tests",
44-
"flu_positive_tests",
45-
"fluah1n1pdm09_positive_tests",
46-
"fluah3_positive_tests",
47-
"fluauns_positive_tests",
48-
"flua_positive_tests",
49-
"flub_positive_tests",
50-
"rsv_tests",
51-
"rsv_positive_tests",
52-
"hpiv_tests",
53-
"hpiv1_positive_tests",
54-
"hpiv2_positive_tests",
55-
"hpiv3_positive_tests",
56-
"hpiv4_positive_tests",
57-
"hpivother_positive_tests",
58-
"adv_tests",
59-
"adv_positive_tests",
60-
"hmpv_tests",
61-
"hmpv_positive_tests",
62-
"evrv_tests",
63-
"evrv_positive_tests",
64-
"hcov_tests",
65-
"hcov_positive_tests",
66-
"week",
67-
"weekorder",
68-
"year"
69-
)
35+
# respiratory_detections_cols= (
36+
# "epiweek",
37+
# "time_value",
38+
# "issue",
39+
# "geo_type",
40+
# "geo_value",
41+
# "sarscov2_tests",
42+
# "sarscov2_positive_tests",
43+
# "flu_tests",
44+
# "flu_positive_tests",
45+
# "fluah1n1pdm09_positive_tests",
46+
# "fluah3_positive_tests",
47+
# "fluauns_positive_tests",
48+
# "flua_positive_tests",
49+
# "flub_positive_tests",
50+
# "rsv_tests",
51+
# "rsv_positive_tests",
52+
# "hpiv_tests",
53+
# "hpiv1_positive_tests",
54+
# "hpiv2_positive_tests",
55+
# "hpiv3_positive_tests",
56+
# "hpiv4_positive_tests",
57+
# "hpivother_positive_tests",
58+
# "adv_tests",
59+
# "adv_positive_tests",
60+
# "hmpv_tests",
61+
# "hmpv_positive_tests",
62+
# "evrv_tests",
63+
# "evrv_positive_tests",
64+
# "hcov_tests",
65+
# "hcov_positive_tests",
66+
# "week",
67+
# "weekorder",
68+
# "year"
69+
# )
7070

71-
pct_positive_cols = (
72-
"epiweek",
73-
"time_value",
74-
"issue",
75-
"geo_type",
76-
"geo_value",
77-
"evrv_pct_positive",
78-
"evrv_tests",
79-
"evrv_positive_tests",
80-
"hpiv_pct_positive",
81-
"hpiv_tests",
82-
"hpiv_positive_tests",
83-
"adv_pct_positive",
84-
"adv_tests",
85-
"adv_positive_tests",
86-
"hcov_pct_positive",
87-
"hcov_tests",
88-
"hcov_positive_tests",
89-
"flua_pct_positive",
90-
"flub_pct_positive",
91-
"flu_tests",
92-
"flua_positive_tests",
93-
"flua_tests",
94-
"flub_tests",
95-
"flub_positive_tests",
96-
"flu_positive_tests",
97-
"flu_pct_positive",
98-
"hmpv_pct_positive",
99-
"hmpv_tests",
100-
"hmpv_positive_tests",
101-
"rsv_pct_positive",
102-
"rsv_tests",
103-
"rsv_positive_tests",
104-
"sarscov2_pct_positive",
105-
"sarscov2_tests",
106-
"sarscov2_positive_tests",
107-
"region",
108-
"week",
109-
"weekorder",
110-
"year"
111-
)
71+
# pct_positive_cols = (
72+
# "epiweek",
73+
# "time_value",
74+
# "issue",
75+
# "geo_type",
76+
# "geo_value",
77+
# "evrv_pct_positive",
78+
# "evrv_tests",
79+
# "evrv_positive_tests",
80+
# "hpiv_pct_positive",
81+
# "hpiv_tests",
82+
# "hpiv_positive_tests",
83+
# "adv_pct_positive",
84+
# "adv_tests",
85+
# "adv_positive_tests",
86+
# "hcov_pct_positive",
87+
# "hcov_tests",
88+
# "hcov_positive_tests",
89+
# "flua_pct_positive",
90+
# "flub_pct_positive",
91+
# "flu_tests",
92+
# "flua_positive_tests",
93+
# "flua_tests",
94+
# "flub_tests",
95+
# "flub_positive_tests",
96+
# "flu_positive_tests",
97+
# "flu_pct_positive",
98+
# "hmpv_pct_positive",
99+
# "hmpv_tests",
100+
# "hmpv_positive_tests",
101+
# "rsv_pct_positive",
102+
# "rsv_tests",
103+
# "rsv_positive_tests",
104+
# "sarscov2_pct_positive",
105+
# "sarscov2_tests",
106+
# "sarscov2_positive_tests",
107+
# "region",
108+
# "week",
109+
# "weekorder",
110+
# "year"
111+
# )
112112

113-
detections_counts_cols = (
114-
"epiweek",
115-
"time_value",
116-
"issue" ,
117-
"geo_type",
118-
"geo_value",
119-
"hpiv_positive_tests",
120-
"adv_positive_tests",
121-
"hmpv_positive_tests",
122-
"evrv_positive_tests",
123-
"hcov_positive_tests",
124-
"rsv_positive_tests",
125-
"flu_positive_tests"
126-
)
113+
# detections_counts_cols = (
114+
# "epiweek",
115+
# "time_value",
116+
# "issue" ,
117+
# "geo_type",
118+
# "geo_value",
119+
# "hpiv_positive_tests",
120+
# "adv_positive_tests",
121+
# "hmpv_positive_tests",
122+
# "evrv_positive_tests",
123+
# "hcov_positive_tests",
124+
# "rsv_positive_tests",
125+
# "flu_positive_tests"
126+
# )
127127

128-
expected_table_names = {
129-
"respiratory_detection":"rvdss_repiratory_detections",
130-
"positive":"rvdss_pct_positive" ,
131-
"count": "rvdss_detections_counts"
132-
}
128+
# expected_table_names = {
129+
# "respiratory_detection":"rvdss_repiratory_detections",
130+
# "positive":"rvdss_pct_positive" ,
131+
# "count": "rvdss_detections_counts"
132+
# }
133133

134-
expected_columns = {
135-
"respiratory_detection":respiratory_detections_cols,
136-
"positive": pct_positive_cols,
137-
"count":detections_counts_cols
138-
}
134+
# expected_columns = {
135+
# "respiratory_detection":respiratory_detections_cols,
136+
# "positive": pct_positive_cols,
137+
# "count":detections_counts_cols
138+
# }
139+
140+
rvdss_cols= (
141+
"epiweek",
142+
"time_value",
143+
"issue",
144+
"geo_type",
145+
"geo_value",
146+
"sarscov2_tests",
147+
"sarscov2_positive_tests",
148+
"sarscov2_pct_positive",
149+
"flu_tests",
150+
"flu_positive_tests",
151+
"flu_pct_positive",
152+
"fluah1n1pdm09_positive_tests",
153+
"fluah3_positive_tests",
154+
"fluauns_positive_tests",
155+
"flua_positive_tests",
156+
"flua_tests",
157+
"flua_pct_positive",
158+
"flub_positive_tests",
159+
"flub_tests",
160+
"flub_pct_positive",
161+
"rsv_tests",
162+
"rsv_positive_tests",
163+
"rsv_pct_positive",
164+
"hpiv_tests",
165+
"hpiv1_positive_tests",
166+
"hpiv2_positive_tests",
167+
"hpiv3_positive_tests",
168+
"hpiv4_positive_tests",
169+
"hpivother_positive_tests",
170+
"hpiv_positive_tests",
171+
"hpiv_pct_positive",
172+
"adv_tests",
173+
"adv_positive_tests",
174+
"adv_pct_positive",
175+
"hmpv_tests",
176+
"hmpv_positive_tests",
177+
"hmpv_pct_positive",
178+
"evrv_tests",
179+
"evrv_positive_tests",
180+
"evrv_pct_positive",
181+
"hcov_tests",
182+
"hcov_positive_tests",
183+
"hcov_pct_positive",
184+
"week",
185+
"weekorder",
186+
"year",
187+
"region"
188+
)
139189

140-
def get_num_rows(cursor, table_name):
141-
cursor.execute("SELECT count(1) `num` FROM `{table_name}`")
190+
def get_num_rows(cursor):
191+
cursor.execute("SELECT count(1) `num` FROM `rvdss`")
142192
for (num,) in cursor:
143193
pass
144194
return num
145195

146-
def update(data_dict,logger):
196+
def update(data,logger):
147197
# connect to the database
148198
u, p = secrets.db.epi
149199
cnx = mysql.connector.connect(user=u, password=p, database="epidata")
@@ -157,36 +207,57 @@ def update(data_dict,logger):
157207
log_exceptions=True,
158208
)
159209

160-
for tt in data_dict.keys():
161-
data = data_dict[tt]
162-
data_tuples = list(data.itertuples(index=False,name=None))
163-
# loop though table types
164-
table_name = expected_table_names[tt]
165-
cols = expected_columns[tt]
166-
place_holders= ', '.join(["?" for _ in cols])
167-
field_names = ", ".join(
168-
f"`{name}`" for name in cols)
210+
# for tt in data_dict.keys():
211+
# data = data_dict[tt]
212+
# data_tuples = list(data.itertuples(index=False,name=None))
213+
# # loop though table types
214+
# table_name = expected_table_names[tt]
215+
# cols = expected_columns[tt]
216+
# place_holders= ', '.join(["?" for _ in cols])
217+
# field_names = ", ".join(
218+
# f"`{name}`" for name in cols)
169219

170-
field_values = ", ".join(
171-
f"%({name})s" for name in cols)
220+
# field_values = ", ".join(
221+
# f"%({name})s" for name in cols)
172222

173-
#check rvdss for new and/or revised data
174-
sql = f"""
175-
INSERT INTO {table_name} ({field_names})
176-
VALUES ({field_values})
177-
"""
223+
# #check rvdss for new and/or revised data
224+
# sql = f"""
225+
# INSERT INTO {table_name} ({field_names})
226+
# VALUES ({field_values})
227+
# """
178228

179-
# keep track of how many rows were added
180-
rows_before = get_num_rows(cur,table_name)
181-
total_rows = 0
229+
# # keep track of how many rows were added
230+
# rows_before = get_num_rows(cur,table_name)
231+
# total_rows = 0
182232

183-
#insert data
184-
cur.executemany(sql, data_tuples)
233+
# #insert data
234+
# cur.executemany(sql, data_tuples)
185235

186-
# keep track of how many rows were added
187-
rows_after = get_num_rows(cur,table_name)
188-
logger.info(f"Inserted {int(rows_after - rows_before)}/{int(total_rows)} row(s) into table {table_name}")
236+
# # keep track of how many rows were added
237+
# rows_after = get_num_rows(cur,table_name)
238+
# logger.info(f"Inserted {int(rows_after - rows_before)}/{int(total_rows)} row(s) into table {table_name}")
189239

240+
data_tuples = list(data.itertuples(index=False,name=None))
241+
field_names = ", ".join(f"`{name}`" for name in rvdss_cols)
242+
field_values = ", ".join(f"%({name})s" for name in rvdss_cols)
243+
244+
#check rvdss for new and/or revised data
245+
sql = f"""
246+
INSERT INTO rvdss ({field_names})
247+
VALUES ({field_values})
248+
"""
249+
250+
# keep track of how many rows were added
251+
rows_before = get_num_rows(cur)
252+
total_rows = 0
253+
254+
#insert data
255+
cur.executemany(sql, data_tuples)
256+
257+
# keep track of how many rows were added
258+
rows_after = get_num_rows(cur)
259+
logger.info(f"Inserted {int(rows_after - rows_before)}/{int(total_rows)} row(s) into table rvdss")
260+
190261
# cleanup
191262
cur.close()
192263
cnx.commit()

0 commit comments

Comments
 (0)