Skip to content

Commit 5035cf6

Browse files
authored
Merge pull request #746 from cmu-delphi/krivard/covid_hosp_column_update
Update columns in covid_hosp: facility, state daily, state timeseries
2 parents 5dc0027 + ed9ed49 commit 5035cf6

File tree

21 files changed

+1108
-590
lines changed

21 files changed

+1108
-590
lines changed

integrations/acquisition/covid_hosp/facility/test_scenarios.py

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
# py3tester coverage target (equivalent to `import *`)
1818
__test_target__ = 'delphi.epidata.acquisition.covid_hosp.facility.update'
1919

20+
NEWLINE="\n"
2021

2122
class AcquisitionTests(unittest.TestCase):
2223

@@ -54,7 +55,7 @@ def test_acquire_dataset(self):
5455
with self.subTest(name='no data yet'):
5556
response = Epidata.covid_hosp_facility(
5657
'450822', Epidata.range(20200101, 20210101))
57-
self.assertEqual(response['result'], -2)
58+
self.assertEqual(response['result'], -2, response)
5859

5960
# acquire sample data into local database
6061
with self.subTest(name='first acquisition'):
@@ -63,21 +64,32 @@ def test_acquire_dataset(self):
6364

6465
# make sure the data now exists
6566
with self.subTest(name='initial data checks'):
67+
expected_spotchecks = {
68+
"hospital_pk": "450822",
69+
"collection_week": 20201030,
70+
"publication_date": 20210315,
71+
"previous_day_total_ed_visits_7_day_sum": 536,
72+
"total_personnel_covid_vaccinated_doses_all_7_day_sum": 18,
73+
"total_beds_7_day_avg": 69.3,
74+
"previous_day_admission_influenza_confirmed_7_day_sum": -999999
75+
}
6676
response = Epidata.covid_hosp_facility(
6777
'450822', Epidata.range(20200101, 20210101))
6878
self.assertEqual(response['result'], 1)
6979
self.assertEqual(len(response['epidata']), 1)
7080
row = response['epidata'][0]
71-
self.assertEqual(row['hospital_pk'], '450822')
72-
self.assertEqual(row['collection_week'], 20201030)
73-
self.assertEqual(row['publication_date'], 20210315)
74-
self.assertEqual(row['previous_day_total_ed_visits_7_day_sum'], 536)
75-
self.assertAlmostEqual(row['total_beds_7_day_avg'], 69.3)
76-
self.assertEqual(
77-
row['previous_day_admission_influenza_confirmed_7_day_sum'], -999999)
78-
79-
# expect 94 fields per row (95 database columns, except `id`)
80-
self.assertEqual(len(row), 94)
81+
for k,v in expected_spotchecks.items():
82+
self.assertTrue(
83+
k in row,
84+
f"no '{k}' in row:\n{NEWLINE.join(sorted(row.keys()))}"
85+
)
86+
if isinstance(v, float):
87+
self.assertAlmostEqual(row[k], v, f"row[{k}] is {row[k]} not {v}")
88+
else:
89+
self.assertEqual(row[k], v, f"row[{k}] is {row[k]} not {v}")
90+
91+
# expect 113 fields per row (114 database columns, except `id`)
92+
self.assertEqual(len(row), 113)
8193

8294
# re-acquisition of the same dataset should be a no-op
8395
with self.subTest(name='second acquisition'):
@@ -108,7 +120,7 @@ def test_facility_lookup(self):
108120
self.assertTrue(acquired)
109121

110122
# texas ground truth, sorted by `hospital_pk`
111-
# see sample data at testdata/acquisition/covid_hosp/facility/dataset.csv
123+
# see sample data at testdata/acquisition/covid_hosp/facility/dataset_old.csv
112124
texas_hospitals = [{
113125
'hospital_pk': '450771',
114126
'state': 'TX',

integrations/acquisition/covid_hosp/state_daily/test_scenarios.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def test_acquire_dataset(self):
5151
# make sure the data does not yet exist
5252
with self.subTest(name='no data yet'):
5353
response = Epidata.covid_hosp('MA', Epidata.range(20200101, 20210101))
54-
self.assertEqual(response['result'], -2)
54+
self.assertEqual(response['result'], -2, response)
5555

5656
# acquire sample data into local database
5757
# mock out network calls to external hosts
@@ -75,13 +75,14 @@ def test_acquire_dataset(self):
7575
self.assertEqual(row['date'], 20201209)
7676
self.assertEqual(row['issue'], 20210315)
7777
self.assertEqual(row['critical_staffing_shortage_today_yes'], 8)
78+
self.assertEqual(row['total_patients_hospitalized_confirmed_influenza_covid_coverage'], 56)
7879
actual = row['inpatient_bed_covid_utilization']
7980
expected = 0.11729857819905214
8081
self.assertAlmostEqual(actual, expected)
8182
self.assertIsNone(row['critical_staffing_shortage_today_no'])
8283

8384
# expect 61 fields per row (63 database columns, except `id` and `record_type`)
84-
self.assertEqual(len(row), 61)
85+
self.assertEqual(len(row), 118)
8586

8687
with self.subTest(name='all date batches acquired'):
8788
response = Epidata.covid_hosp('WY', Epidata.range(20200101, 20210101), issues=20210313)

integrations/acquisition/covid_hosp/state_timeseries/test_scenarios.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,13 +71,14 @@ def test_acquire_dataset(self):
7171
self.assertEqual(row['date'], 20200826)
7272
self.assertEqual(row['issue'], 20210315)
7373
self.assertEqual(row['critical_staffing_shortage_today_yes'], 2)
74+
self.assertEqual(row['total_patients_hospitalized_confirmed_influenza_covid_coverage'], 56)
7475
actual = row['inpatient_bed_covid_utilization']
7576
expected = 0.011946591707659873
7677
self.assertAlmostEqual(actual, expected)
7778
self.assertIsNone(row['critical_staffing_shortage_today_no'])
7879

7980
# expect 61 fields per row (63 database columns, except `id` and `record_type`)
80-
self.assertEqual(len(row), 61)
81+
self.assertEqual(len(row), 118)
8182

8283
# re-acquisition of the same dataset should be a no-op
8384
with self.subTest(name='second acquisition'):

integrations/server/test_covid_hosp.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def setUp(self):
3232
def insert_issue(self, cur, issue, value, record_type):
3333
so_many_nulls = ', '.join(['null'] * 57)
3434
cur.execute(f'''insert into covid_hosp_state_timeseries values (
35-
0, {issue}, 'PA', 20201118, {value}, {so_many_nulls}, '{record_type}'
35+
0, {issue}, 'PA', 20201118, {value}, {so_many_nulls}, '{record_type}', {so_many_nulls}
3636
)''')
3737

3838
def test_query_by_issue(self):

src/acquisition/covid_hosp/common/database.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -150,17 +150,19 @@ def insert_dataset(self, publication_date, dataframe):
150150
dataframe : pandas.DataFrame
151151
The dataset.
152152
"""
153-
154-
num_columns = 2 + len(self.columns_and_types) + len(self.additional_fields)
153+
dataframe_columns_and_types = [
154+
x for x in self.columns_and_types if x[0] in dataframe.columns
155+
]
156+
num_columns = 2 + len(dataframe_columns_and_types) + len(self.additional_fields)
155157
value_placeholders = ', '.join(['%s'] * num_columns)
156-
columns = ', '.join(f'`{i[1]}`' for i in self.columns_and_types + self.additional_fields)
158+
columns = ', '.join(f'`{i[1]}`' for i in dataframe_columns_and_types + self.additional_fields)
157159
sql = f'INSERT INTO `{self.table_name}` (`id`, `{self.publication_col_name}`, {columns}) ' \
158160
f'VALUES ({value_placeholders})'
159161
id_and_publication_date = (0, publication_date)
160162
with self.new_cursor() as cursor:
161163
for _, row in dataframe.iterrows():
162164
values = []
163-
for name, _, dtype in self.columns_and_types:
165+
for name, _, dtype in dataframe_columns_and_types:
164166
if isinstance(row[name], float) and math.isnan(row[name]):
165167
values.append(None)
166168
else:

0 commit comments

Comments
 (0)