Skip to content

Commit 1488884

Browse files
authored
Merge pull request #1349 from cmu-delphi/release/delphi-epidata-4.1.14
Release Delphi Epidata 4.1.14
2 parents 6da4b20 + 0424631 commit 1488884

File tree

29 files changed

+287
-181
lines changed

29 files changed

+287
-181
lines changed

.bumpversion.cfg

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 4.1.13
2+
current_version = 4.1.14
33
commit = False
44
tag = False
55

.env.example

+3
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,6 @@ FLASK_SECRET=abc
44
#API_KEY_REQUIRED_STARTING_AT=2021-07-30
55
API_KEY_ADMIN_PASSWORD=abc
66
API_KEY_REGISTER_WEBHOOK_TOKEN=abc
7+
8+
# Sentry
9+
# If setting a Sentry DSN, note that the URL should NOT be quoted!

.github/workflows/performance-tests-one-time.yml

+5-3
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
name: One-time performance testing - 26th October 2023
1+
name: One-time performance testing - 8th November 2023
22

3-
# Run "At every 30th minute on day-of-month 26 in October"
3+
# Run "At every 30th minute on day-of-month 8 in November"
44
on:
55
schedule:
6-
- cron: '*/30 * 26 10 *'
6+
- cron: '*/30 * 8 11 *'
77

88
# Add some extra perms to comment on a PR
99
permissions:
@@ -65,6 +65,8 @@ jobs:
6565
path: delphi-admin
6666
- name: Build & run Locust
6767
continue-on-error: true # sometimes ~2-5 queries fail, we shouldn't end the run if that's the case
68+
env:
69+
PERFTEST_API_KEY: ${{secrets.PERFTEST_API_KEY}}
6870
run: |
6971
cd delphi-admin/load-testing/locust
7072
docker build -t locust .

.github/workflows/performance-tests.yml

+2
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ jobs:
7373
path: delphi-admin
7474
- name: Build & run Locust
7575
continue-on-error: true # sometimes ~2-5 queries fail, we shouldn't end the run if that's the case
76+
env:
77+
PERFTEST_API_KEY: ${{secrets.PERFTEST_API_KEY}}
7678
run: |
7779
cd delphi-admin/load-testing/locust
7880
docker build -t locust .

dev/local/Makefile

+3
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ LOG_REDIS:=delphi_redis_instance_$(NOW).log
7777
WEB_CONTAINER_ID:=$(shell docker ps -q --filter 'name=delphi_web_epidata')
7878
DATABASE_CONTAINER_ID:=$(shell docker ps -q --filter 'name=delphi_database_epidata')
7979
REDIS_CONTAINER_ID:=$(shell docker ps -q --filter 'name=delphi_redis')
80+
ENV_FILE:=repos/delphi/delphi-epidata/.env
8081

8182
M1=
8283
ifeq ($(shell uname -smp), Darwin arm64 arm)
@@ -104,8 +105,10 @@ web:
104105
@# Run the web server
105106
@# MODULE_NAME specifies the location of the `app` variable, the actual WSGI application object to run.
106107
@# see https://github.com/tiangolo/meinheld-gunicorn-docker#module_name
108+
@touch $(ENV_FILE)
107109
@docker run --rm -p 127.0.0.1:10080:80 \
108110
$(M1) \
111+
--env-file $(ENV_FILE) \
109112
--env "MODULE_NAME=delphi.epidata.server.main" \
110113
--env "SQLALCHEMY_DATABASE_URI=$(sqlalchemy_uri)" \
111114
--env "FLASK_SECRET=abc" --env "FLASK_PREFIX=/epidata" --env "LOG_DEBUG" \

dev/local/setup.cfg

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[metadata]
22
name = Delphi Development
3-
version = 4.1.13
3+
version = 4.1.14
44

55
[options]
66
packages =

devops/Dockerfile

+1-3
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ FROM tiangolo/meinheld-gunicorn:python3.8
77
LABEL org.opencontainers.image.source=https://github.com/cmu-delphi/delphi-epidata
88

99
COPY ./devops/gunicorn_conf.py /app
10-
COPY ./devops/start_wrapper.sh /
1110
RUN mkdir -p /app/delphi/epidata
1211
COPY ./src/server /app/delphi/epidata/server
1312
COPY ./src/common /app/delphi/epidata/common
@@ -18,7 +17,6 @@ COPY requirements.api.txt /app/requirements_also.txt
1817
RUN ln -s -f /usr/share/zoneinfo/America/New_York /etc/localtime \
1918
&& rm -rf /app/delphi/epidata/__pycache__ \
2019
&& chmod -R o+r /app/delphi/epidata \
21-
&& chmod 755 /start_wrapper.sh \
2220
&& pip install --no-cache-dir -r /tmp/requirements.txt -r requirements_also.txt
2321
# the file /tmp/requirements.txt is created in the parent docker definition. (see:
2422
# https://github.com/tiangolo/meinheld-gunicorn-docker/blob/master/docker-images/python3.8.dockerfile#L5 )
@@ -28,4 +26,4 @@ RUN ln -s -f /usr/share/zoneinfo/America/New_York /etc/localtime \
2826
ENV PYTHONUNBUFFERED 1
2927

3028
ENTRYPOINT [ "/entrypoint.sh" ]
31-
CMD [ "/start_wrapper.sh" ]
29+
CMD [ "/start.sh" ]

devops/start_wrapper.sh

-10
This file was deleted.

docs/epidata_development.md

+10
Original file line numberDiff line numberDiff line change
@@ -388,3 +388,13 @@ The command above maps two local directories into the container:
388388
- `/repos/delphi/delphi-epidata/src`: Just the source code, which forms the
389389
container's `delphi.epidata` python package.
390390

391+
## instrumentation with Sentry
392+
393+
Delphi uses [Sentry](https://sentry.io/welcome/) in production for debugging, APM, and other observability purposes. You can instrument your local environment if you want to take advantage of Sentry's features during the development process. In most cases this option is available to internal Delphi team members only.
394+
395+
The bare minimum to set up instrumentation is to supply the DSN for the [epidata-api](https://cmu-delphi.sentry.io/projects/epidata-api/?project=4506123377442816) Sentry project to the application environment.
396+
397+
- You can get the DSN from the Sentry [project's keys config](https://cmu-delphi.sentry.io/settings/projects/epidata-api/keys/), or by asking someone in the prodsys, DevOps, or sysadmin space.
398+
- Once you have the DSN, add it to your local `.env` file and rebuild your containers to start sending telemetry to Sentry.
399+
400+
Additional internal documentation for Sentry can be found [here](https://bookstack.delphi.cmu.edu/books/systems-handbook/page/sentry).

docs/symptom-survey/publications.md

+4-1
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,17 @@ Pandemic"](https://www.pnas.org/topic/548) in *PNAS*:
2626

2727
Research publications using the survey data include:
2828

29+
- W. Dempsey (2023). [Addressing selection bias and measurement error in
30+
COVID-19 case count data using auxiliary information](https://doi.org/10.1214/23-AOAS1744).
31+
*Annals of Applied Statistics* 17 (4), 2903-2923.
2932
- Ma, M.Z., Chen, S.X. (2023). [Beyond the surface: accounting for confounders
3033
in understanding the link between collectivism and COVID-19 pandemic in the
3134
United States](https://doi.org/10.1186/s12889-023-16384-2). *BMC Public
3235
Health* 23, 1513.
3336
- C.K. Ettman, E. Badillo Goicoechea, and E.A. Stuart (2023). [Evolution of
3437
depression and anxiety over the COVID-19 pandemic and across demographic
3538
groups in a large sample of U.S. adults](https://doi.org/10.1016/j.focus.2023.100140).
36-
*AJPM Focus*.
39+
*AJPM Focus* 2 (4), 100140.
3740
- M. Rubinstein, Z. Branson, and E.H. Kennedy (2023). [Heterogeneous
3841
interventional effects with multiple mediators: Semiparametric and
3942
nonparametric approaches](https://doi.org/10.1515/jci-2022-0070). *Journal of

integrations/acquisition/covid_hosp/state_daily/test_scenarios.py

+114-54
Original file line numberDiff line numberDiff line change
@@ -47,62 +47,122 @@ def setUp(self):
4747
cur.execute('delete from api_user')
4848
cur.execute('insert into api_user(api_key, email) values("key", "email")')
4949

50-
@freeze_time("2021-03-16")
51-
def test_acquire_dataset(self):
52-
"""Acquire a new dataset."""
50+
def get_modified_dataset(self, critical_staffing_shortage_today_yes, reporting_cutoff_start):
51+
"""Get a simplified version of a test dataset.
5352
54-
# make sure the data does not yet exist
55-
with self.subTest(name='no data yet'):
56-
response = Epidata.covid_hosp('MA', Epidata.range(20200101, 20210101))
57-
self.assertEqual(response['result'], -2, response)
53+
Only WY data is modified. The issue date is specified in the metadata file.
54+
"""
55+
df = self.test_utils.load_sample_dataset()
56+
df_new = pd.DataFrame(df[df["state"] == "WY"], columns=df.columns).reset_index(drop=True)
57+
df_new["critical_staffing_shortage_today_yes"] = critical_staffing_shortage_today_yes
58+
df_new["reporting_cutoff_start"] = reporting_cutoff_start
59+
return df_new
5860

59-
# acquire sample data into local database
60-
# mock out network calls to external hosts
61-
with self.subTest(name='first acquisition'), \
62-
patch.object(Network, 'fetch_metadata', return_value=self.test_utils.load_sample_metadata()) as mock_fetch_meta, \
63-
patch.object(Network, 'fetch_dataset', side_effect=[self.test_utils.load_sample_dataset("dataset0.csv"), # dataset for 3/13
64-
self.test_utils.load_sample_dataset("dataset0.csv"), # first dataset for 3/15
65-
self.test_utils.load_sample_dataset()] # second dataset for 3/15
66-
) as mock_fetch:
67-
acquired = Update.run()
68-
self.assertTrue(acquired)
69-
self.assertEqual(mock_fetch_meta.call_count, 1)
70-
71-
# make sure the data now exists
72-
with self.subTest(name='initial data checks'):
73-
response = Epidata.covid_hosp('WY', Epidata.range(20200101, 20210101))
74-
self.assertEqual(response['result'], 1)
75-
self.assertEqual(len(response['epidata']), 1)
76-
row = response['epidata'][0]
77-
self.assertEqual(row['state'], 'WY')
78-
self.assertEqual(row['date'], 20201209)
79-
self.assertEqual(row['issue'], 20210315)
80-
self.assertEqual(row['critical_staffing_shortage_today_yes'], 8)
81-
self.assertEqual(row['total_patients_hospitalized_confirmed_influenza_covid_coverage'], 56)
82-
actual = row['inpatient_bed_covid_utilization']
83-
expected = 0.11729857819905214
84-
self.assertAlmostEqual(actual, expected)
85-
self.assertIsNone(row['critical_staffing_shortage_today_no'])
86-
87-
# expect 61 fields per row (63 database columns, except `id` and `record_type`)
88-
self.assertEqual(len(row), 118)
89-
90-
with self.subTest(name='all date batches acquired'):
91-
response = Epidata.covid_hosp('WY', Epidata.range(20200101, 20210101), issues=20210313)
92-
self.assertEqual(response['result'], 1)
93-
94-
# re-acquisition of the same dataset should be a no-op
95-
with self.subTest(name='second acquisition'), \
96-
patch.object(Network, 'fetch_metadata', return_value=self.test_utils.load_sample_metadata()) as mock_fetch_meta, \
97-
patch.object(Network, 'fetch_dataset', return_value=self.test_utils.load_sample_dataset()) as mock_fetch:
98-
acquired = Update.run()
99-
self.assertFalse(acquired)
61+
def test_acquire_dataset(self):
62+
"""Acquire a new dataset."""
10063

101-
# make sure the data still exists
102-
with self.subTest(name='final data checks'):
103-
response = Epidata.covid_hosp('WY', Epidata.range(20200101, 20210101))
104-
self.assertEqual(response['result'], 1)
105-
self.assertEqual(len(response['epidata']), 1)
64+
with freeze_time("2021-03-15"):
65+
# make sure the data does not yet exist
66+
with self.subTest(name='no data yet'):
67+
response = Epidata.covid_hosp('MA', Epidata.range(20200101, 20210101))
68+
self.assertEqual(response['result'], -2, response)
69+
70+
# acquire sample data into local database
71+
# mock out network calls to external hosts
72+
# issues: 3/13, 3/15
73+
with self.subTest(name='first acquisition'), \
74+
patch.object(Network, 'fetch_metadata',
75+
return_value=self.test_utils.load_sample_metadata("metadata.csv")) as mock_fetch_meta, \
76+
patch.object(Network, 'fetch_dataset', side_effect=[
77+
self.test_utils.load_sample_dataset(),
78+
self.test_utils.load_sample_dataset()
79+
]) as mock_fetch:
80+
acquired = Update.run()
81+
self.assertTrue(acquired)
82+
self.assertEqual(mock_fetch_meta.call_count, 1)
83+
84+
# make sure the data now exists
85+
with self.subTest(name='initial data checks'):
86+
response = Epidata.covid_hosp('WY', Epidata.range(20200101, 20210101))
87+
self.assertEqual(response['result'], 1)
88+
self.assertEqual(len(response['epidata']), 1)
89+
row = response['epidata'][0]
90+
self.assertEqual(row['state'], 'WY')
91+
self.assertEqual(row['date'], 20201209)
92+
self.assertEqual(row['issue'], 20210315) # include today's data by default
93+
self.assertEqual(row['critical_staffing_shortage_today_yes'], 8)
94+
self.assertEqual(row['total_patients_hospitalized_confirmed_influenza_covid_coverage'], 56)
95+
self.assertIsNone(row['critical_staffing_shortage_today_no'])
96+
97+
# expect 61 fields per row (63 database columns, except `id` and `record_type`)
98+
self.assertEqual(len(row), 118)
99+
100+
with self.subTest(name='all date batches acquired'):
101+
response = Epidata.covid_hosp('WY', Epidata.range(20200101, 20210101), issues=20210313)
102+
self.assertEqual(response['result'], 1)
103+
104+
# re-acquisition of the same dataset should be a no-op
105+
# issues: 3/13, 3/15
106+
with self.subTest(name='second acquisition'), \
107+
patch.object(Network, 'fetch_metadata',
108+
return_value=self.test_utils.load_sample_metadata("metadata.csv")) as mock_fetch_meta, \
109+
patch.object(Network, 'fetch_dataset', side_effect=[
110+
self.test_utils.load_sample_dataset(),
111+
self.test_utils.load_sample_dataset()
112+
]) as mock_fetch:
113+
acquired = Update.run()
114+
self.assertFalse(acquired)
115+
116+
# make sure the data still exists
117+
response = Epidata.covid_hosp('WY', Epidata.range(20200101, 20210101))
118+
self.assertEqual(response['result'], 1)
119+
self.assertEqual(len(response['epidata']), 1)
120+
121+
with freeze_time("2021-03-16"):
122+
# simulate issue posted after yesterday's run
123+
with self.subTest(name='late issue posted'), \
124+
patch.object(Network, 'fetch_metadata',
125+
return_value=self.test_utils.load_sample_metadata("metadata2.csv")) as mock_fetch_meta, \
126+
patch.object(Network, 'fetch_dataset', side_effect=[
127+
self.get_modified_dataset(critical_staffing_shortage_today_yes = 9, reporting_cutoff_start="2020-12-09"),
128+
self.get_modified_dataset(critical_staffing_shortage_today_yes = 10, reporting_cutoff_start="2020-12-09"),
129+
self.get_modified_dataset(critical_staffing_shortage_today_yes = 11, reporting_cutoff_start="2020-12-10"),
130+
self.get_modified_dataset(critical_staffing_shortage_today_yes = 12, reporting_cutoff_start="2020-12-10"),
131+
]) as mock_fetch:
132+
acquired = Update.run()
133+
self.assertTrue(acquired)
134+
self.assertEqual(mock_fetch_meta.call_count, 1)
135+
136+
# make sure everything was filed correctly
137+
with self.subTest(name='late issue data checks'):
138+
response = Epidata.covid_hosp('WY', Epidata.range(20200101, 20210101))
139+
self.assertEqual(response['result'], 1)
140+
self.assertEqual(len(response['epidata']), 2)
141+
142+
# should have data from 03-15 00:00:01AM
143+
row = response['epidata'][0]
144+
self.assertEqual(row['state'], 'WY')
145+
self.assertEqual(row['date'], 20201209)
146+
self.assertEqual(row['issue'], 20210315) # include today's data by default
147+
self.assertEqual(row['critical_staffing_shortage_today_yes'], 10)
148+
self.assertEqual(row['total_patients_hospitalized_confirmed_influenza_covid_coverage'], 56)
149+
self.assertIsNone(row['critical_staffing_shortage_today_no'])
150+
151+
# should have data from 03-16 00:00:01AM
152+
row = response['epidata'][1]
153+
self.assertEqual(row['state'], 'WY')
154+
self.assertEqual(row['date'], 20201210)
155+
self.assertEqual(row['issue'], 20210316) # include today's data by default
156+
self.assertEqual(row['critical_staffing_shortage_today_yes'], 12)
157+
self.assertEqual(row['total_patients_hospitalized_confirmed_influenza_covid_coverage'], 56)
158+
self.assertIsNone(row['critical_staffing_shortage_today_no'])
159+
160+
# expect 61 fields per row (63 database columns, except `id` and `record_type`)
161+
self.assertEqual(len(row), 118)
162+
163+
with self.subTest(name='all date batches acquired'):
164+
response = Epidata.covid_hosp('WY', Epidata.range(20200101, 20210101), issues=20210316)
165+
self.assertEqual(response['result'], 1)
106166

107167

108168
@freeze_time("2021-03-16")
@@ -121,7 +181,7 @@ def test_acquire_specific_issue(self):
121181
self.assertEqual(pre_max_issue, pd.Timestamp('1900-01-01 00:00:00'))
122182
with self.subTest(name='first acquisition'), \
123183
patch.object(Network, 'fetch_metadata', return_value=self.test_utils.load_sample_metadata()) as mock_fetch_meta, \
124-
patch.object(Network, 'fetch_dataset', side_effect=[self.test_utils.load_sample_dataset("dataset0.csv")]
184+
patch.object(Network, 'fetch_dataset', side_effect=[self.test_utils.load_sample_dataset()]
125185
) as mock_fetch:
126186
acquired = Utils.update_dataset(Database,
127187
Network,

requirements.api.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,16 @@ Flask-Limiter==3.3.0
55
jinja2==3.0.3
66
more_itertools==8.4.0
77
mysqlclient==2.1.1
8-
newrelic
98
orjson==3.4.7
109
pandas==1.2.3
1110
python-dotenv==0.15.0
1211
pyyaml
1312
redis==3.5.3
1413
requests==2.31.0
1514
scipy==1.10.0
15+
sentry-sdk[flask]
1616
SQLAlchemy==1.4.40
1717
structlog==22.1.0
1818
tenacity==7.0.0
1919
typing-extensions
20-
werkzeug==2.2.3
20+
werkzeug==2.3.8

requirements.dev.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
aiohttp==3.8.5
1+
aiohttp==3.8.6
22
black>=20.8b1
33
bump2version==1.0.1
44
covidcast==0.1.5

src/acquisition/covid_hosp/common/database.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -186,9 +186,13 @@ def nan_safe_dtype(dtype, value):
186186

187187
num_columns = 2 + len(dataframe_columns_and_types) + len(self.additional_fields)
188188
value_placeholders = ', '.join(['%s'] * num_columns)
189-
columns = ', '.join(f'`{i.sql_name}`' for i in dataframe_columns_and_types + self.additional_fields)
189+
col_names = [f'`{i.sql_name}`' for i in dataframe_columns_and_types + self.additional_fields]
190+
columns = ', '.join(col_names)
191+
updates = ', '.join(f'{c}=new_values.{c}' for c in col_names)
192+
# NOTE: list in `updates` presumes `publication_col_name` is part of the unique key and thus not needed in UPDATE
190193
sql = f'INSERT INTO `{self.table_name}` (`id`, `{self.publication_col_name}`, {columns}) ' \
191-
f'VALUES ({value_placeholders})'
194+
f'VALUES ({value_placeholders}) AS new_values ' \
195+
f'ON DUPLICATE KEY UPDATE {updates}'
192196
id_and_publication_date = (0, publication_date)
193197
if logger:
194198
logger.info('updating values', count=len(dataframe.index))

0 commit comments

Comments
 (0)