From e37407bb653a18b641eab044b3b4b0178a557cf1 Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Fri, 7 Apr 2023 14:58:25 +0300 Subject: [PATCH 01/42] [WIP] Split covid_hosp into daily & timeseries tables --- .../covid_hosp/state_daily/database.py | 3 +- src/ddl/covid_hosp.sql | 141 ++++++++++++++++- .../covid_hosp_state_split_tables.sql | 148 ++++++++++++++++++ 3 files changed, 287 insertions(+), 5 deletions(-) create mode 100644 src/ddl/migrations/covid_hosp_state_split_tables.sql diff --git a/src/acquisition/covid_hosp/state_daily/database.py b/src/acquisition/covid_hosp/state_daily/database.py index 6a8228994..27205e739 100644 --- a/src/acquisition/covid_hosp/state_daily/database.py +++ b/src/acquisition/covid_hosp/state_daily/database.py @@ -7,8 +7,7 @@ class Database(BaseDatabase): - # note we share a database with state_timeseries - TABLE_NAME = 'covid_hosp_state_timeseries' + TABLE_NAME = 'covid_hosp_state_daily' KEY_COLS = ['state', 'reporting_cutoff_start'] # These are 3-tuples of (CSV header name, SQL db column name, data type) for # all the columns in the CSV file. diff --git a/src/ddl/covid_hosp.sql b/src/ddl/covid_hosp.sql index 2ffe7c71a..6ad8e0b15 100644 --- a/src/ddl/covid_hosp.sql +++ b/src/ddl/covid_hosp.sql @@ -61,9 +61,7 @@ CREATE TABLE `covid_hosp_meta` ( /* -`covid_hosp_state_timeseries` stores the versioned "state timeseries" dataset, -which contains data from both the time series data and the daily snapshot files. - +`covid_hosp_state_timeseries` stores time series data from the versioned "state timeseries" dataset. Data is public under the Open Data Commons Open Database License (ODbL). +------------------------------------------------------------------+---------+------+-----+---------+----------------+ @@ -509,6 +507,143 @@ CREATE TABLE `covid_hosp_state_timeseries` ( ) ENGINE=InnoDB DEFAULT CHARSET=utf8; +/* +`covid_hosp_state_timeseries` stores the versioned "state timeseries" dataset, +which contains data from the daily snapshot files. +Schema is equivalent to `covid_hosp_state_timeseries`. +*/ +CREATE TABLE `covid_hosp_state_daily` ( + `id` INT NOT NULL AUTO_INCREMENT, + `issue` INT NOT NULL, + `state` CHAR(2) NOT NULL, + `date` INT NOT NULL, + `critical_staffing_shortage_today_yes` INT, + `critical_staffing_shortage_today_no` INT, + `critical_staffing_shortage_today_not_reported` INT, + `critical_staffing_shortage_anticipated_within_week_yes` INT, + `critical_staffing_shortage_anticipated_within_week_no` INT, + `critical_staffing_shortage_anticipated_within_week_not_reported` INT, + `hospital_onset_covid` INT, + `hospital_onset_covid_coverage` INT, + `inpatient_beds` INT, + `inpatient_beds_coverage` INT, + `inpatient_beds_used` INT, + `inpatient_beds_used_coverage` INT, + `inpatient_beds_used_covid` INT, + `inpatient_beds_used_covid_coverage` INT, + `previous_day_admission_adult_covid_confirmed` INT, + `previous_day_admission_adult_covid_confirmed_coverage` INT, + `previous_day_admission_adult_covid_suspected` INT, + `previous_day_admission_adult_covid_suspected_coverage` INT, + `previous_day_admission_pediatric_covid_confirmed` INT, + `previous_day_admission_pediatric_covid_confirmed_coverage` INT, + `previous_day_admission_pediatric_covid_suspected` INT, + `previous_day_admission_pediatric_covid_suspected_coverage` INT, + `staffed_adult_icu_bed_occupancy` INT, + `staffed_adult_icu_bed_occupancy_coverage` INT, + `staffed_icu_adult_patients_confirmed_suspected_covid` INT, + `staffed_icu_adult_patients_confirmed_suspected_covid_coverage` INT, + `staffed_icu_adult_patients_confirmed_covid` INT, + `staffed_icu_adult_patients_confirmed_covid_coverage` INT, + `total_adult_patients_hosp_confirmed_suspected_covid` INT, + `total_adult_patients_hosp_confirmed_suspected_covid_coverage` INT, + `total_adult_patients_hosp_confirmed_covid` INT, + `total_adult_patients_hosp_confirmed_covid_coverage` INT, + `total_pediatric_patients_hosp_confirmed_suspected_covid` INT, + `total_pediatric_patients_hosp_confirmed_suspected_covid_coverage` INT, + `total_pediatric_patients_hosp_confirmed_covid` INT, + `total_pediatric_patients_hosp_confirmed_covid_coverage` INT, + `total_staffed_adult_icu_beds` INT, + `total_staffed_adult_icu_beds_coverage` INT, + `inpatient_beds_utilization` DOUBLE, + `inpatient_beds_utilization_coverage` INT, + `inpatient_beds_utilization_numerator` INT, + `inpatient_beds_utilization_denominator` INT, + `percent_of_inpatients_with_covid` DOUBLE, + `percent_of_inpatients_with_covid_coverage` INT, + `percent_of_inpatients_with_covid_numerator` INT, + `percent_of_inpatients_with_covid_denominator` INT, + `inpatient_bed_covid_utilization` DOUBLE, + `inpatient_bed_covid_utilization_coverage` INT, + `inpatient_bed_covid_utilization_numerator` INT, + `inpatient_bed_covid_utilization_denominator` INT, + `adult_icu_bed_covid_utilization` DOUBLE, + `adult_icu_bed_covid_utilization_coverage` INT, + `adult_icu_bed_covid_utilization_numerator` INT, + `adult_icu_bed_covid_utilization_denominator` INT, + `adult_icu_bed_utilization` DOUBLE, + `adult_icu_bed_utilization_coverage` INT, + `adult_icu_bed_utilization_numerator` INT, + `adult_icu_bed_utilization_denominator` INT, + `record_type` CHAR(1) NOT NULL, + -- new columns added Oct 10 + `geocoded_state` VARCHAR(32), + `previous_day_admission_adult_covid_confirmed_18_19` INT, + `previous_day_admission_adult_covid_confirmed_18_19_coverage` INT, + `previous_day_admission_adult_covid_confirmed_20_29` INT, + `previous_day_admission_adult_covid_confirmed_20_29_coverage` INT, + `previous_day_admission_adult_covid_confirmed_30_39` INT, + `previous_day_admission_adult_covid_confirmed_30_39_coverage` INT, + `previous_day_admission_adult_covid_confirmed_40_49` INT, + `previous_day_admission_adult_covid_confirmed_40_49_coverage` INT, + `previous_day_admission_adult_covid_confirmed_50_59` INT, + `previous_day_admission_adult_covid_confirmed_50_59_coverage` INT, + `previous_day_admission_adult_covid_confirmed_60_69` INT, + `previous_day_admission_adult_covid_confirmed_60_69_coverage` INT, + `previous_day_admission_adult_covid_confirmed_70_79` INT, + `previous_day_admission_adult_covid_confirmed_70_79_coverage` INT, + `previous_day_admission_adult_covid_confirmed_80plus` INT, + `previous_day_admission_adult_covid_confirmed_80plus_coverage` INT, + `previous_day_admission_adult_covid_confirmed_unknown` INT, + `previous_day_admission_adult_covid_confirmed_unknown_coverage` INT, + `previous_day_admission_adult_covid_suspected_18_19` INT, + `previous_day_admission_adult_covid_suspected_18_19_coverage` INT, + `previous_day_admission_adult_covid_suspected_20_29` INT, + `previous_day_admission_adult_covid_suspected_20_29_coverage` INT, + `previous_day_admission_adult_covid_suspected_30_39` INT, + `previous_day_admission_adult_covid_suspected_30_39_coverage` INT, + `previous_day_admission_adult_covid_suspected_40_49` INT, + `previous_day_admission_adult_covid_suspected_40_49_coverage` INT, + `previous_day_admission_adult_covid_suspected_50_59` INT, + `previous_day_admission_adult_covid_suspected_50_59_coverage` INT, + `previous_day_admission_adult_covid_suspected_60_69` INT, + `previous_day_admission_adult_covid_suspected_60_69_coverage` INT, + `previous_day_admission_adult_covid_suspected_70_79` INT, + `previous_day_admission_adult_covid_suspected_70_79_coverage` INT, + `previous_day_admission_adult_covid_suspected_80plus` INT, + `previous_day_admission_adult_covid_suspected_80plus_coverage` INT, + `previous_day_admission_adult_covid_suspected_unknown` INT, + `previous_day_admission_adult_covid_suspected_unknown_coverage` INT, + `deaths_covid` INT, + `deaths_covid_coverage` INT, + `on_hand_supply_therapeutic_a_casirivimab_imdevimab_courses` INT, + `on_hand_supply_therapeutic_b_bamlanivimab_courses` INT, + `on_hand_supply_therapeutic_c_bamlanivimab_etesevimab_courses` INT, + `previous_week_therapeutic_a_casirivimab_imdevimab_courses_used` INT, + `previous_week_therapeutic_b_bamlanivimab_courses_used` INT, + `previous_week_therapeutic_c_bamlanivimab_etesevimab_courses_used` INT, + `icu_patients_confirmed_influenza` INT, + `icu_patients_confirmed_influenza_coverage` INT, + `previous_day_admission_influenza_confirmed` INT, + `previous_day_admission_influenza_confirmed_coverage` INT, + `previous_day_deaths_covid_and_influenza` INT, + `previous_day_deaths_covid_and_influenza_coverage` INT, + `previous_day_deaths_influenza` INT, + `previous_day_deaths_influenza_coverage` INT, + `total_patients_hospitalized_confirmed_influenza` INT, + `total_patients_hospitalized_confirmed_influenza_covid` INT, + `total_patients_hospitalized_confirmed_influenza_covid_coverage` INT, + `total_patients_hospitalized_confirmed_influenza_coverage` INT, + PRIMARY KEY (`id`), + -- for uniqueness + -- for fast lookup of most recent issue for a given state, date, and record type + UNIQUE KEY `issue_by_state_and_date` (`state`, `date`, `issue`, `record_type`), + -- for fast lookup of a time-series for a given state, issue, and record type + KEY `date_by_issue_and_state` (`issue`, `state`, `date`, `record_type`), + -- for fast lookup of all states for a given date, issue, and record_type + KEY `state_by_issue_and_date` (`issue`, `date`, `state`, `record_type`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8; + /* `covid_hosp_facility` stores the versioned "facility" dataset. diff --git a/src/ddl/migrations/covid_hosp_state_split_tables.sql b/src/ddl/migrations/covid_hosp_state_split_tables.sql new file mode 100644 index 000000000..2f7d3027a --- /dev/null +++ b/src/ddl/migrations/covid_hosp_state_split_tables.sql @@ -0,0 +1,148 @@ +-- 1. Add new state_daily table mirroring state_timeseries table + +CREATE TABLE `covid_hosp_state_daily` ( + `id` INT NOT NULL AUTO_INCREMENT, + `issue` INT NOT NULL, + `state` CHAR(2) NOT NULL, + `date` INT NOT NULL, + `critical_staffing_shortage_today_yes` INT, + `critical_staffing_shortage_today_no` INT, + `critical_staffing_shortage_today_not_reported` INT, + `critical_staffing_shortage_anticipated_within_week_yes` INT, + `critical_staffing_shortage_anticipated_within_week_no` INT, + `critical_staffing_shortage_anticipated_within_week_not_reported` INT, + `hospital_onset_covid` INT, + `hospital_onset_covid_coverage` INT, + `inpatient_beds` INT, + `inpatient_beds_coverage` INT, + `inpatient_beds_used` INT, + `inpatient_beds_used_coverage` INT, + `inpatient_beds_used_covid` INT, + `inpatient_beds_used_covid_coverage` INT, + `previous_day_admission_adult_covid_confirmed` INT, + `previous_day_admission_adult_covid_confirmed_coverage` INT, + `previous_day_admission_adult_covid_suspected` INT, + `previous_day_admission_adult_covid_suspected_coverage` INT, + `previous_day_admission_pediatric_covid_confirmed` INT, + `previous_day_admission_pediatric_covid_confirmed_coverage` INT, + `previous_day_admission_pediatric_covid_suspected` INT, + `previous_day_admission_pediatric_covid_suspected_coverage` INT, + `staffed_adult_icu_bed_occupancy` INT, + `staffed_adult_icu_bed_occupancy_coverage` INT, + `staffed_icu_adult_patients_confirmed_suspected_covid` INT, + `staffed_icu_adult_patients_confirmed_suspected_covid_coverage` INT, + `staffed_icu_adult_patients_confirmed_covid` INT, + `staffed_icu_adult_patients_confirmed_covid_coverage` INT, + `total_adult_patients_hosp_confirmed_suspected_covid` INT, + `total_adult_patients_hosp_confirmed_suspected_covid_coverage` INT, + `total_adult_patients_hosp_confirmed_covid` INT, + `total_adult_patients_hosp_confirmed_covid_coverage` INT, + `total_pediatric_patients_hosp_confirmed_suspected_covid` INT, + `total_pediatric_patients_hosp_confirmed_suspected_covid_coverage` INT, + `total_pediatric_patients_hosp_confirmed_covid` INT, + `total_pediatric_patients_hosp_confirmed_covid_coverage` INT, + `total_staffed_adult_icu_beds` INT, + `total_staffed_adult_icu_beds_coverage` INT, + `inpatient_beds_utilization` DOUBLE, + `inpatient_beds_utilization_coverage` INT, + `inpatient_beds_utilization_numerator` INT, + `inpatient_beds_utilization_denominator` INT, + `percent_of_inpatients_with_covid` DOUBLE, + `percent_of_inpatients_with_covid_coverage` INT, + `percent_of_inpatients_with_covid_numerator` INT, + `percent_of_inpatients_with_covid_denominator` INT, + `inpatient_bed_covid_utilization` DOUBLE, + `inpatient_bed_covid_utilization_coverage` INT, + `inpatient_bed_covid_utilization_numerator` INT, + `inpatient_bed_covid_utilization_denominator` INT, + `adult_icu_bed_covid_utilization` DOUBLE, + `adult_icu_bed_covid_utilization_coverage` INT, + `adult_icu_bed_covid_utilization_numerator` INT, + `adult_icu_bed_covid_utilization_denominator` INT, + `adult_icu_bed_utilization` DOUBLE, + `adult_icu_bed_utilization_coverage` INT, + `adult_icu_bed_utilization_numerator` INT, + `adult_icu_bed_utilization_denominator` INT, + `record_type` CHAR(1) NOT NULL, + -- new columns added Oct 10 + `geocoded_state` VARCHAR(32), + `previous_day_admission_adult_covid_confirmed_18_19` INT, + `previous_day_admission_adult_covid_confirmed_18_19_coverage` INT, + `previous_day_admission_adult_covid_confirmed_20_29` INT, + `previous_day_admission_adult_covid_confirmed_20_29_coverage` INT, + `previous_day_admission_adult_covid_confirmed_30_39` INT, + `previous_day_admission_adult_covid_confirmed_30_39_coverage` INT, + `previous_day_admission_adult_covid_confirmed_40_49` INT, + `previous_day_admission_adult_covid_confirmed_40_49_coverage` INT, + `previous_day_admission_adult_covid_confirmed_50_59` INT, + `previous_day_admission_adult_covid_confirmed_50_59_coverage` INT, + `previous_day_admission_adult_covid_confirmed_60_69` INT, + `previous_day_admission_adult_covid_confirmed_60_69_coverage` INT, + `previous_day_admission_adult_covid_confirmed_70_79` INT, + `previous_day_admission_adult_covid_confirmed_70_79_coverage` INT, + `previous_day_admission_adult_covid_confirmed_80plus` INT, + `previous_day_admission_adult_covid_confirmed_80plus_coverage` INT, + `previous_day_admission_adult_covid_confirmed_unknown` INT, + `previous_day_admission_adult_covid_confirmed_unknown_coverage` INT, + `previous_day_admission_adult_covid_suspected_18_19` INT, + `previous_day_admission_adult_covid_suspected_18_19_coverage` INT, + `previous_day_admission_adult_covid_suspected_20_29` INT, + `previous_day_admission_adult_covid_suspected_20_29_coverage` INT, + `previous_day_admission_adult_covid_suspected_30_39` INT, + `previous_day_admission_adult_covid_suspected_30_39_coverage` INT, + `previous_day_admission_adult_covid_suspected_40_49` INT, + `previous_day_admission_adult_covid_suspected_40_49_coverage` INT, + `previous_day_admission_adult_covid_suspected_50_59` INT, + `previous_day_admission_adult_covid_suspected_50_59_coverage` INT, + `previous_day_admission_adult_covid_suspected_60_69` INT, + `previous_day_admission_adult_covid_suspected_60_69_coverage` INT, + `previous_day_admission_adult_covid_suspected_70_79` INT, + `previous_day_admission_adult_covid_suspected_70_79_coverage` INT, + `previous_day_admission_adult_covid_suspected_80plus` INT, + `previous_day_admission_adult_covid_suspected_80plus_coverage` INT, + `previous_day_admission_adult_covid_suspected_unknown` INT, + `previous_day_admission_adult_covid_suspected_unknown_coverage` INT, + `deaths_covid` INT, + `deaths_covid_coverage` INT, + `on_hand_supply_therapeutic_a_casirivimab_imdevimab_courses` INT, + `on_hand_supply_therapeutic_b_bamlanivimab_courses` INT, + `on_hand_supply_therapeutic_c_bamlanivimab_etesevimab_courses` INT, + `previous_week_therapeutic_a_casirivimab_imdevimab_courses_used` INT, + `previous_week_therapeutic_b_bamlanivimab_courses_used` INT, + `previous_week_therapeutic_c_bamlanivimab_etesevimab_courses_used` INT, + `icu_patients_confirmed_influenza` INT, + `icu_patients_confirmed_influenza_coverage` INT, + `previous_day_admission_influenza_confirmed` INT, + `previous_day_admission_influenza_confirmed_coverage` INT, + `previous_day_deaths_covid_and_influenza` INT, + `previous_day_deaths_covid_and_influenza_coverage` INT, + `previous_day_deaths_influenza` INT, + `previous_day_deaths_influenza_coverage` INT, + `total_patients_hospitalized_confirmed_influenza` INT, + `total_patients_hospitalized_confirmed_influenza_covid` INT, + `total_patients_hospitalized_confirmed_influenza_covid_coverage` INT, + `total_patients_hospitalized_confirmed_influenza_coverage` INT, + PRIMARY KEY (`id`), + -- for uniqueness + -- for fast lookup of most recent issue for a given state, date, and record type + UNIQUE KEY `issue_by_state_and_date` (`state`, `date`, `issue`, `record_type`), + -- for fast lookup of a time-series for a given state, issue, and record type + KEY `date_by_issue_and_state` (`issue`, `state`, `date`, `record_type`), + -- for fast lookup of all states for a given date, issue, and record_type + KEY `state_by_issue_and_date` (`issue`, `date`, `state`, `record_type`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8; + +-- 2. Move data with record_type=D into new table + +INSERT INTO `covid_hosp_state_daily` +SELECT * FROM `covid_hosp_state_timeseries` +WHERE record_type='D'; + +-- 3. Remove that data from the old table + +DELETE FROM `covid_hosp_state_timeseries` WHERE record_type='D'; + +-- 4. Remove the record_type column from both tables + +ALTER TABLE `covid_hosp_state_daily` DROP COLUMN record_type; +ALTER TABLE `covid_hosp_state_timeseries` DROP COLUMN record_type; \ No newline at end of file From 0647157603cf3175065cda43f1f0e1338204e545 Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Fri, 7 Apr 2023 15:08:57 +0300 Subject: [PATCH 02/42] Fix API endpoint --- src/ddl/migrations/covid_hosp_state_split_tables.sql | 2 +- src/server/endpoints/covid_hosp_state_timeseries.py | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/ddl/migrations/covid_hosp_state_split_tables.sql b/src/ddl/migrations/covid_hosp_state_split_tables.sql index 2f7d3027a..54f6909d8 100644 --- a/src/ddl/migrations/covid_hosp_state_split_tables.sql +++ b/src/ddl/migrations/covid_hosp_state_split_tables.sql @@ -145,4 +145,4 @@ DELETE FROM `covid_hosp_state_timeseries` WHERE record_type='D'; -- 4. Remove the record_type column from both tables ALTER TABLE `covid_hosp_state_daily` DROP COLUMN record_type; -ALTER TABLE `covid_hosp_state_timeseries` DROP COLUMN record_type; \ No newline at end of file +ALTER TABLE `covid_hosp_state_timeseries` DROP COLUMN record_type; diff --git a/src/server/endpoints/covid_hosp_state_timeseries.py b/src/server/endpoints/covid_hosp_state_timeseries.py index 78931ee68..ef5941d37 100644 --- a/src/server/endpoints/covid_hosp_state_timeseries.py +++ b/src/server/endpoints/covid_hosp_state_timeseries.py @@ -152,19 +152,20 @@ def handle(): q.where_integers("date", dates) q.where_strings("state", states) + merge_tables = f"(SELECT *, 'D' as record_type FROM `covid_hosp_state_daily`) UNION ALL (SELECT *, 'T' as record_type FROM `covid_hosp_state_timeseries`)" if issues is not None: q.where_integers("issue", issues) # final query using specific issues - query = f"WITH c as (SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state, issue ORDER BY record_type) `row` FROM {q.table} WHERE {q.conditions_clause}) SELECT {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause}" + query = f"WITH c as (SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state, issue ORDER BY record_type) `row` FROM {merge_tables} WHERE {q.conditions_clause}) SELECT {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause}" elif as_of is not None: sub_condition_asof = "(issue <= :as_of)" q.params["as_of"] = as_of - query = f"WITH c as (SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state ORDER BY issue DESC, record_type) `row` FROM {q.table} WHERE {q.conditions_clause} AND {sub_condition_asof}) SELECT {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause}" + query = f"WITH c as (SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state ORDER BY issue DESC, record_type) `row` FROM {merge_tables} WHERE {q.conditions_clause} AND {sub_condition_asof}) SELECT {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause}" else: # final query using most recent issues - subquery = f"(SELECT max(`issue`) `max_issue`, `date`, `state` FROM {q.table} WHERE {q.conditions_clause} GROUP BY `date`, `state`) x" + subquery = f"(SELECT max(`issue`) `max_issue`, `date`, `state` FROM {merge_tables} WHERE {q.conditions_clause} GROUP BY `date`, `state`) x" condition = f"x.`max_issue` = {q.alias}.`issue` AND x.`date` = {q.alias}.`date` AND x.`state` = {q.alias}.`state`" - query = f"WITH c as (SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state, issue ORDER BY record_type) `row` FROM {q.table} JOIN {subquery} ON {condition}) select {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause}" + query = f"WITH c as (SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state, issue ORDER BY record_type) `row` FROM {merge_tables} JOIN {subquery} ON {condition}) select {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause}" # send query return execute_query(query, q.params, fields_string, fields_int, fields_float) From e2680e6d306f1be95433017d7cf3e61588cbbc74 Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Fri, 7 Apr 2023 15:19:30 +0300 Subject: [PATCH 03/42] Alias --- src/server/endpoints/covid_hosp_state_timeseries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server/endpoints/covid_hosp_state_timeseries.py b/src/server/endpoints/covid_hosp_state_timeseries.py index ef5941d37..8f889ec1b 100644 --- a/src/server/endpoints/covid_hosp_state_timeseries.py +++ b/src/server/endpoints/covid_hosp_state_timeseries.py @@ -152,7 +152,7 @@ def handle(): q.where_integers("date", dates) q.where_strings("state", states) - merge_tables = f"(SELECT *, 'D' as record_type FROM `covid_hosp_state_daily`) UNION ALL (SELECT *, 'T' as record_type FROM `covid_hosp_state_timeseries`)" + merge_tables = f"(SELECT *, 'D' as record_type FROM `covid_hosp_state_daily` UNION ALL SELECT *, 'T' as record_type FROM `covid_hosp_state_timeseries`) AS `merged`" if issues is not None: q.where_integers("issue", issues) # final query using specific issues From beb6b6fb0174c4542d5de0a2d6ae511dc0a2e9e0 Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Fri, 7 Apr 2023 15:30:00 +0300 Subject: [PATCH 04/42] Test changes --- .../covid_hosp/state_daily/test_scenarios.py | 2 +- integrations/server/test_covid_hosp.py | 28 +++++++++------ src/acquisition/covid_hosp/common/database.py | 2 +- src/ddl/covid_hosp.sql | 34 +++++++------------ .../covid_hosp_state_split_tables.sql | 12 +++---- .../endpoints/covid_hosp_state_timeseries.py | 2 +- 6 files changed, 38 insertions(+), 42 deletions(-) diff --git a/integrations/acquisition/covid_hosp/state_daily/test_scenarios.py b/integrations/acquisition/covid_hosp/state_daily/test_scenarios.py index e55bc8ca6..ae4d05d42 100644 --- a/integrations/acquisition/covid_hosp/state_daily/test_scenarios.py +++ b/integrations/acquisition/covid_hosp/state_daily/test_scenarios.py @@ -41,7 +41,7 @@ def setUp(self): # clear relevant tables with Database.connect() as db: with db.new_cursor() as cur: - cur.execute('truncate table covid_hosp_state_timeseries') + cur.execute('truncate table covid_hosp_state_daily') cur.execute('truncate table covid_hosp_meta') @freeze_time("2021-03-16") diff --git a/integrations/server/test_covid_hosp.py b/integrations/server/test_covid_hosp.py index 16538b82d..0b2f98334 100644 --- a/integrations/server/test_covid_hosp.py +++ b/integrations/server/test_covid_hosp.py @@ -29,10 +29,16 @@ def setUp(self): cur.execute('truncate table covid_hosp_meta') - def insert_issue(self, cur, issue, value, record_type): + def insert_timeseries(self, cur, issue, value): so_many_nulls = ', '.join(['null'] * 57) cur.execute(f'''insert into covid_hosp_state_timeseries values ( - 0, {issue}, 'PA', 20201118, {value}, {so_many_nulls}, '{record_type}', {so_many_nulls} + 0, {issue}, 'PA', 20201118, {value}, {so_many_nulls}, {so_many_nulls} + )''') + + def insert_daily(self, cur, issue, value): + so_many_nulls = ', '.join(['null'] * 57) + cur.execute(f'''insert into covid_hosp_state_daily values ( + 0, {issue}, 'PA', 20201118, {value}, {so_many_nulls}, {so_many_nulls} )''') def test_query_by_issue(self): @@ -42,10 +48,10 @@ def test_query_by_issue(self): with db.new_cursor() as cur: # inserting out of order to test server-side order by # also inserting two for 20201201 to test tiebreaker. - self.insert_issue(cur, 20201201, 123, 'T') - self.insert_issue(cur, 20201201, 321, 'D') - self.insert_issue(cur, 20201203, 789, 'T') - self.insert_issue(cur, 20201202, 456, 'T') + self.insert_timeseries(cur, 20201201, 123) + self.insert_daily(cur, 20201201, 321) + self.insert_timeseries(cur, 20201203, 789) + self.insert_timeseries(cur, 20201202, 456) # request without issue (defaulting to latest issue) with self.subTest(name='no issue (latest)'): @@ -86,11 +92,11 @@ def test_query_by_issue(self): def test_query_by_as_of(self): with Database.connect() as db: with db.new_cursor() as cur: - self.insert_issue(cur, 20201101, 0, 'T') - self.insert_issue(cur, 20201102, 1, 'D') - self.insert_issue(cur, 20201103, 2, 'D') - self.insert_issue(cur, 20201103, 3, 'T') - self.insert_issue(cur, 20201104, 4, 'T') + self.insert_timeseries(cur, 20201101, 0) + self.insert_daily(cur, 20201102, 1) + self.insert_daily(cur, 20201103, 2) + self.insert_timeseries(cur, 20201103, 3) + self.insert_timeseries(cur, 20201104, 4) with self.subTest(name='as_of with multiple issues'): response = Epidata.covid_hosp('PA', 20201118, as_of=20201103) diff --git a/src/acquisition/covid_hosp/common/database.py b/src/acquisition/covid_hosp/common/database.py index 4fd0981a1..f69dceefc 100644 --- a/src/acquisition/covid_hosp/common/database.py +++ b/src/acquisition/covid_hosp/common/database.py @@ -45,7 +45,7 @@ def __init__(self, self.connection = connection self.table_name = table_name self.hhs_dataset_id = hhs_dataset_id - self.publication_col_name = "issue" if table_name == 'covid_hosp_state_timeseries' else \ + self.publication_col_name = "issue" if table_name == 'covid_hosp_state_timeseries' or table_name == "covid_hosp_state_daily" else \ 'publication_date' self.columns_and_types = { c.csv_name: c diff --git a/src/ddl/covid_hosp.sql b/src/ddl/covid_hosp.sql index 6ad8e0b15..6f3070d7a 100644 --- a/src/ddl/covid_hosp.sql +++ b/src/ddl/covid_hosp.sql @@ -364,14 +364,6 @@ For daily snapshot files, there is a `reporting_cutoff_start` value, defined as "Look back date start - The latest reports from each hospital is summed for this report starting with this date." We place this value into the `date` column. - -We also add a column `record_type` that specifies if a row came from a -time series file or a daily snapshot file. "T" = time series and -"D" = daily snapshot. When both a time series and a daily snapshot row -have the same issue/date/state but different values, we tiebreak by -taking the daily snapshot value. This is done with a window function that -sorts by the record_type field, ascending, and so it is important that "D" -comes before "T". */ CREATE TABLE `covid_hosp_state_timeseries` ( @@ -437,7 +429,6 @@ CREATE TABLE `covid_hosp_state_timeseries` ( `adult_icu_bed_utilization_coverage` INT, `adult_icu_bed_utilization_numerator` INT, `adult_icu_bed_utilization_denominator` INT, - `record_type` CHAR(1) NOT NULL, -- new columns added Oct 10 `geocoded_state` VARCHAR(32), `previous_day_admission_adult_covid_confirmed_18_19` INT, @@ -498,12 +489,12 @@ CREATE TABLE `covid_hosp_state_timeseries` ( `total_patients_hospitalized_confirmed_influenza_coverage` INT, PRIMARY KEY (`id`), -- for uniqueness - -- for fast lookup of most recent issue for a given state, date, and record type - UNIQUE KEY `issue_by_state_and_date` (`state`, `date`, `issue`, `record_type`), - -- for fast lookup of a time-series for a given state, issue, and record type - KEY `date_by_issue_and_state` (`issue`, `state`, `date`, `record_type`), - -- for fast lookup of all states for a given date, issue, and record_type - KEY `state_by_issue_and_date` (`issue`, `date`, `state`, `record_type`) + -- for fast lookup of most recent issue for a given state and date + UNIQUE KEY `issue_by_state_and_date` (`state`, `date`, `issue`), + -- for fast lookup of a time-series for a given state and issue + KEY `date_by_issue_and_state` (`issue`, `state`, `date`), + -- for fast lookup of all states for a given date and issue + KEY `state_by_issue_and_date` (`issue`, `date`, `state`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8; @@ -575,7 +566,6 @@ CREATE TABLE `covid_hosp_state_daily` ( `adult_icu_bed_utilization_coverage` INT, `adult_icu_bed_utilization_numerator` INT, `adult_icu_bed_utilization_denominator` INT, - `record_type` CHAR(1) NOT NULL, -- new columns added Oct 10 `geocoded_state` VARCHAR(32), `previous_day_admission_adult_covid_confirmed_18_19` INT, @@ -636,12 +626,12 @@ CREATE TABLE `covid_hosp_state_daily` ( `total_patients_hospitalized_confirmed_influenza_coverage` INT, PRIMARY KEY (`id`), -- for uniqueness - -- for fast lookup of most recent issue for a given state, date, and record type - UNIQUE KEY `issue_by_state_and_date` (`state`, `date`, `issue`, `record_type`), - -- for fast lookup of a time-series for a given state, issue, and record type - KEY `date_by_issue_and_state` (`issue`, `state`, `date`, `record_type`), - -- for fast lookup of all states for a given date, issue, and record_type - KEY `state_by_issue_and_date` (`issue`, `date`, `state`, `record_type`) + -- for fast lookup of most recent issue for a given state and date + UNIQUE KEY `issue_by_state_and_date` (`state`, `date`, `issue`), + -- for fast lookup of a time-series for a given state and issue + KEY `date_by_issue_and_state` (`issue`, `state`, `date`), + -- for fast lookup of all states for a given date and issue + KEY `state_by_issue_and_date` (`issue`, `date`, `state`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8; /* diff --git a/src/ddl/migrations/covid_hosp_state_split_tables.sql b/src/ddl/migrations/covid_hosp_state_split_tables.sql index 54f6909d8..f052d47cd 100644 --- a/src/ddl/migrations/covid_hosp_state_split_tables.sql +++ b/src/ddl/migrations/covid_hosp_state_split_tables.sql @@ -124,12 +124,12 @@ CREATE TABLE `covid_hosp_state_daily` ( `total_patients_hospitalized_confirmed_influenza_coverage` INT, PRIMARY KEY (`id`), -- for uniqueness - -- for fast lookup of most recent issue for a given state, date, and record type - UNIQUE KEY `issue_by_state_and_date` (`state`, `date`, `issue`, `record_type`), - -- for fast lookup of a time-series for a given state, issue, and record type - KEY `date_by_issue_and_state` (`issue`, `state`, `date`, `record_type`), - -- for fast lookup of all states for a given date, issue, and record_type - KEY `state_by_issue_and_date` (`issue`, `date`, `state`, `record_type`) + -- for fast lookup of most recent issue for a given state and date + UNIQUE KEY `issue_by_state_and_date` (`state`, `date`, `issue`), + -- for fast lookup of a time-series for a given state and issue + KEY `date_by_issue_and_state` (`issue`, `state`, `date`), + -- for fast lookup of all states for a given date and issue + KEY `state_by_issue_and_date` (`issue`, `date`, `state`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8; -- 2. Move data with record_type=D into new table diff --git a/src/server/endpoints/covid_hosp_state_timeseries.py b/src/server/endpoints/covid_hosp_state_timeseries.py index 8f889ec1b..8a631a73c 100644 --- a/src/server/endpoints/covid_hosp_state_timeseries.py +++ b/src/server/endpoints/covid_hosp_state_timeseries.py @@ -152,7 +152,7 @@ def handle(): q.where_integers("date", dates) q.where_strings("state", states) - merge_tables = f"(SELECT *, 'D' as record_type FROM `covid_hosp_state_daily` UNION ALL SELECT *, 'T' as record_type FROM `covid_hosp_state_timeseries`) AS `merged`" + merge_tables = "(SELECT *, 'D' as record_type FROM `covid_hosp_state_daily` UNION ALL SELECT *, 'T' as record_type FROM `covid_hosp_state_timeseries`) AS `merged`" if issues is not None: q.where_integers("issue", issues) # final query using specific issues From 569c3963e66ea454d6e41ac00a240f7d0f0f88c7 Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Fri, 7 Apr 2023 15:36:35 +0300 Subject: [PATCH 05/42] no date --- src/acquisition/covid_hosp/state_daily/database.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/acquisition/covid_hosp/state_daily/database.py b/src/acquisition/covid_hosp/state_daily/database.py index 27205e739..24e99b878 100644 --- a/src/acquisition/covid_hosp/state_daily/database.py +++ b/src/acquisition/covid_hosp/state_daily/database.py @@ -225,5 +225,4 @@ def __init__(self, *args, **kwargs): table_name=Database.TABLE_NAME, hhs_dataset_id=Network.DATASET_ID, columns_and_types=Database.ORDERED_CSV_COLUMNS, - key_columns=Database.KEY_COLS, - additional_fields=[Columndef('D', 'record_type', None)]) + key_columns=Database.KEY_COLS) From c2ee818ec02cf7c1da058b42bd33f5a9b93ef407 Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Fri, 7 Apr 2023 15:39:48 +0300 Subject: [PATCH 06/42] Remove test checks --- .../acquisition/covid_hosp/state_daily/test_scenarios.py | 6 +++--- .../covid_hosp/state_timeseries/test_scenarios.py | 6 +++--- src/acquisition/covid_hosp/state_timeseries/database.py | 3 +-- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/integrations/acquisition/covid_hosp/state_daily/test_scenarios.py b/integrations/acquisition/covid_hosp/state_daily/test_scenarios.py index ae4d05d42..0a28372ce 100644 --- a/integrations/acquisition/covid_hosp/state_daily/test_scenarios.py +++ b/integrations/acquisition/covid_hosp/state_daily/test_scenarios.py @@ -49,9 +49,9 @@ def test_acquire_dataset(self): """Acquire a new dataset.""" # make sure the data does not yet exist - with self.subTest(name='no data yet'): - response = Epidata.covid_hosp('MA', Epidata.range(20200101, 20210101)) - self.assertEqual(response['result'], -2, response) + # with self.subTest(name='no data yet'): + # response = Epidata.covid_hosp('MA', Epidata.range(20200101, 20210101)) + # self.assertEqual(response['result'], -2, response) # acquire sample data into local database # mock out network calls to external hosts diff --git a/integrations/acquisition/covid_hosp/state_timeseries/test_scenarios.py b/integrations/acquisition/covid_hosp/state_timeseries/test_scenarios.py index 5d13ccbb0..cd971016b 100644 --- a/integrations/acquisition/covid_hosp/state_timeseries/test_scenarios.py +++ b/integrations/acquisition/covid_hosp/state_timeseries/test_scenarios.py @@ -52,9 +52,9 @@ def test_acquire_dataset(self): self.test_utils.load_sample_dataset() # make sure the data does not yet exist - with self.subTest(name='no data yet'): - response = Epidata.covid_hosp('MA', Epidata.range(20200101, 20210101)) - self.assertEqual(response['result'], -2) + # with self.subTest(name='no data yet'): + # response = Epidata.covid_hosp('MA', Epidata.range(20200101, 20210101)) + # self.assertEqual(response['result'], -2) # acquire sample data into local database with self.subTest(name='first acquisition'): diff --git a/src/acquisition/covid_hosp/state_timeseries/database.py b/src/acquisition/covid_hosp/state_timeseries/database.py index 348d9fc0b..0b53965cd 100644 --- a/src/acquisition/covid_hosp/state_timeseries/database.py +++ b/src/acquisition/covid_hosp/state_timeseries/database.py @@ -225,5 +225,4 @@ def __init__(self, *args, **kwargs): table_name=Database.TABLE_NAME, hhs_dataset_id=Network.DATASET_ID, columns_and_types=Database.ORDERED_CSV_COLUMNS, - key_columns=Database.KEY_COLS, - additional_fields=[Columndef('T', 'record_type', None)]) + key_columns=Database.KEY_COLS) From 20334781e9b65a1891d6c8ffe56a44b165c72e28 Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Fri, 7 Apr 2023 15:40:48 +0300 Subject: [PATCH 07/42] truncate more instead --- .../acquisition/covid_hosp/state_daily/test_scenarios.py | 7 ++++--- .../covid_hosp/state_timeseries/test_scenarios.py | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/integrations/acquisition/covid_hosp/state_daily/test_scenarios.py b/integrations/acquisition/covid_hosp/state_daily/test_scenarios.py index 0a28372ce..1801de932 100644 --- a/integrations/acquisition/covid_hosp/state_daily/test_scenarios.py +++ b/integrations/acquisition/covid_hosp/state_daily/test_scenarios.py @@ -42,6 +42,7 @@ def setUp(self): with Database.connect() as db: with db.new_cursor() as cur: cur.execute('truncate table covid_hosp_state_daily') + cur.execute('truncate table covid_hosp_state_timeseries') cur.execute('truncate table covid_hosp_meta') @freeze_time("2021-03-16") @@ -49,9 +50,9 @@ def test_acquire_dataset(self): """Acquire a new dataset.""" # make sure the data does not yet exist - # with self.subTest(name='no data yet'): - # response = Epidata.covid_hosp('MA', Epidata.range(20200101, 20210101)) - # self.assertEqual(response['result'], -2, response) + with self.subTest(name='no data yet'): + response = Epidata.covid_hosp('MA', Epidata.range(20200101, 20210101)) + self.assertEqual(response['result'], -2, response) # acquire sample data into local database # mock out network calls to external hosts diff --git a/integrations/acquisition/covid_hosp/state_timeseries/test_scenarios.py b/integrations/acquisition/covid_hosp/state_timeseries/test_scenarios.py index cd971016b..bd83c6808 100644 --- a/integrations/acquisition/covid_hosp/state_timeseries/test_scenarios.py +++ b/integrations/acquisition/covid_hosp/state_timeseries/test_scenarios.py @@ -37,6 +37,7 @@ def setUp(self): # clear relevant tables with Database.connect() as db: with db.new_cursor() as cur: + cur.execute('truncate table covid_hosp_state_daily') cur.execute('truncate table covid_hosp_state_timeseries') cur.execute('truncate table covid_hosp_meta') From 23313b532c5df8d5f76925b8a0916c20e05caba3 Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Fri, 7 Apr 2023 15:46:09 +0300 Subject: [PATCH 08/42] no D/T --- tests/acquisition/covid_hosp/state_daily/test_database.py | 2 +- tests/acquisition/covid_hosp/state_timeseries/test_database.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/acquisition/covid_hosp/state_daily/test_database.py b/tests/acquisition/covid_hosp/state_daily/test_database.py index 95401d7cc..ae9acd098 100644 --- a/tests/acquisition/covid_hosp/state_daily/test_database.py +++ b/tests/acquisition/covid_hosp/state_daily/test_database.py @@ -50,7 +50,7 @@ def test_insert_dataset(self): 17, 18, 19, 20, 21, 22, 23, 31, 24, 25, 15, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 29, 42, 43, 44, 45, 0, 29, 0, 29, 46, 47, 48, 49, 50, 51, 52, 58, 31, 32, 29, 32, 31, 196, 29, 189, 31, - 53, 54, 55, 56, 2, 29, 2, 29, 137, 31, 'D') + 53, 54, 55, 56, 2, 29, 2, 29, 137, 31) self.assertEqual(len(last_query_values), len(expected_query_values)) for actual, expected in zip(last_query_values, expected_query_values): diff --git a/tests/acquisition/covid_hosp/state_timeseries/test_database.py b/tests/acquisition/covid_hosp/state_timeseries/test_database.py index 24897d42d..ecea27f59 100644 --- a/tests/acquisition/covid_hosp/state_timeseries/test_database.py +++ b/tests/acquisition/covid_hosp/state_timeseries/test_database.py @@ -48,7 +48,7 @@ def test_insert_dataset(self): 24, 25, 13, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 26, 42, 43, 44, 45, 0, 21, 0, 22, 46, 47, 48, 49, 50, 51, 52, 49, 28, 10, 26, 7, 28, 17, 26, 14, 28, 53, 54, 55, 56, 0, 26, 0, 26, - 114, 28, 'T') + 114, 28) self.assertEqual(len(last_query_values), len(expected_query_values)) for actual, expected in zip(last_query_values, expected_query_values): From 926cf16936275d1a84645ef006955dd4e0c8e3a0 Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Fri, 7 Apr 2023 16:09:52 +0300 Subject: [PATCH 09/42] aliases? --- src/server/endpoints/covid_hosp_state_timeseries.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/server/endpoints/covid_hosp_state_timeseries.py b/src/server/endpoints/covid_hosp_state_timeseries.py index 8a631a73c..8a88713bf 100644 --- a/src/server/endpoints/covid_hosp_state_timeseries.py +++ b/src/server/endpoints/covid_hosp_state_timeseries.py @@ -152,20 +152,20 @@ def handle(): q.where_integers("date", dates) q.where_strings("state", states) - merge_tables = "(SELECT *, 'D' as record_type FROM `covid_hosp_state_daily` UNION ALL SELECT *, 'T' as record_type FROM `covid_hosp_state_timeseries`) AS `merged`" + merge_tables = "(SELECT *, 'D' as record_type FROM `covid_hosp_state_daily` UNION ALL SELECT *, 'T' as record_type FROM `covid_hosp_state_timeseries`)" if issues is not None: q.where_integers("issue", issues) # final query using specific issues - query = f"WITH c as (SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state, issue ORDER BY record_type) `row` FROM {merge_tables} WHERE {q.conditions_clause}) SELECT {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause}" + query = f"WITH c as (SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state, issue ORDER BY record_type) `row` FROM {merge_tables} AS `merged` WHERE {q.conditions_clause}) SELECT {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause}" elif as_of is not None: sub_condition_asof = "(issue <= :as_of)" q.params["as_of"] = as_of - query = f"WITH c as (SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state ORDER BY issue DESC, record_type) `row` FROM {merge_tables} WHERE {q.conditions_clause} AND {sub_condition_asof}) SELECT {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause}" + query = f"WITH c as (SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state ORDER BY issue DESC, record_type) `row` FROM {merge_tables} AS `merged` WHERE {q.conditions_clause} AND {sub_condition_asof}) SELECT {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause}" else: # final query using most recent issues - subquery = f"(SELECT max(`issue`) `max_issue`, `date`, `state` FROM {merge_tables} WHERE {q.conditions_clause} GROUP BY `date`, `state`) x" + subquery = f"(SELECT max(`issue`) `max_issue`, `date`, `state` FROM {merge_tables} AS `merged_subq` WHERE {q.conditions_clause} GROUP BY `date`, `state`) x" condition = f"x.`max_issue` = {q.alias}.`issue` AND x.`date` = {q.alias}.`date` AND x.`state` = {q.alias}.`state`" - query = f"WITH c as (SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state, issue ORDER BY record_type) `row` FROM {merge_tables} JOIN {subquery} ON {condition}) select {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause}" + query = f"WITH c as (SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state, issue ORDER BY record_type) `row` FROM {merge_tables} AS `merged` JOIN {subquery} ON {condition}) select {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause}" # send query return execute_query(query, q.params, fields_string, fields_int, fields_float) From 321415c686390a3e96573a044b7f6d2fea588475 Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Fri, 7 Apr 2023 16:18:44 +0300 Subject: [PATCH 10/42] alt subqueries --- src/server/endpoints/covid_hosp_state_timeseries.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/server/endpoints/covid_hosp_state_timeseries.py b/src/server/endpoints/covid_hosp_state_timeseries.py index 8a88713bf..b2c026668 100644 --- a/src/server/endpoints/covid_hosp_state_timeseries.py +++ b/src/server/endpoints/covid_hosp_state_timeseries.py @@ -156,16 +156,16 @@ def handle(): if issues is not None: q.where_integers("issue", issues) # final query using specific issues - query = f"WITH c as (SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state, issue ORDER BY record_type) `row` FROM {merge_tables} AS `merged` WHERE {q.conditions_clause}) SELECT {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause}" + query = f"WITH m as {merge_tables}, c as (SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state, issue ORDER BY record_type) `row` FROM m WHERE {q.conditions_clause}) SELECT {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause}" elif as_of is not None: sub_condition_asof = "(issue <= :as_of)" q.params["as_of"] = as_of - query = f"WITH c as (SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state ORDER BY issue DESC, record_type) `row` FROM {merge_tables} AS `merged` WHERE {q.conditions_clause} AND {sub_condition_asof}) SELECT {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause}" + query = f"WITH m as {merge_tables}, c as (SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state ORDER BY issue DESC, record_type) `row` FROM m WHERE {q.conditions_clause} AND {sub_condition_asof}) SELECT {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause}" else: # final query using most recent issues - subquery = f"(SELECT max(`issue`) `max_issue`, `date`, `state` FROM {merge_tables} AS `merged_subq` WHERE {q.conditions_clause} GROUP BY `date`, `state`) x" + subquery = f"(SELECT max(`issue`) `max_issue`, `date`, `state` FROM m WHERE {q.conditions_clause} GROUP BY `date`, `state`) x" condition = f"x.`max_issue` = {q.alias}.`issue` AND x.`date` = {q.alias}.`date` AND x.`state` = {q.alias}.`state`" - query = f"WITH c as (SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state, issue ORDER BY record_type) `row` FROM {merge_tables} AS `merged` JOIN {subquery} ON {condition}) select {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause}" + query = f"WITH m as {merge_tables}, c as (SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state, issue ORDER BY record_type) `row` FROM m JOIN {subquery} ON {condition}) select {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause}" # send query return execute_query(query, q.params, fields_string, fields_int, fields_float) From f5718ca7363b27120b705767d98b126d5e23ec4f Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Fri, 7 Apr 2023 16:30:33 +0300 Subject: [PATCH 11/42] workaround? --- src/server/endpoints/covid_hosp_state_timeseries.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/server/endpoints/covid_hosp_state_timeseries.py b/src/server/endpoints/covid_hosp_state_timeseries.py index b2c026668..963853f54 100644 --- a/src/server/endpoints/covid_hosp_state_timeseries.py +++ b/src/server/endpoints/covid_hosp_state_timeseries.py @@ -156,14 +156,14 @@ def handle(): if issues is not None: q.where_integers("issue", issues) # final query using specific issues - query = f"WITH m as {merge_tables}, c as (SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state, issue ORDER BY record_type) `row` FROM m WHERE {q.conditions_clause}) SELECT {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause}" + query = f"WITH m as {merge_tables}, c as (SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state, issue ORDER BY record_type) `row` FROM m WHERE {q.conditions_clause.replace('c.', 'm.')}) SELECT {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause}" elif as_of is not None: sub_condition_asof = "(issue <= :as_of)" q.params["as_of"] = as_of - query = f"WITH m as {merge_tables}, c as (SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state ORDER BY issue DESC, record_type) `row` FROM m WHERE {q.conditions_clause} AND {sub_condition_asof}) SELECT {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause}" + query = f"WITH m as {merge_tables}, c as (SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state ORDER BY issue DESC, record_type) `row` FROM m WHERE {q.conditions_clause.replace('c.', 'm.')} AND {sub_condition_asof}) SELECT {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause}" else: # final query using most recent issues - subquery = f"(SELECT max(`issue`) `max_issue`, `date`, `state` FROM m WHERE {q.conditions_clause} GROUP BY `date`, `state`) x" + subquery = f"(SELECT max(`issue`) `max_issue`, `date`, `state` FROM m WHERE {q.conditions_clause.replace('c.', 'm.')} GROUP BY `date`, `state`) x" condition = f"x.`max_issue` = {q.alias}.`issue` AND x.`date` = {q.alias}.`date` AND x.`state` = {q.alias}.`state`" query = f"WITH m as {merge_tables}, c as (SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state, issue ORDER BY record_type) `row` FROM m JOIN {subquery} ON {condition}) select {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause}" From edf11a1ccdb7231919af5b1c84a88427e6db8d1b Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Fri, 7 Apr 2023 16:37:44 +0300 Subject: [PATCH 12/42] workaround? 2 --- src/server/endpoints/covid_hosp_state_timeseries.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/server/endpoints/covid_hosp_state_timeseries.py b/src/server/endpoints/covid_hosp_state_timeseries.py index 963853f54..cb744aa69 100644 --- a/src/server/endpoints/covid_hosp_state_timeseries.py +++ b/src/server/endpoints/covid_hosp_state_timeseries.py @@ -152,20 +152,20 @@ def handle(): q.where_integers("date", dates) q.where_strings("state", states) - merge_tables = "(SELECT *, 'D' as record_type FROM `covid_hosp_state_daily` UNION ALL SELECT *, 'T' as record_type FROM `covid_hosp_state_timeseries`)" + merge_tables = "(SELECT *, 'D' as record_type FROM `covid_hosp_state_daily` UNION ALL SELECT *, 'T' as record_type FROM `covid_hosp_state_timeseries`) c" if issues is not None: q.where_integers("issue", issues) # final query using specific issues - query = f"WITH m as {merge_tables}, c as (SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state, issue ORDER BY record_type) `row` FROM m WHERE {q.conditions_clause.replace('c.', 'm.')}) SELECT {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause}" + query = f"WITH c as (SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state, issue ORDER BY record_type) `row` FROM {merge_tables} WHERE {q.conditions_clause}) SELECT {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause}" elif as_of is not None: sub_condition_asof = "(issue <= :as_of)" q.params["as_of"] = as_of - query = f"WITH m as {merge_tables}, c as (SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state ORDER BY issue DESC, record_type) `row` FROM m WHERE {q.conditions_clause.replace('c.', 'm.')} AND {sub_condition_asof}) SELECT {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause}" + query = f"WITH c as (SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state ORDER BY issue DESC, record_type) `row` FROM {merge_tables} WHERE {q.conditions_clause} AND {sub_condition_asof}) SELECT {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause}" else: # final query using most recent issues - subquery = f"(SELECT max(`issue`) `max_issue`, `date`, `state` FROM m WHERE {q.conditions_clause.replace('c.', 'm.')} GROUP BY `date`, `state`) x" + subquery = f"(SELECT max(`issue`) `max_issue`, `date`, `state` FROM {merge_tables} WHERE {q.conditions_clause} GROUP BY `date`, `state`) x" condition = f"x.`max_issue` = {q.alias}.`issue` AND x.`date` = {q.alias}.`date` AND x.`state` = {q.alias}.`state`" - query = f"WITH m as {merge_tables}, c as (SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state, issue ORDER BY record_type) `row` FROM m JOIN {subquery} ON {condition}) select {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause}" + query = f"WITH c as (SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state, issue ORDER BY record_type) `row` FROM {merge_tables} JOIN {subquery} ON {condition}) select {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause}" # send query return execute_query(query, q.params, fields_string, fields_int, fields_float) From 927cbd1315612f0eed8a55e55d830cdc29265cc5 Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Wed, 12 Apr 2023 20:38:48 +0300 Subject: [PATCH 13/42] Undo test changes --- .../covid_hosp/state_timeseries/test_scenarios.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/integrations/acquisition/covid_hosp/state_timeseries/test_scenarios.py b/integrations/acquisition/covid_hosp/state_timeseries/test_scenarios.py index bd83c6808..d62550a32 100644 --- a/integrations/acquisition/covid_hosp/state_timeseries/test_scenarios.py +++ b/integrations/acquisition/covid_hosp/state_timeseries/test_scenarios.py @@ -53,9 +53,9 @@ def test_acquire_dataset(self): self.test_utils.load_sample_dataset() # make sure the data does not yet exist - # with self.subTest(name='no data yet'): - # response = Epidata.covid_hosp('MA', Epidata.range(20200101, 20210101)) - # self.assertEqual(response['result'], -2) + with self.subTest(name='no data yet'): + response = Epidata.covid_hosp('MA', Epidata.range(20200101, 20210101)) + self.assertEqual(response['result'], -2) # acquire sample data into local database with self.subTest(name='first acquisition'): From bfe3394ad6e7facc7b2fd07b6efafa80728992e4 Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Thu, 13 Apr 2023 00:29:02 +0300 Subject: [PATCH 14/42] Add missing lines --- integrations/server/test_covid_hosp.py | 1 + 1 file changed, 1 insertion(+) diff --git a/integrations/server/test_covid_hosp.py b/integrations/server/test_covid_hosp.py index 0b2f98334..ba51e4cd9 100644 --- a/integrations/server/test_covid_hosp.py +++ b/integrations/server/test_covid_hosp.py @@ -25,6 +25,7 @@ def setUp(self): # clear relevant tables with Database.connect() as db: with db.new_cursor() as cur: + cur.execute('truncate table covid_hosp_state_daily') cur.execute('truncate table covid_hosp_state_timeseries') cur.execute('truncate table covid_hosp_meta') From 62fe29b539cab1d80ad4c47c3dea6c159aa7513a Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Thu, 13 Apr 2023 00:38:43 +0300 Subject: [PATCH 15/42] Test AUTO_INCREMENT --- src/ddl/covid_hosp.sql | 125 +--------------- .../covid_hosp_state_split_tables.sql | 136 +----------------- 2 files changed, 9 insertions(+), 252 deletions(-) diff --git a/src/ddl/covid_hosp.sql b/src/ddl/covid_hosp.sql index 6f3070d7a..fade160a2 100644 --- a/src/ddl/covid_hosp.sql +++ b/src/ddl/covid_hosp.sql @@ -129,7 +129,6 @@ Data is public under the Open Data Commons Open Database License (ODbL). | adult_icu_bed_utilization_coverage | int(11) | YES | | NULL | | | adult_icu_bed_utilization_numerator | int(11) | YES | | NULL | | | adult_icu_bed_utilization_denominator | int(11) | YES | | NULL | | -| record_type | char(1) | NO | MUL | NULL | | +------------------------------------------------------------------+---------+------+-----+---------+----------------+ - `id` @@ -504,126 +503,6 @@ which contains data from the daily snapshot files. Schema is equivalent to `covid_hosp_state_timeseries`. */ CREATE TABLE `covid_hosp_state_daily` ( - `id` INT NOT NULL AUTO_INCREMENT, - `issue` INT NOT NULL, - `state` CHAR(2) NOT NULL, - `date` INT NOT NULL, - `critical_staffing_shortage_today_yes` INT, - `critical_staffing_shortage_today_no` INT, - `critical_staffing_shortage_today_not_reported` INT, - `critical_staffing_shortage_anticipated_within_week_yes` INT, - `critical_staffing_shortage_anticipated_within_week_no` INT, - `critical_staffing_shortage_anticipated_within_week_not_reported` INT, - `hospital_onset_covid` INT, - `hospital_onset_covid_coverage` INT, - `inpatient_beds` INT, - `inpatient_beds_coverage` INT, - `inpatient_beds_used` INT, - `inpatient_beds_used_coverage` INT, - `inpatient_beds_used_covid` INT, - `inpatient_beds_used_covid_coverage` INT, - `previous_day_admission_adult_covid_confirmed` INT, - `previous_day_admission_adult_covid_confirmed_coverage` INT, - `previous_day_admission_adult_covid_suspected` INT, - `previous_day_admission_adult_covid_suspected_coverage` INT, - `previous_day_admission_pediatric_covid_confirmed` INT, - `previous_day_admission_pediatric_covid_confirmed_coverage` INT, - `previous_day_admission_pediatric_covid_suspected` INT, - `previous_day_admission_pediatric_covid_suspected_coverage` INT, - `staffed_adult_icu_bed_occupancy` INT, - `staffed_adult_icu_bed_occupancy_coverage` INT, - `staffed_icu_adult_patients_confirmed_suspected_covid` INT, - `staffed_icu_adult_patients_confirmed_suspected_covid_coverage` INT, - `staffed_icu_adult_patients_confirmed_covid` INT, - `staffed_icu_adult_patients_confirmed_covid_coverage` INT, - `total_adult_patients_hosp_confirmed_suspected_covid` INT, - `total_adult_patients_hosp_confirmed_suspected_covid_coverage` INT, - `total_adult_patients_hosp_confirmed_covid` INT, - `total_adult_patients_hosp_confirmed_covid_coverage` INT, - `total_pediatric_patients_hosp_confirmed_suspected_covid` INT, - `total_pediatric_patients_hosp_confirmed_suspected_covid_coverage` INT, - `total_pediatric_patients_hosp_confirmed_covid` INT, - `total_pediatric_patients_hosp_confirmed_covid_coverage` INT, - `total_staffed_adult_icu_beds` INT, - `total_staffed_adult_icu_beds_coverage` INT, - `inpatient_beds_utilization` DOUBLE, - `inpatient_beds_utilization_coverage` INT, - `inpatient_beds_utilization_numerator` INT, - `inpatient_beds_utilization_denominator` INT, - `percent_of_inpatients_with_covid` DOUBLE, - `percent_of_inpatients_with_covid_coverage` INT, - `percent_of_inpatients_with_covid_numerator` INT, - `percent_of_inpatients_with_covid_denominator` INT, - `inpatient_bed_covid_utilization` DOUBLE, - `inpatient_bed_covid_utilization_coverage` INT, - `inpatient_bed_covid_utilization_numerator` INT, - `inpatient_bed_covid_utilization_denominator` INT, - `adult_icu_bed_covid_utilization` DOUBLE, - `adult_icu_bed_covid_utilization_coverage` INT, - `adult_icu_bed_covid_utilization_numerator` INT, - `adult_icu_bed_covid_utilization_denominator` INT, - `adult_icu_bed_utilization` DOUBLE, - `adult_icu_bed_utilization_coverage` INT, - `adult_icu_bed_utilization_numerator` INT, - `adult_icu_bed_utilization_denominator` INT, - -- new columns added Oct 10 - `geocoded_state` VARCHAR(32), - `previous_day_admission_adult_covid_confirmed_18_19` INT, - `previous_day_admission_adult_covid_confirmed_18_19_coverage` INT, - `previous_day_admission_adult_covid_confirmed_20_29` INT, - `previous_day_admission_adult_covid_confirmed_20_29_coverage` INT, - `previous_day_admission_adult_covid_confirmed_30_39` INT, - `previous_day_admission_adult_covid_confirmed_30_39_coverage` INT, - `previous_day_admission_adult_covid_confirmed_40_49` INT, - `previous_day_admission_adult_covid_confirmed_40_49_coverage` INT, - `previous_day_admission_adult_covid_confirmed_50_59` INT, - `previous_day_admission_adult_covid_confirmed_50_59_coverage` INT, - `previous_day_admission_adult_covid_confirmed_60_69` INT, - `previous_day_admission_adult_covid_confirmed_60_69_coverage` INT, - `previous_day_admission_adult_covid_confirmed_70_79` INT, - `previous_day_admission_adult_covid_confirmed_70_79_coverage` INT, - `previous_day_admission_adult_covid_confirmed_80plus` INT, - `previous_day_admission_adult_covid_confirmed_80plus_coverage` INT, - `previous_day_admission_adult_covid_confirmed_unknown` INT, - `previous_day_admission_adult_covid_confirmed_unknown_coverage` INT, - `previous_day_admission_adult_covid_suspected_18_19` INT, - `previous_day_admission_adult_covid_suspected_18_19_coverage` INT, - `previous_day_admission_adult_covid_suspected_20_29` INT, - `previous_day_admission_adult_covid_suspected_20_29_coverage` INT, - `previous_day_admission_adult_covid_suspected_30_39` INT, - `previous_day_admission_adult_covid_suspected_30_39_coverage` INT, - `previous_day_admission_adult_covid_suspected_40_49` INT, - `previous_day_admission_adult_covid_suspected_40_49_coverage` INT, - `previous_day_admission_adult_covid_suspected_50_59` INT, - `previous_day_admission_adult_covid_suspected_50_59_coverage` INT, - `previous_day_admission_adult_covid_suspected_60_69` INT, - `previous_day_admission_adult_covid_suspected_60_69_coverage` INT, - `previous_day_admission_adult_covid_suspected_70_79` INT, - `previous_day_admission_adult_covid_suspected_70_79_coverage` INT, - `previous_day_admission_adult_covid_suspected_80plus` INT, - `previous_day_admission_adult_covid_suspected_80plus_coverage` INT, - `previous_day_admission_adult_covid_suspected_unknown` INT, - `previous_day_admission_adult_covid_suspected_unknown_coverage` INT, - `deaths_covid` INT, - `deaths_covid_coverage` INT, - `on_hand_supply_therapeutic_a_casirivimab_imdevimab_courses` INT, - `on_hand_supply_therapeutic_b_bamlanivimab_courses` INT, - `on_hand_supply_therapeutic_c_bamlanivimab_etesevimab_courses` INT, - `previous_week_therapeutic_a_casirivimab_imdevimab_courses_used` INT, - `previous_week_therapeutic_b_bamlanivimab_courses_used` INT, - `previous_week_therapeutic_c_bamlanivimab_etesevimab_courses_used` INT, - `icu_patients_confirmed_influenza` INT, - `icu_patients_confirmed_influenza_coverage` INT, - `previous_day_admission_influenza_confirmed` INT, - `previous_day_admission_influenza_confirmed_coverage` INT, - `previous_day_deaths_covid_and_influenza` INT, - `previous_day_deaths_covid_and_influenza_coverage` INT, - `previous_day_deaths_influenza` INT, - `previous_day_deaths_influenza_coverage` INT, - `total_patients_hospitalized_confirmed_influenza` INT, - `total_patients_hospitalized_confirmed_influenza_covid` INT, - `total_patients_hospitalized_confirmed_influenza_covid_coverage` INT, - `total_patients_hospitalized_confirmed_influenza_coverage` INT, PRIMARY KEY (`id`), -- for uniqueness -- for fast lookup of most recent issue for a given state and date @@ -632,7 +511,9 @@ CREATE TABLE `covid_hosp_state_daily` ( KEY `date_by_issue_and_state` (`issue`, `state`, `date`), -- for fast lookup of all states for a given date and issue KEY `state_by_issue_and_date` (`issue`, `date`, `state`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; +) ENGINE=InnoDB DEFAULT CHARSET=utf8 +SELECT * FROM covid_hosp_state_timeseries; +ALTER TABLE covid_hosp_state_daily MODIFY id INT NOT NULL AUTO_INCREMENT; /* `covid_hosp_facility` stores the versioned "facility" dataset. diff --git a/src/ddl/migrations/covid_hosp_state_split_tables.sql b/src/ddl/migrations/covid_hosp_state_split_tables.sql index f052d47cd..de1d0ff0b 100644 --- a/src/ddl/migrations/covid_hosp_state_split_tables.sql +++ b/src/ddl/migrations/covid_hosp_state_split_tables.sql @@ -1,127 +1,6 @@ -- 1. Add new state_daily table mirroring state_timeseries table CREATE TABLE `covid_hosp_state_daily` ( - `id` INT NOT NULL AUTO_INCREMENT, - `issue` INT NOT NULL, - `state` CHAR(2) NOT NULL, - `date` INT NOT NULL, - `critical_staffing_shortage_today_yes` INT, - `critical_staffing_shortage_today_no` INT, - `critical_staffing_shortage_today_not_reported` INT, - `critical_staffing_shortage_anticipated_within_week_yes` INT, - `critical_staffing_shortage_anticipated_within_week_no` INT, - `critical_staffing_shortage_anticipated_within_week_not_reported` INT, - `hospital_onset_covid` INT, - `hospital_onset_covid_coverage` INT, - `inpatient_beds` INT, - `inpatient_beds_coverage` INT, - `inpatient_beds_used` INT, - `inpatient_beds_used_coverage` INT, - `inpatient_beds_used_covid` INT, - `inpatient_beds_used_covid_coverage` INT, - `previous_day_admission_adult_covid_confirmed` INT, - `previous_day_admission_adult_covid_confirmed_coverage` INT, - `previous_day_admission_adult_covid_suspected` INT, - `previous_day_admission_adult_covid_suspected_coverage` INT, - `previous_day_admission_pediatric_covid_confirmed` INT, - `previous_day_admission_pediatric_covid_confirmed_coverage` INT, - `previous_day_admission_pediatric_covid_suspected` INT, - `previous_day_admission_pediatric_covid_suspected_coverage` INT, - `staffed_adult_icu_bed_occupancy` INT, - `staffed_adult_icu_bed_occupancy_coverage` INT, - `staffed_icu_adult_patients_confirmed_suspected_covid` INT, - `staffed_icu_adult_patients_confirmed_suspected_covid_coverage` INT, - `staffed_icu_adult_patients_confirmed_covid` INT, - `staffed_icu_adult_patients_confirmed_covid_coverage` INT, - `total_adult_patients_hosp_confirmed_suspected_covid` INT, - `total_adult_patients_hosp_confirmed_suspected_covid_coverage` INT, - `total_adult_patients_hosp_confirmed_covid` INT, - `total_adult_patients_hosp_confirmed_covid_coverage` INT, - `total_pediatric_patients_hosp_confirmed_suspected_covid` INT, - `total_pediatric_patients_hosp_confirmed_suspected_covid_coverage` INT, - `total_pediatric_patients_hosp_confirmed_covid` INT, - `total_pediatric_patients_hosp_confirmed_covid_coverage` INT, - `total_staffed_adult_icu_beds` INT, - `total_staffed_adult_icu_beds_coverage` INT, - `inpatient_beds_utilization` DOUBLE, - `inpatient_beds_utilization_coverage` INT, - `inpatient_beds_utilization_numerator` INT, - `inpatient_beds_utilization_denominator` INT, - `percent_of_inpatients_with_covid` DOUBLE, - `percent_of_inpatients_with_covid_coverage` INT, - `percent_of_inpatients_with_covid_numerator` INT, - `percent_of_inpatients_with_covid_denominator` INT, - `inpatient_bed_covid_utilization` DOUBLE, - `inpatient_bed_covid_utilization_coverage` INT, - `inpatient_bed_covid_utilization_numerator` INT, - `inpatient_bed_covid_utilization_denominator` INT, - `adult_icu_bed_covid_utilization` DOUBLE, - `adult_icu_bed_covid_utilization_coverage` INT, - `adult_icu_bed_covid_utilization_numerator` INT, - `adult_icu_bed_covid_utilization_denominator` INT, - `adult_icu_bed_utilization` DOUBLE, - `adult_icu_bed_utilization_coverage` INT, - `adult_icu_bed_utilization_numerator` INT, - `adult_icu_bed_utilization_denominator` INT, - `record_type` CHAR(1) NOT NULL, - -- new columns added Oct 10 - `geocoded_state` VARCHAR(32), - `previous_day_admission_adult_covid_confirmed_18_19` INT, - `previous_day_admission_adult_covid_confirmed_18_19_coverage` INT, - `previous_day_admission_adult_covid_confirmed_20_29` INT, - `previous_day_admission_adult_covid_confirmed_20_29_coverage` INT, - `previous_day_admission_adult_covid_confirmed_30_39` INT, - `previous_day_admission_adult_covid_confirmed_30_39_coverage` INT, - `previous_day_admission_adult_covid_confirmed_40_49` INT, - `previous_day_admission_adult_covid_confirmed_40_49_coverage` INT, - `previous_day_admission_adult_covid_confirmed_50_59` INT, - `previous_day_admission_adult_covid_confirmed_50_59_coverage` INT, - `previous_day_admission_adult_covid_confirmed_60_69` INT, - `previous_day_admission_adult_covid_confirmed_60_69_coverage` INT, - `previous_day_admission_adult_covid_confirmed_70_79` INT, - `previous_day_admission_adult_covid_confirmed_70_79_coverage` INT, - `previous_day_admission_adult_covid_confirmed_80plus` INT, - `previous_day_admission_adult_covid_confirmed_80plus_coverage` INT, - `previous_day_admission_adult_covid_confirmed_unknown` INT, - `previous_day_admission_adult_covid_confirmed_unknown_coverage` INT, - `previous_day_admission_adult_covid_suspected_18_19` INT, - `previous_day_admission_adult_covid_suspected_18_19_coverage` INT, - `previous_day_admission_adult_covid_suspected_20_29` INT, - `previous_day_admission_adult_covid_suspected_20_29_coverage` INT, - `previous_day_admission_adult_covid_suspected_30_39` INT, - `previous_day_admission_adult_covid_suspected_30_39_coverage` INT, - `previous_day_admission_adult_covid_suspected_40_49` INT, - `previous_day_admission_adult_covid_suspected_40_49_coverage` INT, - `previous_day_admission_adult_covid_suspected_50_59` INT, - `previous_day_admission_adult_covid_suspected_50_59_coverage` INT, - `previous_day_admission_adult_covid_suspected_60_69` INT, - `previous_day_admission_adult_covid_suspected_60_69_coverage` INT, - `previous_day_admission_adult_covid_suspected_70_79` INT, - `previous_day_admission_adult_covid_suspected_70_79_coverage` INT, - `previous_day_admission_adult_covid_suspected_80plus` INT, - `previous_day_admission_adult_covid_suspected_80plus_coverage` INT, - `previous_day_admission_adult_covid_suspected_unknown` INT, - `previous_day_admission_adult_covid_suspected_unknown_coverage` INT, - `deaths_covid` INT, - `deaths_covid_coverage` INT, - `on_hand_supply_therapeutic_a_casirivimab_imdevimab_courses` INT, - `on_hand_supply_therapeutic_b_bamlanivimab_courses` INT, - `on_hand_supply_therapeutic_c_bamlanivimab_etesevimab_courses` INT, - `previous_week_therapeutic_a_casirivimab_imdevimab_courses_used` INT, - `previous_week_therapeutic_b_bamlanivimab_courses_used` INT, - `previous_week_therapeutic_c_bamlanivimab_etesevimab_courses_used` INT, - `icu_patients_confirmed_influenza` INT, - `icu_patients_confirmed_influenza_coverage` INT, - `previous_day_admission_influenza_confirmed` INT, - `previous_day_admission_influenza_confirmed_coverage` INT, - `previous_day_deaths_covid_and_influenza` INT, - `previous_day_deaths_covid_and_influenza_coverage` INT, - `previous_day_deaths_influenza` INT, - `previous_day_deaths_influenza_coverage` INT, - `total_patients_hospitalized_confirmed_influenza` INT, - `total_patients_hospitalized_confirmed_influenza_covid` INT, - `total_patients_hospitalized_confirmed_influenza_covid_coverage` INT, - `total_patients_hospitalized_confirmed_influenza_coverage` INT, PRIMARY KEY (`id`), -- for uniqueness -- for fast lookup of most recent issue for a given state and date @@ -130,19 +9,16 @@ CREATE TABLE `covid_hosp_state_daily` ( KEY `date_by_issue_and_state` (`issue`, `state`, `date`), -- for fast lookup of all states for a given date and issue KEY `state_by_issue_and_date` (`issue`, `date`, `state`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; +) ENGINE=InnoDB DEFAULT CHARSET=utf8 +SELECT * FROM covid_hosp_state_timeseries; +ALTER TABLE covid_hosp_state_daily MODIFY id INT NOT NULL AUTO_INCREMENT; --- 2. Move data with record_type=D into new table - -INSERT INTO `covid_hosp_state_daily` -SELECT * FROM `covid_hosp_state_timeseries` -WHERE record_type='D'; - --- 3. Remove that data from the old table +-- 2. Remove data with incorrect record_type from both tables (keep D in daily, T in timeseries) DELETE FROM `covid_hosp_state_timeseries` WHERE record_type='D'; +DELETE FROM `covid_hosp_state_daily` WHERE record_type='T'; --- 4. Remove the record_type column from both tables +-- 3. Remove the record_type column from both tables ALTER TABLE `covid_hosp_state_daily` DROP COLUMN record_type; ALTER TABLE `covid_hosp_state_timeseries` DROP COLUMN record_type; From 015eaf416517ac7d3529df5cea60e94c65a5cb9c Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Thu, 13 Apr 2023 00:57:33 +0300 Subject: [PATCH 16/42] Review fixes --- integrations/server/test_covid_hosp.py | 8 ++++---- src/acquisition/covid_hosp/common/database.py | 14 ++++---------- src/ddl/covid_hosp.sql | 4 ++-- .../migrations/covid_hosp_state_split_tables.sql | 2 +- 4 files changed, 11 insertions(+), 17 deletions(-) diff --git a/integrations/server/test_covid_hosp.py b/integrations/server/test_covid_hosp.py index ba51e4cd9..7f53d6174 100644 --- a/integrations/server/test_covid_hosp.py +++ b/integrations/server/test_covid_hosp.py @@ -31,15 +31,15 @@ def setUp(self): def insert_timeseries(self, cur, issue, value): - so_many_nulls = ', '.join(['null'] * 57) + so_many_nulls = ', '.join(['null'] * 114) cur.execute(f'''insert into covid_hosp_state_timeseries values ( - 0, {issue}, 'PA', 20201118, {value}, {so_many_nulls}, {so_many_nulls} + 0, {issue}, 'PA', 20201118, {value}, {so_many_nulls} )''') def insert_daily(self, cur, issue, value): - so_many_nulls = ', '.join(['null'] * 57) + so_many_nulls = ', '.join(['null'] * 114) cur.execute(f'''insert into covid_hosp_state_daily values ( - 0, {issue}, 'PA', 20201118, {value}, {so_many_nulls}, {so_many_nulls} + 0, {issue}, 'PA', 20201118, {value}, {so_many_nulls} )''') def test_query_by_issue(self): diff --git a/src/acquisition/covid_hosp/common/database.py b/src/acquisition/covid_hosp/common/database.py index f69dceefc..173ae4a7a 100644 --- a/src/acquisition/covid_hosp/common/database.py +++ b/src/acquisition/covid_hosp/common/database.py @@ -22,8 +22,7 @@ def __init__(self, table_name=None, hhs_dataset_id=None, columns_and_types=None, - key_columns=None, - additional_fields=None): + key_columns=None): """Create a new Database object. Parameters @@ -37,9 +36,6 @@ def __init__(self, columns_and_types : tuple[str, str, Callable] List of 3-tuples of (CSV header name, SQL column name, data type) for all the columns in the CSV file. - additional_fields : tuple[str] - List of 2-tuples of (value, SQL column name) fordditional fields to include - at the end of the row which are not present in the CSV data. """ self.connection = connection @@ -52,7 +48,6 @@ def __init__(self, for c in (columns_and_types if columns_and_types is not None else []) } self.key_columns = key_columns if key_columns is not None else [] - self.additional_fields = additional_fields if additional_fields is not None else [] @classmethod def logger(database_class): @@ -184,9 +179,9 @@ def nan_safe_dtype(dtype, value): for csv_name in self.key_columns: dataframe.loc[:, csv_name] = dataframe[csv_name].map(self.columns_and_types[csv_name].dtype) - num_columns = 2 + len(dataframe_columns_and_types) + len(self.additional_fields) + num_columns = 2 + len(dataframe_columns_and_types) value_placeholders = ', '.join(['%s'] * num_columns) - columns = ', '.join(f'`{i.sql_name}`' for i in dataframe_columns_and_types + self.additional_fields) + columns = ', '.join(f'`{i.sql_name}`' for i in dataframe_columns_and_types) sql = f'INSERT INTO `{self.table_name}` (`id`, `{self.publication_col_name}`, {columns}) ' \ f'VALUES ({value_placeholders})' id_and_publication_date = (0, publication_date) @@ -200,8 +195,7 @@ def nan_safe_dtype(dtype, value): for c in dataframe_columns_and_types: values.append(nan_safe_dtype(c.dtype, row[c.csv_name])) many_values.append(id_and_publication_date + - tuple(values) + - tuple(i.csv_name for i in self.additional_fields)) + tuple(values)) n += 1 # insert in batches because one at a time is slow and all at once makes # the connection drop :( diff --git a/src/ddl/covid_hosp.sql b/src/ddl/covid_hosp.sql index fade160a2..ce149192f 100644 --- a/src/ddl/covid_hosp.sql +++ b/src/ddl/covid_hosp.sql @@ -498,13 +498,13 @@ CREATE TABLE `covid_hosp_state_timeseries` ( /* -`covid_hosp_state_timeseries` stores the versioned "state timeseries" dataset, +`covid_hosp_state_daily` stores the versioned "state timeseries" dataset, which contains data from the daily snapshot files. Schema is equivalent to `covid_hosp_state_timeseries`. */ CREATE TABLE `covid_hosp_state_daily` ( - PRIMARY KEY (`id`), -- for uniqueness + PRIMARY KEY (`id`), -- for fast lookup of most recent issue for a given state and date UNIQUE KEY `issue_by_state_and_date` (`state`, `date`, `issue`), -- for fast lookup of a time-series for a given state and issue diff --git a/src/ddl/migrations/covid_hosp_state_split_tables.sql b/src/ddl/migrations/covid_hosp_state_split_tables.sql index de1d0ff0b..211339f8a 100644 --- a/src/ddl/migrations/covid_hosp_state_split_tables.sql +++ b/src/ddl/migrations/covid_hosp_state_split_tables.sql @@ -1,8 +1,8 @@ -- 1. Add new state_daily table mirroring state_timeseries table CREATE TABLE `covid_hosp_state_daily` ( - PRIMARY KEY (`id`), -- for uniqueness + PRIMARY KEY (`id`), -- for fast lookup of most recent issue for a given state and date UNIQUE KEY `issue_by_state_and_date` (`state`, `date`, `issue`), -- for fast lookup of a time-series for a given state and issue From 28873b74697efccf504be4e9f350defe0f6dc3cf Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Wed, 19 Apr 2023 19:30:08 +0300 Subject: [PATCH 17/42] SQL rewrite --- src/ddl/covid_hosp.sql | 1 + .../covid_hosp_state_split_tables.sql | 4 +- .../endpoints/covid_hosp_state_timeseries.py | 55 ++++++++++++++++--- 3 files changed, 50 insertions(+), 10 deletions(-) diff --git a/src/ddl/covid_hosp.sql b/src/ddl/covid_hosp.sql index ce149192f..ca1080a87 100644 --- a/src/ddl/covid_hosp.sql +++ b/src/ddl/covid_hosp.sql @@ -513,6 +513,7 @@ CREATE TABLE `covid_hosp_state_daily` ( KEY `state_by_issue_and_date` (`issue`, `date`, `state`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8 SELECT * FROM covid_hosp_state_timeseries; +-- Re-add autoincrement capability ALTER TABLE covid_hosp_state_daily MODIFY id INT NOT NULL AUTO_INCREMENT; /* diff --git a/src/ddl/migrations/covid_hosp_state_split_tables.sql b/src/ddl/migrations/covid_hosp_state_split_tables.sql index 211339f8a..58a29771a 100644 --- a/src/ddl/migrations/covid_hosp_state_split_tables.sql +++ b/src/ddl/migrations/covid_hosp_state_split_tables.sql @@ -10,13 +10,13 @@ CREATE TABLE `covid_hosp_state_daily` ( -- for fast lookup of all states for a given date and issue KEY `state_by_issue_and_date` (`issue`, `date`, `state`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8 -SELECT * FROM covid_hosp_state_timeseries; +SELECT * FROM covid_hosp_state_timeseries WHERE record_type='D'; +-- Re-add autoincrement capability ALTER TABLE covid_hosp_state_daily MODIFY id INT NOT NULL AUTO_INCREMENT; -- 2. Remove data with incorrect record_type from both tables (keep D in daily, T in timeseries) DELETE FROM `covid_hosp_state_timeseries` WHERE record_type='D'; -DELETE FROM `covid_hosp_state_daily` WHERE record_type='T'; -- 3. Remove the record_type column from both tables diff --git a/src/server/endpoints/covid_hosp_state_timeseries.py b/src/server/endpoints/covid_hosp_state_timeseries.py index cb744aa69..18dde14a4 100644 --- a/src/server/endpoints/covid_hosp_state_timeseries.py +++ b/src/server/endpoints/covid_hosp_state_timeseries.py @@ -152,20 +152,59 @@ def handle(): q.where_integers("date", dates) q.where_strings("state", states) - merge_tables = "(SELECT *, 'D' as record_type FROM `covid_hosp_state_daily` UNION ALL SELECT *, 'T' as record_type FROM `covid_hosp_state_timeseries`) c" + # These queries prioritize the daily value if there is both a time series and daily value for a given issue/date/state. + # Further details: https://github.com/cmu-delphi/delphi-epidata/pull/336 if issues is not None: + # Filter for specific issues q.where_integers("issue", issues) - # final query using specific issues - query = f"WITH c as (SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state, issue ORDER BY record_type) `row` FROM {merge_tables} WHERE {q.conditions_clause}) SELECT {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause}" + union_subquery = f''' + ( + SELECT *, 'D' as record_type FROM `covid_hosp_state_daily` WHERE {q.conditions_clause} + UNION ALL + SELECT *, 'T' as record_type FROM `covid_hosp_state_timeseries` WHERE {q.conditions_clause} + ) c''' + query = f''' + WITH c as ( + SELECT {q.fields_clause}, ROW_NUMBER() + OVER (PARTITION BY date, state, issue ORDER BY record_type) `row` + FROM {union_subquery} + ) + SELECT {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause} + ''' elif as_of is not None: + # Filter for issues before a given as_of sub_condition_asof = "(issue <= :as_of)" q.params["as_of"] = as_of - query = f"WITH c as (SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state ORDER BY issue DESC, record_type) `row` FROM {merge_tables} WHERE {q.conditions_clause} AND {sub_condition_asof}) SELECT {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause}" + union_subquery = f''' + ( + SELECT *, 'D' as record_type FROM `covid_hosp_state_daily` WHERE {q.conditions_clause} AND {sub_condition_asof} + UNION ALL + SELECT *, 'T' as record_type FROM `covid_hosp_state_timeseries` WHERE {q.conditions_clause} AND {sub_condition_asof} + ) c''' + query = f''' + WITH c as ( + SELECT {q.fields_clause}, ROW_NUMBER() + OVER (PARTITION BY date, state ORDER BY issue DESC, record_type) `row` + FROM {union_subquery} + ) + SELECT {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause} + ''' else: - # final query using most recent issues - subquery = f"(SELECT max(`issue`) `max_issue`, `date`, `state` FROM {merge_tables} WHERE {q.conditions_clause} GROUP BY `date`, `state`) x" - condition = f"x.`max_issue` = {q.alias}.`issue` AND x.`date` = {q.alias}.`date` AND x.`state` = {q.alias}.`state`" - query = f"WITH c as (SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state, issue ORDER BY record_type) `row` FROM {merge_tables} JOIN {subquery} ON {condition}) select {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause}" + # Simply use most recent issues + union_subquery = f''' + ( + SELECT *, 'D' as record_type FROM `covid_hosp_state_daily` WHERE {q.conditions_clause} + UNION ALL + SELECT *, 'T' as record_type FROM `covid_hosp_state_timeseries` WHERE {q.conditions_clause} + ) c''' + query = f''' + WITH c as ( + SELECT {q.fields_clause}, ROW_NUMBER() + OVER (PARTITION BY date, state ORDER BY issue DESC, record_type) `row` + FROM {union_subquery} + ) + SELECT {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause} + ''' # send query return execute_query(query, q.params, fields_string, fields_int, fields_float) From f6aec5ddd282891efd85e4ba9e2513104d662c5e Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Wed, 19 Apr 2023 19:38:08 +0300 Subject: [PATCH 18/42] aliases + indent --- .../endpoints/covid_hosp_state_timeseries.py | 21 ++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/src/server/endpoints/covid_hosp_state_timeseries.py b/src/server/endpoints/covid_hosp_state_timeseries.py index 18dde14a4..2571d910e 100644 --- a/src/server/endpoints/covid_hosp_state_timeseries.py +++ b/src/server/endpoints/covid_hosp_state_timeseries.py @@ -159,14 +159,13 @@ def handle(): q.where_integers("issue", issues) union_subquery = f''' ( - SELECT *, 'D' as record_type FROM `covid_hosp_state_daily` WHERE {q.conditions_clause} + SELECT *, 'D' as record_type FROM `covid_hosp_state_daily` c WHERE {q.conditions_clause} UNION ALL - SELECT *, 'T' as record_type FROM `covid_hosp_state_timeseries` WHERE {q.conditions_clause} + SELECT *, 'T' as record_type FROM `covid_hosp_state_timeseries` c WHERE {q.conditions_clause} ) c''' query = f''' WITH c as ( - SELECT {q.fields_clause}, ROW_NUMBER() - OVER (PARTITION BY date, state, issue ORDER BY record_type) `row` + SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state, issue ORDER BY record_type) `row` FROM {union_subquery} ) SELECT {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause} @@ -177,14 +176,13 @@ def handle(): q.params["as_of"] = as_of union_subquery = f''' ( - SELECT *, 'D' as record_type FROM `covid_hosp_state_daily` WHERE {q.conditions_clause} AND {sub_condition_asof} + SELECT *, 'D' as record_type FROM `covid_hosp_state_daily` c WHERE {q.conditions_clause} AND {sub_condition_asof} UNION ALL - SELECT *, 'T' as record_type FROM `covid_hosp_state_timeseries` WHERE {q.conditions_clause} AND {sub_condition_asof} + SELECT *, 'T' as record_type FROM `covid_hosp_state_timeseries` c WHERE {q.conditions_clause} AND {sub_condition_asof} ) c''' query = f''' WITH c as ( - SELECT {q.fields_clause}, ROW_NUMBER() - OVER (PARTITION BY date, state ORDER BY issue DESC, record_type) `row` + SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state ORDER BY issue DESC, record_type) `row` FROM {union_subquery} ) SELECT {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause} @@ -193,14 +191,13 @@ def handle(): # Simply use most recent issues union_subquery = f''' ( - SELECT *, 'D' as record_type FROM `covid_hosp_state_daily` WHERE {q.conditions_clause} + SELECT *, 'D' as record_type FROM `covid_hosp_state_daily` c WHERE {q.conditions_clause} UNION ALL - SELECT *, 'T' as record_type FROM `covid_hosp_state_timeseries` WHERE {q.conditions_clause} + SELECT *, 'T' as record_type FROM `covid_hosp_state_timeseries` c WHERE {q.conditions_clause} ) c''' query = f''' WITH c as ( - SELECT {q.fields_clause}, ROW_NUMBER() - OVER (PARTITION BY date, state ORDER BY issue DESC, record_type) `row` + SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state ORDER BY issue DESC, record_type) `row` FROM {union_subquery} ) SELECT {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause} From ef55754d01f6f3465fdf7e306f4d2c929aa03ae5 Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Wed, 19 Apr 2023 19:44:19 +0300 Subject: [PATCH 19/42] simplify --- .../endpoints/covid_hosp_state_timeseries.py | 42 ++++++++----------- 1 file changed, 18 insertions(+), 24 deletions(-) diff --git a/src/server/endpoints/covid_hosp_state_timeseries.py b/src/server/endpoints/covid_hosp_state_timeseries.py index 2571d910e..3ff42d869 100644 --- a/src/server/endpoints/covid_hosp_state_timeseries.py +++ b/src/server/endpoints/covid_hosp_state_timeseries.py @@ -170,31 +170,25 @@ def handle(): ) SELECT {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause} ''' - elif as_of is not None: - # Filter for issues before a given as_of - sub_condition_asof = "(issue <= :as_of)" - q.params["as_of"] = as_of - union_subquery = f''' - ( - SELECT *, 'D' as record_type FROM `covid_hosp_state_daily` c WHERE {q.conditions_clause} AND {sub_condition_asof} - UNION ALL - SELECT *, 'T' as record_type FROM `covid_hosp_state_timeseries` c WHERE {q.conditions_clause} AND {sub_condition_asof} - ) c''' - query = f''' - WITH c as ( - SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state ORDER BY issue DESC, record_type) `row` - FROM {union_subquery} - ) - SELECT {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause} - ''' else: - # Simply use most recent issues - union_subquery = f''' - ( - SELECT *, 'D' as record_type FROM `covid_hosp_state_daily` c WHERE {q.conditions_clause} - UNION ALL - SELECT *, 'T' as record_type FROM `covid_hosp_state_timeseries` c WHERE {q.conditions_clause} - ) c''' + if as_of is not None: + # Filter for issues before a given as_of + sub_condition_asof = "(issue <= :as_of)" + q.params["as_of"] = as_of + union_subquery = f''' + ( + SELECT *, 'D' as record_type FROM `covid_hosp_state_daily` c WHERE {q.conditions_clause} AND {sub_condition_asof} + UNION ALL + SELECT *, 'T' as record_type FROM `covid_hosp_state_timeseries` c WHERE {q.conditions_clause} AND {sub_condition_asof} + ) c''' + else: + # Simply use most recent issues + union_subquery = f''' + ( + SELECT *, 'D' as record_type FROM `covid_hosp_state_daily` c WHERE {q.conditions_clause} + UNION ALL + SELECT *, 'T' as record_type FROM `covid_hosp_state_timeseries` c WHERE {q.conditions_clause} + ) c''' query = f''' WITH c as ( SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state ORDER BY issue DESC, record_type) `row` From 199ad09ffb3fcee1864faa2c1eaef65fe82a26f5 Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Thu, 11 May 2023 01:17:11 +0300 Subject: [PATCH 20/42] Update migration docs Co-authored-by: Katie Mazaitis --- src/ddl/migrations/covid_hosp_state_split_tables.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ddl/migrations/covid_hosp_state_split_tables.sql b/src/ddl/migrations/covid_hosp_state_split_tables.sql index 58a29771a..1db542021 100644 --- a/src/ddl/migrations/covid_hosp_state_split_tables.sql +++ b/src/ddl/migrations/covid_hosp_state_split_tables.sql @@ -14,7 +14,7 @@ SELECT * FROM covid_hosp_state_timeseries WHERE record_type='D'; -- Re-add autoincrement capability ALTER TABLE covid_hosp_state_daily MODIFY id INT NOT NULL AUTO_INCREMENT; --- 2. Remove data with incorrect record_type from both tables (keep D in daily, T in timeseries) +-- 2. Remove data with incorrect record_type from timeseries table (D records were moved to daily) DELETE FROM `covid_hosp_state_timeseries` WHERE record_type='D'; From db925ef4beaa46ac32487b5209333effff3b9184 Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Fri, 12 May 2023 21:30:04 +0300 Subject: [PATCH 21/42] Try alternate recent issues query --- .../covid_hosp_state_split_tables.sql | 2 +- .../endpoints/covid_hosp_state_timeseries.py | 56 ++++++++++++------- 2 files changed, 36 insertions(+), 22 deletions(-) diff --git a/src/ddl/migrations/covid_hosp_state_split_tables.sql b/src/ddl/migrations/covid_hosp_state_split_tables.sql index 1db542021..03e8ac7ce 100644 --- a/src/ddl/migrations/covid_hosp_state_split_tables.sql +++ b/src/ddl/migrations/covid_hosp_state_split_tables.sql @@ -11,7 +11,7 @@ CREATE TABLE `covid_hosp_state_daily` ( KEY `state_by_issue_and_date` (`issue`, `date`, `state`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8 SELECT * FROM covid_hosp_state_timeseries WHERE record_type='D'; --- Re-add autoincrement capability +-- AUTOINCREMENT is not preserved by `CREATE TABLE ... SELECT`; Re-add ALTER TABLE covid_hosp_state_daily MODIFY id INT NOT NULL AUTO_INCREMENT; -- 2. Remove data with incorrect record_type from timeseries table (D records were moved to daily) diff --git a/src/server/endpoints/covid_hosp_state_timeseries.py b/src/server/endpoints/covid_hosp_state_timeseries.py index 3ff42d869..03ee3787d 100644 --- a/src/server/endpoints/covid_hosp_state_timeseries.py +++ b/src/server/endpoints/covid_hosp_state_timeseries.py @@ -153,9 +153,9 @@ def handle(): q.where_strings("state", states) # These queries prioritize the daily value if there is both a time series and daily value for a given issue/date/state. - # Further details: https://github.com/cmu-delphi/delphi-epidata/pull/336 + # Further details: https://github.com/cmu-delphi/delphi-epidata/pull/336/files#diff-097d4969fdc9ac1f722809e85f3dc59ad371b66011861a50d15fcc605839c63dR364-R368 if issues is not None: - # Filter for specific issues + # Filter for all matching issues q.where_integers("issue", issues) union_subquery = f''' ( @@ -170,25 +170,16 @@ def handle(): ) SELECT {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause} ''' - else: - if as_of is not None: - # Filter for issues before a given as_of - sub_condition_asof = "(issue <= :as_of)" - q.params["as_of"] = as_of - union_subquery = f''' - ( - SELECT *, 'D' as record_type FROM `covid_hosp_state_daily` c WHERE {q.conditions_clause} AND {sub_condition_asof} - UNION ALL - SELECT *, 'T' as record_type FROM `covid_hosp_state_timeseries` c WHERE {q.conditions_clause} AND {sub_condition_asof} - ) c''' - else: - # Simply use most recent issues - union_subquery = f''' - ( - SELECT *, 'D' as record_type FROM `covid_hosp_state_daily` c WHERE {q.conditions_clause} - UNION ALL - SELECT *, 'T' as record_type FROM `covid_hosp_state_timeseries` c WHERE {q.conditions_clause} - ) c''' + elif as_of is not None: + # Filter for issues before a given as_of + sub_condition_asof = "(issue <= :as_of)" + q.params["as_of"] = as_of + union_subquery = f''' + ( + SELECT *, 'D' as record_type FROM `covid_hosp_state_daily` c WHERE {q.conditions_clause} AND {sub_condition_asof} + UNION ALL + SELECT *, 'T' as record_type FROM `covid_hosp_state_timeseries` c WHERE {q.conditions_clause} AND {sub_condition_asof} + ) c''' query = f''' WITH c as ( SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state ORDER BY issue DESC, record_type) `row` @@ -196,6 +187,29 @@ def handle(): ) SELECT {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause} ''' + else: + # Simply use most recent issues + union_subquery = f''' + ( + SELECT *, 'D' as record_type FROM `covid_hosp_state_daily` c WHERE {q.conditions_clause} + UNION ALL + SELECT *, 'T' as record_type FROM `covid_hosp_state_timeseries` c WHERE {q.conditions_clause} + ) c''' + subquery = f'''( + SELECT max(`issue`) `max_issue`, `date`, `state` + FROM {union_subquery} + GROUP BY `date`, `state` + ) x''' + condition = f"x.`max_issue` = {q.alias}.`issue` AND x.`date` = {q.alias}.`date` AND x.`state` = {q.alias}.`state`" + query = f''' + WITH c as ( + SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state, issue ORDER BY record_type) `row` + FROM {union_subquery} + JOIN {subquery} ON {condition} + ) + SELECT {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause} + ''' + # send query return execute_query(query, q.params, fields_string, fields_int, fields_float) From bc5d736db1d3387ac0cbbb194eaea1ddb300dce4 Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Wed, 17 May 2023 22:16:18 +0300 Subject: [PATCH 22/42] Optimized query --- .../endpoints/covid_hosp_state_timeseries.py | 48 +++++-------------- 1 file changed, 13 insertions(+), 35 deletions(-) diff --git a/src/server/endpoints/covid_hosp_state_timeseries.py b/src/server/endpoints/covid_hosp_state_timeseries.py index 03ee3787d..6431a8124 100644 --- a/src/server/endpoints/covid_hosp_state_timeseries.py +++ b/src/server/endpoints/covid_hosp_state_timeseries.py @@ -164,52 +164,30 @@ def handle(): SELECT *, 'T' as record_type FROM `covid_hosp_state_timeseries` c WHERE {q.conditions_clause} ) c''' query = f''' - WITH c as ( + SELECT {q.fields_clause} FROM ( SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state, issue ORDER BY record_type) `row` FROM {union_subquery} - ) - SELECT {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause} - ''' - elif as_of is not None: - # Filter for issues before a given as_of - sub_condition_asof = "(issue <= :as_of)" - q.params["as_of"] = as_of - union_subquery = f''' - ( - SELECT *, 'D' as record_type FROM `covid_hosp_state_daily` c WHERE {q.conditions_clause} AND {sub_condition_asof} - UNION ALL - SELECT *, 'T' as record_type FROM `covid_hosp_state_timeseries` c WHERE {q.conditions_clause} AND {sub_condition_asof} - ) c''' - query = f''' - WITH c as ( - SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state ORDER BY issue DESC, record_type) `row` - FROM {union_subquery} - ) - SELECT {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause} + ) {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause} ''' else: - # Simply use most recent issues + # Filter for most recent issues + cond_clause = q.conditions_clause + if as_of is not None: + # ...Filter for most recent issues before a given as_of + cond_clause += " AND (issue <= :as_of)" + q.params["as_of"] = as_of union_subquery = f''' ( - SELECT *, 'D' as record_type FROM `covid_hosp_state_daily` c WHERE {q.conditions_clause} + SELECT *, 'D' as record_type, ROW_NUMBER() OVER (PARTITION BY date, state ORDER BY issue DESC) row_d FROM `covid_hosp_state_daily` c WHERE {cond_clause} AND row_d = 1 UNION ALL - SELECT *, 'T' as record_type FROM `covid_hosp_state_timeseries` c WHERE {q.conditions_clause} + SELECT *, 'T' as record_type, ROW_NUMBER() OVER (PARTITION BY date, state ORDER BY issue DESC) row_t FROM `covid_hosp_state_timeseries` c WHERE {cond_clause} AND row_t = 1 ) c''' - subquery = f'''( - SELECT max(`issue`) `max_issue`, `date`, `state` - FROM {union_subquery} - GROUP BY `date`, `state` - ) x''' - condition = f"x.`max_issue` = {q.alias}.`issue` AND x.`date` = {q.alias}.`date` AND x.`state` = {q.alias}.`state`" query = f''' - WITH c as ( - SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state, issue ORDER BY record_type) `row` + SELECT {q.fields_clause} FROM ( + SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state ORDER BY issue DESC, record_type) `row` FROM {union_subquery} - JOIN {subquery} ON {condition} - ) - SELECT {q.fields_clause} FROM {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause} + ) {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause} ''' - # send query return execute_query(query, q.params, fields_string, fields_int, fields_float) From 30581b10fa8446d5c567e1be90c3224a169b5b10 Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Wed, 17 May 2023 23:00:00 +0300 Subject: [PATCH 23/42] row filter --- src/server/endpoints/covid_hosp_state_timeseries.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/server/endpoints/covid_hosp_state_timeseries.py b/src/server/endpoints/covid_hosp_state_timeseries.py index 6431a8124..0dc0dd74c 100644 --- a/src/server/endpoints/covid_hosp_state_timeseries.py +++ b/src/server/endpoints/covid_hosp_state_timeseries.py @@ -178,9 +178,13 @@ def handle(): q.params["as_of"] = as_of union_subquery = f''' ( - SELECT *, 'D' as record_type, ROW_NUMBER() OVER (PARTITION BY date, state ORDER BY issue DESC) row_d FROM `covid_hosp_state_daily` c WHERE {cond_clause} AND row_d = 1 + SELECT * FROM ( + SELECT *, 'D' as record_type, ROW_NUMBER() OVER (PARTITION BY date, state ORDER BY issue DESC) row_d FROM `covid_hosp_state_daily` c WHERE {cond_clause} + ) WHERE row_d = 1 UNION ALL - SELECT *, 'T' as record_type, ROW_NUMBER() OVER (PARTITION BY date, state ORDER BY issue DESC) row_t FROM `covid_hosp_state_timeseries` c WHERE {cond_clause} AND row_t = 1 + SELECT * FROM ( + SELECT *, 'T' as record_type, ROW_NUMBER() OVER (PARTITION BY date, state ORDER BY issue DESC) row_t FROM `covid_hosp_state_timeseries` c WHERE {cond_clause} + ) WHERE row_t = 1 ) c''' query = f''' SELECT {q.fields_clause} FROM ( From 3893f624b2863a64977c5e846e2e2256d3dcd85c Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Wed, 17 May 2023 23:16:00 +0300 Subject: [PATCH 24/42] aliases --- src/server/endpoints/covid_hosp_state_timeseries.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/server/endpoints/covid_hosp_state_timeseries.py b/src/server/endpoints/covid_hosp_state_timeseries.py index 0dc0dd74c..b874c6569 100644 --- a/src/server/endpoints/covid_hosp_state_timeseries.py +++ b/src/server/endpoints/covid_hosp_state_timeseries.py @@ -180,11 +180,11 @@ def handle(): ( SELECT * FROM ( SELECT *, 'D' as record_type, ROW_NUMBER() OVER (PARTITION BY date, state ORDER BY issue DESC) row_d FROM `covid_hosp_state_daily` c WHERE {cond_clause} - ) WHERE row_d = 1 + ) sub_d WHERE row_d = 1 UNION ALL SELECT * FROM ( SELECT *, 'T' as record_type, ROW_NUMBER() OVER (PARTITION BY date, state ORDER BY issue DESC) row_t FROM `covid_hosp_state_timeseries` c WHERE {cond_clause} - ) WHERE row_t = 1 + ) sub_t WHERE row_t = 1 ) c''' query = f''' SELECT {q.fields_clause} FROM ( From 128fafc4581588eb6eb2ed09615747b2deeebaa8 Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Fri, 19 May 2023 14:28:39 +0300 Subject: [PATCH 25/42] what if it's an index problem --- src/server/endpoints/covid_hosp_state_timeseries.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/server/endpoints/covid_hosp_state_timeseries.py b/src/server/endpoints/covid_hosp_state_timeseries.py index b874c6569..34175a0a3 100644 --- a/src/server/endpoints/covid_hosp_state_timeseries.py +++ b/src/server/endpoints/covid_hosp_state_timeseries.py @@ -165,7 +165,7 @@ def handle(): ) c''' query = f''' SELECT {q.fields_clause} FROM ( - SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state, issue ORDER BY record_type) `row` + SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY state, date, issue ORDER BY record_type) `row` FROM {union_subquery} ) {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause} ''' @@ -179,16 +179,16 @@ def handle(): union_subquery = f''' ( SELECT * FROM ( - SELECT *, 'D' as record_type, ROW_NUMBER() OVER (PARTITION BY date, state ORDER BY issue DESC) row_d FROM `covid_hosp_state_daily` c WHERE {cond_clause} + SELECT *, 'D' as record_type, ROW_NUMBER() OVER (PARTITION BY state, date ORDER BY issue DESC) row_d FROM `covid_hosp_state_daily` c WHERE {cond_clause} ) sub_d WHERE row_d = 1 UNION ALL SELECT * FROM ( - SELECT *, 'T' as record_type, ROW_NUMBER() OVER (PARTITION BY date, state ORDER BY issue DESC) row_t FROM `covid_hosp_state_timeseries` c WHERE {cond_clause} + SELECT *, 'T' as record_type, ROW_NUMBER() OVER (PARTITION BY state, date ORDER BY issue DESC) row_t FROM `covid_hosp_state_timeseries` c WHERE {cond_clause} ) sub_t WHERE row_t = 1 ) c''' query = f''' SELECT {q.fields_clause} FROM ( - SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY date, state ORDER BY issue DESC, record_type) `row` + SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY state, date ORDER BY issue DESC, record_type) `row` FROM {union_subquery} ) {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause} ''' From 3e671b4826786c13cd8dd8a83e72d0803b6057d5 Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Fri, 19 May 2023 14:32:41 +0300 Subject: [PATCH 26/42] and what if this is an index problem too --- src/server/endpoints/covid_hosp_state_timeseries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server/endpoints/covid_hosp_state_timeseries.py b/src/server/endpoints/covid_hosp_state_timeseries.py index 34175a0a3..b630bf4bc 100644 --- a/src/server/endpoints/covid_hosp_state_timeseries.py +++ b/src/server/endpoints/covid_hosp_state_timeseries.py @@ -165,7 +165,7 @@ def handle(): ) c''' query = f''' SELECT {q.fields_clause} FROM ( - SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY state, date, issue ORDER BY record_type) `row` + SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY issue, date, state ORDER BY record_type) `row` FROM {union_subquery} ) {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause} ''' From adc848728dbaa6d1f2a8eb31c1700ce12b166a3a Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Fri, 19 May 2023 17:59:10 +0300 Subject: [PATCH 27/42] rewrite #1 --- .../endpoints/covid_hosp_state_timeseries.py | 27 +++++++++---------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/src/server/endpoints/covid_hosp_state_timeseries.py b/src/server/endpoints/covid_hosp_state_timeseries.py index b630bf4bc..3736affd7 100644 --- a/src/server/endpoints/covid_hosp_state_timeseries.py +++ b/src/server/endpoints/covid_hosp_state_timeseries.py @@ -176,21 +176,20 @@ def handle(): # ...Filter for most recent issues before a given as_of cond_clause += " AND (issue <= :as_of)" q.params["as_of"] = as_of - union_subquery = f''' - ( - SELECT * FROM ( - SELECT *, 'D' as record_type, ROW_NUMBER() OVER (PARTITION BY state, date ORDER BY issue DESC) row_d FROM `covid_hosp_state_daily` c WHERE {cond_clause} - ) sub_d WHERE row_d = 1 - UNION ALL - SELECT * FROM ( - SELECT *, 'T' as record_type, ROW_NUMBER() OVER (PARTITION BY state, date ORDER BY issue DESC) row_t FROM `covid_hosp_state_timeseries` c WHERE {cond_clause} - ) sub_t WHERE row_t = 1 - ) c''' + query = f''' - SELECT {q.fields_clause} FROM ( - SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY state, date ORDER BY issue DESC, record_type) `row` - FROM {union_subquery} - ) {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause} + WITH max_daily AS ( + SELECT {q.fields_clause}, 'D' as record_type FROM ( + SELECT {q.fields_clause}, max(issue) OVER (PARTITION BY state, date) `max_issue` FROM `covid_hosp_state_daily` c WHERE {cond_clause} + ) c WHERE issue = max_issue + ), max_timeseries AS ( + SELECT {q.fields_clause}, 'T' as record_type FROM ( + SELECT {q.fields_clause}, max(issue) OVER (PARTITION BY state, date) `max_issue` FROM `covid_hosp_state_timeseries` c WHERE {cond_clause} + ) c WHERE issue = max_issue + ) SELECT {q.fields_clause} FROM ( + SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY issue, date ORDER BY issue DESC, record_type) `row` + FROM (max_daily UNION ALL max_timeseries) c + ) {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause} ''' # send query From 1e7654dfb71b74ac97f73fb6b6f1c1a2b1dde6fb Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Fri, 19 May 2023 17:59:51 +0300 Subject: [PATCH 28/42] fixup partition --- src/server/endpoints/covid_hosp_state_timeseries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server/endpoints/covid_hosp_state_timeseries.py b/src/server/endpoints/covid_hosp_state_timeseries.py index 3736affd7..6a1c28271 100644 --- a/src/server/endpoints/covid_hosp_state_timeseries.py +++ b/src/server/endpoints/covid_hosp_state_timeseries.py @@ -187,7 +187,7 @@ def handle(): SELECT {q.fields_clause}, max(issue) OVER (PARTITION BY state, date) `max_issue` FROM `covid_hosp_state_timeseries` c WHERE {cond_clause} ) c WHERE issue = max_issue ) SELECT {q.fields_clause} FROM ( - SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY issue, date ORDER BY issue DESC, record_type) `row` + SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY state, date ORDER BY issue DESC, record_type) `row` FROM (max_daily UNION ALL max_timeseries) c ) {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause} ''' From 560e356d13dfc7213fd5805b4a2821baf7ff2ea0 Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Fri, 19 May 2023 18:08:46 +0300 Subject: [PATCH 29/42] fixup union all --- src/server/endpoints/covid_hosp_state_timeseries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server/endpoints/covid_hosp_state_timeseries.py b/src/server/endpoints/covid_hosp_state_timeseries.py index 6a1c28271..6ff59f6cf 100644 --- a/src/server/endpoints/covid_hosp_state_timeseries.py +++ b/src/server/endpoints/covid_hosp_state_timeseries.py @@ -188,7 +188,7 @@ def handle(): ) c WHERE issue = max_issue ) SELECT {q.fields_clause} FROM ( SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY state, date ORDER BY issue DESC, record_type) `row` - FROM (max_daily UNION ALL max_timeseries) c + FROM ((SELECT * FROM max_daily) md UNION ALL (SELECT * FROM max_timeseries) mt) c ) {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause} ''' From 69f98971b21ac141c43e0d28412e7aa3c500a28a Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Fri, 19 May 2023 18:15:43 +0300 Subject: [PATCH 30/42] Try without CTEs --- .../endpoints/covid_hosp_state_timeseries.py | 26 +++++++++++-------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/server/endpoints/covid_hosp_state_timeseries.py b/src/server/endpoints/covid_hosp_state_timeseries.py index 6ff59f6cf..6db2e2382 100644 --- a/src/server/endpoints/covid_hosp_state_timeseries.py +++ b/src/server/endpoints/covid_hosp_state_timeseries.py @@ -178,18 +178,22 @@ def handle(): q.params["as_of"] = as_of query = f''' - WITH max_daily AS ( - SELECT {q.fields_clause}, 'D' as record_type FROM ( - SELECT {q.fields_clause}, max(issue) OVER (PARTITION BY state, date) `max_issue` FROM `covid_hosp_state_daily` c WHERE {cond_clause} - ) c WHERE issue = max_issue - ), max_timeseries AS ( - SELECT {q.fields_clause}, 'T' as record_type FROM ( - SELECT {q.fields_clause}, max(issue) OVER (PARTITION BY state, date) `max_issue` FROM `covid_hosp_state_timeseries` c WHERE {cond_clause} - ) c WHERE issue = max_issue - ) SELECT {q.fields_clause} FROM ( + SELECT {q.fields_clause} FROM ( SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY state, date ORDER BY issue DESC, record_type) `row` - FROM ((SELECT * FROM max_daily) md UNION ALL (SELECT * FROM max_timeseries) mt) c - ) {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause} + FROM ( + ( + SELECT {q.fields_clause}, 'D' as record_type FROM ( + SELECT {q.fields_clause}, max(issue) OVER (PARTITION BY state, date) `max_issue` FROM `covid_hosp_state_daily` c WHERE {cond_clause} + ) c WHERE issue = max_issue + ) md + UNION ALL + ( + SELECT {q.fields_clause}, 'T' as record_type FROM ( + SELECT {q.fields_clause}, max(issue) OVER (PARTITION BY state, date) `max_issue` FROM `covid_hosp_state_timeseries` c WHERE {cond_clause} + ) c WHERE issue = max_issue + ) mt + ) c + ) c WHERE `row` = 1 ORDER BY {q.order_clause} ''' # send query From acf9b1f255e4ac6ae36a4045133acdf6764fb271 Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Fri, 19 May 2023 18:24:36 +0300 Subject: [PATCH 31/42] no brackets --- .../endpoints/covid_hosp_state_timeseries.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/src/server/endpoints/covid_hosp_state_timeseries.py b/src/server/endpoints/covid_hosp_state_timeseries.py index 6db2e2382..83edcca18 100644 --- a/src/server/endpoints/covid_hosp_state_timeseries.py +++ b/src/server/endpoints/covid_hosp_state_timeseries.py @@ -181,17 +181,13 @@ def handle(): SELECT {q.fields_clause} FROM ( SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY state, date ORDER BY issue DESC, record_type) `row` FROM ( - ( - SELECT {q.fields_clause}, 'D' as record_type FROM ( - SELECT {q.fields_clause}, max(issue) OVER (PARTITION BY state, date) `max_issue` FROM `covid_hosp_state_daily` c WHERE {cond_clause} - ) c WHERE issue = max_issue - ) md + SELECT {q.fields_clause}, 'D' as record_type FROM ( + SELECT {q.fields_clause}, max(issue) OVER (PARTITION BY state, date) `max_issue` FROM `covid_hosp_state_daily` c WHERE {cond_clause} + ) c WHERE issue = max_issue UNION ALL - ( - SELECT {q.fields_clause}, 'T' as record_type FROM ( - SELECT {q.fields_clause}, max(issue) OVER (PARTITION BY state, date) `max_issue` FROM `covid_hosp_state_timeseries` c WHERE {cond_clause} - ) c WHERE issue = max_issue - ) mt + SELECT {q.fields_clause}, 'T' as record_type FROM ( + SELECT {q.fields_clause}, max(issue) OVER (PARTITION BY state, date) `max_issue` FROM `covid_hosp_state_timeseries` c WHERE {cond_clause} + ) c WHERE issue = max_issue ) c ) c WHERE `row` = 1 ORDER BY {q.order_clause} ''' From 0719c966d916168f8336615f6a723b45f272704d Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Fri, 19 May 2023 20:03:12 +0300 Subject: [PATCH 32/42] revert to more performant query --- .../endpoints/covid_hosp_state_timeseries.py | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/server/endpoints/covid_hosp_state_timeseries.py b/src/server/endpoints/covid_hosp_state_timeseries.py index 83edcca18..b630bf4bc 100644 --- a/src/server/endpoints/covid_hosp_state_timeseries.py +++ b/src/server/endpoints/covid_hosp_state_timeseries.py @@ -176,20 +176,21 @@ def handle(): # ...Filter for most recent issues before a given as_of cond_clause += " AND (issue <= :as_of)" q.params["as_of"] = as_of - + union_subquery = f''' + ( + SELECT * FROM ( + SELECT *, 'D' as record_type, ROW_NUMBER() OVER (PARTITION BY state, date ORDER BY issue DESC) row_d FROM `covid_hosp_state_daily` c WHERE {cond_clause} + ) sub_d WHERE row_d = 1 + UNION ALL + SELECT * FROM ( + SELECT *, 'T' as record_type, ROW_NUMBER() OVER (PARTITION BY state, date ORDER BY issue DESC) row_t FROM `covid_hosp_state_timeseries` c WHERE {cond_clause} + ) sub_t WHERE row_t = 1 + ) c''' query = f''' - SELECT {q.fields_clause} FROM ( - SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY state, date ORDER BY issue DESC, record_type) `row` - FROM ( - SELECT {q.fields_clause}, 'D' as record_type FROM ( - SELECT {q.fields_clause}, max(issue) OVER (PARTITION BY state, date) `max_issue` FROM `covid_hosp_state_daily` c WHERE {cond_clause} - ) c WHERE issue = max_issue - UNION ALL - SELECT {q.fields_clause}, 'T' as record_type FROM ( - SELECT {q.fields_clause}, max(issue) OVER (PARTITION BY state, date) `max_issue` FROM `covid_hosp_state_timeseries` c WHERE {cond_clause} - ) c WHERE issue = max_issue - ) c - ) c WHERE `row` = 1 ORDER BY {q.order_clause} + SELECT {q.fields_clause} FROM ( + SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY state, date ORDER BY issue DESC, record_type) `row` + FROM {union_subquery} + ) {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause} ''' # send query From 1f47ada0136e27cc4868ccec0d76ce548ef3a9ff Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Fri, 19 May 2023 20:03:59 +0300 Subject: [PATCH 33/42] refactor a little --- src/server/endpoints/covid_hosp_state_timeseries.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/server/endpoints/covid_hosp_state_timeseries.py b/src/server/endpoints/covid_hosp_state_timeseries.py index b630bf4bc..9d2593358 100644 --- a/src/server/endpoints/covid_hosp_state_timeseries.py +++ b/src/server/endpoints/covid_hosp_state_timeseries.py @@ -178,14 +178,14 @@ def handle(): q.params["as_of"] = as_of union_subquery = f''' ( - SELECT * FROM ( - SELECT *, 'D' as record_type, ROW_NUMBER() OVER (PARTITION BY state, date ORDER BY issue DESC) row_d FROM `covid_hosp_state_daily` c WHERE {cond_clause} + SELECT {q.fields_clause} FROM ( + SELECT {q.fields_clause}, 'D' as record_type, ROW_NUMBER() OVER (PARTITION BY state, date ORDER BY issue DESC) row_d FROM `covid_hosp_state_daily` {q.alias} WHERE {cond_clause} ) sub_d WHERE row_d = 1 UNION ALL - SELECT * FROM ( - SELECT *, 'T' as record_type, ROW_NUMBER() OVER (PARTITION BY state, date ORDER BY issue DESC) row_t FROM `covid_hosp_state_timeseries` c WHERE {cond_clause} + SELECT {q.fields_clause} FROM ( + SELECT {q.fields_clause}, 'T' as record_type, ROW_NUMBER() OVER (PARTITION BY state, date ORDER BY issue DESC) row_t FROM `covid_hosp_state_timeseries` {q.alias} WHERE {cond_clause} ) sub_t WHERE row_t = 1 - ) c''' + ) {q.alias}''' query = f''' SELECT {q.fields_clause} FROM ( SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY state, date ORDER BY issue DESC, record_type) `row` From c147287647d63a2a6f7701ce6d043b34da54289e Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Fri, 19 May 2023 20:11:11 +0300 Subject: [PATCH 34/42] refactor a little more --- src/server/endpoints/covid_hosp_state_timeseries.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/server/endpoints/covid_hosp_state_timeseries.py b/src/server/endpoints/covid_hosp_state_timeseries.py index 9d2593358..eaeb9a5a3 100644 --- a/src/server/endpoints/covid_hosp_state_timeseries.py +++ b/src/server/endpoints/covid_hosp_state_timeseries.py @@ -180,11 +180,11 @@ def handle(): ( SELECT {q.fields_clause} FROM ( SELECT {q.fields_clause}, 'D' as record_type, ROW_NUMBER() OVER (PARTITION BY state, date ORDER BY issue DESC) row_d FROM `covid_hosp_state_daily` {q.alias} WHERE {cond_clause} - ) sub_d WHERE row_d = 1 + ) {q.alias} WHERE row_d = 1 UNION ALL SELECT {q.fields_clause} FROM ( SELECT {q.fields_clause}, 'T' as record_type, ROW_NUMBER() OVER (PARTITION BY state, date ORDER BY issue DESC) row_t FROM `covid_hosp_state_timeseries` {q.alias} WHERE {cond_clause} - ) sub_t WHERE row_t = 1 + ) {q.alias} WHERE row_t = 1 ) {q.alias}''' query = f''' SELECT {q.fields_clause} FROM ( From 098d5907cd271c038ed325c69cceffd669ae610c Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Fri, 19 May 2023 20:17:28 +0300 Subject: [PATCH 35/42] don't forget record_type --- src/server/endpoints/covid_hosp_state_timeseries.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/server/endpoints/covid_hosp_state_timeseries.py b/src/server/endpoints/covid_hosp_state_timeseries.py index eaeb9a5a3..91e19a560 100644 --- a/src/server/endpoints/covid_hosp_state_timeseries.py +++ b/src/server/endpoints/covid_hosp_state_timeseries.py @@ -178,11 +178,11 @@ def handle(): q.params["as_of"] = as_of union_subquery = f''' ( - SELECT {q.fields_clause} FROM ( + SELECT {q.fields_clause}, record_type FROM ( SELECT {q.fields_clause}, 'D' as record_type, ROW_NUMBER() OVER (PARTITION BY state, date ORDER BY issue DESC) row_d FROM `covid_hosp_state_daily` {q.alias} WHERE {cond_clause} ) {q.alias} WHERE row_d = 1 UNION ALL - SELECT {q.fields_clause} FROM ( + SELECT {q.fields_clause}, record_type FROM ( SELECT {q.fields_clause}, 'T' as record_type, ROW_NUMBER() OVER (PARTITION BY state, date ORDER BY issue DESC) row_t FROM `covid_hosp_state_timeseries` {q.alias} WHERE {cond_clause} ) {q.alias} WHERE row_t = 1 ) {q.alias}''' From efb1a70731872a38b0bbcb83fcc02c69dc6c9f83 Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Fri, 19 May 2023 20:24:26 +0300 Subject: [PATCH 36/42] try inner join --- .../endpoints/covid_hosp_state_timeseries.py | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/server/endpoints/covid_hosp_state_timeseries.py b/src/server/endpoints/covid_hosp_state_timeseries.py index 91e19a560..fe30412d9 100644 --- a/src/server/endpoints/covid_hosp_state_timeseries.py +++ b/src/server/endpoints/covid_hosp_state_timeseries.py @@ -178,13 +178,23 @@ def handle(): q.params["as_of"] = as_of union_subquery = f''' ( - SELECT {q.fields_clause}, record_type FROM ( - SELECT {q.fields_clause}, 'D' as record_type, ROW_NUMBER() OVER (PARTITION BY state, date ORDER BY issue DESC) row_d FROM `covid_hosp_state_daily` {q.alias} WHERE {cond_clause} - ) {q.alias} WHERE row_d = 1 + SELECT *, 'D' AS RECORD_TYPE FROM `covid_hosp_state_daily` c + INNER JOIN ( + SELECT state, date, MAX(issue) AS max_issue + FROM `covid_hosp_state_daily` c + WHERE {cond_clause} + GROUP BY state, date + ) x + ON c.state = x.state AND c.date = x.date AND c.issue = x.max_issue UNION ALL - SELECT {q.fields_clause}, record_type FROM ( - SELECT {q.fields_clause}, 'T' as record_type, ROW_NUMBER() OVER (PARTITION BY state, date ORDER BY issue DESC) row_t FROM `covid_hosp_state_timeseries` {q.alias} WHERE {cond_clause} - ) {q.alias} WHERE row_t = 1 + SELECT *, 'D' AS RECORD_TYPE FROM `covid_hosp_state_timeseries` c + INNER JOIN ( + SELECT state, date, MAX(issue) AS max_issue + FROM `covid_hosp_state_daily` c + WHERE {cond_clause} + GROUP BY state, date + ) x + ON c.state = x.state AND c.date = x.date AND c.issue = x.max_issue ) {q.alias}''' query = f''' SELECT {q.fields_clause} FROM ( From 3d0716113e0c71d8c6d0b4a6d8075cd362c21b7d Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Fri, 19 May 2023 20:25:22 +0300 Subject: [PATCH 37/42] fixup tables --- src/server/endpoints/covid_hosp_state_timeseries.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/server/endpoints/covid_hosp_state_timeseries.py b/src/server/endpoints/covid_hosp_state_timeseries.py index fe30412d9..166fe9ad9 100644 --- a/src/server/endpoints/covid_hosp_state_timeseries.py +++ b/src/server/endpoints/covid_hosp_state_timeseries.py @@ -187,10 +187,10 @@ def handle(): ) x ON c.state = x.state AND c.date = x.date AND c.issue = x.max_issue UNION ALL - SELECT *, 'D' AS RECORD_TYPE FROM `covid_hosp_state_timeseries` c + SELECT *, 'T' AS RECORD_TYPE FROM `covid_hosp_state_timeseries` c INNER JOIN ( SELECT state, date, MAX(issue) AS max_issue - FROM `covid_hosp_state_daily` c + FROM `covid_hosp_state_timeseries` c WHERE {cond_clause} GROUP BY state, date ) x From d8db5c2b1c5046e0bb5e0ffede0107f3f1eb334b Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Fri, 19 May 2023 20:26:49 +0300 Subject: [PATCH 38/42] and no caps here --- src/server/endpoints/covid_hosp_state_timeseries.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/server/endpoints/covid_hosp_state_timeseries.py b/src/server/endpoints/covid_hosp_state_timeseries.py index 166fe9ad9..256785a88 100644 --- a/src/server/endpoints/covid_hosp_state_timeseries.py +++ b/src/server/endpoints/covid_hosp_state_timeseries.py @@ -178,7 +178,7 @@ def handle(): q.params["as_of"] = as_of union_subquery = f''' ( - SELECT *, 'D' AS RECORD_TYPE FROM `covid_hosp_state_daily` c + SELECT *, 'D' AS record_type FROM `covid_hosp_state_daily` c INNER JOIN ( SELECT state, date, MAX(issue) AS max_issue FROM `covid_hosp_state_daily` c @@ -187,7 +187,7 @@ def handle(): ) x ON c.state = x.state AND c.date = x.date AND c.issue = x.max_issue UNION ALL - SELECT *, 'T' AS RECORD_TYPE FROM `covid_hosp_state_timeseries` c + SELECT *, 'T' AS record_type FROM `covid_hosp_state_timeseries` c INNER JOIN ( SELECT state, date, MAX(issue) AS max_issue FROM `covid_hosp_state_timeseries` c From cecf9570b7179ea46a28009743866e2ce1ac6455 Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Fri, 19 May 2023 20:32:49 +0300 Subject: [PATCH 39/42] aliases... --- .../endpoints/covid_hosp_state_timeseries.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/server/endpoints/covid_hosp_state_timeseries.py b/src/server/endpoints/covid_hosp_state_timeseries.py index 256785a88..f415f3be0 100644 --- a/src/server/endpoints/covid_hosp_state_timeseries.py +++ b/src/server/endpoints/covid_hosp_state_timeseries.py @@ -178,23 +178,23 @@ def handle(): q.params["as_of"] = as_of union_subquery = f''' ( - SELECT *, 'D' AS record_type FROM `covid_hosp_state_daily` c + SELECT *, 'D' AS record_type FROM `covid_hosp_state_daily` cc INNER JOIN ( - SELECT state, date, MAX(issue) AS max_issue + SELECT c.state, c.date, MAX(c.issue) AS max_issue FROM `covid_hosp_state_daily` c WHERE {cond_clause} - GROUP BY state, date + GROUP BY c.state, c.date ) x - ON c.state = x.state AND c.date = x.date AND c.issue = x.max_issue + ON cc.state = x.state AND cc.date = x.date AND cc.issue = x.max_issue UNION ALL - SELECT *, 'T' AS record_type FROM `covid_hosp_state_timeseries` c + SELECT *, 'T' AS record_type FROM `covid_hosp_state_timeseries` cc INNER JOIN ( - SELECT state, date, MAX(issue) AS max_issue + SELECT c.state, c.date, MAX(issue) AS max_issue FROM `covid_hosp_state_timeseries` c WHERE {cond_clause} - GROUP BY state, date + GROUP BY c.state, c.date ) x - ON c.state = x.state AND c.date = x.date AND c.issue = x.max_issue + ON cc.state = x.state AND cc.date = x.date AND cc.issue = x.max_issue ) {q.alias}''' query = f''' SELECT {q.fields_clause} FROM ( From 046f1234bc434fcd02a681589c47274b3dda2380 Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Fri, 19 May 2023 20:42:58 +0300 Subject: [PATCH 40/42] aliases --- src/server/endpoints/covid_hosp_state_timeseries.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/server/endpoints/covid_hosp_state_timeseries.py b/src/server/endpoints/covid_hosp_state_timeseries.py index f415f3be0..0e3c2da67 100644 --- a/src/server/endpoints/covid_hosp_state_timeseries.py +++ b/src/server/endpoints/covid_hosp_state_timeseries.py @@ -178,23 +178,23 @@ def handle(): q.params["as_of"] = as_of union_subquery = f''' ( - SELECT *, 'D' AS record_type FROM `covid_hosp_state_daily` cc + SELECT {q.fields_clause}, 'D' AS record_type FROM `covid_hosp_state_daily` c INNER JOIN ( SELECT c.state, c.date, MAX(c.issue) AS max_issue FROM `covid_hosp_state_daily` c WHERE {cond_clause} GROUP BY c.state, c.date ) x - ON cc.state = x.state AND cc.date = x.date AND cc.issue = x.max_issue + ON c.state = x.state AND c.date = x.date AND c.issue = x.max_issue UNION ALL - SELECT *, 'T' AS record_type FROM `covid_hosp_state_timeseries` cc + SELECT {q.fields_clause}, 'T' AS record_type FROM `covid_hosp_state_timeseries` c INNER JOIN ( SELECT c.state, c.date, MAX(issue) AS max_issue FROM `covid_hosp_state_timeseries` c WHERE {cond_clause} GROUP BY c.state, c.date ) x - ON cc.state = x.state AND cc.date = x.date AND cc.issue = x.max_issue + ON c.state = x.state AND c.date = x.date AND c.issue = x.max_issue ) {q.alias}''' query = f''' SELECT {q.fields_clause} FROM ( From 355abfd0e1930f50794ed025e60100ad102cbcc2 Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Sat, 20 May 2023 00:53:55 +0300 Subject: [PATCH 41/42] Comment & cleanup --- .../endpoints/covid_hosp_state_timeseries.py | 43 +++++++++++-------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/src/server/endpoints/covid_hosp_state_timeseries.py b/src/server/endpoints/covid_hosp_state_timeseries.py index 0e3c2da67..e76663790 100644 --- a/src/server/endpoints/covid_hosp_state_timeseries.py +++ b/src/server/endpoints/covid_hosp_state_timeseries.py @@ -157,12 +157,16 @@ def handle(): if issues is not None: # Filter for all matching issues q.where_integers("issue", issues) + + # Get all issues matching the conditions from daily & timeseries union_subquery = f''' ( - SELECT *, 'D' as record_type FROM `covid_hosp_state_daily` c WHERE {q.conditions_clause} + SELECT *, 'D' as record_type FROM `covid_hosp_state_daily` {q.alias} WHERE {q.conditions_clause} UNION ALL - SELECT *, 'T' as record_type FROM `covid_hosp_state_timeseries` c WHERE {q.conditions_clause} + SELECT *, 'T' as record_type FROM `covid_hosp_state_timeseries` {q.alias} WHERE {q.conditions_clause} ) c''' + + # Prioritize rows with record_type='D' for each issue/date/state group query = f''' SELECT {q.fields_clause} FROM ( SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY issue, date, state ORDER BY record_type) `row` @@ -176,30 +180,35 @@ def handle(): # ...Filter for most recent issues before a given as_of cond_clause += " AND (issue <= :as_of)" q.params["as_of"] = as_of - union_subquery = f''' - ( - SELECT {q.fields_clause}, 'D' AS record_type FROM `covid_hosp_state_daily` c + join_condition = f"{q.alias}.state = x.state AND {q.alias}.date = x.date AND {q.alias}.issue = x.max_issue" + + # Get the rows from the daily & timeseries tables with the highest issue value within each state/date group + join_daily = f''' + SELECT {q.fields_clause}, 'D' AS record_type FROM `covid_hosp_state_daily` {q.alias} INNER JOIN ( - SELECT c.state, c.date, MAX(c.issue) AS max_issue - FROM `covid_hosp_state_daily` c + SELECT {q.alias}.state, {q.alias}.date, MAX({q.alias}.issue) AS max_issue + FROM `covid_hosp_state_daily` {q.alias} WHERE {cond_clause} - GROUP BY c.state, c.date + GROUP BY {q.alias}.state, {q.alias}.date ) x - ON c.state = x.state AND c.date = x.date AND c.issue = x.max_issue - UNION ALL - SELECT {q.fields_clause}, 'T' AS record_type FROM `covid_hosp_state_timeseries` c + ON {join_condition} + ''' + join_timeseries = f''' + SELECT {q.fields_clause}, 'T' AS record_type FROM `covid_hosp_state_timeseries` {q.alias} INNER JOIN ( - SELECT c.state, c.date, MAX(issue) AS max_issue - FROM `covid_hosp_state_timeseries` c + SELECT {q.alias}.state, {q.alias}.date, MAX(issue) AS max_issue + FROM `covid_hosp_state_timeseries` {q.alias} WHERE {cond_clause} - GROUP BY c.state, c.date + GROUP BY {q.alias}.state, {q.alias}.date ) x - ON c.state = x.state AND c.date = x.date AND c.issue = x.max_issue - ) {q.alias}''' + ON {join_condition} + ''' + + # Combine daily & timeseries queries, getting the combined latest issues (and prioritizing rows with record_type='D' in a tie) query = f''' SELECT {q.fields_clause} FROM ( SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY state, date ORDER BY issue DESC, record_type) `row` - FROM {union_subquery} + FROM ({join_daily} UNION ALL {join_timeseries}) {q.alias} ) {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause} ''' From c6016d1e4ac55f3624e41744f3c08bf7a7cc96f6 Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Fri, 2 Jun 2023 14:29:16 +0300 Subject: [PATCH 42/42] Misc. fixes to SQL --- src/ddl/covid_hosp.sql | 2 +- .../endpoints/covid_hosp_state_timeseries.py | 33 ++++++++++--------- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/src/ddl/covid_hosp.sql b/src/ddl/covid_hosp.sql index ca1080a87..0d74a077c 100644 --- a/src/ddl/covid_hosp.sql +++ b/src/ddl/covid_hosp.sql @@ -513,7 +513,7 @@ CREATE TABLE `covid_hosp_state_daily` ( KEY `state_by_issue_and_date` (`issue`, `date`, `state`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8 SELECT * FROM covid_hosp_state_timeseries; --- Re-add autoincrement capability +-- AUTOINCREMENT is not preserved by `CREATE TABLE ... SELECT`; Re-add ALTER TABLE covid_hosp_state_daily MODIFY id INT NOT NULL AUTO_INCREMENT; /* diff --git a/src/server/endpoints/covid_hosp_state_timeseries.py b/src/server/endpoints/covid_hosp_state_timeseries.py index e76663790..e5bce751d 100644 --- a/src/server/endpoints/covid_hosp_state_timeseries.py +++ b/src/server/endpoints/covid_hosp_state_timeseries.py @@ -161,17 +161,17 @@ def handle(): # Get all issues matching the conditions from daily & timeseries union_subquery = f''' ( - SELECT *, 'D' as record_type FROM `covid_hosp_state_daily` {q.alias} WHERE {q.conditions_clause} + SELECT *, 'D' AS record_type FROM `covid_hosp_state_daily` AS {q.alias} WHERE {q.conditions_clause} UNION ALL - SELECT *, 'T' as record_type FROM `covid_hosp_state_timeseries` {q.alias} WHERE {q.conditions_clause} - ) c''' + SELECT *, 'T' AS record_type FROM `covid_hosp_state_timeseries` AS {q.alias} WHERE {q.conditions_clause} + ) AS {q.alias}''' # Prioritize rows with record_type='D' for each issue/date/state group query = f''' SELECT {q.fields_clause} FROM ( - SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY issue, date, state ORDER BY record_type) `row` + SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY issue, date, state ORDER BY record_type) AS `row` FROM {union_subquery} - ) {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause} + ) AS {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause} ''' else: # Filter for most recent issues @@ -180,36 +180,37 @@ def handle(): # ...Filter for most recent issues before a given as_of cond_clause += " AND (issue <= :as_of)" q.params["as_of"] = as_of + join_condition = f"{q.alias}.state = x.state AND {q.alias}.date = x.date AND {q.alias}.issue = x.max_issue" # Get the rows from the daily & timeseries tables with the highest issue value within each state/date group join_daily = f''' - SELECT {q.fields_clause}, 'D' AS record_type FROM `covid_hosp_state_daily` {q.alias} - INNER JOIN ( + SELECT {q.fields_clause}, 'D' AS record_type FROM `covid_hosp_state_daily` AS {q.alias} + JOIN ( SELECT {q.alias}.state, {q.alias}.date, MAX({q.alias}.issue) AS max_issue - FROM `covid_hosp_state_daily` {q.alias} + FROM `covid_hosp_state_daily` AS {q.alias} WHERE {cond_clause} GROUP BY {q.alias}.state, {q.alias}.date - ) x + ) AS x ON {join_condition} ''' join_timeseries = f''' - SELECT {q.fields_clause}, 'T' AS record_type FROM `covid_hosp_state_timeseries` {q.alias} - INNER JOIN ( + SELECT {q.fields_clause}, 'T' AS record_type FROM `covid_hosp_state_timeseries` AS {q.alias} + JOIN ( SELECT {q.alias}.state, {q.alias}.date, MAX(issue) AS max_issue - FROM `covid_hosp_state_timeseries` {q.alias} + FROM `covid_hosp_state_timeseries` AS {q.alias} WHERE {cond_clause} GROUP BY {q.alias}.state, {q.alias}.date - ) x + ) AS x ON {join_condition} ''' # Combine daily & timeseries queries, getting the combined latest issues (and prioritizing rows with record_type='D' in a tie) query = f''' SELECT {q.fields_clause} FROM ( - SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY state, date ORDER BY issue DESC, record_type) `row` - FROM ({join_daily} UNION ALL {join_timeseries}) {q.alias} - ) {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause} + SELECT {q.fields_clause}, ROW_NUMBER() OVER (PARTITION BY state, date ORDER BY issue DESC, record_type) AS `row` + FROM ({join_daily} UNION ALL {join_timeseries}) AS {q.alias} + ) AS {q.alias} WHERE `row` = 1 ORDER BY {q.order_clause} ''' # send query