Skip to content

Commit b41e492

Browse files
committed
fixing covid explore wip
1 parent 87f0394 commit b41e492

File tree

5 files changed

+96
-53
lines changed

5 files changed

+96
-53
lines changed

R/utils.R

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -431,9 +431,10 @@ sort_by_quantile <- function(forecasts) {
431431

432432

433433
#' Print recent targets errors.
434-
get_recent_targets_errors <- function(recent_minutes = 60) {
435-
forecast_errors <- targets::tar_meta() %>%
436-
filter(time > Sys.time() - minutes(recent_minutes), !is.na(parent), !is.na(error)) %>%
434+
get_recent_targets_errors <- function(time_since = minutes(60)) {
435+
meta_df <- targets::tar_meta()
436+
forecast_errors <- meta_df %>%
437+
filter(time > Sys.time() - time_since, !is.na(parent), !is.na(error)) %>%
437438
arrange(desc(time)) %>%
438439
distinct(parent, error, .keep_all = TRUE) %>%
439440
select(time, parent, error) %>%
@@ -451,8 +452,8 @@ get_recent_targets_errors <- function(recent_minutes = 60) {
451452
}
452453
}
453454

454-
other_errors <- targets::tar_meta() %>%
455-
filter(time > Sys.time() - minutes(recent_minutes), !is.na(error)) %>%
455+
other_errors <- meta_df %>%
456+
filter(time > Sys.time() - time_since, !is.na(error)) %>%
456457
arrange(desc(time)) %>%
457458
distinct(error, .keep_all = TRUE) %>%
458459
select(time, name, error)
@@ -468,4 +469,6 @@ get_recent_targets_errors <- function(recent_minutes = 60) {
468469
))
469470
}
470471
}
472+
473+
return(invisible(meta_df %>% filter(time > Sys.time() - time_since)))
471474
}

reports/template.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,14 @@ Simplistic/low data methods:
2424
- [Flu flatline](flu-notebook-flatline.html)
2525
- [Flu climate](flu-notebook-climate_linear.html)
2626

27-
### Covid
27+
### Covid (new)
28+
29+
- [Covid AR with population scaling](covid-notebook-scaled_pop_main.html)
30+
- [Covid AR with population scaling and seasonal features](covid-notebook-scaled_pop_season.html)
31+
- [Covid AR with population scaling, and exogenous features](covid-notebook-scaled_pop_exogenous.html)
32+
- [Covid Flatline](covid-notebook-flatline_forecaster.html)
33+
34+
### Covid (old)
2835

2936
- [Covid AR with population scaling](covid-notebook-1.html)
3037
- [Covid AR with population scaling and smoothed features](covid-notebook-2.html)

scripts/covid_hosp_explore.R

Lines changed: 60 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ fetch_args <- epidatr::fetch_args_list(return_empty = FALSE, timeout_seconds = 4
1919
# with prototyping the pipeline.
2020
dummy_mode <- as.logical(Sys.getenv("DUMMY_MODE", FALSE))
2121
# For testing, reduce dates
22-
forecast_generation_dates <- forecast_generation_dates[1:10]
23-
forecast_dates <- forecast_dates[1:10]
22+
# forecast_generation_dates <- forecast_generation_dates[1:10]
23+
# forecast_dates <- forecast_dates[1:10]
2424

2525

2626
# ================================ FORECASTER PARAMETERS ====================
@@ -199,6 +199,7 @@ get_partially_applied_forecaster <- function(id) {
199199
}
200200
}
201201

202+
202203
# ================================ TARGETS =================================
203204
# ================================ PARAMETERS TARGETS ======================
204205
parameter_targets <- list2(
@@ -232,7 +233,7 @@ data_targets <- list2(
232233
}
233234
),
234235
tar_target(
235-
name = hhs_latest_data,
236+
name = hhs_evaluation_data,
236237
command = {
237238
epidatr::pub_covidcast(
238239
source = "hhs",
@@ -242,20 +243,20 @@ data_targets <- list2(
242243
geo_values = "*",
243244
time_values = "*",
244245
fetch_args = fetch_args
245-
)
246+
) %>%
247+
select(signal, geo_value, time_value, value) %>%
248+
daily_to_weekly(keys = c("geo_value", "signal")) %>%
249+
select(signal, geo_value, target_end_date = time_value, true_value = value) %>%
250+
# Correction for timing offsets
251+
mutate(target_end_date = target_end_date + 3)
246252
}
247253
),
248254
tar_target(
249-
name = hhs_evaluation_data,
255+
name = state_geo_values,
250256
command = {
251-
hhs_latest_data %>%
252-
select(signal, geo_value, time_value, value) %>%
253-
daily_to_weekly(keys = c("geo_value", "signal")) %>%
254-
rename(
255-
true_value = value,
256-
target_end_date = time_value
257-
) %>%
258-
select(signal, geo_value, target_end_date, true_value)
257+
hhs_evaluation_data %>%
258+
pull(geo_value) %>%
259+
unique()
259260
}
260261
),
261262
tar_target(
@@ -490,13 +491,7 @@ forecasts_and_scores <- tar_map(
490491
forecast_scaled <- forecast
491492
actual_eval_data <- hhs_evaluation_data
492493
}
493-
# Fix for timing offsets
494-
actual_eval_data <- actual_eval_data %>% mutate(target_end_date = target_end_date + 3)
495-
state_geo_values <- actual_eval_data %>%
496-
pull(geo_value) %>%
497-
unique()
498494
forecast_scaled <- forecast_scaled %>%
499-
filter(geo_value %in% state_geo_values) %>%
500495
mutate(forecast_date = forecast_date + 3, target_end_date = target_end_date + 3) %>%
501496
rename("model" = "id")
502497

@@ -512,34 +507,52 @@ combined_forecasts_and_scores <- rlang::list2(
512507
tar_combine(
513508
delphi_forecasts,
514509
forecasts_and_scores[["forecast"]],
515-
command = dplyr::bind_rows(!!!.x) %>% rename(forecaster = id)
510+
command = {
511+
dplyr::bind_rows(!!!.x) %>%
512+
rename(forecaster = id) %>%
513+
filter(geo_value %in% state_geo_values) %>%
514+
mutate(forecast_date = forecast_date + 3, target_end_date = target_end_date + 3)
515+
}
516516
),
517517
tar_combine(
518518
delphi_scores,
519519
forecasts_and_scores[["score"]],
520-
command = dplyr::bind_rows(!!!.x) %>% rename(forecaster = id)
520+
command = {
521+
dplyr::bind_rows(!!!.x) %>%
522+
rename(forecaster = id) %>%
523+
filter(geo_value %in% state_geo_values)
524+
}
521525
),
522526
)
523527
external_forecasts_and_scores <- rlang::list2(
524528
tar_target(
525-
external_forecasts,
529+
outside_forecaster_subset,
530+
command = c("COVIDhub-baseline", "COVIDhub-trained_ensemble", "COVIDhub_CDC-ensemble")
531+
),
532+
tar_target(
533+
external_forecasts_file,
526534
command = {
527535
s3load("covid19_forecast_hub_2023.rds", bucket = "forecasting-team-data", verbose = FALSE)
528536
full_results
529537
}
530538
),
531539
tar_target(
532-
external_scores,
540+
external_forecasts,
533541
command = {
534-
actual_eval_data <- hhs_evaluation_data %>%
535-
mutate(target_end_date = target_end_date + 3)
536-
cmu_forecast_dates <- ref_time_values + 3
537-
filtered_forecasts <- external_forecasts %>%
542+
external_forecasts_file %>%
543+
filter(geo_value %in% state_geo_values, forecaster %in% outside_forecaster_subset) %>%
538544
mutate(forecast_date = forecast_date + 5, target_end_date = target_end_date + 5) %>%
539-
filter(forecast_date %in% cmu_forecast_dates) %>%
540-
rename(model = forecaster) %>%
545+
filter(forecast_date %in% (ref_time_values + 3)) %>%
541546
rename(prediction = value) %>%
542-
filter(!is.na(geo_value))
547+
mutate(prediction = prediction * 7)
548+
}
549+
),
550+
tar_target(
551+
external_scores,
552+
command = {
553+
actual_eval_data <- hhs_evaluation_data
554+
filtered_forecasts <- external_forecasts %>%
555+
rename(model = forecaster)
543556
evaluate_predictions(forecasts = filtered_forecasts, truth_data = actual_eval_data) %>%
544557
rename(forecaster = model)
545558
}
@@ -551,14 +564,28 @@ joined_forecasts_and_scores <- rlang::list2(
551564
tar_target(
552565
family_notebooks,
553566
command = {
554-
actual_eval_data <- hhs_evaluation_data %>%
555-
mutate(target_end_date = target_end_date + 3)
567+
actual_eval_data <- hhs_evaluation_data
556568
delphi_forecaster_subset <- forecaster_parameter_combinations[[forecaster_families]]$id
557-
outside_forecaster_subset <- c("COVIDhub-baseline", "COVIDhub-ensemble")
569+
558570
filtered_forecasts <- joined_forecasts %>%
559571
filter(forecaster %in% c(delphi_forecaster_subset, outside_forecaster_subset))
560572
filtered_scores <- joined_scores %>%
561573
filter(forecaster %in% c(delphi_forecaster_subset, outside_forecaster_subset))
574+
575+
# TODO: Write an assert to make sure that these dates are similar. It's a bit tricky.
576+
# actual_eval_data %>%
577+
# filter(target_end_date > "2023-09-01") %>%
578+
# distinct(target_end_date) %>%
579+
# pull(target_end_date) %>%
580+
# sort()
581+
# filtered_forecasts %>%
582+
# distinct(target_end_date) %>%
583+
# pull(target_end_date) %>%
584+
# sort()
585+
# filtered_scores %>%
586+
# distinct(target_end_date) %>%
587+
# pull(target_end_date) %>%
588+
# sort()
562589
forecaster_parameters <- forecaster_parameter_combinations[[forecaster_families]]
563590
rmarkdown::render(
564591
"scripts/reports/comparison-notebook.Rmd",

scripts/reports/comparison-notebook.Rmd

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -40,23 +40,23 @@ library(purrr)
4040
```
4141

4242
```{r}
43+
# outside_forecaster_subset <- c("COVIDhub-baseline", "COVIDhub-trained_ensemble", "COVIDhub_CDC-ensemble")
44+
# i <- 1
4345
# params <- list(
44-
# forecaster_parameter_combinations = tar_read(forecaster_parameter_combinations),
45-
# forecaster_family = "scaled_pop_season",
46-
# joined_forecasts = tar_read(joined_forecasts),
47-
# joined_scores = tar_read(joined_scores),
48-
# truth_data = tar_read(hhs_evaluation_data) %>%
49-
# select(-population) %>%
50-
# mutate(target_end_date = target_end_date + 3),
51-
# disease = "flu"
46+
# forecaster_family = forecaster_families[[i]],
47+
# forecaster_parameters = forecaster_parameter_combinations[[i]],
48+
# forecasts = tar_read(joined_forecasts) %>% filter(forecaster %in% c(forecaster_parameter_combinations[[i]]$id, outside_forecaster_subset)),
49+
# scores = tar_read(joined_scores) %>% filter(forecaster %in% c(forecaster_parameter_combinations[[i]]$id, outside_forecaster_subset)),
50+
# truth_data = tar_read(hhs_evaluation_data),
51+
# disease = "covid"
5252
# )
5353
5454
if (params$disease == "flu") {
5555
base_forecaster_name <- "FluSight-baseline"
5656
ensemble_forecaster_name <- "FluSight-ensemble"
5757
} else {
5858
base_forecaster_name <- "COVIDhub-baseline"
59-
ensemble_forecaster_name <- "COVIDhub-ensemble"
59+
ensemble_forecaster_name <- "COVIDhub_CDC-ensemble"
6060
}
6161
6262
# Load scores and filter them, get global variables
@@ -96,17 +96,23 @@ The table is sorted by ascending WIS and contains all the forecasters in this no
9696
if (params$disease == "flu") {
9797
ignore_keys <- c("forecaster", "keys_to_ignore", "pop_scaling")
9898
} else {
99-
ignore_keys <- c("forecaster", "keys_to_ignore")
99+
ignore_keys <- c("forecaster", "keys_to_ignore", "outcome")
100100
}
101101
102102
param_table <- params$forecaster_parameters %>%
103103
select(-any_of(ignore_keys)) %>%
104104
{
105105
if ("n_training" %in% colnames(.)) {
106-
(.) %>% mutate(n_training = as.character(n_training))
106+
. <- (.) %>% mutate(n_training = as.character(n_training))
107107
} else {
108108
.
109109
}
110+
if ("trainer" %in% colnames(.)) {
111+
. <- (.) %>% mutate(trainer = trainer[[1]])
112+
} else {
113+
.
114+
}
115+
.
110116
} %>%
111117
full_join(
112118
scores %>%

scripts/targets-common.R

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@ suppressPackageStartupMessages({
44

55
# On tanka, we have 64 cores, but we leave some free to try to reduce thrashing
66
# and to allow for other users.
7-
if (parallel::detectCores() < 30) {
8-
num_workers <- parallel::detectCores() - 1L
7+
if (parallel::detectCores() == 64) {
8+
num_workers <- 30L
99
} else {
10-
num_workers <- parallel::detectCores() - 20L
10+
num_workers <- parallel::detectCores() - 1L
1111
}
1212

1313
main_controller <- crew_controller_local(

0 commit comments

Comments
 (0)