@@ -19,8 +19,8 @@ fetch_args <- epidatr::fetch_args_list(return_empty = FALSE, timeout_seconds = 4
19
19
# with prototyping the pipeline.
20
20
dummy_mode <- as.logical(Sys.getenv(" DUMMY_MODE" , FALSE ))
21
21
# For testing, reduce dates
22
- forecast_generation_dates <- forecast_generation_dates [1 : 10 ]
23
- forecast_dates <- forecast_dates [1 : 10 ]
22
+ # forecast_generation_dates <- forecast_generation_dates[1:10]
23
+ # forecast_dates <- forecast_dates[1:10]
24
24
25
25
26
26
# ================================ FORECASTER PARAMETERS ====================
@@ -199,6 +199,7 @@ get_partially_applied_forecaster <- function(id) {
199
199
}
200
200
}
201
201
202
+
202
203
# ================================ TARGETS =================================
203
204
# ================================ PARAMETERS TARGETS ======================
204
205
parameter_targets <- list2(
@@ -232,7 +233,7 @@ data_targets <- list2(
232
233
}
233
234
),
234
235
tar_target(
235
- name = hhs_latest_data ,
236
+ name = hhs_evaluation_data ,
236
237
command = {
237
238
epidatr :: pub_covidcast(
238
239
source = " hhs" ,
@@ -242,20 +243,20 @@ data_targets <- list2(
242
243
geo_values = " *" ,
243
244
time_values = " *" ,
244
245
fetch_args = fetch_args
245
- )
246
+ ) %> %
247
+ select(signal , geo_value , time_value , value ) %> %
248
+ daily_to_weekly(keys = c(" geo_value" , " signal" )) %> %
249
+ select(signal , geo_value , target_end_date = time_value , true_value = value ) %> %
250
+ # Correction for timing offsets
251
+ mutate(target_end_date = target_end_date + 3 )
246
252
}
247
253
),
248
254
tar_target(
249
- name = hhs_evaluation_data ,
255
+ name = state_geo_values ,
250
256
command = {
251
- hhs_latest_data %> %
252
- select(signal , geo_value , time_value , value ) %> %
253
- daily_to_weekly(keys = c(" geo_value" , " signal" )) %> %
254
- rename(
255
- true_value = value ,
256
- target_end_date = time_value
257
- ) %> %
258
- select(signal , geo_value , target_end_date , true_value )
257
+ hhs_evaluation_data %> %
258
+ pull(geo_value ) %> %
259
+ unique()
259
260
}
260
261
),
261
262
tar_target(
@@ -490,13 +491,7 @@ forecasts_and_scores <- tar_map(
490
491
forecast_scaled <- forecast
491
492
actual_eval_data <- hhs_evaluation_data
492
493
}
493
- # Fix for timing offsets
494
- actual_eval_data <- actual_eval_data %> % mutate(target_end_date = target_end_date + 3 )
495
- state_geo_values <- actual_eval_data %> %
496
- pull(geo_value ) %> %
497
- unique()
498
494
forecast_scaled <- forecast_scaled %> %
499
- filter(geo_value %in% state_geo_values ) %> %
500
495
mutate(forecast_date = forecast_date + 3 , target_end_date = target_end_date + 3 ) %> %
501
496
rename(" model" = " id" )
502
497
@@ -512,34 +507,52 @@ combined_forecasts_and_scores <- rlang::list2(
512
507
tar_combine(
513
508
delphi_forecasts ,
514
509
forecasts_and_scores [[" forecast" ]],
515
- command = dplyr :: bind_rows(!!! .x ) %> % rename(forecaster = id )
510
+ command = {
511
+ dplyr :: bind_rows(!!! .x ) %> %
512
+ rename(forecaster = id ) %> %
513
+ filter(geo_value %in% state_geo_values ) %> %
514
+ mutate(forecast_date = forecast_date + 3 , target_end_date = target_end_date + 3 )
515
+ }
516
516
),
517
517
tar_combine(
518
518
delphi_scores ,
519
519
forecasts_and_scores [[" score" ]],
520
- command = dplyr :: bind_rows(!!! .x ) %> % rename(forecaster = id )
520
+ command = {
521
+ dplyr :: bind_rows(!!! .x ) %> %
522
+ rename(forecaster = id ) %> %
523
+ filter(geo_value %in% state_geo_values )
524
+ }
521
525
),
522
526
)
523
527
external_forecasts_and_scores <- rlang :: list2(
524
528
tar_target(
525
- external_forecasts ,
529
+ outside_forecaster_subset ,
530
+ command = c(" COVIDhub-baseline" , " COVIDhub-trained_ensemble" , " COVIDhub_CDC-ensemble" )
531
+ ),
532
+ tar_target(
533
+ external_forecasts_file ,
526
534
command = {
527
535
s3load(" covid19_forecast_hub_2023.rds" , bucket = " forecasting-team-data" , verbose = FALSE )
528
536
full_results
529
537
}
530
538
),
531
539
tar_target(
532
- external_scores ,
540
+ external_forecasts ,
533
541
command = {
534
- actual_eval_data <- hhs_evaluation_data %> %
535
- mutate(target_end_date = target_end_date + 3 )
536
- cmu_forecast_dates <- ref_time_values + 3
537
- filtered_forecasts <- external_forecasts %> %
542
+ external_forecasts_file %> %
543
+ filter(geo_value %in% state_geo_values , forecaster %in% outside_forecaster_subset ) %> %
538
544
mutate(forecast_date = forecast_date + 5 , target_end_date = target_end_date + 5 ) %> %
539
- filter(forecast_date %in% cmu_forecast_dates ) %> %
540
- rename(model = forecaster ) %> %
545
+ filter(forecast_date %in% (ref_time_values + 3 )) %> %
541
546
rename(prediction = value ) %> %
542
- filter(! is.na(geo_value ))
547
+ mutate(prediction = prediction * 7 )
548
+ }
549
+ ),
550
+ tar_target(
551
+ external_scores ,
552
+ command = {
553
+ actual_eval_data <- hhs_evaluation_data
554
+ filtered_forecasts <- external_forecasts %> %
555
+ rename(model = forecaster )
543
556
evaluate_predictions(forecasts = filtered_forecasts , truth_data = actual_eval_data ) %> %
544
557
rename(forecaster = model )
545
558
}
@@ -551,14 +564,28 @@ joined_forecasts_and_scores <- rlang::list2(
551
564
tar_target(
552
565
family_notebooks ,
553
566
command = {
554
- actual_eval_data <- hhs_evaluation_data %> %
555
- mutate(target_end_date = target_end_date + 3 )
567
+ actual_eval_data <- hhs_evaluation_data
556
568
delphi_forecaster_subset <- forecaster_parameter_combinations [[forecaster_families ]]$ id
557
- outside_forecaster_subset <- c( " COVIDhub-baseline " , " COVIDhub-ensemble " )
569
+
558
570
filtered_forecasts <- joined_forecasts %> %
559
571
filter(forecaster %in% c(delphi_forecaster_subset , outside_forecaster_subset ))
560
572
filtered_scores <- joined_scores %> %
561
573
filter(forecaster %in% c(delphi_forecaster_subset , outside_forecaster_subset ))
574
+
575
+ # TODO: Write an assert to make sure that these dates are similar. It's a bit tricky.
576
+ # actual_eval_data %>%
577
+ # filter(target_end_date > "2023-09-01") %>%
578
+ # distinct(target_end_date) %>%
579
+ # pull(target_end_date) %>%
580
+ # sort()
581
+ # filtered_forecasts %>%
582
+ # distinct(target_end_date) %>%
583
+ # pull(target_end_date) %>%
584
+ # sort()
585
+ # filtered_scores %>%
586
+ # distinct(target_end_date) %>%
587
+ # pull(target_end_date) %>%
588
+ # sort()
562
589
forecaster_parameters <- forecaster_parameter_combinations [[forecaster_families ]]
563
590
rmarkdown :: render(
564
591
" scripts/reports/comparison-notebook.Rmd" ,
0 commit comments