Skip to content

Commit f2f56c2

Browse files
committed
fix: hub data Julia script, notebook
1 parent b41e492 commit f2f56c2

File tree

3 files changed

+22
-33
lines changed

3 files changed

+22
-33
lines changed

scripts/covid_hosp_explore.R

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ get_partially_applied_forecaster <- function(id) {
203203
# ================================ TARGETS =================================
204204
# ================================ PARAMETERS TARGETS ======================
205205
parameter_targets <- list2(
206-
tar_target(name = aheads, command = c(0, 7, 14, 21)),
206+
tar_target(name = aheads, command = c(0, 7, 14, 21, 28)),
207207
tar_target(name = ref_time_values, command = forecast_dates),
208208
# This is used for parameter lookup.
209209
tar_target(name = forecaster_parameter_grid, command = forecaster_parameter_combinations),
@@ -532,8 +532,10 @@ external_forecasts_and_scores <- rlang::list2(
532532
tar_target(
533533
external_forecasts_file,
534534
command = {
535-
s3load("covid19_forecast_hub_2023.rds", bucket = "forecasting-team-data", verbose = FALSE)
536-
full_results
535+
s3load("covid19_forecast_hub_2023_full_summed.rds", bucket = "forecasting-team-data", verbose = FALSE)
536+
full_results %>%
537+
mutate(target_end_date = as.Date(forecast_date) + 7 * as.numeric(week_ahead)) %>%
538+
rename(ahead = week_ahead)
537539
}
538540
),
539541
tar_target(

scripts/one_offs/read_covid_forecast_hub_data.jl

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,33 +2,32 @@
22
# specifically in the data-processed folder
33
# to get the rds, run
44
#
5-
# full_results <- readr::read_csv("../covid19-forecast-hub/data-processed/covid19-2023season-results.csv")
6-
# aws.s3::s3save(full_results, object = "covid19_forecast_hub_2023.rds", bucket = "forecasting-team-data")
5+
# full_results <- readr::read_csv("../OLDcovid19-forecast-hub/data-processed/covid19-2023season-results.csv")
6+
# aws.s3::s3save(full_results, object = "covid19_forecast_hub_2023_full_summed.rds", bucket = "forecasting-team-data")
77
#
8+
using Base: floatrange
89
using CSV
910
using DataFrames
1011
using DataFramesMeta
1112
using Dates
1213
using RData
14+
import Base.lowercase
1315
pwd()
14-
res = CSV.read("COVIDhub-ensemble/2023-10-02-COVIDhub-ensemble.csv", DataFrame)
15-
pathname = "COVIDhub-ensemble/"
16-
filename = "2023-10-02-COVIDhub-ensemble.csv"
16+
res = CSV.read("COVIDhub_CDC-ensemble/2023-10-02-COVIDhub_CDC-ensemble.csv", DataFrame)
17+
pathname = "COVIDhub_CDC-ensemble/"
18+
filename = "2023-10-02-COVIDhub_CDC-ensemble.csv"
1719
state_names = CSV.read("../data-locations/locations.csv", DataFrame)
1820
lowercase(m::Missing) = m
1921
@rtransform! state_names @passmissing :abbreviation = lowercase(:abbreviation)
2022
@select! state_names :abbreviation :location
21-
2223
function format_file(pathname, filename, state_names)
2324
if length(filename) < 10 ||
2425
match(r"[0-9]{4}-[0-9]{2}-[0-9]{2}", filename[1:10]) == nothing ||
2526
Date(filename[1:10]) < Date(2023, 1, 1)
2627
return DataFrame()
2728
end
2829
println(joinpath(pathname, filename))
29-
30-
res = CSV.read(joinpath(pathname, filename), DataFrame, missingstring="NA")
31-
30+
res = CSV.read(joinpath(pathname, filename), DataFrame, missingstring="NA", types=Dict("value" => Float64))
3231
if !("forecast_date" in names(res)) ||
3332
res[!, :forecast_date] |> minimum < Date(2023, 1, 1)
3433
return DataFrame()
@@ -41,7 +40,11 @@ function format_file(pathname, filename, state_names)
4140
end
4241
res = leftjoin(res, state_names, on=:location)
4342
@select! res :forecaster :geo_value = :abbreviation :forecast_date :target_end_date :ahead = :target :quantile :value
44-
res
43+
@chain res begin
44+
@rtransform :week_ahead = div(:ahead, 7)
45+
@groupby :forecaster :geo_value :forecast_date :week_ahead :quantile
46+
@combine :value = sum(:value)
47+
end
4548
end
4649
results = DataFrame[]
4750
for (root, dirs, files) in walkdir(".")
@@ -50,11 +53,4 @@ for (root, dirs, files) in walkdir(".")
5053
end
5154
end
5255
full_results = vcat(results...)
53-
CSV.write("covid19-2023season-results.csv", full_results)
54-
full_results[!, :forecaster] |> unique
55-
@rsubset! full_results :ahead % 7 == 0
56-
@rtransform! full_results :forecaster = :forecaster[3:end]
57-
"./fqfae"[3:end]
58-
3 % 7
59-
@rsubset full_results !ismissing(:geo_value) :forecast_date == Date(2023,11,13)
60-
@rsubset res :forecast_date == Date(2023,11,0)
56+
CSV.write("covid19-2023season-results.csv", full_results)

scripts/reports/comparison-notebook.Rmd

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -177,21 +177,11 @@ subtitle <- sprintf(
177177
format(max(forecast_dates), "%B %d, %Y")
178178
)
179179
p <- ggplot(
180-
normalized_df %>% filter(forecaster != ensemble_forecaster_name),
180+
normalized_df,
181181
aes(x = forecast_date, y = !!sym(var))
182182
) +
183183
geom_line(aes(color = forecaster, group = forecaster)) +
184184
geom_point(aes(color = forecaster, group = forecaster)) +
185-
geom_line(
186-
data = normalized_df %>% filter(forecaster == ensemble_forecaster_name),
187-
aes(x = forecast_date, y = !!sym(var)),
188-
color = "black", linetype = 2
189-
) +
190-
geom_point(
191-
data = normalized_df %>% filter(forecaster == ensemble_forecaster_name),
192-
aes(x = forecast_date, y = !!sym(var)),
193-
color = "black", shape = 21, fill = "white"
194-
) +
195185
geom_hline(yintercept = 1, linetype = 1, color = "black") +
196186
facet_grid(rows = vars(ahead)) +
197187
facet_wrap(~ahead, nrow = 4, labeller = labeller(ahead = facets.label)) +
@@ -602,7 +592,7 @@ Fan plots showing the 90% prediction intervals for the forecasts made by the CMU
602592
if (params$disease == "flu") {
603593
plot_dates <- seq.Date(as.Date("2023-10-07"), by = "4 weeks", length.out = 8)
604594
} else {
605-
plot_dates <- seq.Date(as.Date("2023-07-03"), by = "4 weeks", length.out = 7)
595+
plot_dates <- seq.Date(as.Date("2023-10-07"), by = "4 weeks", length.out = 7)
606596
}
607597
```
608598

@@ -611,6 +601,7 @@ if (params$disease == "flu") {
611601
```{r}
612602
# We plot a subset of the dates and geos for the fan plot
613603
geo_vals <- c("ca", "fl", "pa", "tx", "ny")
604+
# geo_vals <- c("ca")
614605
forecast_subset <- params$forecasts %>%
615606
filter(
616607
geo_value %in% geo_vals,

0 commit comments

Comments
 (0)