Skip to content

Commit a96824c

Browse files
author
fullname
committed
WIP Bexar outlier detection
1 parent 93fee67 commit a96824c

File tree

1 file changed

+118
-0
lines changed

1 file changed

+118
-0
lines changed

slides/day1-afternoon.qmd

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -872,6 +872,124 @@ edfo |>
872872
theme(legend.position = c(.075, .8), legend.background = element_rect(fill = NA), legend.key = element_rect(fill = NA))
873873
```
874874

875+
876+
```{r outlier-bexar}
877+
edfo_bexar <-
878+
county_smoothed_cli_comparison |>
879+
filter(geo_value == "48029") |>
880+
select(geo_value, time_value, case) |>
881+
as_epi_df() |>
882+
group_by(geo_value) |>
883+
mutate(outlier_info = detect_outlr_rm(
884+
x = time_value, y = case
885+
)) |>
886+
ungroup()
887+
888+
edfo_bexar |>
889+
unnest() |>
890+
mutate(case_corrected = replacement) |>
891+
select(geo_value, time_value, case, case_corrected) |>
892+
pivot_longer(starts_with("case")) |>
893+
mutate(
894+
name = case_when(
895+
name == "case_corrected" ~ "corrected",
896+
TRUE ~ "original"
897+
),
898+
name = as.factor(name),
899+
name = fct_relevel(name, "original")
900+
) |>
901+
ggplot(aes(x = time_value)) +
902+
geom_line(aes(y = value, color = name)) +
903+
scale_color_brewer(palette = "Set1") +
904+
geom_hline(yintercept = 0) +
905+
facet_wrap(vars(geo_value), scales = "free_y", nrow = 1) +
906+
scale_x_date(minor_breaks = "month", date_labels = "%b %Y") +
907+
labs(x = "Date", y = "COVID-19 cases") +
908+
theme(legend.position = c(.075, .8), legend.background = element_rect(fill = NA), legend.key = element_rect(fill = NA))
909+
910+
edfo_bexar2 <-
911+
county_smoothed_cli_comparison |>
912+
filter(geo_value == "48029") |>
913+
select(geo_value, time_value, case) |>
914+
as_epi_df() |>
915+
group_by(geo_value) |>
916+
mutate(outlier_info = detect_outlr_rm(
917+
x = time_value, y = case, n = 60
918+
)) |>
919+
ungroup()
920+
921+
edfo_bexar2 |>
922+
unnest() |>
923+
mutate(case_corrected = replacement) |>
924+
select(geo_value, time_value, case, case_corrected) |>
925+
pivot_longer(starts_with("case")) |>
926+
mutate(
927+
name = case_when(
928+
name == "case_corrected" ~ "corrected",
929+
TRUE ~ "original"
930+
),
931+
name = as.factor(name),
932+
name = fct_relevel(name, "original")
933+
) |>
934+
ggplot(aes(x = time_value)) +
935+
geom_line(aes(y = value, color = name)) +
936+
scale_color_brewer(palette = "Set1") +
937+
geom_hline(yintercept = 0) +
938+
facet_wrap(vars(geo_value), scales = "free_y", nrow = 1) +
939+
scale_x_date(minor_breaks = "month", date_labels = "%b %Y") +
940+
labs(x = "Date", y = "COVID-19 cases") +
941+
theme(legend.position = c(.075, .8), legend.background = element_rect(fill = NA), legend.key = element_rect(fill = NA))
942+
943+
944+
sa_fips <- "48029"
945+
sa_start <- "2020-06-15"
946+
sa_end <- "2020-08-15"
947+
sa_anomaly_date <- as.Date("2020-07-16")
948+
sources <- c("jhu-csse", "fb-survey", "doctor-visits", "google-symptoms", "chng")
949+
signals <- c("confirmed_7dav_incidence_num", "smoothed_whh_cmnty_cli",
950+
"smoothed_adj_cli", "sum_anosmia_ageusia_smoothed_search",
951+
"smoothed_adj_outpatient_cli")
952+
reinhart <- map2(sources, signals,
953+
~ pub_covidcast(.x, .y, time_type = "day", geo_type = "county",
954+
time_values = epirange(sa_start, sa_end),
955+
geo_values = sa_fips) |>
956+
select(source, time_value, value))
957+
sa_cross <- reinhart[[1]]$value[33]
958+
sa_scale <- sd(reinhart[[1]]$value, na.rm = TRUE)
959+
reinhart[-1] <- map(
960+
reinhart[-1],
961+
~ mutate(.x, value = (value - value[33]) / sd(value, na.rm = TRUE) *
962+
sa_scale + sa_cross))
963+
reinhart <- list_rbind(reinhart)
964+
965+
g1 <- ggplot(reinhart |> filter(source == "jhu-csse"), aes(x = time_value, y = value)) +
966+
geom_vline(xintercept = sa_anomaly_date, color = "gray", linetype = "dashed",
967+
alpha = 0.75) +
968+
geom_line() +
969+
labs(x = "", y = "Cases", title = "Cases") +
970+
theme_bw()
971+
972+
labels <- c(
973+
`fb-survey` = "Survey-based CLI",
974+
chng = "Outpatient CLI",
975+
`google-symptoms` = "Google searches",
976+
`doctor-visits` = "Insurance claims"
977+
)
978+
979+
980+
g2 <- reinhart |>
981+
filter(source != "jhu-csse") |>
982+
ggplot(aes(x = time_value, y = value, color = source)) +
983+
geom_vline(xintercept = sa_anomaly_date, color = "gray", linetype = "dashed",
984+
size = 0.75, alpha = 0.75) +
985+
geom_line() +
986+
scale_color_delphi(labels = labels, name = "") +
987+
labs(x = "", y = "Signal value (rescaled)", title = "Auxiliary signals") +
988+
theme_bw()
989+
990+
cowplot::plot_grid(g1, g2, ncol = 2, rel_widths = c(.35, .65))
991+
```
992+
875993
## `epi_archive`: Collection of `epi_df`s
876994

877995
* full version history of a data set

0 commit comments

Comments
 (0)