forked from rafalab/vacunaspr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwrangle-paper.R
105 lines (78 loc) · 3.18 KB
/
wrangle-paper.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
library(data.table)
library(tidyverse)
library(lubridate)
library(scales)
library(tidycensus)
rda_path <- "rdas"
pop_year <- 2019
# Age groups --------------------------------------------------------------
collapse_age <- function(tab, age_starts){
breaks <- sort(age_starts)
labels <- c(paste(breaks[-length(breaks)], c(breaks[-1]-1), sep="-"),
paste0(breaks[length(breaks)], "+"))
ret <- copy(tab)
ret[, ageRange := cut(start, c(age_starts, Inf),
right = FALSE, include.lowest = TRUE,
labels = labels)]
ret[, c("start", "end") := NULL]
vars <- "poblacion"
if("se" %in% names(ret)){ ret[, se := se^2]; vars <- c(vars, "se")}
cols <- setdiff(names(ret), vars)
ret <- ret[, lapply(.SD, sum), keyby = cols, .SDcols = vars]
if("se" %in% names(ret)) ret[, se := sqrt(se)]
ret[, ageRange := factor(ageRange, levels = labels)]
return(ret[])
}
split_10_14 <- function(tab){
tmp1 <- tab[!start %in% c(10,12)]
tmp2 <- tab[start %in% c(10,12)]
if("se" %in% names(tmp1)){
f <- function(tab) data.table(start=c(10,12), end=c(11,14),
poblacion=tab$poblacion*c(2,3)/5,
se = tab$se*sqrt(c(2,3)/5))
} else{
f <- function(tab) data.table(start=c(10,12), end=c(11,14),
poblacion=tab$poblacion*c(2,3)/5)
}
keys <- setdiff(names(tmp2), c("start", "end", "poblacion", "se"))
tmp2 <- tmp2[, f(.SD), by = keys]
setcolorder(tmp2, names(tmp1))
ret <- rbindlist(list(tmp1, tmp2))[order(start, gender)]
return(ret[])
}
load(file.path(rda_path, "population-tabs-acs.rda"))
muni_levels <- c(levels(raw_pop_municipio$municipio), "No reportado")
manu_levels <- c("UNV", "MOD", "PFR", "JSN")
first_ped_day <- make_date(2021, 11, 04)
first_booster_day <- make_date(2021, 8, 13)
first_jnj_booster_day <- make_date(2021, 10, 22)
## pick a year to use as population estimates
setnames(raw_pop, paste0("poblacion_", pop_year), "poblacion")
out <- str_subset(names(raw_pop), "poblacion_")
raw_pop[, (out) := NULL]
setnames(raw_pop_municipio, paste0("poblacion_", pop_year), "poblacion")
out <- str_subset(names(raw_pop_municipio), "poblacion_")
raw_pop_municipio[, (out) := NULL]
## split 10-14
raw_pop <- split_10_14(raw_pop)
raw_pop_municipio <- split_10_14(raw_pop_municipio)
age_starts <- c(0, 5, 12, 18, 30, 40, 50, 60, 70, 80)
pop_by_age_gender <- collapse_age(raw_pop, age_starts)
pop_by_age_gender_municipio <- collapse_age(raw_pop_municipio, age_starts)
age_levels <- levels(pop_by_age_gender$ageRange)
pr_pop <- sum(raw_pop$poblacion)
pr_pop_se <- sqrt(sum(raw_pop$se^2))
pr_adult_pop <- sum(raw_pop[end<=17]$poblacion)
pr_adult_pop_se <- sqrt(sum(raw_pop[end<=17]$se^2))
message("Wrangling cases.")
counts_age_starts <- age_starts
source("compute-counts.R")
save(dat_cases_vax, file = file.path(rda_path, "dat_cases_vax.rda"))
rm(dat_cases_vax); gc();gc()
rm(pop_vax); gc(); gc()
rm(pop_par); gc(); gc()
rm(all_bst_combs); gc(); gc()
rm(pop_unvax); gc();gc()
# Prepare data for dashboard ----------------------------------------------
### collapse ageRanges
save(counts, file=file.path(rda_path ,"paper-counts.rda"))