-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path1_censobrDataExtraction.Rmd
80 lines (68 loc) · 1.85 KB
/
1_censobrDataExtraction.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
---
title: "Para Data Extraction Notebook"
output: html_notebook
---
```{r}
# Load necessary libraries
library(censobr)
library(arrow)
library(dplyr)
library(ggplot2)
```
## Download Census Tract data
```{r}
tract_basico <- read_tracts(
year = 2010,
dataset = "Basico",
showProgress = TRUE,
cache = FALSE
)
```
```{r}
tract_income <- read_tracts(
year = 2010,
dataset = "DomicilioRenda",
showProgress = TRUE
)
```
```{r}
tract_pessoa <- read_tracts(
year = 2010,
dataset = "Pessoa",
showProgress = TRUE
)
```
```{r}
# See data dictionary for population dataset
# data_dictionary(year = 2010, dataset = "tracts", showProgress = FALSE)
```
```{r}
# Select columns
tract_basico <- tract_basico |> select("code_tract", "V002") # Population
tract_income <- tract_income |> select("code_tract", "V003") # Income (Reais)
tract_idade <- tract_pessoa |>
select(
"code_tract", "pessoa01_V021", "pessoa01_V022", "pessoa01_V023",
"pessoa01_V024", "pessoa01_V025", "pessoa01_V026", "pessoa01_V027",
"pessoa01_V028", "pessoa01_V029", "pessoa01_V030", "pessoa01_V031",
"pessoa01_V032", "pessoa01_V033", "pessoa01_V034", "pessoa01_V035",
"pessoa01_V036", "pessoa01_V037", "pessoa01_V038", "pessoa01_V039",
"pessoa01_V040", "pessoa01_V041", "pessoa01_V042", "pessoa01_V043",
"pessoa01_V044", "pessoa01_V045", "pessoa01_V046", "pessoa01_V047",
"pessoa01_V048", "pessoa01_V049", "pessoa01_V050", "pessoa01_V051",
"pessoa01_V052"
) # Population by age
```
```{r}
# Merge all variables in one single data frame
tracts_df_temp <- left_join(tract_basico, tract_income)
tracts_df <- left_join(tracts_df_temp, tract_idade) |> collect()
```
```{r}
# Calculate income per capita (Reais)
tracts_df <- tracts_df |> mutate(income_pc = V003 / V002)
head(tracts_df)
```
```{r}
write.csv(tracts_df, "censobr_tract_income_para.csv", row.names = TRUE)
```