forked from jennybc/gapminder
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path07_fill-and-fix-continent.R
82 lines (74 loc) · 2.18 KB
/
07_fill-and-fix-continent.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#' ---
#' date: "`r format(Sys.Date())`"
#' output:
#' html_document:
#' keep_md: TRUE
#' ---
library(plyr)
suppressPackageStartupMessages(library(dplyr))
library(ggplot2)
library(readr)
gap_dat <- read_tsv("05_gap-merged-with-china-1952.tsv") %>%
mutate(country = factor(country),
continent = factor(continent))
gap_dat %>% str
gap_dat$continent %>% summary()
#' Hmmmmm .... I've never heard of the continent of FSU.
tmp <- gap_dat %>%
filter(continent == "FSU") %>%
droplevels()
tmp$country %>% levels()
#' Aha. FSU = Former Soviet Union.
#' Which countries do not have continent data?
tmp <- gap_dat %>%
filter(is.na(continent)) %>%
droplevels()
tmp$country %>% levels()
#' Populate missing values of continent.
cont_dat <- frame_data(
~country, ~continent,
'Armenia', 'FSU',
'Aruba', 'Americas',
'Australia', 'Oceania',
'Bahamas', 'Americas',
'Barbados', 'Americas',
'Belize', 'Americas',
'Canada', 'Americas',
'French Guiana', 'Americas',
'French Polynesia', 'Oceania',
'Georgia', 'FSU',
'Grenada', 'Americas',
'Guadeloupe', 'Americas',
'Haiti', 'Americas',
'Hong Kong, China', 'Asia',
'Maldives', 'Asia',
'Martinique', 'Americas',
'Micronesia, Fed. Sts.', 'Oceania',
'Netherlands Antilles', 'Americas',
'New Caledonia', 'Oceania',
'Papua New Guinea', 'Oceania',
'Reunion', 'Africa',
'Samoa', 'Oceania',
'Sao Tome and Principe', 'Africa',
'Tonga', 'Oceania',
'Uzbekistan', 'FSU',
'Vanuatu', 'Oceania')
gap_dat <- gap_dat %>%
## 2015-12-29
## dplyr bug means we can't use inner_join right now
## https://github.com/hadley/dplyr/issues/1559
#left_join(cont_dat, by = "country") %>%
merge(cont_dat, by = "country", all = TRUE) %>%
tbl_df() %>%
mutate(continent = factor(ifelse(is.na(continent.y),
as.character(continent.x),
as.character(continent.y))),
continent.x = NULL,
continent.y = NULL) %>%
arrange(country, year)
gap_dat %>% str()
gap_dat$continent %>% summary()
my_vars <- c('country', 'continent', 'year',
'lifeExp', 'pop', 'gdpPercap')
gap_dat <- gap_dat[my_vars]
write_tsv(gap_dat, "07_gap-merged-with-continent.tsv")