tidytuesday/2019-12-26_sankey at master · zhiiiyang/tidytuesday

Name	Name	Last commit message	Last commit date
parent directory ..
README.Rmd	README.Rmd	update sankey	Aug 5, 2022
README.html	README.html	update the img of sankey repo	Dec 26, 2019
README.md	README.md	update the img of sankey repo	Dec 26, 2019
sankey.png	sankey.png	update the img of sankey repo	Dec 26, 2019
twitter.png	twitter.png	sankey diagram	Dec 26, 2019

Name

Last commit message

Last commit date

README.Rmd

update sankey

Aug 5, 2022

README.html

update the img of sankey repo

Dec 26, 2019

README.md

update the img of sankey repo

Dec 26, 2019

sankey.png

update the img of sankey repo

Dec 26, 2019

twitter.png

sankey diagram

Dec 26, 2019

title

author

date

output

Sankey diagram and Word-cloud for hashtags

Zhi Yang

12/24/2019

html_document

keep_md
true

library(rtweet)
library(lubridate)

## 
## Attaching package: 'lubridate'

## The following object is masked from 'package:base':
## 
##     date

suppressMessages(library(dplyr))
library(networkD3)
library(wordcloud2)

token <- get_tokens()

Get my timeline

targeted_user <- "zhiiiyang"
tweet_list <- get_timeline(targeted_user,n = 3000)
fav_list <- get_favorites(targeted_user, n = 3000)
dim(tweet_list)

## [1] 1269   90

dim(fav_list)

## [1] 1578   91

clean hashtags by removing the `NA`

tweet_hash <- unlist(tweet_list$hashtags)
tweet_hash <- tweet_hash[!is.na(tweet_hash)]
fav_hash <- unlist(fav_list$hashtags)
fav_hash <- fav_hash[!is.na(fav_hash)]

length(tweet_hash)

## [1] 393

length(fav_hash)

## [1] 1226

exclude hashtags less than 10 counts

dat <- data.frame(source = c(rep("Tweets", length(tweet_hash)),
                             rep("Likes", length(fav_hash))),
                  target = paste0("#",
                                  c(tolower(tweet_hash), tolower(fav_hash))))

dat_sum <- dat %>% group_by(source, target) %>% 
           summarise(value = n()) %>%
           filter(value > 10) %>%
           arrange(desc(value))


nodes <- data.frame(name=c(as.character(dat_sum$source), 
                           as.character(dat_sum$target)) %>% unique())

dat_sum$IDsource=match(dat_sum$source, nodes$name)-1 
dat_sum$IDtarget=match(dat_sum$target, nodes$name)-1

create the network

sankeyNetwork(Links = dat_sum, Nodes = nodes,
                      Source = "IDsource", Target = "IDtarget",
                      Value = "value", NodeID = "name", 
                      sinksRight=FALSE, fontFamily = "sans-serif",
              nodeWidth=10, fontSize=18, nodePadding=15)

create the word cloud

figPath = system.file("examples/t.png",package = "wordcloud2")
Freq <- dat %>% group_by(target) %>% 
        summarise(value = as.numeric(n())) %>% 
        mutate(value = if_else(value>100, round(value/8), value)) %>%
        arrange(desc(value))
colnames(Freq) <- c("word", "freq")
Freq$word <- stringr::str_remove(Freq$word, "#")
wordcloud2(Freq, figPath = figPath, size = 1.5, color = "skyblue")

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Files

2019-12-26_sankey

2019-12-26_sankey

README.md

Get my timeline

clean hashtags by removing the `NA`

exclude hashtags less than 10 counts

create the network

create the word cloud

Files

2019-12-26_sankey

Directory actions

More options

Directory actions

More options

Latest commit

History

2019-12-26_sankey

Folders and files

parent directory

README.md

Get my timeline

clean hashtags by removing the NA

exclude hashtags less than 10 counts

create the network

create the word cloud

clean hashtags by removing the `NA`