Skip to content

Files

Latest commit

6cf6d9b · Aug 5, 2022

History

History

2019-12-26_sankey

Folders and files

NameName
Last commit message
Last commit date

parent directory

..
Aug 5, 2022
Dec 26, 2019
Dec 26, 2019
Dec 26, 2019
Dec 26, 2019
title author date output
Sankey diagram and Word-cloud for hashtags
Zhi Yang
12/24/2019
html_document
keep_md
true
library(rtweet)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
suppressMessages(library(dplyr))
library(networkD3)
library(wordcloud2)

token <- get_tokens()

Get my timeline

targeted_user <- "zhiiiyang"
tweet_list <- get_timeline(targeted_user,n = 3000)
fav_list <- get_favorites(targeted_user, n = 3000)
dim(tweet_list)
## [1] 1269   90
dim(fav_list)
## [1] 1578   91

clean hashtags by removing the NA

tweet_hash <- unlist(tweet_list$hashtags)
tweet_hash <- tweet_hash[!is.na(tweet_hash)]
fav_hash <- unlist(fav_list$hashtags)
fav_hash <- fav_hash[!is.na(fav_hash)]

length(tweet_hash)
## [1] 393
length(fav_hash)
## [1] 1226

exclude hashtags less than 10 counts

dat <- data.frame(source = c(rep("Tweets", length(tweet_hash)),
                             rep("Likes", length(fav_hash))),
                  target = paste0("#",
                                  c(tolower(tweet_hash), tolower(fav_hash))))

dat_sum <- dat %>% group_by(source, target) %>% 
           summarise(value = n()) %>%
           filter(value > 10) %>%
           arrange(desc(value))


nodes <- data.frame(name=c(as.character(dat_sum$source), 
                           as.character(dat_sum$target)) %>% unique())

dat_sum$IDsource=match(dat_sum$source, nodes$name)-1 
dat_sum$IDtarget=match(dat_sum$target, nodes$name)-1

create the network

sankeyNetwork(Links = dat_sum, Nodes = nodes,
                      Source = "IDsource", Target = "IDtarget",
                      Value = "value", NodeID = "name", 
                      sinksRight=FALSE, fontFamily = "sans-serif",
              nodeWidth=10, fontSize=18, nodePadding=15)

create the word cloud

figPath = system.file("examples/t.png",package = "wordcloud2")
Freq <- dat %>% group_by(target) %>% 
        summarise(value = as.numeric(n())) %>% 
        mutate(value = if_else(value>100, round(value/8), value)) %>%
        arrange(desc(value))
colnames(Freq) <- c("word", "freq")
Freq$word <- stringr::str_remove(Freq$word, "#")
wordcloud2(Freq, figPath = figPath, size = 1.5, color = "skyblue")