-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmy-t-sne.R
105 lines (82 loc) · 3.52 KB
/
my-t-sne.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# Michelle Li
# t-SNE code from van der Maaten/jdonaldson
# last updated: July 20, 2018
# successfully ran Nestorowa and Paul data, but not Grover data
# added option to select cluster(s) to highlight in tsne plots
# load required packages: tsne (can download from CRAN) and rgl for 3D plot
library(tsne)
library(rgl)
#### PREPARING DATA ----------------------------------------------
## NOTES ON OUR DATA:
## Nestorowa data: 1645 cells x 4290 genes
## Paul data: 8716 genes x 2730 cells (need transpose for t-SNE)
## Grover data: 46175 genes x 135 cells (need transpose for t-SNE)
## Rockne data: 12674 genes x 101 cells (need transpose for t-SNE)
## import data and convert into matrix
rawdata <- read.delim("~/GitHub/cell-diff-reu-2018/data/coordinates_gene_counts_flow_cytometry.txt", row.names=1)
# rawdata <- read.csv("~/GitHub/cell-diff-reu-2018/data/grover_expression.txt", row.names=1, sep="")
cleandata <- na.omit(rawdata) # remove NAs
data <- cleandata[,5:13] # choose flow cytometry columns
# data <- cleandata[,14:ncol(cleandata)] # choose gene exp columns
data <- as.matrix(data) # convert to matrix
## load .RData (Paul)
# load("~/GitHub/cell-diff-reu-2018/data/Paul_Cell_MARSseq_GSE72857.RData") # paul data
dataname <- "Nestorowa Data (Gene Exp)" # set name
## create factor for grouping
rownames <- row.names(data)
groups <- as.character(rownames)
groups[grepl("HSPC",groups)] <- "HSPC" # replace all labels with hspc
groups[grepl("LT",groups)] <- "LT.HSC" # replace with lt.hsc
groups[grepl("Prog",groups)] <- "PROG" # same for prog
groupsf <- factor(groups)
# groupsf <- factor(cluster.id) # paul data
nclusters <- nlevels(groupsf) # number of groups to cluster
colors <- rainbow(nclusters)
#### T-SNE -------------------------------------------------
# t-SNE DIMENSIONS: rows=cells, columns=genes
# Nestorowa: use original
# Paul/Grover/Rockne: use transpose
# data <- t(data)
# Note: can take very, very long with large datasets
# 200-300 max iterations tends to work well (default is 1000), 100 is too little
ydata3d <- tsne(data, k=3, max_iter = 200) # ydata = matrix(rnorm(k * n),n)
#### PLOTTING -------------------------------------------------
# par(mar = c(5.1,4.1,4.1,8.1), xpd = "TRUE") # use if you need extra space in margins for legend
## plot 2d
plot(x=ydata3d[,1], y=ydata3d[,2],
xlab="", ylab="",
col = colors[groupsf],
pch=20,
main = paste("2D t-SNE Plot of", dataname))
legend("topright", inset = c(0,0),
legend = levels(groupsf),
pch = 20,
col = colors,
ncol=3, cex=0.5, pt.cex=1)
## plot 3d
plot3d(x=ydata3d[,1], y=ydata3d[,2], z=ydata3d[,3],
xlab="", ylab="", zlab="",
col=colors[groupsf],
pch=20,
main = paste("3D t-SNE Plot of", dataname))
legend3d("topright", inset=c(0.1,0.1),
legend = levels(groupsf),
pch = 20,
col = colors,
ncol=3, cex=1)
## Plotting with highlighted cluster(s)
## NOTE: have not checked the part below in a while -- may not make sense.
## select cluster(s) to highlight
clusterindex <- 10 # select index of the cluster you want to visualize
color1 <- colors
color1[-clusterindex] <- "black"
## plot 2d select clusters
plot(x=ydata3d[,1], y=ydata3d[,2],
col = color1[groupsf],
pch=20,
main = paste("2D t-SNE Plot of Cluster", clusterindex, dataname, "(tsne)"))
## plot 3d select clusters
plot3d(x=ydata3d[,1], y=ydata3d[,2], z=ydata3d[,3],
col = color1[groupsf],
pch=20,
main = paste("3D t-SNE Plot of Cluster", clusterindex, dataname, "(tsne)"))