Skip to content

Commit cab2c69

Browse files
authored
Merge pull request #2 from elijahspina/elijahspina-patch-2
Add files via upload
2 parents f91abbd + 5212270 commit cab2c69

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+2098
-0
lines changed

R scripts/CCA_dim_test.R

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#This function takes the results of CCA
2+
# and tests independence of successive dimensions
3+
# using Wilks's Lambda.
4+
5+
dim.test <- function(res){
6+
ev = (1 - res$cor^2)
7+
n = dim(data1)[1]
8+
p = length(data1)
9+
q = length(data2)
10+
k = min(p, q)
11+
m = n - 3/2 - (p + q)/2
12+
w = rev(cumprod(rev(ev)))
13+
14+
# initialize
15+
d1 = d2 = f <- vector("numeric", k)
16+
17+
for (i in 1:k) {
18+
s = sqrt((p^2 * q^2 - 4)/(p^2 + q^2 - 5))
19+
si = 1/s
20+
d1[i] = p * q
21+
d2[i] = m * s - p * q/2 + 1
22+
r = (1 - w[i]^si)/w[i]^si
23+
f[i] = r * d2[i]/d1[i]
24+
p = p - 1
25+
q = q - 1
26+
}
27+
28+
pv = pf(f, d1, d2, lower.tail = FALSE)
29+
(dmat <- cbind(WilksL = w, F = f, df1 = d1, df2 = d2, p = pv))
30+
}

R scripts/DEseq2 extras.txt

+184
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
#Load required packages
2+
library("DESeq2", lib.loc="C:/Program Files/R/R-3.1.2/library")
3+
library("vsn")
4+
library(MVN)
5+
6+
#Load matrix of raw counts
7+
mcount <- read.delim("C:/RNAseq/miRNA_data/count_strand_redo_rpkm5for3.txt", row.names="miRNA")
8+
9+
#Prepare sample annotation file
10+
#conditions <- c(0,0,0,1,1,1,3,3,3,8,8,8)
11+
conditions <- c("B", "A", "A", "A", "B", "B", "C", "C", "C", "D", "D", "D")
12+
design <- factor(conditions)
13+
coldata <- data.frame(cbind(colnames(mcount), design))
14+
colnames(coldata) <- c("sample", "group")
15+
16+
#Check for recursiveness of data structures
17+
is.recursive(mcount)
18+
is.recursive(coldata)
19+
is.recursive(design)
20+
21+
#Aggregate experiment data
22+
dds <- DESeqDataSetFromMatrix(mcount, coldata, design=~group)
23+
24+
#Perform DE analysis between 2 groups
25+
#dds <- DESeq(dds)
26+
#res <- results(dds)
27+
#resOrdered <- res[order(res$padj),]
28+
#summary(res)
29+
#plotMA(res, main="1cpm3 DESeq2", ylim=c(-2,2))
30+
#plotDispEsts(dds)
31+
#resSig <- subset(resOrdered, padj < 0.1)
32+
33+
#Add unshrunken max-likelihood estimates to results (for comparison only)
34+
#resMLE <- results(dds, addMLE=TRUE)
35+
#resOrderedMLE <- resMLE[order(resMLE$padj),]
36+
#summary(resMLE)
37+
#plotMA(resMLE, main="1cpm3 DESeq2", ylim=c(-2,2))
38+
#resSigMLE <- subset(resOrderedMLE, padj < 0.1)
39+
40+
#LRT test similar to EdgeR
41+
ddsLRT <- DESeq(dds, test="LRT", full=~group, reduced= ~ 1)
42+
resLRT <- results(ddsLRT)
43+
plotDispEsts(ddsLRT, main="NB-GLM LRT Dispersion")
44+
resLRT01 <- results(ddsLRT, contrast=c("group","A","B"))
45+
resLRT03 <- results(ddsLRT, contrast=c("group","A","C"))
46+
resLRT08 <- results(ddsLRT, contrast=c("group","A","D"))
47+
resLRTOrdered01 <- resLRT01[order(resLRT01$padj),]
48+
resLRTOrdered03 <- resLRT03[order(resLRT03$padj),]
49+
resLRTOrdered08 <- resLRT08[order(resLRT08$padj),]
50+
resLRTSig01 <- subset(resLRTOrdered01, padj < 0.1)
51+
resLRTSig03 <- subset(resLRTOrdered03, padj < 0.1)
52+
resLRTSig08 <- subset(resLRTOrdered08, padj < 0.1)
53+
plotMA(resLRT01, main="0dpa v. 1dpa", ylim=c(-2,2))
54+
plotMA(resLRT03, main="0dpa v. 3dpa", ylim=c(-2,2))
55+
plotMA(resLRT08, main="0dpa v. 8dpa", ylim=c(-2,2))
56+
sink("C:/users/eli/desktop/1cpm3_nopara2_B_DESeq2_LRT_summaries.txt")
57+
"ANOVA-like Comparison"
58+
summary(resLRT)
59+
"1dpa"
60+
summary(resLRT01)
61+
"3dpa"
62+
summary(resLRT03)
63+
"8dpa"
64+
summary(resLRT08)
65+
sink()
66+
67+
#Extract DESeq transformations of the data
68+
rld <- rlog(ddsLRT)
69+
vsd <- varianceStabilizingTransformation(ddsLRT)
70+
rlogMat <- assay(rld)
71+
vstMat <- assay(vsd)
72+
write.table(rlogMat, file="C:/users/eli/desktop/1cpm3_nopara2_rlogMat.csv", quote=FALSE, sep='\t')
73+
write.table(vstMat, file="C:/users/eli/desktop/1cpm3_nopara2_vstMat.csv", quote=FALSE, sep='\t')
74+
75+
#Explore mean-variance of normal and transformed data
76+
meanSdPlot(counts(dds,normalized=TRUE)[notAllZero,], main="Raw Counts")
77+
par(mfrow=c(1,3))
78+
notAllZero <- (rowSums(counts(dds))>0)
79+
meanSdPlot(log2(counts(dds,normalized=TRUE)[notAllZero,] + 1), main="Log2")
80+
meanSdPlot(assay(rld[notAllZero,]), main="rLog")
81+
meanSdPlot(assay(vsd[notAllZero,]), main="VST")
82+
83+
dev.off()
84+
85+
#Plot PCA of VST normalized samples
86+
plotPCA(vsd, intgroup=c("group"))
87+
88+
#Assess multivariate normality of normal and transformed data
89+
#WARNING: runing mardiaTest() on large data sets is very computationally
90+
# intensive and may crash your R session, monitor your CPU & RAM closely
91+
mardiaTest(mcount, qqplot=TRUE)
92+
mardiaTest(log(mcount+1), qqplot=TRUE)
93+
mardiaTest(rlogMat, qqplot=TRUE)
94+
mardiaTest(vstMat, qqplot=TRUE)
95+
96+
#Assess multivariate outliers in normal and transformed data
97+
outL <- mvOutlier(vsdm, qqplot=TRUE, method="quan")
98+
outL.adj <- mvOutlier(vsdm, qqplot=TRUE, method="adj.quan")
99+
mardiaTest(outL$newData, qqplot=TRUE)
100+
mardiaTest(outL.adj$newData, qqplot=TRUE)
101+
outL2 <- mvOutlier(outL$newData, qqplot=TRUE, method="adj.quan")
102+
mardiaTest(outL2$newData, qqplot=TRUE)
103+
104+
#plot the maximum value of Cook�s distance for each row over the rank of the test statistic
105+
#to justify its use as a filtering criterion for a given test
106+
W <- res01$stat
107+
maxCooks <- apply(assays(ddsGLM)[["cooks"]],1,max)
108+
idx <- !is.na(W)
109+
plot(rank(W[idx]), maxCooks[idx], xlab="rank of Wald statistic",
110+
ylab="maximum Cook?s distance per gene",
111+
ylim=c(0,5), cex=.4, col=rgb(0,0,0,.3))
112+
m <- ncol(ddsGLM)
113+
p <- 3
114+
abline(h=qf(.99, p, m - p))
115+
116+
#plot mean of normalized counts v. -log(pvalue) for a given test
117+
plot(res01$baseMean+1, -log10(res01$pvalue),
118+
log="x", xlab="mean of normalized counts",
119+
ylab=expression(-log[10](pvalue)),
120+
ylim=c(0,30),
121+
cex=.4, col=rgb(0,0,0,.3))
122+
123+
#Bar plot of p-values for a given test
124+
use <- res01$baseMean > attr(res01,"filterThreshold")
125+
h1 <- hist(res01$pvalue[!use], breaks=0:50/50, plot=FALSE)
126+
h2 <- hist(res01$pvalue[use], breaks=0:50/50, plot=FALSE)
127+
colori <- c('do not pass'="khaki", 'pass'="powderblue")
128+
barplot(height = rbind(h1$counts, h2$counts), beside = FALSE,
129+
col = colori, space = 0, ylab="frequency", main="p-values for 0 v. 1dpa")
130+
text(x = c(0, length(h1$counts)), y = 0, label = paste(c(0,1)),
131+
adj = c(0.5,1.7), xpd=NA)
132+
legend("topright", fill=rev(colori), legend=rev(names(colori)))
133+
134+
#Expanded model GLM analysis
135+
ddsGLM <- DESeq(dds, betaPrior=TRUE, modelMatrixType="expanded")
136+
plotDispEsts(ddsGLM, main="miRNA NB-GLM Dispersion")
137+
res01 <- results(ddsGLM, contrast=c("group","1","2"))
138+
res03 <- results(ddsGLM, contrast=c("group","1","3"))
139+
res08 <- results(ddsGLM, contrast=c("group","1","4"))
140+
res13 <- results(ddsGLM, contrast=c("group","2","3"))
141+
res18 <- results(ddsGLM, contrast=c("group","2","4"))
142+
res38 <- results(ddsGLM, contrast=c("group","3","4"))
143+
resOrdered01 <- res01[order(res01$padj),]
144+
resOrdered03 <- res03[order(res03$padj),]
145+
resOrdered08 <- res08[order(res08$padj),]
146+
resOrdered13 <- res13[order(res13$padj),]
147+
resOrdered18 <- res18[order(res18$padj),]
148+
resOrdered38 <- res38[order(res38$padj),]
149+
resSig01 <- subset(resOrdered01, padj < 0.1)
150+
resSig03 <- subset(resOrdered03, padj < 0.1)
151+
resSig08 <- subset(resOrdered08, padj < 0.1)
152+
resSig13 <- subset(resOrdered13, padj < 0.1)
153+
resSig18 <- subset(resOrdered18, padj < 0.1)
154+
resSig38 <- subset(resOrdered38, padj < 0.1)
155+
plotMA(res01, main="0dpa v. 1dpa", ylim=c(-2,2))
156+
plotMA(res03, main="0dpa v. 3dpa", ylim=c(-2,2))
157+
plotMA(res08, main="0dpa v. 8dpa", ylim=c(-2,2))
158+
plotMA(res13, main="1dpa v. 3dpa", ylim=c(-2,2))
159+
plotMA(res18, main="1dpa v. 8dpa", ylim=c(-2,2))
160+
plotMA(res38, main="3dpa v. 8dpa", ylim=c(-2,2))
161+
sink("C:/users/eli/desktop/DESeq2_summaries.txt")
162+
summary(res01)
163+
summary(res03)
164+
summary(res08)
165+
summary(res13)
166+
summary(res18)
167+
summary(res38)
168+
sink()
169+
170+
#Export GLM results
171+
write.table(as.data.frame(resOrdered01), file="C:/users/eli/desktop/DEseq2_0-1dpa.txt", quote=FALSE, sep='\t')
172+
write.table(as.data.frame(resOrdered03), file="C:/users/eli/desktop/DEseq2_0-3dpa.txt", quote=FALSE, sep='\t')
173+
write.table(as.data.frame(resOrdered08), file="C:/users/eli/desktop/DEseq2_0-8dpa.txt", quote=FALSE, sep='\t')
174+
write.table(as.data.frame(resOrdered13), file="C:/users/eli/desktop/DEseq2_1-3dpa.txt", quote=FALSE, sep='\t')
175+
write.table(as.data.frame(resOrdered18), file="C:/users/eli/desktop/DEseq2_1-8dpa.txt", quote=FALSE, sep='\t')
176+
write.table(as.data.frame(resOrdered38), file="C:/users/eli/desktop/DEseq2_3-8dpa.txt", quote=FALSE, sep='\t')
177+
178+
#Export significant DE genes only
179+
write.table(as.data.frame(resSig01), file="C:/users/eli/desktop/DEseq2_0-1dpaSig.txt", quote=FALSE, sep='\t')
180+
write.table(as.data.frame(resSig03), file="C:/users/eli/desktop/DEseq2_0-3dpaSig.txt", quote=FALSE, sep='\t')
181+
write.table(as.data.frame(resSig08), file="C:/users/eli/desktop/DEseq2_0-8dpaSig.txt", quote=FALSE, sep='\t')
182+
write.table(as.data.frame(resSig13), file="C:/users/eli/desktop/DEseq2_1-3dpaSig.txt", quote=FALSE, sep='\t')
183+
write.table(as.data.frame(resSig18), file="C:/users/eli/desktop/DEseq2_1-8dpaSig.txt", quote=FALSE, sep='\t')
184+
write.table(as.data.frame(resSig38), file="C:/users/eli/desktop/DEseq2_3-8dpaSig.txt", quote=FALSE, sep='\t')

R scripts/DEseq2_nopara2.txt

+96
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
library(DESeq2)
2+
x <- read.delim("C:/RNAseq/miRNA_data/1cpm3_nopara2_counts_10rep.txt", row.names="miRNA")
3+
conditions <- c("0dpa", "0dpa", "1dpa", "1dpa", "3dpa", "3dpa", "3dpa", "8dpa", "8dpa", "8dpa")
4+
5+
#x <- read.delim("C:/RNAseq/polya_data/rpkm/count_strand_redo_rpkm5for3.txt", row.names="Gene")
6+
#conditions <- c("A", "A", "A", "B", "B", "B", "C", "C", "C", "D", "D", "D")
7+
8+
design <- factor(conditions)
9+
coldata <- data.frame(cbind(colnames(x), group=design))
10+
11+
#DESeq
12+
dds <- DESeqDataSetFromMatrix(x, coldata, design=~group)
13+
ddsLRT <- DESeq(dds, test="LRT", full=~group, reduced= ~ 1)
14+
plotDispEsts(ddsLRT, main="NB-GLM LRT Dispersion")
15+
resLRT <- results(ddsLRT)
16+
resLRT01 <- results(ddsLRT, contrast=c("group","2","1"))
17+
resLRT03 <- results(ddsLRT, contrast=c("group","3","1"))
18+
resLRT08 <- results(ddsLRT, contrast=c("group","4","1"))
19+
resLRTOrdered <- resLRT[order(resLRT$padj),]
20+
resLRTOrdered01 <- resLRT01[order(resLRT01$padj),]
21+
resLRTOrdered03 <- resLRT03[order(resLRT03$padj),]
22+
resLRTOrdered08 <- resLRT08[order(resLRT08$padj),]
23+
resLRTSig <- subset(resLRT, padj < 0.05)
24+
resLRTSig01 <- subset(resLRTOrdered01, padj < 0.05)
25+
resLRTSig03 <- subset(resLRTOrdered03, padj < 0.05)
26+
resLRTSig08 <- subset(resLRTOrdered08, padj < 0.05)
27+
plotMA(resLRT01, main="0dpa v. 1dpa", ylim=c(-2,2))
28+
plotMA(resLRT03, main="0dpa v. 3dpa", ylim=c(-2,2))
29+
plotMA(resLRT08, main="0dpa v. 8dpa", ylim=c(-2,2))
30+
summary(resLRT01)
31+
summary(resLRT03)
32+
summary(resLRT08)
33+
summary(resLRT)
34+
35+
#Export results
36+
write.table(as.data.frame(resLRTOrdered), file="C:/rnaseq/mirna_data/clusters/10rep_redo2/DEseq2_1cpm3redo_nopara2_ANOVA-like_LRT_redo.txt", quote=FALSE, sep='\t')
37+
write.table(as.data.frame(resLRTOrdered01), file="C:/rnaseq/mirna_data/clusters/10rep_redo2/DEseq2_1cpm3redo_nopara2_1dpa_redo.txt", quote=FALSE, sep='\t')
38+
write.table(as.data.frame(resLRTOrdered03), file="C:/rnaseq/mirna_data/clusters/10rep_redo2/DEseq2_1cpm3redo_nopara2_3dpa_redo.txt", quote=FALSE, sep='\t')
39+
write.table(as.data.frame(resLRTOrdered08), file="C:/rnaseq/mirna_data/clusters/10rep_redo2/DEseq2_1cpm3redo_nopara2_8dpa_redo.txt", quote=FALSE, sep='\t')
40+
#write.table(as.data.frame(resLRTOrdered), file="C:/rnaseq/polya_data/clusters/DESeq_5rpkm3redo_ANOVA-like_LRT_redo.txt", quote=FALSE, sep='\t')
41+
#write.table(as.data.frame(resLRTOrdered01), file="C:/rnaseq/polya_data/clusters/DESeq_5rpkm3redo_1dpa_redo.txt", quote=FALSE, sep='\t')
42+
#write.table(as.data.frame(resLRTOrdered03), file="C:/rnaseq/polya_data/clusters/DESeq_5rpkm3redo_3dpa_redo.txt", quote=FALSE, sep='\t')
43+
#write.table(as.data.frame(resLRTOrdered08), file="C:/rnaseq/polya_data/clusters/DESeq_5rpkm3redo_8dpa_redo.txt", quote=FALSE, sep='\t')
44+
45+
#Export summaries
46+
#sink("C:/rnaseq/polya_data/clusters/DESeq_5rpkm3redo_DESeq2_LRT_summaries_redo.txt")
47+
sink("C:/rnaseq/mirna_data/clusters/10rep_redo2/DEseq2_1cpm3redo_nopara2_LRT_summaries_redo2.txt")
48+
"ANOVA-like Comparison"
49+
summary(resLRT)
50+
"1dpa"
51+
summary(resLRT01)
52+
"3dpa"
53+
summary(resLRT03)
54+
"8dpa"
55+
summary(resLRT08)
56+
sink()
57+
58+
#Plot all DE genes one-by-one
59+
#resLRTSig <- subset(resLRT, padj < 0.1)
60+
sig <- rownames(resLRTSig)
61+
#Adjust plotting lattice based on number of DE genes
62+
#source("C:/users/eli/desktop/r_scripts/get_factors.r")
63+
#L <- length(colnames(resLRTSig)
64+
#F <- get_factors(L-L%%5)
65+
#f <- c(F[length(F)/2], F[(length(F)/2) + 1)
66+
67+
for(item in sig){plotCounts(ddsLRT, gene=item, intgroup="group", col="blue", pch=18)}
68+
for(item in sig){
69+
d <- plotCounts(ddsLRT, gene=item, intgroup="group", returnData=TRUE)
70+
ggplot(d, aes(x=group, y=count)) +
71+
geom_point(position=position_jitter(w=0.1,h=0)) +
72+
scale_y_log10(breaks=c(25,100,400))
73+
}
74+
75+
#Explore transformed data
76+
library(
77+
rld <- rlog(ddsLRT)
78+
vsd <- varianceStabilizingTransformation(ddsLRT)
79+
rlogMat <- assay(rld)
80+
vstMat <- assay(vsd)
81+
plotPCA(ddsLRT, intgroup=c("group"))
82+
notAllZero <- (rowSums(counts(ddsLRT))>0)
83+
meanSdPlot(counts(ddsLRT,normalized=TRUE)[notAllZero,], main="Raw Counts")
84+
par(mfrow=c(1,3))
85+
meanSdPlot(log2(counts(ddsLRT,normalized=TRUE)[notAllZero,] + 1), main="Log2")
86+
meanSdPlot(assay(rld[notAllZero,]), main="rLog")
87+
meanSdPlot(assay(vsd[notAllZero,]), main="VST")
88+
par(mfrow=c(1,1))
89+
90+
#Transformations
91+
rld <- rlog(ddsLRT)
92+
vsd <- varianceStabilizingTransformation(ddsLRT)
93+
rlogMat <- assay(rld)
94+
vstMat <- assay(vsd)
95+
write.table(rlogMat, file="C:/rnaseq/mirna_data/clusters/10rep_redo2/1cpm3_nopara2_rLogmat_redo2.txt", quote=FALSE, sep='\t')
96+
write.table(vstMat, file="C:/rnaseq/mirna_data/clusters/10rep_redo2/1cpm3_nopara2_VSTmat_redo2.txt", quote=FALSE, sep='\t')

0 commit comments

Comments
 (0)