|
| 1 | +#Load required packages |
| 2 | +library("DESeq2", lib.loc="C:/Program Files/R/R-3.1.2/library") |
| 3 | +library("vsn") |
| 4 | +library(MVN) |
| 5 | + |
| 6 | +#Load matrix of raw counts |
| 7 | +mcount <- read.delim("C:/RNAseq/miRNA_data/count_strand_redo_rpkm5for3.txt", row.names="miRNA") |
| 8 | + |
| 9 | +#Prepare sample annotation file |
| 10 | +#conditions <- c(0,0,0,1,1,1,3,3,3,8,8,8) |
| 11 | +conditions <- c("B", "A", "A", "A", "B", "B", "C", "C", "C", "D", "D", "D") |
| 12 | +design <- factor(conditions) |
| 13 | +coldata <- data.frame(cbind(colnames(mcount), design)) |
| 14 | +colnames(coldata) <- c("sample", "group") |
| 15 | + |
| 16 | +#Check for recursiveness of data structures |
| 17 | +is.recursive(mcount) |
| 18 | +is.recursive(coldata) |
| 19 | +is.recursive(design) |
| 20 | + |
| 21 | +#Aggregate experiment data |
| 22 | +dds <- DESeqDataSetFromMatrix(mcount, coldata, design=~group) |
| 23 | + |
| 24 | +#Perform DE analysis between 2 groups |
| 25 | +#dds <- DESeq(dds) |
| 26 | +#res <- results(dds) |
| 27 | +#resOrdered <- res[order(res$padj),] |
| 28 | +#summary(res) |
| 29 | +#plotMA(res, main="1cpm3 DESeq2", ylim=c(-2,2)) |
| 30 | +#plotDispEsts(dds) |
| 31 | +#resSig <- subset(resOrdered, padj < 0.1) |
| 32 | + |
| 33 | +#Add unshrunken max-likelihood estimates to results (for comparison only) |
| 34 | +#resMLE <- results(dds, addMLE=TRUE) |
| 35 | +#resOrderedMLE <- resMLE[order(resMLE$padj),] |
| 36 | +#summary(resMLE) |
| 37 | +#plotMA(resMLE, main="1cpm3 DESeq2", ylim=c(-2,2)) |
| 38 | +#resSigMLE <- subset(resOrderedMLE, padj < 0.1) |
| 39 | + |
| 40 | +#LRT test similar to EdgeR |
| 41 | +ddsLRT <- DESeq(dds, test="LRT", full=~group, reduced= ~ 1) |
| 42 | +resLRT <- results(ddsLRT) |
| 43 | +plotDispEsts(ddsLRT, main="NB-GLM LRT Dispersion") |
| 44 | +resLRT01 <- results(ddsLRT, contrast=c("group","A","B")) |
| 45 | +resLRT03 <- results(ddsLRT, contrast=c("group","A","C")) |
| 46 | +resLRT08 <- results(ddsLRT, contrast=c("group","A","D")) |
| 47 | +resLRTOrdered01 <- resLRT01[order(resLRT01$padj),] |
| 48 | +resLRTOrdered03 <- resLRT03[order(resLRT03$padj),] |
| 49 | +resLRTOrdered08 <- resLRT08[order(resLRT08$padj),] |
| 50 | +resLRTSig01 <- subset(resLRTOrdered01, padj < 0.1) |
| 51 | +resLRTSig03 <- subset(resLRTOrdered03, padj < 0.1) |
| 52 | +resLRTSig08 <- subset(resLRTOrdered08, padj < 0.1) |
| 53 | +plotMA(resLRT01, main="0dpa v. 1dpa", ylim=c(-2,2)) |
| 54 | +plotMA(resLRT03, main="0dpa v. 3dpa", ylim=c(-2,2)) |
| 55 | +plotMA(resLRT08, main="0dpa v. 8dpa", ylim=c(-2,2)) |
| 56 | +sink("C:/users/eli/desktop/1cpm3_nopara2_B_DESeq2_LRT_summaries.txt") |
| 57 | +"ANOVA-like Comparison" |
| 58 | +summary(resLRT) |
| 59 | +"1dpa" |
| 60 | +summary(resLRT01) |
| 61 | +"3dpa" |
| 62 | +summary(resLRT03) |
| 63 | +"8dpa" |
| 64 | +summary(resLRT08) |
| 65 | +sink() |
| 66 | + |
| 67 | +#Extract DESeq transformations of the data |
| 68 | +rld <- rlog(ddsLRT) |
| 69 | +vsd <- varianceStabilizingTransformation(ddsLRT) |
| 70 | +rlogMat <- assay(rld) |
| 71 | +vstMat <- assay(vsd) |
| 72 | +write.table(rlogMat, file="C:/users/eli/desktop/1cpm3_nopara2_rlogMat.csv", quote=FALSE, sep='\t') |
| 73 | +write.table(vstMat, file="C:/users/eli/desktop/1cpm3_nopara2_vstMat.csv", quote=FALSE, sep='\t') |
| 74 | + |
| 75 | +#Explore mean-variance of normal and transformed data |
| 76 | +meanSdPlot(counts(dds,normalized=TRUE)[notAllZero,], main="Raw Counts") |
| 77 | +par(mfrow=c(1,3)) |
| 78 | +notAllZero <- (rowSums(counts(dds))>0) |
| 79 | +meanSdPlot(log2(counts(dds,normalized=TRUE)[notAllZero,] + 1), main="Log2") |
| 80 | +meanSdPlot(assay(rld[notAllZero,]), main="rLog") |
| 81 | +meanSdPlot(assay(vsd[notAllZero,]), main="VST") |
| 82 | + |
| 83 | +dev.off() |
| 84 | + |
| 85 | +#Plot PCA of VST normalized samples |
| 86 | +plotPCA(vsd, intgroup=c("group")) |
| 87 | + |
| 88 | +#Assess multivariate normality of normal and transformed data |
| 89 | +#WARNING: runing mardiaTest() on large data sets is very computationally |
| 90 | +# intensive and may crash your R session, monitor your CPU & RAM closely |
| 91 | +mardiaTest(mcount, qqplot=TRUE) |
| 92 | +mardiaTest(log(mcount+1), qqplot=TRUE) |
| 93 | +mardiaTest(rlogMat, qqplot=TRUE) |
| 94 | +mardiaTest(vstMat, qqplot=TRUE) |
| 95 | + |
| 96 | +#Assess multivariate outliers in normal and transformed data |
| 97 | +outL <- mvOutlier(vsdm, qqplot=TRUE, method="quan") |
| 98 | +outL.adj <- mvOutlier(vsdm, qqplot=TRUE, method="adj.quan") |
| 99 | +mardiaTest(outL$newData, qqplot=TRUE) |
| 100 | +mardiaTest(outL.adj$newData, qqplot=TRUE) |
| 101 | +outL2 <- mvOutlier(outL$newData, qqplot=TRUE, method="adj.quan") |
| 102 | +mardiaTest(outL2$newData, qqplot=TRUE) |
| 103 | + |
| 104 | +#plot the maximum value of Cook�s distance for each row over the rank of the test statistic |
| 105 | +#to justify its use as a filtering criterion for a given test |
| 106 | +W <- res01$stat |
| 107 | +maxCooks <- apply(assays(ddsGLM)[["cooks"]],1,max) |
| 108 | +idx <- !is.na(W) |
| 109 | +plot(rank(W[idx]), maxCooks[idx], xlab="rank of Wald statistic", |
| 110 | +ylab="maximum Cook?s distance per gene", |
| 111 | +ylim=c(0,5), cex=.4, col=rgb(0,0,0,.3)) |
| 112 | +m <- ncol(ddsGLM) |
| 113 | +p <- 3 |
| 114 | +abline(h=qf(.99, p, m - p)) |
| 115 | + |
| 116 | +#plot mean of normalized counts v. -log(pvalue) for a given test |
| 117 | +plot(res01$baseMean+1, -log10(res01$pvalue), |
| 118 | + log="x", xlab="mean of normalized counts", |
| 119 | + ylab=expression(-log[10](pvalue)), |
| 120 | + ylim=c(0,30), |
| 121 | + cex=.4, col=rgb(0,0,0,.3)) |
| 122 | + |
| 123 | +#Bar plot of p-values for a given test |
| 124 | +use <- res01$baseMean > attr(res01,"filterThreshold") |
| 125 | +h1 <- hist(res01$pvalue[!use], breaks=0:50/50, plot=FALSE) |
| 126 | +h2 <- hist(res01$pvalue[use], breaks=0:50/50, plot=FALSE) |
| 127 | +colori <- c('do not pass'="khaki", 'pass'="powderblue") |
| 128 | +barplot(height = rbind(h1$counts, h2$counts), beside = FALSE, |
| 129 | +col = colori, space = 0, ylab="frequency", main="p-values for 0 v. 1dpa") |
| 130 | +text(x = c(0, length(h1$counts)), y = 0, label = paste(c(0,1)), |
| 131 | +adj = c(0.5,1.7), xpd=NA) |
| 132 | +legend("topright", fill=rev(colori), legend=rev(names(colori))) |
| 133 | + |
| 134 | +#Expanded model GLM analysis |
| 135 | +ddsGLM <- DESeq(dds, betaPrior=TRUE, modelMatrixType="expanded") |
| 136 | +plotDispEsts(ddsGLM, main="miRNA NB-GLM Dispersion") |
| 137 | +res01 <- results(ddsGLM, contrast=c("group","1","2")) |
| 138 | +res03 <- results(ddsGLM, contrast=c("group","1","3")) |
| 139 | +res08 <- results(ddsGLM, contrast=c("group","1","4")) |
| 140 | +res13 <- results(ddsGLM, contrast=c("group","2","3")) |
| 141 | +res18 <- results(ddsGLM, contrast=c("group","2","4")) |
| 142 | +res38 <- results(ddsGLM, contrast=c("group","3","4")) |
| 143 | +resOrdered01 <- res01[order(res01$padj),] |
| 144 | +resOrdered03 <- res03[order(res03$padj),] |
| 145 | +resOrdered08 <- res08[order(res08$padj),] |
| 146 | +resOrdered13 <- res13[order(res13$padj),] |
| 147 | +resOrdered18 <- res18[order(res18$padj),] |
| 148 | +resOrdered38 <- res38[order(res38$padj),] |
| 149 | +resSig01 <- subset(resOrdered01, padj < 0.1) |
| 150 | +resSig03 <- subset(resOrdered03, padj < 0.1) |
| 151 | +resSig08 <- subset(resOrdered08, padj < 0.1) |
| 152 | +resSig13 <- subset(resOrdered13, padj < 0.1) |
| 153 | +resSig18 <- subset(resOrdered18, padj < 0.1) |
| 154 | +resSig38 <- subset(resOrdered38, padj < 0.1) |
| 155 | +plotMA(res01, main="0dpa v. 1dpa", ylim=c(-2,2)) |
| 156 | +plotMA(res03, main="0dpa v. 3dpa", ylim=c(-2,2)) |
| 157 | +plotMA(res08, main="0dpa v. 8dpa", ylim=c(-2,2)) |
| 158 | +plotMA(res13, main="1dpa v. 3dpa", ylim=c(-2,2)) |
| 159 | +plotMA(res18, main="1dpa v. 8dpa", ylim=c(-2,2)) |
| 160 | +plotMA(res38, main="3dpa v. 8dpa", ylim=c(-2,2)) |
| 161 | +sink("C:/users/eli/desktop/DESeq2_summaries.txt") |
| 162 | +summary(res01) |
| 163 | +summary(res03) |
| 164 | +summary(res08) |
| 165 | +summary(res13) |
| 166 | +summary(res18) |
| 167 | +summary(res38) |
| 168 | +sink() |
| 169 | + |
| 170 | +#Export GLM results |
| 171 | +write.table(as.data.frame(resOrdered01), file="C:/users/eli/desktop/DEseq2_0-1dpa.txt", quote=FALSE, sep='\t') |
| 172 | +write.table(as.data.frame(resOrdered03), file="C:/users/eli/desktop/DEseq2_0-3dpa.txt", quote=FALSE, sep='\t') |
| 173 | +write.table(as.data.frame(resOrdered08), file="C:/users/eli/desktop/DEseq2_0-8dpa.txt", quote=FALSE, sep='\t') |
| 174 | +write.table(as.data.frame(resOrdered13), file="C:/users/eli/desktop/DEseq2_1-3dpa.txt", quote=FALSE, sep='\t') |
| 175 | +write.table(as.data.frame(resOrdered18), file="C:/users/eli/desktop/DEseq2_1-8dpa.txt", quote=FALSE, sep='\t') |
| 176 | +write.table(as.data.frame(resOrdered38), file="C:/users/eli/desktop/DEseq2_3-8dpa.txt", quote=FALSE, sep='\t') |
| 177 | + |
| 178 | +#Export significant DE genes only |
| 179 | +write.table(as.data.frame(resSig01), file="C:/users/eli/desktop/DEseq2_0-1dpaSig.txt", quote=FALSE, sep='\t') |
| 180 | +write.table(as.data.frame(resSig03), file="C:/users/eli/desktop/DEseq2_0-3dpaSig.txt", quote=FALSE, sep='\t') |
| 181 | +write.table(as.data.frame(resSig08), file="C:/users/eli/desktop/DEseq2_0-8dpaSig.txt", quote=FALSE, sep='\t') |
| 182 | +write.table(as.data.frame(resSig13), file="C:/users/eli/desktop/DEseq2_1-3dpaSig.txt", quote=FALSE, sep='\t') |
| 183 | +write.table(as.data.frame(resSig18), file="C:/users/eli/desktop/DEseq2_1-8dpaSig.txt", quote=FALSE, sep='\t') |
| 184 | +write.table(as.data.frame(resSig38), file="C:/users/eli/desktop/DEseq2_3-8dpaSig.txt", quote=FALSE, sep='\t') |
0 commit comments