Created the orphan branch 'public' and deleted experimental code not …

…used
leekgroup · Nov 13, 2014 · 2c11a2c · 2c11a2c
commit 2c11a2c
Show file tree

Hide file tree

Showing 13 changed files with 485 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -0,0 +1,5 @@
+# LIBD n36 project
+
+This repository is for hosting the code for the LIDB n36 project being undertaken by Andrew Jaffe's lab.
+
+If you have any questions about the files in this directory contact AE Jaffe <[email protected]>.
diff --git a/derAnalysis/der2Analysis.sh b/derAnalysis/der2Analysis.sh
@@ -0,0 +1,53 @@
+#!/bin/sh
+
+## Usage
+# sh der2Analysis.sh run1-v0.0.46
+
+# Directories
+MAINDIR=/dcs01/lieber/ajaffe/Brain/derRuns/libd_n36
+WDIR=${MAINDIR}/derAnalysis
+DATADIR=${MAINDIR}/derCoverageInfo
+
+# Define variables
+SHORT='der2A-n36'
+PREFIX=$1
+CORES=8
+
+# Construct shell files
+for chrnum in 22 21 Y 20 19 18 17 16 15 14 13 12 11 10 9 8 X 7 6 5 4 3 2 1
+#for chrnum in Y
+do
+	echo "Creating script for chromosome ${chrnum}"
+	chr="chr${chrnum}"
+	outdir="${PREFIX}/${chr}"
+	sname="${SHORT}.${PREFIX}.${chr}"
+	cat > ${WDIR}/.${sname}.sh <<EOF
+#!/bin/bash	
+#$ -cwd
+#$ -m e
+#$ -l mem_free=125G,h_vmem=25G,h_fsize=10G
+#$ -N ${sname}
+#$ -pe local ${CORES}
+
+echo "**** Job starts ****"
+date
+
+# Create output directory 
+mkdir -p ${WDIR}/${outdir}
+# Make logs directory
+mkdir -p ${WDIR}/${outdir}/logs
+
+# run derfinder2-analysis.R
+cd ${WDIR}/${PREFIX}/
+module load R/3.1.x
+Rscript ${WDIR}/derfinder2-analysis.R -d "${DATADIR}/${chr}CovInfo.Rdata" -c "${chrnum}" -m ${CORES} -v TRUE
+
+# Move log files into the logs directory
+mv ${WDIR}/${sname}.* ${WDIR}/${outdir}/logs/
+
+echo "**** Job ends ****"
+date
+EOF
+	call="qsub .${sname}.sh"
+	$call
+done
diff --git a/derAnalysis/der2Merge.sh b/derAnalysis/der2Merge.sh
@@ -0,0 +1,36 @@
+#!/bin/sh
+
+## Usage
+# sh der2Merge.sh run1-v0.0.46
+
+# Directories
+MAINDIR=/dcs01/lieber/ajaffe/Brain/derRuns/libd_n36
+WDIR=${MAINDIR}/derAnalysis
+
+# Define variables
+SHORT='der2M-n36'
+PREFIX=$1
+
+# Construct shell files
+outdir="${PREFIX}"
+sname="${SHORT}.${PREFIX}"
+echo "Creating script ${sname}"
+cat > ${WDIR}/.${sname}.sh <<EOF
+#!/bin/bash	
+echo "**** Job starts ****"
+date
+
+mkdir -p ${WDIR}/${outdir}/logs
+
+# merge results
+Rscript-3.1 -e "library(derfinder); load('/dcs01/lieber/ajaffe/Brain/derRuns/derfinderExample/derGenomicState/GenomicState.Hsapiens.UCSC.hg19.knownGene.Rdata'); mergeResults(prefix='${PREFIX}', genomicState=GenomicState.Hsapiens.UCSC.hg19.knownGene)"
+
+# Move log files into the logs directory
+mv ${WDIR}/${sname}.* ${WDIR}/${outdir}/logs/
+
+echo "**** Job ends ****"
+date
+EOF
+call="qsub -cwd -l mem_free=50G,h_vmem=100G,h_fsize=10G -N ${sname} -m e .${sname}.sh"
+echo $call
+$call
diff --git a/derAnalysis/der2Models.sh b/derAnalysis/der2Models.sh
@@ -0,0 +1,39 @@
+#!/bin/sh
+
+## Usage
+# sh der2Models.sh fetalVsInfant-v0.0.46 fetal infant
+
+# Directories
+MAINDIR=/dcs01/lieber/ajaffe/Brain/derRuns/libd_n36
+WDIR=${MAINDIR}/derAnalysis
+
+# Define variables
+SHORT='der2Mod-n36'
+PREFIX=$1
+GROUP1=$2
+GROUP2=$3
+
+# Construct shell files
+outdir="${PREFIX}"
+sname="${SHORT}.${PREFIX}"
+echo "Creating script ${sname}"
+cat > ${WDIR}/.${sname}.sh <<EOF
+#!/bin/bash	
+echo "**** Job starts ****"
+date
+
+mkdir -p ${WDIR}/${outdir}/logs
+
+# merge results
+cd ${WDIR}/${outdir}/
+Rscript-3.1 ${WDIR}/derfinder2-models.R -r "$GROUP1" -c "$GROUP2"
+
+# Move log files into the logs directory
+mv ${WDIR}/${sname}.* ${WDIR}/${outdir}/logs/
+
+echo "**** Job ends ****"
+date
+EOF
+call="qsub -cwd -l mem_free=50G,h_vmem=100G,h_fsize=10G -N ${sname} -m e .${sname}.sh"
+echo $call
+$call
diff --git a/derAnalysis/der2Report.sh b/derAnalysis/der2Report.sh
@@ -0,0 +1,36 @@
+#!/bin/sh
+
+## Usage
+# sh der2Report.sh run1-v0.0.46
+
+# Directories
+MAINDIR=/dcs01/lieber/ajaffe/Brain/derRuns/libd_n36
+WDIR=${MAINDIR}/derAnalysis
+
+# Define variables
+SHORT='der2R-n36'
+PREFIX=$1
+
+# Construct shell files
+outdir="${PREFIX}"
+sname="${SHORT}.${PREFIX}"
+echo "Creating script ${sname}"
+cat > ${WDIR}/.${sname}.sh <<EOF
+#!/bin/bash	
+echo "**** Job starts ****"
+date
+
+mkdir -p ${WDIR}/${outdir}/logs
+
+# merge results
+Rscript-3.1 -e "library(derfinderReport); load('${MAINDIR}/derCoverageInfo/fullCov.Rdata'); generateReport(prefix='${PREFIX}', browse=FALSE, nBestClusters=20, fullCov=fullCov, device='CairoPNG')"
+
+# Move log files into the logs directory
+mv ${WDIR}/${sname}.* ${WDIR}/${outdir}/logs/
+
+echo "**** Job ends ****"
+date
+EOF
+call="qsub -cwd -l mem_free=50G,h_vmem=100G,h_fsize=10G -N ${sname} -m e .${sname}.sh"
+echo $call
+$call
diff --git a/derAnalysis/derfinder2-analysis.R b/derAnalysis/derfinder2-analysis.R
@@ -0,0 +1,82 @@
+## Run derfinder's analysis steps with timing info
+
+## Load libraries
+library("getopt")
+
+## Available at https://github.com/lcolladotor/derfinder
+library("derfinder")
+
+## Specify parameters
+spec <- matrix(c(
+	'DFfile', 'd', 1, "character", "path to the .Rdata file with the results from loadCoverage()",
+	'chr', 'c', 1, "character", "Chromosome under analysis. Use X instead of chrX.",
+	'mcores', 'm', 1, "integer", "Number of cores",
+	'verbose' , 'v', 2, "logical", "Print status updates",
+	'help' , 'h', 0, "logical", "Display help"
+), byrow=TRUE, ncol=5)
+opt <- getopt(spec)
+
+## Testing the script
+test <- FALSE
+if(test) {
+	## Speficy it using an interactive R session and testing
+	test <- TRUE
+}
+
+## Test values
+if(test){
+	opt <- NULL
+	opt$DFfile <- "/dcs01/lieber/ajaffe/Brain/derRuns/libd_n36/derCoverageInfo/chr21CovInfo.Rdata"
+	opt$chr <- "21"
+	opt$mcores <- 1
+	opt$verbose <- NULL
+}
+
+## if help was asked for print a friendly message
+## and exit with a non-zero error code
+if (!is.null(opt$help)) {
+	cat(getopt(spec, usage=TRUE))
+	q(status=1)
+}
+
+## Default value for verbose = TRUE
+if (is.null(opt$verbose)) opt$verbose <- TRUE
+
+if(opt$verbose) message("Loading Rdata file with the output from loadCoverage()")
+load(opt$DFfile)
+
+## Make it easy to use the name later. Here I'm assuming the names were generated using output='auto' in loadCoverage()
+eval(parse(text=paste0("data <- ", "chr", opt$chr, "CovInfo")))
+eval(parse(text=paste0("rm(chr", opt$chr, "CovInfo)")))
+
+## Just for testing purposes
+if(test) {
+	tmp <- data
+	tmp$coverage <- tmp$coverage[1:1e6, ]
+	library("IRanges")
+	tmp$position[which(tmp$pos)[1e6 + 1]:length(tmp$pos)] <- FALSE
+	data <- tmp
+}
+
+## Load the models
+load("models.Rdata")
+
+## Load group information
+load("groupInfo.Rdata")
+
+## Load colsubsets used
+# load("colsubset.Rdata")
+
+## Run the analysis
+# analyzeChr(chr=opt$chr, coverageInfo=data, models=models, colsubset=colsubset, cutoffFstat=1e-04, cutoffType="theoretical", nPermute=100, seeds=seq_len(100) * 20140408, maxClusterGap=3000, groupInfo=groupInfo, subject="hg19", mc.cores=opt$mcores, lowMemDir=paste0("chr", opt$chr, "/chunksDir"), verbose=opt$verbose)
+
+## n36 analysis
+# analyzeChr(chr=opt$chr, coverageInfo=data, models=models, cutoffFstat=1e-08, cutoffType="theoretical", nPermute=1000, seeds=seq_len(1000), maxClusterGap=3000, groupInfo=groupInfo, subject="hg19", mc.cores=opt$mcores, lowMemDir=paste0("chr", opt$chr, "/chunksDir"), verbose=opt$verbose)
+## without lowMemDir
+analyzeChr(chr=opt$chr, coverageInfo=data, models=models, cutoffFstat=1e-08, cutoffType="theoretical", nPermute=1000, seeds=seq_len(1000), maxClusterGap=3000, groupInfo=groupInfo, subject="hg19", mc.cores=opt$mcores, verbose=opt$verbose)
+
+## Done
+if(opt$verbose) {
+	print(proc.time())
+	print(sessionInfo(), locale=FALSE)
+}
diff --git a/derAnalysis/derfinder2-models.R b/derAnalysis/derfinder2-models.R
@@ -0,0 +1,98 @@
+## Calculate the library adjustments and build the models
+
+library("getopt")
+
+## Available at https://github.com/lcolladotor/derfinder
+library("derfinder")
+
+## Specify parameters
+spec <- matrix(c(
+	'reference', 'r', 1, "character", "group 1 to use in the comparison",
+	'comparison', 'c', 1, "character", "group 2 to use in the comparison",
+	'help' , 'h', 0, "logical", "Display help"
+), byrow=TRUE, ncol=5)
+opt <- getopt(spec)
+
+## Testing the script
+test <- FALSE
+if(test) {
+	## Speficy it using an interactive R session and testing
+	test <- TRUE
+}
+
+## Test values
+if(test){
+	opt <- NULL
+	opt$group1 <- "(-1,0]"
+	opt$group2 <- "(0,1]"
+}
+
+## if help was asked for print a friendly message
+## and exit with a non-zero error code
+if (!is.null(opt$help)) {
+	cat(getopt(spec, usage=TRUE))
+	q(status=1)
+}
+
+
+## Load the coverage information
+load("../../derCoverageInfo/filteredCov.Rdata")
+
+## Identify the sampledirs
+dirs <- colnames(filteredCov[[1]]$coverage)
+
+##### Note that this whole section is for defining the models using makeModels()
+##### You can alternatively define them manually and/or use packages such as splines if needed.
+
+## Define the groups
+info <- read.table( "/home/epi/ajaffe/Lieber/Projects/RNAseq/n36/samples_for_rnaseq.txt", header=TRUE)
+info <- info[complete.cases(info),]
+## Match dirs with actual rows in the info table
+info$dir <- paste0("R", info$RNANum)
+match <- sapply(dirs, function(x) { which(info$dir == x)})
+info <- info[match, ]
+## Set the first group as the reference
+twogroups <- c(opt$reference, opt$comparison)
+cases <- c("(-1,0]", "(0,1]", "(1,10]", "(10,20]", "(20,50]", "(50,100]")
+names(cases) <- c("fetal", "infant", "child", "teen", "adult", "elderly")
+levels <- cases[match(twogroups, names(cases))]
+
+group <- factor(info$ageGroup, levels=levels)
+
+## Define colsubset
+colsubset <- which(!is.na(group))
+save(colsubset, file="colsubset.Rdata")
+
+## Update the group labels
+group <- group[!is.na(group)]
+
+## Determine sample size adjustments
+if(file.exists("sampleDepths.Rdata")) {
+	load("sampleDepths.Rdata")
+} else {
+	if(file.exists("collapsedFull.Rdata")) {
+		load("collapsedFull.Rdata")
+	} else {
+		## Load the un-filtered coverage information for getting the sample size adjustments
+		load("../../derCoverageInfo/fullCov.Rdata")
+
+		## Collapse
+		collapsedFull <- collapseFullCoverage(fullCov, colsubset=colsubset, save=TRUE)
+	}
+
+	## Get the adjustments
+	sampleDepths <- sampleDepth(collapsedFull=collapsedFull, probs = 1, nonzero = TRUE, scalefac = 32, center=FALSE, verbose=TRUE)
+	save(sampleDepths, file="sampleDepths.Rdata")
+}
+
+## Build the models
+models <- makeModels(sampleDepths=sampleDepths, testvars=group, adjustvars=NULL, testIntercept=FALSE)
+save(models, file="models.Rdata")
+
+## Save information used for analyzeChr(groupInfo)
+groupInfo <- group
+save(groupInfo, file="groupInfo.Rdata")
+
+## Done :-)
+proc.time()
+sessionInfo()
diff --git a/derAnalysis/fix-colnames-fullRegions-2014-03-29.R b/derAnalysis/fix-colnames-fullRegions-2014-03-29.R
@@ -0,0 +1,17 @@
+## Fix some names before running the report
+
+library("GenomicRanges")
+load("fullRegions.Rdata")
+load("groupInfo.Rdata")
+
+colnames(values(fullRegions))
+
+i <- grep("mean\\.", colnames(values(fullRegions)))
+colnames(values(fullRegions))[i] <- paste("mean", levels(groupInfo), sep="")
+
+i <- grep("log2FoldChange", colnames(values(fullRegions)))
+colnames(values(fullRegions))[i] <- paste("log2FoldChange", levels(groupInfo)[2:length(unique(groupInfo))], "vs", levels(groupInfo)[1], sep="")
+
+colnames(values(fullRegions))
+
+save(fullRegions, file="fullRegions.Rdata")