Skip to content

Commit

Permalink
Updated code to work with derfinder 1.0.8 (current release), build pr…
Browse files Browse the repository at this point in the history
…oper models for run2-v0.0.42 (initial file had models for a different analysis), link to BioC website instead of GitHub repo, and added a short description of files in the README
  • Loading branch information
lcolladotor committed Nov 13, 2014
1 parent 2c11a2c commit 812b2e5
Show file tree
Hide file tree
Showing 13 changed files with 79 additions and 91 deletions.
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# LIBD n36 project

This repository is for hosting the code for the LIDB n36 project being undertaken by Andrew Jaffe's lab.
This repository is for hosting the code for the LIDB n36 project lead by AE Jaffe.

If you have any questions about the files in this directory contact AE Jaffe <[email protected]>.

Scripts use [derfinder](http://www.bioconductor.org/packages/release/bioc/html/derfinder.html) and are organized in the following way:

* [derCoverageInfo](derCoverageInfo/): contains scripts for processing data from BAM files, filtering and creating Rdata files.
* [fullCoverage](fullCoverage/): similar as _derCoverageInfo_ but without filtering data.
* [derAnalysis](derAnalysis/): has scripts for creating the models, running the derfinder analysis one chromosome at a time, merging the results, and creating a html report.
2 changes: 1 addition & 1 deletion derAnalysis/der2Analysis.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/sh

## Usage
# sh der2Analysis.sh run1-v0.0.46
# sh der2Analysis.sh run2-v0.0.42

# Directories
MAINDIR=/dcs01/lieber/ajaffe/Brain/derRuns/libd_n36
Expand Down
15 changes: 11 additions & 4 deletions derAnalysis/der2Merge.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
#!/bin/sh

## Usage
# sh der2Merge.sh run1-v0.0.46
# sh der2Merge.sh run2-v0.0.42

## derfinder available at http://www.bioconductor.org/packages/release/bioc/html/derfinder.html

# Directories
MAINDIR=/dcs01/lieber/ajaffe/Brain/derRuns/libd_n36
Expand All @@ -16,21 +18,26 @@ outdir="${PREFIX}"
sname="${SHORT}.${PREFIX}"
echo "Creating script ${sname}"
cat > ${WDIR}/.${sname}.sh <<EOF
#!/bin/bash
#!/bin/bash
#$ -cwd
#$ -m e
#$ -l mem_free=50G,h_vmem=100G,h_fsize=10G
#$ -N ${sname}
echo "**** Job starts ****"
date
mkdir -p ${WDIR}/${outdir}/logs
# merge results
Rscript-3.1 -e "library(derfinder); load('/dcs01/lieber/ajaffe/Brain/derRuns/derfinderExample/derGenomicState/GenomicState.Hsapiens.UCSC.hg19.knownGene.Rdata'); mergeResults(prefix='${PREFIX}', genomicState=GenomicState.Hsapiens.UCSC.hg19.knownGene)"
module load R/3.1.x
Rscript -e "library(derfinder); load('/dcs01/lieber/ajaffe/Brain/derRuns/derfinderExample/derGenomicState/GenomicState.Hsapiens.UCSC.hg19.knownGene.Rdata'); mergeResults(prefix='${PREFIX}', genomicState=GenomicState.Hsapiens.UCSC.hg19.knownGene$fullGenome)"
# Move log files into the logs directory
mv ${WDIR}/${sname}.* ${WDIR}/${outdir}/logs/
echo "**** Job ends ****"
date
EOF
call="qsub -cwd -l mem_free=50G,h_vmem=100G,h_fsize=10G -N ${sname} -m e .${sname}.sh"
call="qsub .${sname}.sh"
echo $call
$call
17 changes: 10 additions & 7 deletions derAnalysis/der2Models.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/sh

## Usage
# sh der2Models.sh fetalVsInfant-v0.0.46 fetal infant
# sh der2Models.sh run2-v0.0.42

# Directories
MAINDIR=/dcs01/lieber/ajaffe/Brain/derRuns/libd_n36
Expand All @@ -10,30 +10,33 @@ WDIR=${MAINDIR}/derAnalysis
# Define variables
SHORT='der2Mod-n36'
PREFIX=$1
GROUP1=$2
GROUP2=$3

# Construct shell files
outdir="${PREFIX}"
sname="${SHORT}.${PREFIX}"
echo "Creating script ${sname}"
cat > ${WDIR}/.${sname}.sh <<EOF
#!/bin/bash
#!/bin/bash
#$ -cwd
#$ -m e
#$ -l mem_free=50G,h_vmem=100G,h_fsize=10G
#$ -N ${sname}
echo "**** Job starts ****"
date
mkdir -p ${WDIR}/${outdir}/logs
# merge results
cd ${WDIR}/${outdir}/
Rscript-3.1 ${WDIR}/derfinder2-models.R -r "$GROUP1" -c "$GROUP2"
module load R/3.1.x
Rscript ${WDIR}/derfinder2-models.R
# Move log files into the logs directory
mv ${WDIR}/${sname}.* ${WDIR}/${outdir}/logs/
echo "**** Job ends ****"
date
EOF
call="qsub -cwd -l mem_free=50G,h_vmem=100G,h_fsize=10G -N ${sname} -m e .${sname}.sh"
call="qsub .${sname}.sh"
echo $call
$call
$call
15 changes: 11 additions & 4 deletions derAnalysis/der2Report.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
#!/bin/sh

## Usage
# sh der2Report.sh run1-v0.0.46
# sh der2Report.sh run2-v0.0.42

## regionReport available at http://www.bioconductor.org/packages/release/bioc/html/regionReport.html

# Directories
MAINDIR=/dcs01/lieber/ajaffe/Brain/derRuns/libd_n36
Expand All @@ -16,21 +18,26 @@ outdir="${PREFIX}"
sname="${SHORT}.${PREFIX}"
echo "Creating script ${sname}"
cat > ${WDIR}/.${sname}.sh <<EOF
#!/bin/bash
#!/bin/bash
#$ -cwd
#$ -m e
#$ -l mem_free=50G,h_vmem=100G,h_fsize=10G
#$ -N ${sname}
echo "**** Job starts ****"
date
mkdir -p ${WDIR}/${outdir}/logs
# merge results
Rscript-3.1 -e "library(derfinderReport); load('${MAINDIR}/derCoverageInfo/fullCov.Rdata'); generateReport(prefix='${PREFIX}', browse=FALSE, nBestClusters=20, fullCov=fullCov, device='CairoPNG')"
module load R/3.1.x
Rscript -e "library(regionReport); load('${MAINDIR}/derCoverageInfo/fullCov.Rdata'); derfinderReport(prefix='${PREFIX}', browse=FALSE, nBestClusters=20, fullCov=fullCov, device='CairoPNG')"
# Move log files into the logs directory
mv ${WDIR}/${sname}.* ${WDIR}/${outdir}/logs/
echo "**** Job ends ****"
date
EOF
call="qsub -cwd -l mem_free=50G,h_vmem=100G,h_fsize=10G -N ${sname} -m e .${sname}.sh"
call="qsub .${sname}.sh"
echo $call
$call
10 changes: 2 additions & 8 deletions derAnalysis/derfinder2-analysis.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
## Load libraries
library("getopt")

## Available at https://github.com/lcolladotor/derfinder
## Available at http://www.bioconductor.org/packages/release/bioc/html/derfinder.html
library("derfinder")

## Specify parameters
Expand Down Expand Up @@ -64,16 +64,10 @@ load("models.Rdata")
## Load group information
load("groupInfo.Rdata")

## Load colsubsets used
# load("colsubset.Rdata")

## Run the analysis
# analyzeChr(chr=opt$chr, coverageInfo=data, models=models, colsubset=colsubset, cutoffFstat=1e-04, cutoffType="theoretical", nPermute=100, seeds=seq_len(100) * 20140408, maxClusterGap=3000, groupInfo=groupInfo, subject="hg19", mc.cores=opt$mcores, lowMemDir=paste0("chr", opt$chr, "/chunksDir"), verbose=opt$verbose)

## n36 analysis
# analyzeChr(chr=opt$chr, coverageInfo=data, models=models, cutoffFstat=1e-08, cutoffType="theoretical", nPermute=1000, seeds=seq_len(1000), maxClusterGap=3000, groupInfo=groupInfo, subject="hg19", mc.cores=opt$mcores, lowMemDir=paste0("chr", opt$chr, "/chunksDir"), verbose=opt$verbose)
## without lowMemDir
analyzeChr(chr=opt$chr, coverageInfo=data, models=models, cutoffFstat=1e-08, cutoffType="theoretical", nPermute=1000, seeds=seq_len(1000), maxClusterGap=3000, groupInfo=groupInfo, subject="hg19", mc.cores=opt$mcores, verbose=opt$verbose)
analyzeChr(chr=opt$chr, coverageInfo=data, models=models, cutoffFstat=1e-08, cutoffType="theoretical", nPermute=1000, seeds=seq_len(1000), maxClusterGap=3000, groupInfo=groupInfo, subject="hg19", mc.cores=opt$mcores, lowMemDir=file.path(tempdir(), paste0("chr", opt$chr), "chunksDir"), verbose=opt$verbose)

## Done
if(opt$verbose) {
Expand Down
52 changes: 4 additions & 48 deletions derAnalysis/derfinder2-models.R
Original file line number Diff line number Diff line change
@@ -1,40 +1,8 @@
## Calculate the library adjustments and build the models

library("getopt")

## Available at https://github.com/lcolladotor/derfinder
## Available at http://www.bioconductor.org/packages/release/bioc/html/derfinder.html
library("derfinder")

## Specify parameters
spec <- matrix(c(
'reference', 'r', 1, "character", "group 1 to use in the comparison",
'comparison', 'c', 1, "character", "group 2 to use in the comparison",
'help' , 'h', 0, "logical", "Display help"
), byrow=TRUE, ncol=5)
opt <- getopt(spec)

## Testing the script
test <- FALSE
if(test) {
## Speficy it using an interactive R session and testing
test <- TRUE
}

## Test values
if(test){
opt <- NULL
opt$group1 <- "(-1,0]"
opt$group2 <- "(0,1]"
}

## if help was asked for print a friendly message
## and exit with a non-zero error code
if (!is.null(opt$help)) {
cat(getopt(spec, usage=TRUE))
q(status=1)
}


## Load the coverage information
load("../../derCoverageInfo/filteredCov.Rdata")

Expand All @@ -51,20 +19,8 @@ info <- info[complete.cases(info),]
info$dir <- paste0("R", info$RNANum)
match <- sapply(dirs, function(x) { which(info$dir == x)})
info <- info[match, ]
## Set the first group as the reference
twogroups <- c(opt$reference, opt$comparison)
cases <- c("(-1,0]", "(0,1]", "(1,10]", "(10,20]", "(20,50]", "(50,100]")
names(cases) <- c("fetal", "infant", "child", "teen", "adult", "elderly")
levels <- cases[match(twogroups, names(cases))]

group <- factor(info$ageGroup, levels=levels)

## Define colsubset
colsubset <- which(!is.na(group))
save(colsubset, file="colsubset.Rdata")

## Update the group labels
group <- group[!is.na(group)]
## Set the control group as the reference
group <- relevel(info$ageGroup, "(0,1]")

## Determine sample size adjustments
if(file.exists("sampleDepths.Rdata")) {
Expand All @@ -77,7 +33,7 @@ if(file.exists("sampleDepths.Rdata")) {
load("../../derCoverageInfo/fullCov.Rdata")

## Collapse
collapsedFull <- collapseFullCoverage(fullCov, colsubset=colsubset, save=TRUE)
collapsedFull <- collapseFullCoverage(fullCov, save=TRUE)
}

## Get the adjustments
Expand Down
2 changes: 2 additions & 0 deletions derAnalysis/fix-colnames-fullRegions-2014-03-29.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,5 @@ colnames(values(fullRegions))[i] <- paste("log2FoldChange", levels(groupInfo)[2:
colnames(values(fullRegions))

save(fullRegions, file="fullRegions.Rdata")

## Note: this might no longer be needed with the BioC version of derfinder, but we'll leave it here just in case.
15 changes: 10 additions & 5 deletions derCoverageInfo/derCoverageInfo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,31 +6,36 @@ WDIR=${MAINDIR}/libd_n36/derCoverageInfo
DATADIR=${MAINDIR}/n36

# Define variables
SHORT='derCovInf'
SHORT='derCovInf-n36'

# Construct shell files
for chrnum in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 X Y
do
chr="chr${chrnum}"
echo "Creating script for chromosome ${chr}"
cat > ${WDIR}/.${SHORT}.${chr}.sh <<EOF
#!/bin/bash
#!/bin/bash
#$ -cwd
#$ -m e
#$ -l mem_free=50G,h_vmem=100G,h_fsize=30G
#$ -N ${SHORT}.${chr}
echo "**** Job starts ****"
date
# Make logs directory
mkdir -p ${WDIR}/logs
# run makeDF()
R-devel -e "library(derfinder2); dirs <- makeBamList(datadir='${DATADIR}', samplepatt='bam$', bamterm=NULL); names(dirs) <- gsub('_accepted_hits.bam', '', names(dirs)); loadCoverage(dirs=dirs, chr='${chr}', cutoff=5, output='auto', verbose=TRUE); proc.time(); sessionInfo()"
# run loadCoverage()
module load R/3.1.x
R -e "library(derfinder); files <- rawFiles(datadir='${DATADIR}', samplepatt='bam$', bamterm=NULL); names(files) <- gsub('_accepted_hits.bam', '', names(files)); loadCoverage(files=files, chr='${chr}', cutoff=5, output='auto', verbose=TRUE); proc.time(); sessionInfo()"
## Move log files into the logs directory
mv ${SHORT}.${chr}.* ${WDIR}/logs/
echo "**** Job ends ****"
date
EOF
call="qsub -cwd -l jabba,mem_free=50G,h_vmem=100G,h_fsize=30G -N ${SHORT}.${chr} -m e ${WDIR}/.${SHORT}.${chr}.sh"
call="qsub ${WDIR}/.${SHORT}.${chr}.sh"
echo $call
$call
done
11 changes: 6 additions & 5 deletions derCoverageInfo/mergeCov.R
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
## Merge the coverage data
## Note that you would now use the fullCoverage(cutoff = 5) function which did not exist when this script was made.

library(IRanges)
library('IRanges')

## setup
chrs <- c(1:22, "X", "Y")
covList <- vector("list", 24)
names(covList) <- chrs
filteredCov <- vector("list", 24)
names(filteredCov) <- chrs

## Actual processing
for(chr in chrs) {
load(paste0("chr", chr, "CovInfo.Rdata"))
eval(parse(text=paste0("data <- ", "chr", chr, "CovInfo")))
covList[[chr]] <- data
filteredCov[[chr]] <- data
}

save(covList, file="covList.Rdata")
save(filteredCov, file="filteredCov.Rdata")
15 changes: 10 additions & 5 deletions fullCoverage/fullCoverage.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,31 +6,36 @@ WDIR=${MAINDIR}/libd_n36/fullCoverage
DATADIR=${MAINDIR}/n36

# Define variables
SHORT='fullCov'
SHORT='fullCov-n36'

# Construct shell files
for chrnum in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 X Y
do
chr="chr${chrnum}"
echo "Creating script for chromosome ${chr}"
cat > ${WDIR}/.${SHORT}.${chr}.sh <<EOF
#!/bin/bash
#!/bin/bash
#$ -cwd
#$ -m e
#$ -l mem_free=50G,h_vmem=100G,h_fsize=30G
#$ -N ${SHORT}.${chr}
echo "**** Job starts ****"
date
# Make logs directory
mkdir -p ${WDIR}/logs
# run makeDF()
R-devel -e "library(derfinder2); dirs <- makeBamList(datadir='${DATADIR}', samplepatt='bam$', bamterm=NULL); names(dirs) <- gsub('_accepted_hits.bam', '', names(dirs)); loadCoverage(dirs=dirs, chr='${chr}', cutoff=NULL, output='auto', verbose=TRUE); proc.time(); sessionInfo()"
# run loadCoverage(cutoff = NULL)
module load R/3.1.x
R -e "library(derfinder); files <- rawFiles(datadir='${DATADIR}', samplepatt='bam$', bamterm=NULL); names(files) <- gsub('_accepted_hits.bam', '', names(files)); loadCoverage(files=files, chr='${chr}', cutoff=NULL, output='auto', verbose=TRUE); proc.time(); sessionInfo()"
## Move log files into the logs directory
mv ${SHORT}.${chr}.* ${WDIR}/logs/
echo "**** Job ends ****"
date
EOF
call="qsub -cwd -l jabba,mem_free=50G,h_vmem=100G,h_fsize=30G -N ${SHORT}.${chr} -m e ${WDIR}/.${SHORT}.${chr}.sh"
call="qsub ${WDIR}/.${SHORT}.${chr}.sh"
echo $call
$call
done
5 changes: 3 additions & 2 deletions fullCoverage/mergeFull.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
## Merge the coverage data
## Note that you would now use the fullCoverage() function which did not exist when this script was made.

library(IRanges)
library('IRanges')

## setup
chrs <- c(1:22, "X", "Y")
Expand All @@ -14,4 +15,4 @@ for(chr in chrs) {
fullCov[[chr]] <- data$coverage
}

save(fullCov, file="fullCov.Rdata")
save(fullCov, file="../derCoverageInfo/fullCov.Rdata")
Loading

0 comments on commit 812b2e5

Please sign in to comment.