forked from broadgsa/gatk
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge remote-tracking branch 'unstable/master'
- Loading branch information
Showing
352 changed files
with
17,139 additions
and
10,080 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,7 +12,7 @@ if ( onCMDLine ) { | |
inputFileName = args[1] | ||
outputPDF = args[2] | ||
} else { | ||
inputFileName = "~/Desktop/broadLocal/GATK/unstable/wgs.jobreport.txt" | ||
inputFileName = "Q-26618@gsa4.jobreport.txt" | ||
#inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/[email protected]" | ||
#inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/rodPerformanceGoals/history/report.082711.txt" | ||
outputPDF = NA | ||
|
@@ -129,9 +129,11 @@ plotGroup <- function(groupTable) { | |
# as above, but averaging over all iterations | ||
groupAnnotationsNoIteration = setdiff(groupAnnotations, "iteration") | ||
if ( dim(sub)[1] > 1 ) { | ||
sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd)) | ||
textplot(as.data.frame(sum), show.rownames=F) | ||
title(paste("Job summary for", name, "averaging over all iterations"), cex=3) | ||
try({ # need a try here because we will fail to reduce when there's just a single iteration | ||
sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd)) | ||
textplot(as.data.frame(sum), show.rownames=F) | ||
title(paste("Job summary for", name, "averaging over all iterations"), cex=3) | ||
}, silent=T) | ||
} | ||
} | ||
|
||
|
@@ -149,6 +151,35 @@ convertUnits <- function(gatkReportData) { | |
lapply(gatkReportData, convertGroup) | ||
} | ||
|
||
# | ||
# Plots runtimes by analysis name and exechosts | ||
# | ||
# Useful to understand the performance of analysis jobs by hosts, | ||
# and to debug problematic nodes | ||
# | ||
plotTimeByHost <- function(gatkReportData) { | ||
fields = c("analysisName", "exechosts", "runtime") | ||
|
||
runtimes = data.frame() | ||
for ( report in gatkReportData ) { | ||
runtimes = rbind(runtimes, report[,fields]) | ||
} | ||
|
||
plotMe <- function(name, vis) { | ||
p = ggplot(data=runtimes, aes(x=exechosts, y=runtime, group=exechosts, color=exechosts)) | ||
p = p + facet_grid(analysisName ~ ., scale="free") | ||
p = p + vis() | ||
p = p + xlab("Job execution host") | ||
p = p + opts(title = paste(name, "of job runtimes by analysis name and execution host")) | ||
p = p + ylab(paste("Distribution of runtimes", RUNTIME_UNITS)) | ||
p = p + opts(axis.text.x=theme_text(angle=45, hjust=1, vjust=1)) | ||
print(p) | ||
} | ||
|
||
plotMe("Boxplot", geom_boxplot) | ||
plotMe("Jittered points", geom_jitter) | ||
} | ||
|
||
|
||
# read the table | ||
gatkReportData <- gsa.read.gatkreport(inputFileName) | ||
|
@@ -162,7 +193,9 @@ if ( ! is.na(outputPDF) ) { | |
plotJobsGantt(gatkReportData, T, F) | ||
plotJobsGantt(gatkReportData, F, F) | ||
plotProgressByTime(gatkReportData) | ||
plotTimeByHost(gatkReportData) | ||
for ( group in gatkReportData ) { | ||
print(group) | ||
plotGroup(group) | ||
} | ||
|
||
|
247 changes: 247 additions & 0 deletions
247
public/java/src/net/sf/picard/sam/MergingSamRecordIterator.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,247 @@ | ||
/* | ||
* Copyright (c) 2011, The Broad Institute | ||
* | ||
* Permission is hereby granted, free of charge, to any person | ||
* obtaining a copy of this software and associated documentation | ||
* files (the "Software"), to deal in the Software without | ||
* restriction, including without limitation the rights to use, | ||
* copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
* copies of the Software, and to permit persons to whom the | ||
* Software is furnished to do so, subject to the following | ||
* conditions: | ||
* | ||
* The above copyright notice and this permission notice shall be | ||
* included in all copies or substantial portions of the Software. | ||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | ||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | ||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | ||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
* OTHER DEALINGS IN THE SOFTWARE. | ||
*/ | ||
package net.sf.picard.sam; | ||
|
||
import net.sf.picard.PicardException; | ||
|
||
import java.util.*; | ||
import java.lang.reflect.Constructor; | ||
|
||
import net.sf.samtools.*; | ||
import net.sf.samtools.util.CloseableIterator; | ||
|
||
/** | ||
* Provides an iterator interface for merging multiple underlying iterators into a single | ||
* iterable stream. The underlying iterators/files must all have the same sort order unless | ||
* the requested output format is unsorted, in which case any combination is valid. | ||
*/ | ||
public class MergingSamRecordIterator implements CloseableIterator<SAMRecord> { | ||
private final PriorityQueue<ComparableSamRecordIterator> pq; | ||
private final SamFileHeaderMerger samHeaderMerger; | ||
private final Collection<SAMFileReader> readers; | ||
private final SAMFileHeader.SortOrder sortOrder; | ||
private final SAMRecordComparator comparator; | ||
|
||
private boolean initialized = false; | ||
private boolean iterationStarted = false; | ||
|
||
/** | ||
* Constructs a new merging iterator with the same set of readers and sort order as | ||
* provided by the header merger parameter. | ||
* @param headerMerger The merged header and contents of readers. | ||
* @param forcePresorted True to ensure that the iterator checks the headers of the readers for appropriate sort order. | ||
* @deprecated replaced by (SamFileHeaderMerger, Collection<SAMFileReader>, boolean) | ||
*/ | ||
public MergingSamRecordIterator(final SamFileHeaderMerger headerMerger, final boolean forcePresorted) { | ||
this(headerMerger, headerMerger.getReaders(), forcePresorted); | ||
} | ||
|
||
/** | ||
* Constructs a new merging iterator with the same set of readers and sort order as | ||
* provided by the header merger parameter. | ||
* @param headerMerger The merged header and contents of readers. | ||
* @param assumeSorted false ensures that the iterator checks the headers of the readers for appropriate sort order. | ||
*/ | ||
public MergingSamRecordIterator(final SamFileHeaderMerger headerMerger, Collection<SAMFileReader> readers, final boolean assumeSorted) { | ||
this.samHeaderMerger = headerMerger; | ||
this.sortOrder = headerMerger.getMergedHeader().getSortOrder(); | ||
this.comparator = getComparator(); | ||
this.readers = readers; | ||
|
||
this.pq = new PriorityQueue<ComparableSamRecordIterator>(readers.size()); | ||
|
||
for (final SAMFileReader reader : readers) { | ||
if (!assumeSorted && this.sortOrder != SAMFileHeader.SortOrder.unsorted && | ||
reader.getFileHeader().getSortOrder() != this.sortOrder){ | ||
throw new PicardException("Files are not compatible with sort order"); | ||
} | ||
} | ||
} | ||
|
||
/** | ||
* Add a given SAM file iterator to the merging iterator. Use this to restrict the merged iteration to a given genomic interval, | ||
* rather than iterating over every read in the backing file or stream. | ||
* @param reader Reader to add to the merging iterator. | ||
* @param iterator Iterator traversing over reader contents. | ||
*/ | ||
public void addIterator(final SAMFileReader reader, final CloseableIterator<SAMRecord> iterator) { | ||
if(iterationStarted) | ||
throw new PicardException("Cannot add another iterator; iteration has already begun"); | ||
if(!samHeaderMerger.containsHeader(reader.getFileHeader())) | ||
throw new PicardException("All iterators to be merged must be accounted for in the SAM header merger"); | ||
final ComparableSamRecordIterator comparableIterator = new ComparableSamRecordIterator(reader,iterator,comparator); | ||
addIfNotEmpty(comparableIterator); | ||
initialized = true; | ||
} | ||
|
||
private void startIterationIfRequired() { | ||
if(initialized) | ||
return; | ||
for(SAMFileReader reader: readers) | ||
addIterator(reader,reader.iterator()); | ||
iterationStarted = true; | ||
} | ||
|
||
/** | ||
* Close down all open iterators. | ||
*/ | ||
public void close() { | ||
// Iterators not in the priority queue have already been closed; only close down the iterators that are still in the priority queue. | ||
for(CloseableIterator<SAMRecord> iterator: pq) | ||
iterator.close(); | ||
} | ||
|
||
/** Returns true if any of the underlying iterators has more records, otherwise false. */ | ||
public boolean hasNext() { | ||
startIterationIfRequired(); | ||
return !this.pq.isEmpty(); | ||
} | ||
|
||
/** Returns the next record from the top most iterator during merging. */ | ||
public SAMRecord next() { | ||
startIterationIfRequired(); | ||
|
||
final ComparableSamRecordIterator iterator = this.pq.poll(); | ||
final SAMRecord record = iterator.next(); | ||
addIfNotEmpty(iterator); | ||
record.setHeader(this.samHeaderMerger.getMergedHeader()); | ||
|
||
// Fix the read group if needs be | ||
if (this.samHeaderMerger.hasReadGroupCollisions()) { | ||
final String oldGroupId = (String) record.getAttribute(ReservedTagConstants.READ_GROUP_ID); | ||
if (oldGroupId != null ) { | ||
final String newGroupId = this.samHeaderMerger.getReadGroupId(iterator.getReader().getFileHeader(),oldGroupId); | ||
record.setAttribute(ReservedTagConstants.READ_GROUP_ID, newGroupId); | ||
} | ||
} | ||
|
||
// Fix the program group if needs be | ||
if (this.samHeaderMerger.hasProgramGroupCollisions()) { | ||
final String oldGroupId = (String) record.getAttribute(ReservedTagConstants.PROGRAM_GROUP_ID); | ||
if (oldGroupId != null ) { | ||
final String newGroupId = this.samHeaderMerger.getProgramGroupId(iterator.getReader().getFileHeader(),oldGroupId); | ||
record.setAttribute(ReservedTagConstants.PROGRAM_GROUP_ID, newGroupId); | ||
} | ||
} | ||
|
||
// Fix up the sequence indexes if needs be | ||
if (this.samHeaderMerger.hasMergedSequenceDictionary()) { | ||
if (record.getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { | ||
record.setReferenceIndex(this.samHeaderMerger.getMergedSequenceIndex(iterator.getReader().getFileHeader(),record.getReferenceIndex())); | ||
} | ||
|
||
if (record.getReadPairedFlag() && record.getMateReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { | ||
record.setMateReferenceIndex(this.samHeaderMerger.getMergedSequenceIndex(iterator.getReader().getFileHeader(),record.getMateReferenceIndex())); | ||
} | ||
} | ||
|
||
return record; | ||
} | ||
|
||
/** | ||
* Adds iterator to priority queue. If the iterator has more records it is added | ||
* otherwise it is closed and not added. | ||
*/ | ||
private void addIfNotEmpty(final ComparableSamRecordIterator iterator) { | ||
if (iterator.hasNext()) { | ||
pq.offer(iterator); | ||
} | ||
else { | ||
iterator.close(); | ||
} | ||
} | ||
|
||
/** Unsupported operation. */ | ||
public void remove() { | ||
throw new UnsupportedOperationException("MergingSAMRecorderIterator.remove()"); | ||
} | ||
|
||
/** | ||
* Get the right comparator for a given sort order (coordinate, alphabetic). In the | ||
* case of "unsorted" it will return a comparator that gives an arbitrary but reflexive | ||
* ordering. | ||
*/ | ||
private SAMRecordComparator getComparator() { | ||
// For unsorted build a fake comparator that compares based on object ID | ||
if (this.sortOrder == SAMFileHeader.SortOrder.unsorted) { | ||
return new SAMRecordComparator() { | ||
public int fileOrderCompare(final SAMRecord lhs, final SAMRecord rhs) { | ||
return System.identityHashCode(lhs) - System.identityHashCode(rhs); | ||
} | ||
|
||
public int compare(final SAMRecord lhs, final SAMRecord rhs) { | ||
return fileOrderCompare(lhs, rhs); | ||
} | ||
}; | ||
} | ||
if (samHeaderMerger.hasMergedSequenceDictionary() && sortOrder.equals(SAMFileHeader.SortOrder.coordinate)) { | ||
return new MergedSequenceDictionaryCoordinateOrderComparator(); | ||
} | ||
|
||
// Otherwise try and figure out what kind of comparator to return and build it | ||
return this.sortOrder.getComparatorInstance(); | ||
} | ||
|
||
/** Returns the merged header that the merging iterator is working from. */ | ||
public SAMFileHeader getMergedHeader() { | ||
return this.samHeaderMerger.getMergedHeader(); | ||
} | ||
|
||
/** | ||
* Ugh. Basically does a regular coordinate compare, but looks up the sequence indices in the merged | ||
* sequence dictionary. I hate the fact that this extends SAMRecordCoordinateComparator, but it avoids | ||
* more copy & paste. | ||
*/ | ||
private class MergedSequenceDictionaryCoordinateOrderComparator extends SAMRecordCoordinateComparator { | ||
|
||
public int fileOrderCompare(final SAMRecord samRecord1, final SAMRecord samRecord2) { | ||
final int referenceIndex1 = getReferenceIndex(samRecord1); | ||
final int referenceIndex2 = getReferenceIndex(samRecord2); | ||
if (referenceIndex1 != referenceIndex2) { | ||
if (referenceIndex1 == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { | ||
return 1; | ||
} else if (referenceIndex2 == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { | ||
return -1; | ||
} else { | ||
return referenceIndex1 - referenceIndex2; | ||
} | ||
} | ||
if (referenceIndex1 == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { | ||
// Both are unmapped. | ||
return 0; | ||
} | ||
return samRecord1.getAlignmentStart() - samRecord2.getAlignmentStart(); | ||
} | ||
|
||
private int getReferenceIndex(final SAMRecord samRecord) { | ||
if (samRecord.getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { | ||
return samHeaderMerger.getMergedSequenceIndex(samRecord.getHeader(), samRecord.getReferenceIndex()); | ||
} | ||
if (samRecord.getMateReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { | ||
return samHeaderMerger.getMergedSequenceIndex(samRecord.getHeader(), samRecord.getMateReferenceIndex()); | ||
} | ||
return SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX; | ||
} | ||
} | ||
} |
Oops, something went wrong.