reduceSum )
// Find the filtering lodCutoff for display on the model PDFs. Red variants are those which were below the cutoff and filtered out of the final callset.
double lodCutoff = 0.0;
for( final Tranche tranche : tranches ) {
- if( MathUtils.compareDoubles(tranche.ts, TS_FILTER_LEVEL, 0.0001)==0 ) {
+ if( MathUtils.compareDoubles(tranche.ts, TS_FILTER_LEVEL, 0.0001) == 0 ) {
lodCutoff = tranche.minVQSLod;
}
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java
index 98a8ac92b6..555999bdb6 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java
@@ -33,10 +33,10 @@
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.Reference;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
+import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.gatk.walkers.Window;
import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCounts;
import org.broadinstitute.sting.utils.SampleUtils;
-import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
@@ -67,6 +67,19 @@
* VCF and then run SelectVariants to extract the common records with -select 'set == "Intersection"', as worked out
* in the detailed example on the wiki.
*
+ * Note that CombineVariants supports multi-threaded parallelism (8/15/12). This is particularly useful
+ * when converting from VCF to BCF2, which can be expensive. In this case each thread spends CPU time
+ * doing the conversion, and the GATK engine is smart enough to merge the partial BCF2 blocks together
+ * efficiency. However, since this merge runs in only one thread, you can quickly reach diminishing
+ * returns with the number of parallel threads. -nt 4 works well but -nt 8 may be too much.
+ *
+ * Some fine details about the merging algorithm:
+ *
+ * - As of GATK 2.1, when merging multiple VCF records at a site, the combined VCF record has the QUAL of
+ * the first VCF record with a non-MISSING QUAL value. The previous behavior was to take the
+ * max QUAL, which resulted in sometime strange downstream confusion
+ *
+ *
* Input
*
* One or more variant sets to combine.
@@ -100,7 +113,7 @@
*/
@DocumentedGATKFeature( groupName = "Variant Evaluation and Manipulation Tools", extraDocs = {CommandLineGATK.class} )
@Reference(window=@Window(start=-50,stop=50))
-public class CombineVariants extends RodWalker {
+public class CombineVariants extends RodWalker implements TreeReducible {
/**
* The VCF files to merge together
*
@@ -188,7 +201,8 @@ public void initialize() {
logger.warn("VCF output file not an instance of VCFWriterStub; cannot enable sites only output option");
if ( PRIORITY_STRING == null ) {
- PRIORITY_STRING = Utils.join(",", vcfRods.keySet());
+ genotypeMergeOption = VariantContextUtils.GenotypeMergeType.UNSORTED;
+ //PRIORITY_STRING = Utils.join(",", vcfRods.keySet()); Deleted by Ami (7/10/12)
logger.info("Priority string not provided, using arbitrary genotyping order: " + PRIORITY_STRING);
}
@@ -313,5 +327,10 @@ public Integer reduce(Integer counter, Integer sum) {
return counter + sum;
}
+ @Override
+ public Integer treeReduce(Integer lhs, Integer rhs) {
+ return reduce(lhs, rhs);
+ }
+
public void onTraversalDone(Integer sum) {}
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java
index d223adefbe..f89bcb2a70 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java
@@ -34,15 +34,13 @@
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Map;
-import java.util.Set;
+import java.util.*;
/**
* Filters a lifted-over VCF file for ref bases that have been changed.
@@ -66,7 +64,7 @@ public void initialize() {
Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName));
Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName));
- final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey(trackName) ? vcfHeaders.get(trackName).getMetaDataInSortedOrder() : null, samples);
+ final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey(trackName) ? vcfHeaders.get(trackName).getMetaDataInSortedOrder() : Collections.emptySet(), samples);
writer.writeHeader(vcfHeader);
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java
index 235eb1d9be..9fe499a036 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java
@@ -139,11 +139,11 @@ private int writeLeftAlignedIndel(final VariantContext vc, final ReferenceContex
final byte[] refSeq = ref.getBases();
// get the indel length
- int indelLength;
+ final int indelLength;
if ( vc.isSimpleDeletion() )
- indelLength = vc.getReference().length();
+ indelLength = vc.getReference().length() - 1;
else
- indelLength = vc.getAlternateAllele(0).length();
+ indelLength = vc.getAlternateAllele(0).length() - 1;
if ( indelLength > 200 ) {
writer.add(vc);
@@ -151,7 +151,7 @@ private int writeLeftAlignedIndel(final VariantContext vc, final ReferenceContex
}
// create an indel haplotype
- int originalIndex = ref.getLocus().getStart() - ref.getWindow().getStart() + 1;
+ final int originalIndex = ref.getLocus().getStart() - ref.getWindow().getStart() + 1;
final byte[] originalIndel = makeHaplotype(vc, refSeq, originalIndex, indelLength);
// create a CIGAR string to represent the event
@@ -170,11 +170,12 @@ private int writeLeftAlignedIndel(final VariantContext vc, final ReferenceContex
VariantContext newVC = new VariantContextBuilder(vc).start(vc.getStart()-difference).stop(vc.getEnd()-difference).make();
//System.out.println("Moving record from " + vc.getChr()+":"+vc.getStart() + " to " + vc.getChr()+":"+(vc.getStart()-difference));
- int indelIndex = originalIndex-difference;
- byte[] newBases = new byte[indelLength];
- System.arraycopy((vc.isSimpleDeletion() ? refSeq : originalIndel), indelIndex, newBases, 0, indelLength);
- Allele newAllele = Allele.create(newBases, vc.isSimpleDeletion());
- newVC = updateAllele(newVC, newAllele, refSeq[indelIndex-1]);
+ final int indelIndex = originalIndex-difference;
+ final byte[] newBases = new byte[indelLength + 1];
+ newBases[0] = refSeq[indelIndex-1];
+ System.arraycopy((vc.isSimpleDeletion() ? refSeq : originalIndel), indelIndex, newBases, 1, indelLength);
+ final Allele newAllele = Allele.create(newBases, vc.isSimpleDeletion());
+ newVC = updateAllele(newVC, newAllele);
writer.add(newVC);
return 1;
@@ -195,7 +196,7 @@ private static byte[] makeHaplotype(VariantContext vc, byte[] ref, int indexOfRe
if ( vc.isSimpleDeletion() ) {
indexOfRef += indelLength;
} else {
- System.arraycopy(vc.getAlternateAllele(0).getBases(), 0, hap, currentPos, indelLength);
+ System.arraycopy(vc.getAlternateAllele(0).getBases(), 1, hap, currentPos, indelLength);
currentPos += indelLength;
}
@@ -205,14 +206,14 @@ private static byte[] makeHaplotype(VariantContext vc, byte[] ref, int indexOfRe
return hap;
}
- public static VariantContext updateAllele(VariantContext vc, Allele newAllele, Byte refBaseForIndel) {
+ public static VariantContext updateAllele(final VariantContext vc, final Allele newAllele) {
// create a mapping from original allele to new allele
HashMap alleleMap = new HashMap(vc.getAlleles().size());
if ( newAllele.isReference() ) {
alleleMap.put(vc.getReference(), newAllele);
- alleleMap.put(vc.getAlternateAllele(0), vc.getAlternateAllele(0));
+ alleleMap.put(vc.getAlternateAllele(0), Allele.create(newAllele.getBases()[0], false));
} else {
- alleleMap.put(vc.getReference(), vc.getReference());
+ alleleMap.put(vc.getReference(), Allele.create(newAllele.getBases()[0], true));
alleleMap.put(vc.getAlternateAllele(0), newAllele);
}
@@ -229,6 +230,6 @@ public static VariantContext updateAllele(VariantContext vc, Allele newAllele, B
newGenotypes.add(new GenotypeBuilder(genotype).alleles(newAlleles).make());
}
- return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).referenceBaseForIndel(refBaseForIndel).make();
+ return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).make();
}
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java
index 0d9a4fc031..63209e98c5 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java
@@ -119,7 +119,6 @@ private void convertAndWrite(VariantContext vc, ReferenceContext ref) {
if ( toInterval != null ) {
// check whether the strand flips, and if so reverse complement everything
- // TODO -- make this work for indels (difficult because the 'previous base' context needed will be changing based on indel type/size)
if ( fromInterval.isPositiveStrand() != toInterval.isPositiveStrand() && vc.isPointEvent() ) {
vc = VariantContextUtils.reverseComplement(vc);
}
@@ -132,11 +131,10 @@ private void convertAndWrite(VariantContext vc, ReferenceContext ref) {
.attribute("OriginalStart", fromInterval.getStart()).make();
}
- VariantContext newVC = VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc);
- if ( originalVC.isSNP() && originalVC.isBiallelic() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(newVC) ) {
+ if ( originalVC.isSNP() && originalVC.isBiallelic() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(vc) ) {
logger.warn(String.format("VCF at %s / %d => %s / %d is switching substitution type %s/%s to %s/%s",
- originalVC.getChr(), originalVC.getStart(), newVC.getChr(), newVC.getStart(),
- originalVC.getReference(), originalVC.getAlternateAllele(0), newVC.getReference(), newVC.getAlternateAllele(0)));
+ originalVC.getChr(), originalVC.getStart(), vc.getChr(), vc.getStart(),
+ originalVC.getReference(), originalVC.getAlternateAllele(0), vc.getReference(), vc.getAlternateAllele(0)));
}
writer.add(vc);
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java
index f14f6c2a67..46a3a8cd1d 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java
@@ -120,12 +120,6 @@ public class SelectHeaders extends RodWalker implements TreeRe
@Argument(fullName = "exclude_header_name", shortName = "xl_hn", doc = "Exclude header. Can be specified multiple times", required = false)
public Set XLheaderNames;
- /**
- * Note that reference inclusion takes precedence over other header matching. If set other reference lines may be excluded but the file name will still be added.
- */
- @Argument(fullName = "include_reference_name", shortName = "irn", doc = "If set the reference file name minus the file extension will be added to the headers", required = false)
- public boolean includeReference;
-
/**
* Note that interval name inclusion takes precedence over other header matching. If set other interval lines may be excluded but the intervals will still be added.
*/
@@ -162,10 +156,6 @@ public void initialize() {
// Select only the headers requested by name or expression.
headerLines = new LinkedHashSet(getSelectedHeaders(headerLines));
- // Optionally add in the reference.
- if (includeReference && getToolkit().getArguments().referenceFile != null)
- headerLines.add(new VCFHeaderLine(VCFHeader.REFERENCE_KEY, FilenameUtils.getBaseName(getToolkit().getArguments().referenceFile.getName())));
-
// Optionally add in the intervals.
if (includeIntervals && getToolkit().getArguments().intervals != null) {
for (IntervalBinding intervalBinding : getToolkit().getArguments().intervals) {
@@ -205,7 +195,7 @@ private Set getSelectedHeaders(Set headerLines) {
selectedHeaders = ListFileUtils.excludeMatching(selectedHeaders, headerKey, XLheaderNames, true);
// always include the contig lines
- selectedHeaders = VCFUtils.withUpdatedContigsAsLines(selectedHeaders, getToolkit().getArguments().referenceFile, getToolkit().getMasterSequenceDictionary());
+ selectedHeaders = VCFUtils.withUpdatedContigsAsLines(selectedHeaders, getToolkit().getArguments().referenceFile, getToolkit().getMasterSequenceDictionary(), true);
return selectedHeaders;
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java
index e4831eaf29..bfd9aa52f6 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java
@@ -31,7 +31,6 @@
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCounts;
@@ -311,10 +310,6 @@ public class SelectVariants extends RodWalker implements TreeR
private File rsIDFile = null;
- @Hidden
- @Argument(fullName="outMVFile", shortName="outMVFile", doc="", required=false)
- private String outMVFile = null;
-
@Hidden
@Argument(fullName="fullyDecode", doc="If true, the incoming VariantContext will be fully decoded", required=false)
private boolean fullyDecode = false;
@@ -329,7 +324,7 @@ public class SelectVariants extends RodWalker implements TreeR
/* Private class used to store the intermediate variants in the integer random selection process */
- private class RandomVariantStructure {
+ private static class RandomVariantStructure {
private VariantContext vc;
RandomVariantStructure(VariantContext vcP) {
@@ -369,8 +364,6 @@ public enum NumberAlleleRestriction {
private int positionToAdd = 0;
private RandomVariantStructure [] variantArray;
- private PrintStream outMVFileStream = null;
-
//Random number generator for the genotypes to remove
private Random randomGenotypes = new Random();
@@ -470,6 +463,7 @@ public void initialize() {
final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
UAC.GLmodel = GenotypeLikelihoodsCalculationModel.Model.BOTH;
UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES;
+ UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
UAC.NO_SLOD = true;
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
headerLines.addAll(UnifiedGenotyper.getHeaderInfo(UAC, null, null));
@@ -527,23 +521,6 @@ public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentCo
if (MENDELIAN_VIOLATIONS && mv.countViolations(this.getSampleDB().getFamilies(samples),vc) < 1)
break;
- if (outMVFile != null){
- for( String familyId : mv.getViolationFamilies()){
- for(Sample sample : this.getSampleDB().getFamily(familyId)){
- if(sample.getParents().size() > 0){
- outMVFileStream.format("MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " +
- "childG=%s childGL=%s\n",vc.getChr(), vc.getStart(),
- vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(), vc.getCalledChrCount(vc.getAlternateAllele(0)),
- sample.getMaternalID(), sample.getPaternalID(), sample.getID(),
- vc.getGenotype(sample.getMaternalID()).toBriefString(), vc.getGenotype(sample.getMaternalID()).getLikelihoods().getAsString(),
- vc.getGenotype(sample.getPaternalID()).toBriefString(), vc.getGenotype(sample.getPaternalID()).getLikelihoods().getAsString(),
- vc.getGenotype(sample.getID()).toBriefString(),vc.getGenotype(sample.getID()).getLikelihoods().getAsString() );
-
- }
- }
- }
- }
-
if (DISCORDANCE_ONLY) {
Collection compVCs = tracker.getValues(discordanceTrack, context.getLocation());
if (!isDiscordant(vc, compVCs))
@@ -567,7 +544,7 @@ public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentCo
VariantContext sub = subsetRecord(vc, EXCLUDE_NON_VARIANTS);
if ( REGENOTYPE && sub.isPolymorphicInSamples() && hasPLs(sub) ) {
- final VariantContextBuilder builder = new VariantContextBuilder(UG_engine.calculateGenotypes(tracker, ref, context, sub)).filters(sub.getFiltersMaybeNull());
+ final VariantContextBuilder builder = new VariantContextBuilder(UG_engine.calculateGenotypes(sub)).filters(sub.getFiltersMaybeNull());
addAnnotations(builder, sub);
sub = builder.make();
}
@@ -730,7 +707,13 @@ private VariantContext subsetRecord(final VariantContext vc, final boolean exclu
if ( vc.getAlleles().size() != sub.getAlleles().size() )
newGC = VariantContextUtils.stripPLs(sub.getGenotypes());
- //Remove a fraction of the genotypes if needed
+ // if we have fewer samples in the selected VC than in the original VC, we need to strip out the MLE tags
+ if ( vc.getNSamples() != sub.getNSamples() ) {
+ builder.rmAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY);
+ builder.rmAttribute(VCFConstants.MLE_ALLELE_FREQUENCY_KEY);
+ }
+
+ // Remove a fraction of the genotypes if needed
if ( fractionGenotypes > 0 ){
ArrayList genotypes = new ArrayList();
for ( Genotype genotype : newGC ) {
@@ -767,17 +750,21 @@ private void addAnnotations(final VariantContextBuilder builder, final VariantCo
VariantContextUtils.calculateChromosomeCounts(builder, false);
+ boolean sawDP = false;
int depth = 0;
for (String sample : originalVC.getSampleNames()) {
Genotype g = originalVC.getGenotype(sample);
if ( ! g.isFiltered() ) {
- if ( g.hasDP() )
+ if ( g.hasDP() ) {
depth += g.getDP();
+ sawDP = true;
+ }
}
}
- builder.attribute("DP", depth);
+ if ( sawDP )
+ builder.attribute("DP", depth);
}
private void randomlyAddVariant(int rank, VariantContext vc) {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java
index 4b793a31ef..c92551a734 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java
@@ -130,35 +130,16 @@ private void validate(VariantContext vc, RefMetaDataTracker tracker, ReferenceCo
return;
// get the true reference allele
- Allele reportedRefAllele = vc.getReference();
- Allele observedRefAllele = null;
- // insertions
- if ( vc.isSimpleInsertion() ) {
- observedRefAllele = Allele.create(Allele.NULL_ALLELE_STRING);
+ final Allele reportedRefAllele = vc.getReference();
+ final int refLength = reportedRefAllele.length();
+ if ( refLength > 100 ) {
+ logger.info(String.format("Reference allele is too long (%d) at position %s:%d; skipping that record.", refLength, vc.getChr(), vc.getStart()));
+ return;
}
- // deletions
- else if ( vc.isSimpleDeletion() || vc.isMNP() ) {
- // we can't validate arbitrarily long deletions
- if ( reportedRefAllele.length() > 100 ) {
- logger.info(String.format("Reference allele is too long (%d) at position %s:%d; skipping that record.", reportedRefAllele.length(), vc.getChr(), vc.getStart()));
- return;
- }
- // deletions are associated with the (position of) the last (preceding) non-deleted base;
- // hence to get actually deleted bases we need offset = 1
- int offset = vc.isMNP() ? 0 : 1;
- byte[] refBytes = ref.getBases();
- byte[] trueRef = new byte[reportedRefAllele.length()];
- for (int i = 0; i < reportedRefAllele.length(); i++)
- trueRef[i] = refBytes[i+offset];
- observedRefAllele = Allele.create(trueRef, true);
- }
- // SNPs, etc. but not mixed types because they are too difficult
- else if ( !vc.isMixed() ) {
- byte[] refByte = new byte[1];
- refByte[0] = ref.getBase();
- observedRefAllele = Allele.create(refByte, true);
- }
+ final byte[] observedRefBases = new byte[refLength];
+ System.arraycopy(ref.getBases(), 0, observedRefBases, 0, refLength);
+ final Allele observedRefAllele = Allele.create(observedRefBases);
// get the RS IDs
Set rsIDs = null;
@@ -171,10 +152,10 @@ else if ( !vc.isMixed() ) {
try {
switch( type ) {
case ALL:
- vc.extraStrictValidation(observedRefAllele, ref.getBase(), rsIDs);
+ vc.extraStrictValidation(reportedRefAllele, observedRefAllele, rsIDs);
break;
case REF:
- vc.validateReferenceBases(observedRefAllele, ref.getBase());
+ vc.validateReferenceBases(reportedRefAllele, observedRefAllele);
break;
case IDS:
vc.validateRSIDs(rsIDs);
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java
index 7e82fc4540..3fba8fa77e 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java
@@ -8,8 +8,6 @@
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
-import org.broadinstitute.sting.utils.R.RScriptExecutorException;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@@ -18,7 +16,6 @@
import org.broadinstitute.sting.utils.text.XReadLines;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
-import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
import java.io.*;
@@ -95,7 +92,6 @@ public void initialize() {
// write to the fam file, the first six columns of the standard ped file
// first, load data from the input meta data file
Map> metaValues = new HashMap>();
- Set samplesToUse = new HashSet();
logger.debug("Reading in metadata...");
try {
if ( metaDataFile.getAbsolutePath().endsWith(".fam") ) {
@@ -274,6 +270,7 @@ public void onTraversalDone(Integer numSites) {
inStream.read(readGenotypes);
outBed.write(readGenotypes);
}
+ inStream.close();
} catch (IOException e) {
throw new ReviewedStingException("Error reading form temp file for input.",e);
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java
index 844c4d5fbe..b9577ca9b2 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java
@@ -372,7 +372,7 @@ public void onTraversalDone(Integer sum) {}
// ----------------------------------------------------------------------------------------------------
public static abstract class Getter { public abstract String get(VariantContext vc); }
- public static Map getters = new HashMap();
+ public static final Map getters = new HashMap();
static {
// #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT
@@ -381,7 +381,7 @@ public static abstract class Getter { public abstract String get(VariantContext
getters.put("REF", new Getter() {
public String get(VariantContext vc) {
StringBuilder x = new StringBuilder();
- x.append(vc.getAlleleStringWithRefPadding(vc.getReference()));
+ x.append(vc.getReference().getDisplayString());
return x.toString();
}
});
@@ -393,7 +393,7 @@ public String get(VariantContext vc) {
for ( int i = 0; i < n; i++ ) {
if ( i != 0 ) x.append(",");
- x.append(vc.getAlleleStringWithRefPadding(vc.getAlternateAllele(i)));
+ x.append(vc.getAlternateAllele(i));
}
return x.toString();
}
@@ -435,11 +435,8 @@ public String get(VariantContext vc) {
private static Object splitAltAlleles(VariantContext vc) {
final int numAltAlleles = vc.getAlternateAlleles().size();
if ( numAltAlleles == 1 )
- return vc.getAlleleStringWithRefPadding(vc.getAlternateAllele(0));
+ return vc.getAlternateAllele(0);
- final List alleles = new ArrayList(numAltAlleles);
- for ( Allele allele : vc.getAlternateAlleles() )
- alleles.add(vc.getAlleleStringWithRefPadding(allele));
- return alleles;
+ return vc.getAlternateAlleles();
}
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java
index 787d4d9abc..78c9c4a1ca 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java
@@ -103,12 +103,6 @@ public class VariantsToVCF extends RodWalker {
@Argument(fullName="sample", shortName="sample", doc="The sample name represented by the variant rod", required=false)
protected String sampleName = null;
- /**
- * This argument is useful for fixing input VCFs with bad reference bases (the output will be a fixed version of the VCF).
- */
- @Argument(fullName="fixRef", shortName="fixRef", doc="Fix common reference base in case there's an indel without padding", required=false)
- protected boolean fixReferenceBase = false;
-
private Set allowedGenotypeFormatStrings = new HashSet();
private boolean wroteHeader = false;
private Set samples;
@@ -140,10 +134,6 @@ public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentCo
builder.genotypes(g);
}
- if ( fixReferenceBase ) {
- builder.referenceBaseForIndel(ref.getBase());
- }
-
writeRecord(builder.make(), tracker, ref.getLocus());
}
@@ -169,8 +159,8 @@ private Collection getVariantContexts(RefMetaDataTracker tracker
continue;
Map alleleMap = new HashMap(2);
- alleleMap.put(RawHapMapFeature.DELETION, Allele.create(Allele.NULL_ALLELE_STRING, dbsnpVC.isSimpleInsertion()));
- alleleMap.put(RawHapMapFeature.INSERTION, Allele.create(((RawHapMapFeature)record).getAlleles()[1], !dbsnpVC.isSimpleInsertion()));
+ alleleMap.put(RawHapMapFeature.DELETION, Allele.create(ref.getBase(), dbsnpVC.isSimpleInsertion()));
+ alleleMap.put(RawHapMapFeature.INSERTION, Allele.create(ref.getBase() + ((RawHapMapFeature)record).getAlleles()[1], !dbsnpVC.isSimpleInsertion()));
hapmap.setActualAlleles(alleleMap);
// also, use the correct positioning for insertions
diff --git a/public/java/src/org/broadinstitute/sting/utils/AutoFormattingTime.java b/public/java/src/org/broadinstitute/sting/utils/AutoFormattingTime.java
new file mode 100644
index 0000000000..8964c16cb8
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/AutoFormattingTime.java
@@ -0,0 +1,53 @@
+package org.broadinstitute.sting.utils;
+
+/**
+ * Simple utility class that makes it convenient to print unit adjusted times
+ */
+public class AutoFormattingTime {
+ double timeInSeconds; // in Seconds
+ int precision; // for format
+
+ public AutoFormattingTime(double timeInSeconds, int precision) {
+ this.timeInSeconds = timeInSeconds;
+ this.precision = precision;
+ }
+
+ public AutoFormattingTime(double timeInSeconds) {
+ this(timeInSeconds, 1);
+ }
+
+ public double getTimeInSeconds() {
+ return timeInSeconds;
+ }
+
+ /**
+ * Instead of 10000 s, returns 2.8 hours
+ * @return
+ */
+ public String toString() {
+ double unitTime = timeInSeconds;
+ String unit = "s";
+
+ if ( timeInSeconds > 120 ) {
+ unitTime = timeInSeconds / 60; // minutes
+ unit = "m";
+
+ if ( unitTime > 120 ) {
+ unitTime /= 60; // hours
+ unit = "h";
+
+ if ( unitTime > 100 ) {
+ unitTime /= 24; // days
+ unit = "d";
+
+ if ( unitTime > 20 ) {
+ unitTime /= 7; // days
+ unit = "w";
+ }
+ }
+ }
+ }
+
+ return String.format("%6."+precision+"f %s", unitTime, unit);
+ }
+}
diff --git a/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java b/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java
index 393dd57358..2d7f51c3fb 100644
--- a/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java
@@ -67,10 +67,10 @@ private Base(char base, int index) {
public static final byte DELETION_INDEX = 4;
public static final byte NO_CALL_INDEX = 5; // (this is 'N')
- public static int gIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'G');
- public static int cIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'C');
- public static int aIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'A');
- public static int tIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'T');
+ public static final int aIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'A');
+ public static final int cIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'C');
+ public static final int gIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'G');
+ public static final int tIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'T');
/// In genetics, a transition is a mutation changing a purine to another purine nucleotide (A <-> G) or
// a pyrimidine to another pyrimidine nucleotide (C <-> T).
@@ -227,14 +227,21 @@ static public int extendedBaseToBaseIndex(byte base) {
}
@Deprecated
- static public boolean isRegularBase(char base) {
+ static public boolean isRegularBase( final char base ) {
return simpleBaseToBaseIndex(base) != -1;
}
- static public boolean isRegularBase(byte base) {
+ static public boolean isRegularBase( final byte base ) {
return simpleBaseToBaseIndex(base) != -1;
}
+ static public boolean isAllRegularBases( final byte[] bases ) {
+ for( final byte base : bases) {
+ if( !isRegularBase(base) ) { return false; }
+ }
+ return true;
+ }
+
static public boolean isNBase(byte base) {
return base == 'N' || base == 'n';
}
@@ -431,6 +438,37 @@ static public String simpleComplement(String bases) {
return new String(simpleComplement(bases.getBytes()));
}
+ /**
+ * Returns the uppercased version of the bases
+ *
+ * @param bases the bases
+ * @return the upper cased version
+ */
+ static public byte[] convertToUpperCase(final byte[] bases) {
+ for ( int i = 0; i < bases.length; i++ ) {
+ if ( (char)bases[i] >= 'a' )
+ bases[i] = toUpperCaseBase(bases[i]);
+ }
+ return bases;
+ }
+
+ static public byte toUpperCaseBase(final byte base) {
+ switch (base) {
+ case 'a':
+ return 'A';
+ case 'c':
+ return 'C';
+ case 'g':
+ return 'G';
+ case 't':
+ return 'T';
+ case 'n':
+ return 'N';
+ default:
+ return base;
+ }
+ }
+
/**
* Returns the index of the most common base in the basecounts array. To be used with
* pileup.getBaseCounts.
diff --git a/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java b/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java
index 4f2b5b2eb7..77ecd295f3 100644
--- a/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java
+++ b/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java
@@ -43,9 +43,6 @@
/**
* Factory class for creating GenomeLocs
*/
-@Invariant({
- "logger != null",
- "contigInfo != null"})
public final class GenomeLocParser {
private static Logger logger = Logger.getLogger(GenomeLocParser.class);
@@ -54,20 +51,39 @@ public final class GenomeLocParser {
// Ugly global variable defining the optional ordering of contig elements
//
// --------------------------------------------------------------------------------------------------------------
- private final MasterSequenceDictionary contigInfo;
+
+ /**
+ * This single variable holds the underlying SamSequenceDictionary used by the GATK. We assume
+ * it is thread safe.
+ */
+ final private SAMSequenceDictionary SINGLE_MASTER_SEQUENCE_DICTIONARY;
+
+ /**
+ * A thread-local caching contig info
+ */
+ private final ThreadLocal contigInfoPerThread =
+ new ThreadLocal();
+
+ /**
+ * @return a caching sequence dictionary appropriate for this thread
+ */
+ private CachingSequenceDictionary getContigInfo() {
+ if ( contigInfoPerThread.get() == null ) {
+ // initialize for this thread
+ logger.debug("Creating thread-local caching sequence dictionary for thread " + Thread.currentThread().getName());
+ contigInfoPerThread.set(new CachingSequenceDictionary(SINGLE_MASTER_SEQUENCE_DICTIONARY));
+ }
+
+ assert contigInfoPerThread.get() != null;
+
+ return contigInfoPerThread.get();
+ }
/**
* A wrapper class that provides efficient last used caching for the global
- * SAMSequenceDictionary underlying all of the GATK engine capabilities
+ * SAMSequenceDictionary underlying all of the GATK engine capabilities.
*/
- // todo -- enable when CoFoJa developers identify the problem (likely thread unsafe invariants)
-// @Invariant({
-// "dict != null",
-// "dict.size() > 0",
-// "lastSSR == null || dict.getSequence(lastContig).getSequenceIndex() == lastIndex",
-// "lastSSR == null || dict.getSequence(lastContig).getSequenceName() == lastContig",
-// "lastSSR == null || dict.getSequence(lastContig) == lastSSR"})
- private final class MasterSequenceDictionary {
+ private final class CachingSequenceDictionary {
final private SAMSequenceDictionary dict;
// cache
@@ -76,7 +92,7 @@ private final class MasterSequenceDictionary {
int lastIndex = -1;
@Requires({"dict != null", "dict.size() > 0"})
- public MasterSequenceDictionary(SAMSequenceDictionary dict) {
+ public CachingSequenceDictionary(SAMSequenceDictionary dict) {
this.dict = dict;
}
@@ -111,7 +127,6 @@ public synchronized final SAMSequenceRecord getSequence(final int index) {
return lastSSR;
else
return updateCache(null, index);
-
}
@Requires("contig != null")
@@ -125,12 +140,12 @@ public synchronized final int getSequenceIndex(final String contig) {
}
@Requires({"contig != null", "lastContig != null"})
- private final synchronized boolean isCached(final String contig) {
+ private synchronized boolean isCached(final String contig) {
return lastContig.equals(contig);
}
@Requires({"lastIndex != -1", "index >= 0"})
- private final synchronized boolean isCached(final int index) {
+ private synchronized boolean isCached(final int index) {
return lastIndex == index;
}
@@ -144,7 +159,7 @@ private final synchronized boolean isCached(final int index) {
*/
@Requires("contig != null || index >= 0")
@Ensures("result != null")
- private final synchronized SAMSequenceRecord updateCache(final String contig, int index ) {
+ private synchronized SAMSequenceRecord updateCache(final String contig, int index ) {
SAMSequenceRecord rec = contig == null ? dict.getSequence(index) : dict.getSequence(contig);
if ( rec == null ) {
throw new ReviewedStingException("BUG: requested unknown contig=" + contig + " index=" + index);
@@ -174,7 +189,7 @@ public GenomeLocParser(SAMSequenceDictionary seqDict) {
throw new UserException.CommandLineException("Failed to load reference dictionary");
}
- contigInfo = new MasterSequenceDictionary(seqDict);
+ SINGLE_MASTER_SEQUENCE_DICTIONARY = seqDict;
logger.debug(String.format("Prepared reference sequence contig dictionary"));
for (SAMSequenceRecord contig : seqDict.getSequences()) {
logger.debug(String.format(" %s (%d bp)", contig.getSequenceName(), contig.getSequenceLength()));
@@ -188,11 +203,11 @@ public GenomeLocParser(SAMSequenceDictionary seqDict) {
* @return True if the contig is valid. False otherwise.
*/
public final boolean contigIsInDictionary(String contig) {
- return contig != null && contigInfo.hasContig(contig);
+ return contig != null && getContigInfo().hasContig(contig);
}
public final boolean indexIsInDictionary(final int index) {
- return index >= 0 && contigInfo.hasContig(index);
+ return index >= 0 && getContigInfo().hasContig(index);
}
@@ -208,7 +223,7 @@ public final boolean indexIsInDictionary(final int index) {
public final SAMSequenceRecord getContigInfo(final String contig) {
if ( contig == null || ! contigIsInDictionary(contig) )
throw new UserException.MalformedGenomeLoc(String.format("Contig %s given as location, but this contig isn't present in the Fasta sequence dictionary", contig));
- return contigInfo.getSequence(contig);
+ return getContigInfo().getSequence(contig);
}
/**
@@ -226,9 +241,9 @@ public final int getContigIndex(final String contig) {
@Requires("contig != null")
protected int getContigIndexWithoutException(final String contig) {
- if ( contig == null || ! contigInfo.hasContig(contig) )
+ if ( contig == null || ! getContigInfo().hasContig(contig) )
return -1;
- return contigInfo.getSequenceIndex(contig);
+ return getContigInfo().getSequenceIndex(contig);
}
/**
@@ -236,7 +251,7 @@ protected int getContigIndexWithoutException(final String contig) {
* @return
*/
public final SAMSequenceDictionary getContigs() {
- return contigInfo.dict;
+ return getContigInfo().dict;
}
// --------------------------------------------------------------------------------------------------------------
@@ -291,7 +306,7 @@ public GenomeLoc createGenomeLoc(String contig, int index, final int start, fina
* @return true if it's valid, false otherwise. If exceptOnError, then throws a UserException if invalid
*/
private boolean validateGenomeLoc(String contig, int contigIndex, int start, int stop, boolean mustBeOnReference, boolean exceptOnError) {
- if ( ! contigInfo.hasContig(contig) )
+ if ( ! getContigInfo().hasContig(contig) )
return vglHelper(exceptOnError, String.format("Unknown contig %s", contig));
if (stop < start)
@@ -300,8 +315,8 @@ private boolean validateGenomeLoc(String contig, int contigIndex, int start, int
if (contigIndex < 0)
return vglHelper(exceptOnError, String.format("The contig index %d is less than 0", contigIndex));
- if (contigIndex >= contigInfo.getNSequences())
- return vglHelper(exceptOnError, String.format("The contig index %d is greater than the stored sequence count (%d)", contigIndex, contigInfo.getNSequences()));
+ if (contigIndex >= getContigInfo().getNSequences())
+ return vglHelper(exceptOnError, String.format("The contig index %d is greater than the stored sequence count (%d)", contigIndex, getContigInfo().getNSequences()));
if ( mustBeOnReference ) {
if (start < 1)
@@ -310,7 +325,7 @@ private boolean validateGenomeLoc(String contig, int contigIndex, int start, int
if (stop < 1)
return vglHelper(exceptOnError, String.format("The stop position %d is less than 1", stop));
- int contigSize = contigInfo.getSequence(contigIndex).getSequenceLength();
+ int contigSize = getContigInfo().getSequence(contigIndex).getSequenceLength();
if (start > contigSize || stop > contigSize)
return vglHelper(exceptOnError, String.format("The genome loc coordinates %d-%d exceed the contig size (%d)", start, stop, contigSize));
}
@@ -558,7 +573,7 @@ public GenomeLoc incPos(GenomeLoc loc, int by) {
@Requires("contigName != null")
@Ensures("result != null")
public GenomeLoc createOverEntireContig(String contigName) {
- SAMSequenceRecord contig = contigInfo.getSequence(contigName);
+ SAMSequenceRecord contig = getContigInfo().getSequence(contigName);
return createGenomeLoc(contigName,contig.getSequenceIndex(),1,contig.getSequenceLength(), true);
}
@@ -573,7 +588,7 @@ public GenomeLoc createGenomeLocAtStart(GenomeLoc loc, int maxBasePairs) {
if (GenomeLoc.isUnmapped(loc))
return null;
String contigName = loc.getContig();
- SAMSequenceRecord contig = contigInfo.getSequence(contigName);
+ SAMSequenceRecord contig = getContigInfo().getSequence(contigName);
int contigIndex = contig.getSequenceIndex();
int start = loc.getStart() - maxBasePairs;
@@ -598,7 +613,7 @@ public GenomeLoc createPaddedGenomeLoc(final GenomeLoc loc, final int padding) {
if (GenomeLoc.isUnmapped(loc))
return loc;
final String contigName = loc.getContig();
- final SAMSequenceRecord contig = contigInfo.getSequence(contigName);
+ final SAMSequenceRecord contig = getContigInfo().getSequence(contigName);
final int contigIndex = contig.getSequenceIndex();
final int contigLength = contig.getSequenceLength();
@@ -619,7 +634,7 @@ public GenomeLoc createGenomeLocAtStop(GenomeLoc loc, int maxBasePairs) {
if (GenomeLoc.isUnmapped(loc))
return null;
String contigName = loc.getContig();
- SAMSequenceRecord contig = contigInfo.getSequence(contigName);
+ SAMSequenceRecord contig = getContigInfo().getSequence(contigName);
int contigIndex = contig.getSequenceIndex();
int contigLength = contig.getSequenceLength();
diff --git a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java
index 829e75682f..fcde1f419d 100755
--- a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java
+++ b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java
@@ -27,6 +27,7 @@
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.samtools.Cigar;
+import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.ReadUtils;
@@ -40,6 +41,7 @@ public class Haplotype {
protected final double[] quals;
private GenomeLoc genomeLocation = null;
private HashMap readLikelihoodsPerSample = null;
+ private HashMap readCountsPerSample = null;
private HashMap eventMap = null;
private boolean isRef = false;
private Cigar cigar;
@@ -83,18 +85,27 @@ public int hashCode() {
return Arrays.hashCode(bases);
}
- public void addReadLikelihoods( final String sample, final double[] readLikelihoods ) {
+ public void addReadLikelihoods( final String sample, final double[] readLikelihoods, final int[] readCounts ) {
if( readLikelihoodsPerSample == null ) {
readLikelihoodsPerSample = new HashMap();
}
readLikelihoodsPerSample.put(sample, readLikelihoods);
+ if( readCountsPerSample == null ) {
+ readCountsPerSample = new HashMap();
+ }
+ readCountsPerSample.put(sample, readCounts);
}
@Ensures({"result != null"})
public double[] getReadLikelihoods( final String sample ) {
return readLikelihoodsPerSample.get(sample);
}
-
+
+ @Ensures({"result != null"})
+ public int[] getReadCounts( final String sample ) {
+ return readCountsPerSample.get(sample);
+ }
+
public Set getSampleKeySet() {
return readLikelihoodsPerSample.keySet();
}
@@ -160,52 +171,24 @@ public void setCigar( final Cigar cigar ) {
}
@Requires({"refInsertLocation >= 0"})
- public Haplotype insertAllele( final Allele refAllele, final Allele altAllele, int refInsertLocation ) {
-
- if( refAllele.length() != altAllele.length() ) { refInsertLocation++; }
+ public Haplotype insertAllele( final Allele refAllele, final Allele altAllele, final int refInsertLocation ) {
+ // refInsertLocation is in ref haplotype offset coordinates NOT genomic coordinates
final int haplotypeInsertLocation = ReadUtils.getReadCoordinateForReferenceCoordinate(alignmentStartHapwrtRef, cigar, refInsertLocation, ReadUtils.ClippingTail.RIGHT_TAIL, true);
- if( haplotypeInsertLocation == -1 ) { // desired change falls inside deletion so don't bother creating a new haplotype
- return new Haplotype(bases.clone());
- }
- byte[] newHaplotype;
-
- try {
- if( refAllele.length() == altAllele.length() ) { // SNP or MNP
- newHaplotype = bases.clone();
- for( int iii = 0; iii < altAllele.length(); iii++ ) {
- newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
- }
- } else if( refAllele.length() < altAllele.length() ) { // insertion
- final int altAlleleLength = altAllele.length();
- newHaplotype = new byte[bases.length + altAlleleLength];
- for( int iii = 0; iii < bases.length; iii++ ) {
- newHaplotype[iii] = bases[iii];
- }
- for( int iii = newHaplotype.length - 1; iii > haplotypeInsertLocation + altAlleleLength - 1; iii-- ) {
- newHaplotype[iii] = newHaplotype[iii-altAlleleLength];
- }
- for( int iii = 0; iii < altAlleleLength; iii++ ) {
- newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
- }
- } else { // deletion
- final int shift = refAllele.length() - altAllele.length();
- newHaplotype = new byte[bases.length - shift];
- for( int iii = 0; iii < haplotypeInsertLocation + altAllele.length(); iii++ ) {
- newHaplotype[iii] = bases[iii];
- }
- for( int iii = haplotypeInsertLocation + altAllele.length(); iii < newHaplotype.length; iii++ ) {
- newHaplotype[iii] = bases[iii+shift];
- }
- }
- } catch (Exception e) { // event already on haplotype is too large/complex to insert another allele, most likely because of not enough reference padding
- return new Haplotype(bases.clone());
+ if( haplotypeInsertLocation == -1 || haplotypeInsertLocation + refAllele.length() >= bases.length ) { // desired change falls inside deletion so don't bother creating a new haplotype
+ return null;
}
-
- return new Haplotype(newHaplotype);
+ byte[] newHaplotypeBases = new byte[]{};
+ newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(bases, 0, haplotypeInsertLocation)); // bases before the variant
+ newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, altAllele.getBases()); // the alt allele of the variant
+ newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(bases, haplotypeInsertLocation + refAllele.length(), bases.length)); // bases after the variant
+ return new Haplotype(newHaplotypeBases);
}
- public static LinkedHashMap makeHaplotypeListFromAlleles(List