diff --git a/build.xml b/build.xml
index ca5d22a5a7..f681ddafa3 100644
--- a/build.xml
+++ b/build.xml
@@ -76,7 +76,8 @@
     <property name="staging.dir" value="staging" />
     <property name="default.executable" value="none" />
 
-    <!-- Javadoc/Scaladoc directories -->
+    <!-- GATKDocs/Javadoc/Scaladoc directories -->
+    <property name="gatkdocs.dir" value="gatkdocs" />
     <property name="javadoc.dir" value="javadoc" />
     <property name="scaladoc.dir" value="scaladoc" />
 
@@ -92,12 +93,10 @@
     <!-- To disable for test targets, run with -Duse.contracts=false -->
     <!-- To enable for non-test targets, run with -Duse.contracts=true -->
     <property name="java.contracts.dir" value="${build.dir}/java/contracts" />
-    <property name="contracts.version" value="1.0-20110609" />
+    <property name="contracts.version" value="1.0-r139" />
     <property name="cofoja.jar" value="${lib.dir}/cofoja-${contracts.version}.jar"/>
     <property name="contract.dump.dir" value="dump" />
 
-    <property name="gatkdocs.dir" value="gatkdocs" />
-
     <!-- do we want to halt on failure of a unit test? default to yes (Bamboo uses 'no') -->
     <property name="halt" value="yes" />
 
@@ -208,19 +207,19 @@
         <include name="**/*.java" />
     </fileset>
 
-    <pathconvert property="external.build.dir">
-        <path path="${java.classes}"/>
-    </pathconvert>
+    <path id="external.build.dir">
+        <path path="${java.classes}" />
+    </path>
 
-    <pathconvert property="external.dist.dir">
+    <path id="external.dist.dir">
         <path path="${dist.dir}" />
-    </pathconvert>
+    </path>
 
     <!-- GATK dependencies consist of 3rd party plugins plus compiled GATK classes -->
-    <pathconvert property="external.gatk.classpath">
+    <path id="external.gatk.classpath">
         <path path="${java.classes}"/>
         <path refid="external.dependencies" />
-    </pathconvert>
+    </path>
 
     <!-- the path for resources that need to go into the GATK jar;
          any additional resources should go into this set. -->
@@ -430,9 +429,9 @@
 
     <target name="gatk.compile.external.source" depends="gatk.compile.internal.source" if="include.external">
         <subant target="compile" genericantfile="build.xml">
-            <property name="build.dir" value="${external.build.dir}" />
-            <property name="dist.dir" value="${external.dist.dir}" />
-            <property name="gatk.classpath" value="${external.gatk.classpath}" />
+            <property name="build.dir" refid="external.build.dir" />
+            <property name="dist.dir" refid="external.dist.dir" />
+            <property name="gatk.classpath" refid="external.gatk.classpath" />
             <fileset dir="${external.dir}" includes="*/build.xml" erroronmissingdir="false" />
         </subant>
     </target>
@@ -680,9 +679,9 @@
         </jar>
 
         <subant target="dist" genericantfile="build.xml">
-            <property name="build.dir" value="${external.build.dir}" />
-            <property name="dist.dir" value="${external.dist.dir}" />
-            <property name="gatk.classpath" value="${external.gatk.classpath}" />
+            <property name="build.dir" refid="external.build.dir" />
+            <property name="dist.dir" refid="external.dist.dir" />
+            <property name="gatk.classpath" refid="external.gatk.classpath" />
             <fileset dir="${external.dir}" includes="*/build.xml" erroronmissingdir="false" />
         </subant>
     </target>
diff --git a/ivy.xml b/ivy.xml
index b197d0714f..0761cb4111 100644
--- a/ivy.xml
+++ b/ivy.xml
@@ -52,7 +52,7 @@
         <dependency org="gov.nist" name="Jama" rev="1.0.2"/>
 
         <!-- Dependencies for the graph aligner -->
-        <dependency org="org.jgrapht" name="jgrapht-jdk1.5" rev="0.7.3"/>
+        <dependency org="net.sf.jgrapht" name="jgrapht" rev="0.8.3"/>
 
         <!-- Dependencies for the html walker documention -->
         <dependency org="org.freemarker" name="freemarker" rev="2.3.18"/>
@@ -87,7 +87,7 @@
         <dependency org="com.google.code.caliper" name="caliper" rev="1.0-SNAPSHOT" conf="test"/>
 
         <!-- Contracts for Java and dependencies -->
-        <dependency org="com.google.code.cofoja" name="cofoja" rev="1.0-20110609"/>
+        <dependency org="com.google.code.cofoja" name="cofoja" rev="1.0-r139"/>
         <dependency org="asm" name="asm-all" rev="3.3.1"/>
 
         <!-- POI, for reading pipeline files -->
diff --git a/public/R/scripts/org/broadinstitute/sting/gatk/walkers/bqsr/BQSR.R b/public/R/scripts/org/broadinstitute/sting/utils/recalibration/BQSR.R
similarity index 74%
rename from public/R/scripts/org/broadinstitute/sting/gatk/walkers/bqsr/BQSR.R
rename to public/R/scripts/org/broadinstitute/sting/utils/recalibration/BQSR.R
index 6c4dace1df..8a9eecf48c 100644
--- a/public/R/scripts/org/broadinstitute/sting/gatk/walkers/bqsr/BQSR.R
+++ b/public/R/scripts/org/broadinstitute/sting/utils/recalibration/BQSR.R
@@ -1,8 +1,18 @@
 library("ggplot2")
+library(gplots)
+library("reshape")
+library("grid")
 library("tools") #For compactPDF in R 2.13+
+library(gsalib)
 
-args <- commandArgs(TRUE)
+
+if ( interactive() ) {
+  args <- c("NA12878.6.1.dedup.realign.recal.bqsr.grp.csv", "NA12878.6.1.dedup.realign.recal.bqsr.grp", NA)
+} else {
+  args <- commandArgs(TRUE)
+} 
 data <- read.csv(args[1])
+gsa.report <- gsa.read.gatkreport(args[2])
 data <- within(data, EventType <- factor(EventType, levels = rev(levels(EventType))))
 
 numRG = length(unique(data$ReadGroup))
@@ -82,20 +92,45 @@ for(cov in levels(data$CovariateName)) {    # for each covariate in turn
     
     p <- ggplot(d, aes(x=CovariateValue)) +
       xlab(paste(cov,"Covariate")) +
-      ylab("Number of Observations") +
+      ylab("No. of Observations (area normalized)") +
       blankTheme
-    d <- p + geom_histogram(aes(fill=Recalibration,weight=Observations),alpha=0.6,binwidth=1,position="identity") + scale_fill_manual(values=c("maroon1","blue")) + facet_grid(.~EventType) +     
-      scale_y_continuous(formatter="comma")
-    
+    d <- p + geom_histogram(aes(fill=Recalibration,weight=Observations,y=..ndensity..),alpha=0.6,binwidth=1,position="identity")
+    d <- d + scale_fill_manual(values=c("maroon1","blue"))
+    d <- d + facet_grid(.~EventType) 
+#    d <- d + scale_y_continuous(formatter="comma")
   }
 }
 
-pdf(args[2],height=9,width=15)
+if ( ! is.na(args[3]) )
+  pdf(args[3],height=9,width=15)
+
+#frame()
+textplot(gsa.report$Arguments, show.rownames=F)
+title(
+  main="GATK BaseRecalibration report",
+  sub=date())
+
 distributeGraphRows(list(a,b,c), c(1,1,1))
 distributeGraphRows(list(d,e,f), c(1,1,1))
-dev.off()
 
+# format the overall information
+rt0 <- data.frame(
+  ReadGroup = gsa.report$RecalTable0$ReadGroup,
+  EventType = gsa.report$RecalTable0$EventType,
+  EmpiricalQuality = sprintf("%.1f", gsa.report$RecalTable0$EmpiricalQuality),
+  EstimatedQReported = sprintf("%.1f", gsa.report$RecalTable0$EstimatedQReported),
+  Observations = sprintf("%.2e", gsa.report$RecalTable0$Observations),
+  Errors = sprintf("%.2e", gsa.report$RecalTable0$Errors))  
+textplot(t(rt0), show.colnames=F)
+title("Overall error rates by event type")
+
+# plot per quality score recalibration table
+textplot(gsa.report$RecalTable1, show.rownames=F)
+title("Error rates by event type and initial quality score")
 
-if (exists('compactPDF')) {
-  compactPDF(args[2])
+if ( ! is.na(args[3]) ) {
+  dev.off()
+  if (exists('compactPDF')) {
+    compactPDF(args[2])
+  }
 }
diff --git a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.variantqc.utils.R b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.variantqc.utils.R
index 19567e7e6f..45dacd835a 100644
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.variantqc.utils.R
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.variantqc.utils.R
@@ -207,7 +207,7 @@ plotVariantQC <- function(metrics, measures, requestedStrat = "Sample",
   
   if ( requestedStrat == "Sample" ) {
     perSampleGraph <- perSampleGraph + geom_text(aes(label=strat), size=1.5) + geom_blank() # don't display a scale
-    perSampleGraph <- perSampleGraph + scale_x_discrete("Sample (ordered by nSNPs)", formatter=function(x) "")
+    perSampleGraph <- perSampleGraph + scale_x_discrete("Sample (ordered by nSNPs)")
   } else { # by AlleleCount
     perSampleGraph <- perSampleGraph + geom_point(aes(size=log10(nobs))) #+ geom_smooth(aes(weight=log10(nobs)))
     perSampleGraph <- perSampleGraph + scale_x_log10("AlleleCount")
diff --git a/public/java/src/net/sf/picard/reference/FastaSequenceIndexBuilder.java b/public/java/src/net/sf/picard/reference/FastaSequenceIndexBuilder.java
index 6c8fe18348..10326ef2ed 100644
--- a/public/java/src/net/sf/picard/reference/FastaSequenceIndexBuilder.java
+++ b/public/java/src/net/sf/picard/reference/FastaSequenceIndexBuilder.java
@@ -208,6 +208,7 @@ else if (basesThisLine != basesPerLine || bytesPerLine != bytesRead - endOfLastL
                         break;
                 }
             }
+            in.close();
             return sequenceIndex;
         }
         catch (IOException e) {
diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/Bases.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/Bases.java
index bc0a5b63d6..7cd85cfd8e 100644
--- a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/Bases.java
+++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/Bases.java
@@ -12,10 +12,10 @@
  */
 public class Bases implements Iterable<Byte>
 {
-    public static byte A = 'A';
-    public static byte C = 'C';
-    public static byte G = 'G';
-    public static byte T = 'T';
+    public static final byte A = 'A';
+    public static final byte C = 'C';
+    public static final byte G = 'G';
+    public static final byte T = 'T';
 
     public static final Bases instance = new Bases();
 
diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinition.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinition.java
index a5647ec0f9..618120217a 100644
--- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinition.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinition.java
@@ -26,6 +26,7 @@
 package org.broadinstitute.sting.commandline;
 
 import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 
 import java.lang.annotation.Annotation;
 import java.util.List;
@@ -147,6 +148,9 @@ public ArgumentDefinition( ArgumentIOType ioType,
         this.exclusiveOf = exclusiveOf;
         this.validation = validation;
         this.validOptions = validOptions;
+
+        validateName(shortName);
+        validateName(fullName);
     }
 
     /**
@@ -192,6 +196,9 @@ else if( !isFullNameProvided )
         else
             shortName = null;
 
+        validateName(shortName);
+        validateName(fullName);
+
         this.ioType = ioType;
         this.argumentType = argumentType;
         this.fullName = fullName;
@@ -277,4 +284,14 @@ public static String getValidationRegex( Annotation annotation ) {
         String validation = (String)CommandLineUtils.getValue(annotation, "validation");
         return validation.trim().length() > 0 ? validation.trim() : null;
     }
+
+    /**
+     * Make sure the argument's name is valid
+     *
+     * @param name
+     */
+    private void validateName(final String name) {
+        if ( name != null && name.startsWith("-") )
+            throw new ReviewedStingException("Invalid argument definition: " + name + " begins with a -");
+    }
 }
diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitionGroup.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitionGroup.java
index b47677b08e..474225e2a1 100644
--- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitionGroup.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitionGroup.java
@@ -55,10 +55,8 @@ public ArgumentDefinitionGroup( String groupName, List<ArgumentDefinition> argum
      * Does the name of this argument group match the name of another?
      */
     public boolean groupNameMatches( ArgumentDefinitionGroup other ) {
-        if( this.groupName == null && other.groupName == null )
-            return true;
-        if( this.groupName == null && other.groupName != null )
-            return false;
+        if( this.groupName == null )
+            return other.groupName == null;
         return this.groupName.equals(other.groupName);
     }
 
diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java
index c201e95f04..dd4a151bf1 100644
--- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java
@@ -53,7 +53,7 @@ public abstract class ArgumentTypeDescriptor {
     /**
      * our log, which we want to capture anything from org.broadinstitute.sting
      */
-    protected static Logger logger = Logger.getLogger(ArgumentTypeDescriptor.class);
+    protected static final Logger logger = Logger.getLogger(ArgumentTypeDescriptor.class);
 
     /**
      * Fetch the given descriptor from the descriptor repository.
diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java
index 26af49e12b..376b6f210a 100755
--- a/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java
@@ -120,8 +120,8 @@ public static Tags parseTags(String argument, String tagString) {
      */
     private static final String TAG_TEXT = "[\\w\\-\\.\\=]*";
 
-    public static ParsingMethod FullNameParsingMethod = new ParsingMethod(Pattern.compile(String.format("\\s*--(%1$s)(?:\\:(%2$s(?:,%2$s)*))?\\s*",ARGUMENT_TEXT,TAG_TEXT)),
+    public static final ParsingMethod FullNameParsingMethod = new ParsingMethod(Pattern.compile(String.format("\\s*--(%1$s)(?:\\:(%2$s(?:,%2$s)*))?\\s*",ARGUMENT_TEXT,TAG_TEXT)),
                                                                           ArgumentDefinitions.FullNameDefinitionMatcher) {};
-    public static ParsingMethod ShortNameParsingMethod = new ParsingMethod(Pattern.compile(String.format("\\s*-(%1$s)(?:\\:(%2$s(?:,%2$s)*))?\\s*",ARGUMENT_TEXT,TAG_TEXT)),
+    public static final ParsingMethod ShortNameParsingMethod = new ParsingMethod(Pattern.compile(String.format("\\s*-(%1$s)(?:\\:(%2$s(?:,%2$s)*))?\\s*",ARGUMENT_TEXT,TAG_TEXT)),
                                                                            ArgumentDefinitions.ShortNameDefinitionMatcher) {};
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java
index c6bb4a27c0..0286cdc524 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java
@@ -130,8 +130,8 @@ private void authorizeGATKRun() {
              getArgumentCollection().phoneHomeType == GATKRunReport.PhoneHomeOption.STDOUT ) {
             if ( getArgumentCollection().gatkKeyFile == null ) {
                 throw new UserException("Running with the -et NO_ET or -et STDOUT option requires a GATK Key file. " +
-                                        "Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home " +
-                                        "for more information and instructions on how to obtain a key.");
+                                        "Please see " + GATKRunReport.PHONE_HOME_DOCS_URL +
+                                        " for more information and instructions on how to obtain a key.");
             }
             else {
                 PublicKey gatkPublicKey = CryptUtils.loadGATKDistributedPublicKey();
diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java
index b1ad19e696..312d31727d 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java
@@ -130,6 +130,12 @@ private static void checkForMaskedUserErrors(final Throwable t) {
 
         // can't close tribble index when writing
         if ( message.indexOf("Unable to close index for") != -1 )
+            exitSystemWithUserError(new UserException(t.getCause() == null ? message : t.getCause().getMessage()));
+
+        // disk is full
+        if ( message.indexOf("No space left on device") != -1 )
+            exitSystemWithUserError(new UserException(t.getMessage()));
+        if ( t.getCause() != null && t.getCause().getMessage().indexOf("No space left on device") != -1 )
             exitSystemWithUserError(new UserException(t.getCause().getMessage()));
     }
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
index 5d6fb75ed6..e76cde43ab 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
@@ -233,10 +233,6 @@ public Object execute() {
         if (args.nonDeterministicRandomSeed)
             resetRandomGenerator(System.currentTimeMillis());
 
-        // TODO -- REMOVE ME WHEN WE STOP BCF testing
-        if ( args.USE_SLOW_GENOTYPES )
-            GenotypeBuilder.MAKE_FAST_BY_DEFAULT = false;
-
         // if the use specified an input BQSR recalibration table then enable on the fly recalibration
         if (args.BQSR_RECAL_FILE != null)
             setBaseRecalibration(args.BQSR_RECAL_FILE, args.quantizationLevels, args.disableIndelQuals, args.PRESERVE_QSCORES_LESS_THAN, args.emitOriginalQuals);
@@ -797,6 +793,14 @@ private SAMDataSource createReadsDataSource(GATKArgumentCollection argCollection
         if ( getWalkerBAQApplicationTime() == BAQ.ApplicationTime.FORBIDDEN && argCollection.BAQMode != BAQ.CalculationMode.OFF)
             throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + argCollection.BAQMode + " was requested.");
 
+        if (argCollection.removeProgramRecords && argCollection.keepProgramRecords)
+            throw new UserException.BadArgumentValue("rpr / kpr", "Cannot enable both options");
+
+        boolean removeProgramRecords = argCollection.removeProgramRecords || walker.getClass().isAnnotationPresent(RemoveProgramRecords.class);
+
+        if (argCollection.keepProgramRecords)
+            removeProgramRecords = false;
+
         return new SAMDataSource(
                 samReaderIDs,
                 threadAllocation,
@@ -813,7 +817,8 @@ private SAMDataSource createReadsDataSource(GATKArgumentCollection argCollection
                 getWalkerBAQQualityMode(),
                 refReader,
                 getBaseRecalibration(),
-                argCollection.defaultBaseQualities);
+                argCollection.defaultBaseQualities,
+                removeProgramRecords);
     }
 
     /**
@@ -840,20 +845,9 @@ private List<ReferenceOrderedDataSource> getReferenceOrderedDataSources(Collecti
                                                                             SAMSequenceDictionary sequenceDictionary,
                                                                             GenomeLocParser genomeLocParser,
                                                                             ValidationExclusion.TYPE validationExclusionType) {
-        VCFHeader header = null;
-        if ( getArguments().repairVCFHeader != null ) {
-            try {
-                final PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(getArguments().repairVCFHeader));
-                header = (VCFHeader)new VCFCodec().readHeader(pbs).getHeaderValue();
-                pbs.close();
-            } catch ( IOException e ) {
-                throw new UserException.CouldNotReadInputFile(getArguments().repairVCFHeader, e);
-            }
-        }
-
-        RMDTrackBuilder builder = new RMDTrackBuilder(sequenceDictionary,genomeLocParser, header, validationExclusionType);
+        final RMDTrackBuilder builder = new RMDTrackBuilder(sequenceDictionary,genomeLocParser, validationExclusionType);
 
-        List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
+        final List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
         for (RMDTriplet fileDescriptor : referenceMetaDataFiles)
             dataSources.add(new ReferenceOrderedDataSource(fileDescriptor,
                                                            builder,
diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
index 3fd3857c5d..bbbd96cf1c 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
@@ -57,8 +57,6 @@ public class GATKArgumentCollection {
     public GATKArgumentCollection() {
     }
 
-    public Map<String, String> walkerArgs = new HashMap<String, String>();
-
     // parameters and their defaults
     @Input(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = false)
     public List<String> samFiles = new ArrayList<String>();
@@ -66,10 +64,10 @@ public GATKArgumentCollection() {
     @Argument(fullName = "read_buffer_size", shortName = "rbs", doc="Number of reads per SAM file to buffer in memory", required = false)
     public Integer readBufferSize = null;
 
-    @Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? STANDARD is the default, can be NO_ET so nothing is posted to the run repository. Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for details.", required = false)
+    @Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? STANDARD is the default, can be NO_ET so nothing is posted to the run repository. Please see " + GATKRunReport.PHONE_HOME_DOCS_URL + " for details.", required = false)
     public GATKRunReport.PhoneHomeOption phoneHomeType = GATKRunReport.PhoneHomeOption.STANDARD;
 
-    @Argument(fullName = "gatk_key", shortName = "K", doc="GATK Key file. Required if running with -et NO_ET. Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for details.", required = false)
+    @Argument(fullName = "gatk_key", shortName = "K", doc="GATK Key file. Required if running with -et NO_ET. Please see " + GATKRunReport.PHONE_HOME_DOCS_URL + " for details.", required = false)
     public File gatkKeyFile = null;
 
     @Argument(fullName = "read_filter", shortName = "rf", doc = "Specify filtration criteria to apply to each read individually", required = false)
@@ -249,6 +247,12 @@ public static DownsamplingMethod getDefaultDownsamplingMethod() {
     @Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation", required = false)
     public SAMFileReader.ValidationStringency strictnessLevel = SAMFileReader.ValidationStringency.SILENT;
 
+    @Argument(fullName = "remove_program_records", shortName = "rpr", doc = "Should we override the Walker's default and remove program records from the SAM header", required = false)
+    public boolean removeProgramRecords = false;
+
+    @Argument(fullName = "keep_program_records", shortName = "kpr", doc = "Should we override the Walker's default and keep program records from the SAM header", required = false)
+    public boolean keepProgramRecords = false;
+
     @Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations: nothing will be checked at runtime.  For expert users only who know what they are doing.  We do not support usage of this argument.", required = false)
     public ValidationExclusion.TYPE unsafe;
 
@@ -375,19 +379,5 @@ public static DownsamplingMethod getDefaultDownsamplingMethod() {
     @Hidden
     public boolean generateShadowBCF = false;
     // TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed
-
-    @Argument(fullName="useSlowGenotypes",shortName = "useSlowGenotypes",doc="",required=false)
-    @Hidden
-    public boolean USE_SLOW_GENOTYPES = false;
-    // TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed
-
-    /**
-     * The file pointed to by this argument must be a VCF file. The GATK will read in just the header of this file
-     * and then use the INFO, FORMAT, and FILTER field values from this file to repair the header file of any other
-     * VCF file that GATK reads in.  This allows us to have in effect a master set of header records and use these
-     * to fill in any missing ones in input VCF files.
-     */
-    @Argument(fullName="repairVCFHeader", shortName = "repairVCFHeader", doc="If provided, whenever we read a VCF file we will use the header in this file to repair the header of the input VCF files", required=false)
-    public File repairVCFHeader = null;
 }
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java
new file mode 100644
index 0000000000..f30fc03160
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java
@@ -0,0 +1,62 @@
+package org.broadinstitute.sting.gatk.arguments;
+
+import org.broadinstitute.sting.commandline.Advanced;
+import org.broadinstitute.sting.commandline.Argument;
+import org.broadinstitute.sting.commandline.Input;
+import org.broadinstitute.sting.commandline.RodBinding;
+import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel;
+import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
+import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+
+/**
+ * Created with IntelliJ IDEA.
+ * User: rpoplin
+ * Date: 8/20/12
+ * A collection of arguments that are common to the various callers.
+ * This is pulled out so that every caller isn't exposed to the arguments from every other caller.
+ */
+
+public class StandardCallerArgumentCollection {
+    /**
+     * The expected heterozygosity value used to compute prior likelihoods for any locus. The default priors are:
+     * het = 1e-3, P(hom-ref genotype) = 1 - 3 * het / 2, P(het genotype) = het, P(hom-var genotype) = het / 2
+     */
+    @Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false)
+    public Double heterozygosity = UnifiedGenotyperEngine.HUMAN_SNP_HETEROZYGOSITY;
+
+    @Argument(fullName = "genotyping_mode", shortName = "gt_mode", doc = "Specifies how to determine the alternate alleles to use for genotyping", required = false)
+    public GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY;
+
+    @Argument(fullName = "output_mode", shortName = "out_mode", doc = "Specifies which type of calls we should output", required = false)
+    public UnifiedGenotyperEngine.OUTPUT_MODE OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY;
+
+    /**
+     * The minimum phred-scaled Qscore threshold to separate high confidence from low confidence calls. Only genotypes with
+     * confidence >= this threshold are emitted as called sites. A reasonable threshold is 30 for high-pass calling (this
+     * is the default).
+     */
+    @Argument(fullName = "standard_min_confidence_threshold_for_calling", shortName = "stand_call_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be called", required = false)
+    public double STANDARD_CONFIDENCE_FOR_CALLING = 30.0;
+
+    /**
+     * This argument allows you to emit low quality calls as filtered records.
+     */
+    @Argument(fullName = "standard_min_confidence_threshold_for_emitting", shortName = "stand_emit_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold)", required = false)
+    public double STANDARD_CONFIDENCE_FOR_EMITTING = 30.0;
+
+    /**
+     * When the UnifiedGenotyper is put into GENOTYPE_GIVEN_ALLELES mode it will genotype the samples using only the alleles provide in this rod binding
+     */
+    @Input(fullName="alleles", shortName = "alleles", doc="The set of alleles at which to genotype when --genotyping_mode is GENOTYPE_GIVEN_ALLELES", required=false)
+    public RodBinding<VariantContext> alleles;
+
+    /**
+     * If there are more than this number of alternate alleles presented to the genotyper (either through discovery or GENOTYPE_GIVEN ALLELES),
+     * then only this many alleles will be used.  Note that genotyping sites with many alternate alleles is both CPU and memory intensive and it
+     * scales exponentially based on the number of alternate alleles.  Unless there is a good reason to change the default value, we highly recommend
+     * that you not play around with this parameter.
+     */
+    @Advanced
+    @Argument(fullName = "max_alternate_alleles", shortName = "maxAltAlleles", doc = "Maximum number of alternate alleles to genotype", required = false)
+    public int MAX_ALTERNATE_ALLELES = 3;
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java
index 142c8a1785..01e24df671 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java
@@ -118,7 +118,7 @@ private void getStates(ShardDataProvider provider, SAMRecord rec) {
                 rec.getAlignmentStart(),
                 stop);
         states = new ArrayList<RMDDataState>();
-        if (provider != null && provider.getReferenceOrderedData() != null)
+        if (provider.getReferenceOrderedData() != null)
             for (ReferenceOrderedDataSource dataSource : provider.getReferenceOrderedData())
                 states.add(new RMDDataState(dataSource, dataSource.seek(range)));
     }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
index 0fa4234b35..7f0a0c4c0c 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
@@ -89,6 +89,11 @@ public class SAMDataSource {
      */
     private final SAMFileReader.ValidationStringency validationStringency;
 
+    /**
+     * Do we want to remove the program records from this data source?
+     */
+    private final boolean removeProgramRecords;
+
     /**
      * Store BAM indices for each reader present.
      */
@@ -200,7 +205,8 @@ public SAMDataSource(
                 BAQ.QualityMode.DONT_MODIFY,
                 null, // no BAQ
                 null, // no BQSR
-                (byte) -1);
+                (byte) -1,
+                false);
     }
 
     /**
@@ -233,7 +239,8 @@ public SAMDataSource(
             BAQ.QualityMode qmode,
             IndexedFastaSequenceFile refReader,
             BaseRecalibration bqsrApplier,
-            byte defaultBaseQualities) {
+            byte defaultBaseQualities,
+            boolean removeProgramRecords) {
         this.readMetrics = new ReadMetrics();
         this.genomeLocParser = genomeLocParser;
 
@@ -249,6 +256,7 @@ public SAMDataSource(
             dispatcher = null;
 
         validationStringency = strictness;
+        this.removeProgramRecords = removeProgramRecords;
         if(readBufferSize != null)
             ReadShard.setReadBufferSize(readBufferSize);
         else {
@@ -748,7 +756,7 @@ protected synchronized SAMReaderID getReaderID(SAMFileReader reader) {
         private synchronized void createNewResource() {
             if(allResources.size() > maxEntries)
                 throw new ReviewedStingException("Cannot create a new resource pool.  All resources are in use.");
-            SAMReaders readers = new SAMReaders(readerIDs, validationStringency);
+            SAMReaders readers = new SAMReaders(readerIDs, validationStringency, removeProgramRecords);
             allResources.add(readers);
             availableResources.add(readers);
         }
@@ -777,9 +785,11 @@ private class SAMReaders implements Iterable<SAMFileReader> {
         /**
          * Derive a new set of readers from the Reads metadata.
          * @param readerIDs reads to load.
+         * TODO: validationStringency is not used here
          * @param validationStringency validation stringency.
+         * @param removeProgramRecords indicate whether to clear program records from the readers
          */
-        public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency) {
+        public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency, boolean removeProgramRecords) {
             final int totalNumberOfFiles = readerIDs.size();
             int readerNumber = 1;
             final SimpleTimer timer = new SimpleTimer().start();
@@ -790,6 +800,9 @@ public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStr
             long lastTick = timer.currentTime();
             for(final SAMReaderID readerID: readerIDs) {
                 final ReaderInitializer init = new ReaderInitializer(readerID).call();
+                if (removeProgramRecords) {
+                    init.reader.getFileHeader().setProgramRecords(new ArrayList<SAMProgramRecord>());
+                }
                 if (threadAllocation.getNumIOThreads() > 0) {
                     inputStreams.put(init.readerID, init.blockInputStream); // get from initializer
                 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java
index 4ecfe472df..c02ae7d99b 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java
@@ -45,7 +45,6 @@
 import java.io.File;
 import java.util.ArrayList;
 import java.util.Collections;
-import java.util.LinkedList;
 import java.util.List;
 
 /**
@@ -56,30 +55,31 @@ public class ReferenceDataSource {
     private IndexedFastaSequenceFile reference;
 
     /** our log, which we want to capture anything from this class */
-    protected static org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(ReferenceDataSource.class);
+    protected static final org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(ReferenceDataSource.class);
 
     /**
      * Create reference data source from fasta file
      * @param fastaFile Fasta file to be used as reference
      */
     public ReferenceDataSource(File fastaFile) {
-
         // does the fasta file exist? check that first...
         if (!fastaFile.exists())
             throw new UserException("The fasta file you specified (" + fastaFile.getAbsolutePath() + ") does not exist.");
 
-        File indexFile = new File(fastaFile.getAbsolutePath() + ".fai");
-        File dictFile;
-        if (fastaFile.getAbsolutePath().endsWith("fa")) {
-            dictFile = new File(fastaFile.getAbsolutePath().replace(".fa", ".dict"));
-        }
-        else
-         dictFile = new File(fastaFile.getAbsolutePath().replace(".fasta", ".dict"));
+        final boolean isGzipped = fastaFile.getAbsolutePath().endsWith(".gz");
+
+        final File indexFile = new File(fastaFile.getAbsolutePath() + ".fai");
+
+        // determine the name for the dict file
+        final String fastaExt = (fastaFile.getAbsolutePath().endsWith("fa") ? ".fa" : ".fasta" ) + (isGzipped ? ".gz" : "");
+        final File dictFile = new File(fastaFile.getAbsolutePath().replace(fastaExt, ".dict"));
 
         /*
-         if index file does not exist, create it manually
-          */
+        * if index file does not exist, create it manually
+        */
         if (!indexFile.exists()) {
+            if ( isGzipped ) throw new UserException.CouldNotCreateReferenceFAIorDictForGzippedRef(fastaFile);
+
             logger.info(String.format("Index file %s does not exist. Trying to create it now.", indexFile.getAbsolutePath()));
             FSLockWithShared indexLock = new FSLockWithShared(indexFile,true);
             try {
@@ -96,7 +96,7 @@ public ReferenceDataSource(File fastaFile) {
             }
             catch(UserException e) {
                 // Rethrow all user exceptions as-is; there should be more details in the UserException itself. 
-                throw e;    
+                throw e;
             }
             catch (Exception e) {
                 // If lock creation succeeded, the failure must have been generating the index.
@@ -115,6 +115,8 @@ public ReferenceDataSource(File fastaFile) {
         * This has been filed in trac as (PIC-370) Want programmatic interface to CreateSequenceDictionary
         */
         if (!dictFile.exists()) {
+            if ( isGzipped ) throw new UserException.CouldNotCreateReferenceFAIorDictForGzippedRef(fastaFile);
+
             logger.info(String.format("Dict file %s does not exist. Trying to create it now.", dictFile.getAbsolutePath()));
 
             /*
@@ -219,9 +221,9 @@ public Iterable<Shard> createShardsOverEntireReference(final SAMDataSource reads
             for(int shardStart = 1; shardStart <= refSequenceRecord.getSequenceLength(); shardStart += maxShardSize) {
                 final int shardStop = Math.min(shardStart+maxShardSize-1, refSequenceRecord.getSequenceLength());
                 shards.add(new LocusShard(parser,
-                                          readsDataSource,
-                                          Collections.singletonList(parser.createGenomeLoc(refSequenceRecord.getSequenceName(),shardStart,shardStop)),
-                                          null));
+                        readsDataSource,
+                        Collections.singletonList(parser.createGenomeLoc(refSequenceRecord.getSequenceName(),shardStart,shardStop)),
+                        null));
             }
         }
         return shards;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
index 5080997084..95e39b7c6b 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
@@ -58,7 +58,7 @@
 
 /** Shards and schedules data in manageable chunks. */
 public abstract class MicroScheduler implements MicroSchedulerMBean {
-    protected static Logger logger = Logger.getLogger(MicroScheduler.class);
+    protected static final Logger logger = Logger.getLogger(MicroScheduler.class);
 
     /**
      * Counts the number of instances of the class that are currently alive.
diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/TreeReducer.java b/public/java/src/org/broadinstitute/sting/gatk/executive/TreeReducer.java
index 632638f64f..390da0cced 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/TreeReducer.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/TreeReducer.java
@@ -66,13 +66,13 @@ public boolean isReadyForReduce() {
      * @return Result of the reduce.
      */
     public Object call() {
-        Object result = null;
+        Object result;
 
         final long startTime = System.currentTimeMillis();
 
         try {
             if( lhs == null )
-                result = lhs.get();
+                result = null;
                 // todo -- what the hell is this above line?  Shouldn't it be the two below?
 //            if( lhs == null )
 //                throw new IllegalStateException(String.format("Insufficient data on which to reduce; lhs = %s, rhs = %s", lhs, rhs) );
diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/BadCigarFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/BadCigarFilter.java
index 9a1455859e..cda7392ae5 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/filters/BadCigarFilter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/BadCigarFilter.java
@@ -29,9 +29,19 @@
 import net.sf.samtools.CigarOperator;
 import net.sf.samtools.SAMRecord;
 
+import java.util.Iterator;
+
 /**
  * Filter out reads with wonky cigar strings.
  *
+ *  - No reads with Hard/Soft clips in the middle of the cigar
+ *  - No reads starting with deletions (with or without preceding clips)
+ *  - No reads ending in deletions (with or without follow-up clips)
+ *  - No reads that are fully hard or soft clipped
+ *  - No reads that have consecutive indels in the cigar (II, DD, ID or DI)
+ *
+ *  ps: apparently an empty cigar is okay...
+ *
  * @author ebanks
  * @version 0.1
  */
@@ -40,28 +50,72 @@ public class BadCigarFilter extends ReadFilter {
 
     public boolean filterOut(final SAMRecord rec) {
         final Cigar c = rec.getCigar();
-        if( c.isEmpty() ) { return false; }                                                                             // if there is no Cigar then it can't be bad
 
-        boolean previousElementWasIndel = false;
-        CigarOperator lastOp = c.getCigarElement(0).getOperator();
+        // if there is no Cigar then it can't be bad
+        if( c.isEmpty() ) {
+            return false;
+        }
+
+        Iterator<CigarElement> elementIterator = c.getCigarElements().iterator();
 
-        if (lastOp == CigarOperator.D)                                                                                  // filter out reads starting with deletion
+        CigarOperator firstOp = CigarOperator.H;
+        while (elementIterator.hasNext() && (firstOp == CigarOperator.H || firstOp == CigarOperator.S)) {
+            CigarOperator op = elementIterator.next().getOperator();
+
+            // No reads with Hard/Soft clips in the middle of the cigar
+            if (firstOp != CigarOperator.H && op == CigarOperator.H) {
+                    return true;
+            }
+            firstOp = op;
+        }
+
+        // No reads starting with deletions (with or without preceding clips)
+        if (firstOp == CigarOperator.D) {
             return true;
-        
-        for (CigarElement ce : c.getCigarElements()) {
-            CigarOperator op = ce.getOperator();
-            if (op == CigarOperator.D || op == CigarOperator.I) {
-                if (previousElementWasIndel)
-                    return true;                                                                                        // filter out reads with adjacent I/D
-
-                previousElementWasIndel = true;
+        }
+
+        boolean hasMeaningfulElements = (firstOp != CigarOperator.H && firstOp != CigarOperator.S);
+        boolean previousElementWasIndel = firstOp == CigarOperator.I;
+        CigarOperator lastOp = firstOp;
+        CigarOperator previousOp = firstOp;
+
+        while (elementIterator.hasNext()) {
+            CigarOperator op = elementIterator.next().getOperator();
+
+            if (op != CigarOperator.S && op != CigarOperator.H) {
+
+                // No reads with Hard/Soft clips in the middle of the cigar
+                if (previousOp == CigarOperator.S || previousOp == CigarOperator.H)
+                    return true;
+
+                lastOp = op;
+
+                if (!hasMeaningfulElements && op.consumesReadBases()) {
+                    hasMeaningfulElements = true;
+                }
+
+                if (op == CigarOperator.I || op == CigarOperator.D) {
+
+                    // No reads that have consecutive indels in the cigar (II, DD, ID or DI)
+                    if (previousElementWasIndel) {
+                        return true;
+                    }
+                    previousElementWasIndel = true;
+                }
+                else {
+                    previousElementWasIndel = false;
+                }
+            }
+            // No reads with Hard/Soft clips in the middle of the cigar
+            else if (op == CigarOperator.S && previousOp == CigarOperator.H) {
+                return true;
             }
-            else                                                                                                        // this is a regular base (match/mismatch/hard or soft clip)
-                previousElementWasIndel = false;                                                                        // reset the previous element
 
-            lastOp = op;
+            previousOp = op;
         }
 
-        return lastOp == CigarOperator.D;
+        // No reads ending in deletions (with or without follow-up clips)
+        // No reads that are fully hard or soft clipped
+        return lastOp == CigarOperator.D || !hasMeaningfulElements;
     }
 }
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/ThreadLocalOutputTracker.java b/public/java/src/org/broadinstitute/sting/gatk/io/ThreadLocalOutputTracker.java
index 999deddd1f..636787c69f 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/io/ThreadLocalOutputTracker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/io/ThreadLocalOutputTracker.java
@@ -119,7 +119,7 @@ private <T> File createTempFile( Stub<T> stub ) {
 
         try {
             tempFile = File.createTempFile( stub.getClass().getName(), null );
-            tempFile.deleteOnExit();
+            //tempFile.deleteOnExit();
         }
         catch( IOException ex ) {
             throw new UserException.BadTmpDir("Unable to create temporary file for stub: " + stub.getClass().getName() );
diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java b/public/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java
index cb8786be10..300e801e62 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java
@@ -62,6 +62,7 @@ public SAMFileWriterStorage( SAMFileWriterStub stub, File file ) {
         if (stub.getGenerateMD5())
             factory.setCreateMd5File(true);
         // Adjust max records in RAM.
+        // TODO -- this doesn't actually work because of a bug in Picard; do not use until fixed
         if(stub.getMaxRecordsInRam() != null)
             factory.setMaxRecordsInRam(stub.getMaxRecordsInRam());
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java
index fb05a6b047..0f5290db7a 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java
@@ -27,9 +27,10 @@
 import net.sf.samtools.util.BlockCompressedOutputStream;
 import org.apache.log4j.Logger;
 import org.broad.tribble.AbstractFeatureReader;
+import org.broad.tribble.FeatureCodec;
 import org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub;
+import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
 import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
@@ -60,6 +61,7 @@ public class VariantContextWriterStorage implements Storage<VariantContextWriter
     protected final File file;
     protected OutputStream stream;
     protected final VariantContextWriter writer;
+    boolean closed = false;
 
     /**
      * Constructs an object which will write directly into the output file provided by the stub.
@@ -81,6 +83,18 @@ else if ( stub.getOutputStream() != null ) {
             throw new ReviewedStingException("Unable to create target to which to write; storage was provided with neither a file nor a stream.");
     }
 
+    /**
+     * Constructs an object which will redirect into a different file.
+     * @param stub Stub to use when synthesizing file / header info.
+     * @param tempFile File into which to direct the output data.
+     */
+    public VariantContextWriterStorage(VariantContextWriterStub stub, File tempFile) {
+        logger.debug("Creating temporary output file " + tempFile.getAbsolutePath() + " for VariantContext output.");
+        this.file = tempFile;
+        this.writer = vcfWriterToFile(stub, file, false);
+        writer.writeHeader(stub.getVCFHeader());
+    }
+
     /**
      * common initialization routine for multiple constructors
      * @param stub Stub to use when constructing the output file.
@@ -139,19 +153,6 @@ public void add(final VariantContext vc) {
         }
     }
 
-
-    /**
-     * Constructs an object which will redirect into a different file.
-     * @param stub Stub to use when synthesizing file / header info.
-     * @param tempFile File into which to direct the output data.
-     */
-    public VariantContextWriterStorage(VariantContextWriterStub stub, File tempFile) {
-        logger.debug("Creating temporary VCF file " + tempFile.getAbsolutePath() + " for VCF output.");
-        this.file = tempFile;
-        this.writer = vcfWriterToFile(stub, file, false);
-        writer.writeHeader(stub.getVCFHeader());
-    }
-
     public void add(VariantContext vc) {
         writer.add(vc);
     }
@@ -172,20 +173,34 @@ public void close() {
         if(file != null)
             logger.debug("Closing temporary file " + file.getAbsolutePath());
         writer.close();
+        closed = true;
     }
 
     public void mergeInto(VariantContextWriterStorage target) {
         try {
-            String sourceFilePath = file.getAbsolutePath();
-            String targetFilePath = target.file != null ? target.file.getAbsolutePath() : "/dev/stdin";
-            logger.debug(String.format("Merging %s into %s",sourceFilePath,targetFilePath));
-            AbstractFeatureReader<VariantContext> source = AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), new VCFCodec(), false);
+            if ( ! closed )
+                throw new ReviewedStingException("Writer not closed, but we are merging into the file!");
+            final String targetFilePath = target.file != null ? target.file.getAbsolutePath() : "/dev/stdin";
+            logger.debug(String.format("Merging %s into %s",file.getAbsolutePath(),targetFilePath));
+
+            // use the feature manager to determine the right codec for the tmp file
+            // that way we don't assume it's a specific type
+            final FeatureManager.FeatureDescriptor fd = new FeatureManager().getByFiletype(file);
+            if ( fd == null )
+                throw new ReviewedStingException("Unexpectedly couldn't find valid codec for temporary output file " + file);
+
+            final FeatureCodec<VariantContext> codec = fd.getCodec();
+            final AbstractFeatureReader<VariantContext> source =
+                    AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), codec, false);
             
-            for ( VariantContext vc : source.iterator() ) {
+            for ( final VariantContext vc : source.iterator() ) {
                 target.writer.add(vc);
             }
 
             source.close();
+            file.delete(); // this should be last to aid in debugging when the process fails
+        } catch (UserException e) {
+            throw new ReviewedStingException("BUG: intermediate file " + file + " is malformed, got error while reading", e);
         } catch (IOException e) {
             throw new UserException.CouldNotReadInputFile(file, "Error reading file in VCFWriterStorage: ", e);
         }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java
index 09766f1273..5e1132d459 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java
@@ -47,6 +47,7 @@
 public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
     public static final String NO_HEADER_ARG_NAME = "no_cmdline_in_header";
     public static final String SITES_ONLY_ARG_NAME = "sites_only";
+    public static final String FORCE_BCF = "bcf";
     public static final HashSet<String> SUPPORTED_ZIPPED_SUFFIXES = new HashSet<String>();
 
     //
@@ -96,7 +97,11 @@ public boolean supports( Class type ) {
 
     @Override
     public List<ArgumentDefinition> createArgumentDefinitions( ArgumentSource source ) {
-        return Arrays.asList( createDefaultArgumentDefinition(source), createNoCommandLineHeaderArgumentDefinition(),createSitesOnlyArgumentDefinition());
+        return Arrays.asList(
+                createDefaultArgumentDefinition(source),
+                createNoCommandLineHeaderArgumentDefinition(),
+                createSitesOnlyArgumentDefinition(),
+                createBCFArgumentDefinition() );
     }
 
     /**
@@ -117,7 +122,7 @@ public String typeDefaultDocString(ArgumentSource source) {
     public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) {
         if(!source.isRequired())
             throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default.");        
-        VariantContextWriterStub stub = new VariantContextWriterStub(engine, defaultOutputStream, false, argumentSources, false, false);
+        VariantContextWriterStub stub = new VariantContextWriterStub(engine, defaultOutputStream, argumentSources);
         engine.addOutput(stub);
         return stub;
     }
@@ -141,15 +146,15 @@ public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type ty
         if(writerFile == null && !source.isRequired())
             throw new MissingArgumentValueException(defaultArgumentDefinition);
 
-        // Should we compress the output stream?
-        boolean compress = isCompressed(writerFileName);
-
-        boolean skipWritingCmdLineHeader = argumentIsPresent(createNoCommandLineHeaderArgumentDefinition(),matches);
-        boolean doNotWriteGenotypes = argumentIsPresent(createSitesOnlyArgumentDefinition(),matches);
-
         // Create a stub for the given object.
-        VariantContextWriterStub stub = (writerFile != null) ? new VariantContextWriterStub(engine, writerFile, compress, argumentSources, skipWritingCmdLineHeader, doNotWriteGenotypes)
-                                                  : new VariantContextWriterStub(engine, defaultOutputStream, compress, argumentSources, skipWritingCmdLineHeader, doNotWriteGenotypes);
+        final VariantContextWriterStub stub = (writerFile != null)
+                ? new VariantContextWriterStub(engine, writerFile, argumentSources)
+                : new VariantContextWriterStub(engine, defaultOutputStream, argumentSources);
+
+        stub.setCompressed(isCompressed(writerFileName));
+        stub.setDoNotWriteGenotypes(argumentIsPresent(createSitesOnlyArgumentDefinition(),matches));
+        stub.setSkipWritingCommandLineHeader(argumentIsPresent(createNoCommandLineHeaderArgumentDefinition(),matches));
+        stub.setForceBCF(argumentIsPresent(createBCFArgumentDefinition(),matches));
 
         // WARNING: Side effects required by engine!
         parsingEngine.addTags(stub,getArgumentTags(matches));
@@ -159,8 +164,8 @@ public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type ty
     }
 
     /**
-     * Creates the optional compression level argument for the BAM file.
-     * @return Argument definition for the BAM file itself.  Will not be null.
+     * Creates the optional no_header argument for the VCF file.
+     * @return Argument definition for the VCF file itself.  Will not be null.
      */
     private ArgumentDefinition createNoCommandLineHeaderArgumentDefinition() {
         return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
@@ -179,8 +184,8 @@ private ArgumentDefinition createNoCommandLineHeaderArgumentDefinition() {
     }
 
     /**
-     * Creates the optional compression level argument for the BAM file.
-     * @return Argument definition for the BAM file itself.  Will not be null.
+     * Creates the optional sites_only argument definition
+     * @return Argument definition for the VCF file itself.  Will not be null.
      */
     private ArgumentDefinition createSitesOnlyArgumentDefinition() {
         return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
@@ -198,6 +203,26 @@ private ArgumentDefinition createSitesOnlyArgumentDefinition() {
                                        null );
     }
 
+    /**
+     * Creates the optional bcf argument definition
+     * @return Argument definition for the VCF file itself.  Will not be null.
+     */
+    private ArgumentDefinition createBCFArgumentDefinition() {
+        return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
+                boolean.class,
+                FORCE_BCF,
+                FORCE_BCF,
+                "force BCF output, regardless of the file's extension",
+                false,
+                true,
+                false,
+                true,
+                null,
+                null,
+                null,
+                null );
+    }
+
     /**
      * Returns true if the file will be compressed.
      * @param writerFileName Name of the file
diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java
index 6ed889eb67..260a7efdaf 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java
@@ -35,6 +35,7 @@
 import org.broadinstitute.sting.utils.variantcontext.writer.Options;
 import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory;
 
 import java.io.File;
 import java.io.OutputStream;
@@ -78,7 +79,7 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
     /**
      * Should we emit a compressed output stream?
      */
-    private final boolean isCompressed;
+    private boolean isCompressed = false;
 
     /**
      * A hack: push the argument sources into the VCF header so that the VCF header
@@ -89,12 +90,17 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
     /**
      * Should the header be written out?  A hidden argument.
      */
-    private final boolean skipWritingCommandLineHeader;
+    private boolean skipWritingCommandLineHeader = false;
 
     /**
      * Should we not write genotypes even when provided?
      */
-    private final boolean doNotWriteGenotypes;
+    private boolean doNotWriteGenotypes = false;
+
+    /**
+     * Should we force BCF writing regardless of the file extension?
+     */
+    private boolean forceBCF = false;
 
     /**
      * Connects this stub with an external stream capable of serving the
@@ -107,19 +113,13 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
      *
      * @param engine engine.
      * @param genotypeFile  file to (ultimately) create.
-     * @param isCompressed  should we compress the output stream?
      * @param argumentSources sources.
-     * @param skipWritingCommandLineHeader skip writing header.
-     * @param doNotWriteGenotypes do not write genotypes.
      */
-    public VariantContextWriterStub(GenomeAnalysisEngine engine, File genotypeFile, boolean isCompressed, Collection<Object> argumentSources, boolean skipWritingCommandLineHeader, boolean doNotWriteGenotypes) {
+    public VariantContextWriterStub(GenomeAnalysisEngine engine, File genotypeFile, Collection<Object> argumentSources) {
         this.engine = engine;
         this.genotypeFile = genotypeFile;
         this.genotypeStream = null;
-        this.isCompressed = isCompressed;
         this.argumentSources = argumentSources;
-        this.skipWritingCommandLineHeader = skipWritingCommandLineHeader;
-        this.doNotWriteGenotypes = doNotWriteGenotypes;
     }
 
     /**
@@ -127,19 +127,13 @@ public VariantContextWriterStub(GenomeAnalysisEngine engine, File genotypeFile,
      *
      * @param engine engine.
      * @param genotypeStream  stream to (ultimately) write.
-     * @param isCompressed  should we compress the output stream?
      * @param argumentSources sources.
-     * @param skipWritingCommandLineHeader skip writing header.
-     * @param doNotWriteGenotypes do not write genotypes.
      */
-    public VariantContextWriterStub(GenomeAnalysisEngine engine, OutputStream genotypeStream, boolean isCompressed, Collection<Object> argumentSources, boolean skipWritingCommandLineHeader, boolean doNotWriteGenotypes) {
+    public VariantContextWriterStub(GenomeAnalysisEngine engine, OutputStream genotypeStream, Collection<Object> argumentSources) {
         this.engine = engine;
         this.genotypeFile = null;
         this.genotypeStream = new PrintStream(genotypeStream);
-        this.isCompressed = isCompressed;
         this.argumentSources = argumentSources;
-        this.skipWritingCommandLineHeader = skipWritingCommandLineHeader;
-        this.doNotWriteGenotypes = doNotWriteGenotypes;
     }
 
     /**
@@ -166,6 +160,22 @@ public boolean isCompressed() {
         return isCompressed;
     }
 
+    public void setCompressed(boolean compressed) {
+        isCompressed = compressed;
+    }
+
+    public void setSkipWritingCommandLineHeader(boolean skipWritingCommandLineHeader) {
+        this.skipWritingCommandLineHeader = skipWritingCommandLineHeader;
+    }
+
+    public void setDoNotWriteGenotypes(boolean doNotWriteGenotypes) {
+        this.doNotWriteGenotypes = doNotWriteGenotypes;
+    }
+
+    public void setForceBCF(boolean forceBCF) {
+        this.forceBCF = forceBCF;
+    }
+
     /**
      * Gets the master sequence dictionary from the engine associated with this stub
      * @link GenomeAnalysisEngine.getMasterSequenceDictionary
@@ -186,6 +196,9 @@ public EnumSet<Options> getWriterOptions(boolean indexOnTheFly) {
         if ( engine.lenientVCFProcessing() ) options.add(Options.ALLOW_MISSING_FIELDS_IN_HEADER);
         if ( indexOnTheFly && ! isCompressed() ) options.add(Options.INDEX_ON_THE_FLY);
 
+        if ( forceBCF || (getFile() != null && VariantContextWriterFactory.isBCFOutput(getFile())) )
+            options.add(Options.FORCE_BCF);
+
         return options.isEmpty() ? EnumSet.noneOf(Options.class) : EnumSet.copyOf(options);
     }
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java
index 6ff9f3bd5d..f970691898 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java
@@ -159,7 +159,7 @@ public CigarOperator stepForwardOnGenome() {
                     return stepForwardOnGenome();
                 } else {
                     if (curElement != null && curElement.getOperator() == CigarOperator.D)
-                        throw new UserException.MalformedBAM(read, "read ends with deletion. Cigar: " + read.getCigarString() + ". This is an indication of a malformed file, but the SAM spec allows reads ending in deletion. If you are sure you want to use this read, re-run your analysis with the extra option: -rf BadCigar");
+                        throw new UserException.MalformedBAM(read, "read ends with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar");
                         
                     // Reads that contain indels model the genomeOffset as the following base in the reference.  Because
                     // we fall into this else block only when indels end the read, increment genomeOffset  such that the
@@ -185,7 +185,7 @@ public CigarOperator stepForwardOnGenome() {
                     break;
                 case D: // deletion w.r.t. the reference
                     if (readOffset < 0)             // we don't want reads starting with deletion, this is a malformed cigar string
-                        throw new UserException.MalformedBAM(read, "Read starting with deletion. Cigar: " + read.getCigarString() + ". This is an indication of a malformed file, but the SAM spec allows reads starting in deletion. If you are sure you want to use this read, re-run your analysis with the extra option: -rf BadCigar");
+                        throw new UserException.MalformedBAM(read, "read starts with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar");
                     // should be the same as N case
                     genomeOffset++;
                     done = true;
@@ -195,6 +195,8 @@ public CigarOperator stepForwardOnGenome() {
                     done = true;
                     break;
                 case M:
+                case EQ:
+                case X:
                     readOffset++;
                     genomeOffset++;
                     done = true;
@@ -279,7 +281,6 @@ public AlignmentContext next() {
      */
     private void lazyLoadNextAlignmentContext() {
         while (nextAlignmentContext == null && readStates.hasNext()) {
-            // this call will set hasExtendedEvents to true if it picks up a read with indel right before the current position on the ref:
             readStates.collectPendingReads();
 
             final GenomeLoc location = getLocation();
@@ -378,7 +379,7 @@ private void updateReadStates() {
                 CigarOperator op = state.stepForwardOnGenome();
                 if (op == null) {
                     // we discard the read only when we are past its end AND indel at the end of the read (if any) was
-                    // already processed. Keeping the read state that retunred null upon stepForwardOnGenome() is safe
+                    // already processed. Keeping the read state that returned null upon stepForwardOnGenome() is safe
                     // as the next call to stepForwardOnGenome() will return null again AND will clear hadIndel() flag.
                     it.remove();                                                // we've stepped off the end of the object
                 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java
index f190cbcfd7..b60a7845aa 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java
@@ -86,13 +86,14 @@ public class GATKRunReport {
     private static File REPORT_SENTINEL = new File(REPORT_DIR.getAbsolutePath() + "/ENABLE");
 
     // number of milliseconds before the S3 put operation is timed-out:
-    private static final long S3PutTimeOut = 30 * 1000;
+    private static final long S3PutTimeOut = 10 * 1000;
 
+    public static final String PHONE_HOME_DOCS_URL = "http://gatkforums.broadinstitute.org/discussion/1250/what-is-phone-home-and-how-does-it-affect-me#latest";
 
     /**
      * our log
      */
-    protected static Logger logger = Logger.getLogger(GATKRunReport.class);
+    protected static final Logger logger = Logger.getLogger(GATKRunReport.class);
 
 
     @Element(required = false, name = "id")
diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java
index fe069c2d97..2b46414a80 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java
@@ -163,43 +163,58 @@ public static List<String> getAlleleList(OldDbSNPFeature feature) {
         @Override        
         public VariantContext convert(String name, Object input, ReferenceContext ref) {
             OldDbSNPFeature dbsnp = (OldDbSNPFeature)input;
-            if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) )
+
+            int index = dbsnp.getStart() - ref.getWindow().getStart() - 1;
+            if ( index < 0 )
+                return null; // we weren't given enough reference context to create the VariantContext
+
+            final byte refBaseForIndel = ref.getBases()[index];
+
+            boolean addPaddingBase;
+            if ( isSNP(dbsnp) || isMNP(dbsnp) )
+                addPaddingBase = false;
+            else if ( isIndel(dbsnp) || dbsnp.getVariantType().contains("mixed") )
+                addPaddingBase = VariantContextUtils.requiresPaddingBase(stripNullDashes(getAlleleList(dbsnp)));
+            else
+                return null; // can't handle anything else
+
+            Allele refAllele;
+            if ( dbsnp.getNCBIRefBase().equals("-") )
+                refAllele = Allele.create(refBaseForIndel, true);
+            else if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) )
                 return null;
-            Allele refAllele = Allele.create(dbsnp.getNCBIRefBase(), true);
+            else
+                refAllele = Allele.create((addPaddingBase ? (char)refBaseForIndel : "") + dbsnp.getNCBIRefBase(), true);
 
-            if ( isSNP(dbsnp) || isIndel(dbsnp) || isMNP(dbsnp) || dbsnp.getVariantType().contains("mixed") ) {
-                // add the reference allele
-                List<Allele> alleles = new ArrayList<Allele>();
-                alleles.add(refAllele);
+            final List<Allele> alleles = new ArrayList<Allele>();
+            alleles.add(refAllele);
 
-                // add all of the alt alleles
-                boolean sawNullAllele = refAllele.isNull();
-                for ( String alt : getAlternateAlleleList(dbsnp) ) {
-                    if ( ! Allele.acceptableAlleleBases(alt) ) {
-                        //System.out.printf("Excluding dbsnp record %s%n", dbsnp);
-                        return null;
-                    }
-                    Allele altAllele = Allele.create(alt, false);
-                    alleles.add(altAllele);
-                    if ( altAllele.isNull() )
-                        sawNullAllele = true;
-                }
+            // add all of the alt alleles
+            for ( String alt : getAlternateAlleleList(dbsnp) ) {
+                if ( Allele.wouldBeNullAllele(alt.getBytes()))
+                    alt = "";
+                else if ( ! Allele.acceptableAlleleBases(alt) )
+                    return null;
 
-                Map<String, Object> attributes = new HashMap<String, Object>();
+                alleles.add(Allele.create((addPaddingBase ? (char)refBaseForIndel : "") + alt, false));
+            }
 
-                int index = dbsnp.getStart() - ref.getWindow().getStart() - 1;
-                if ( index < 0 )
-                    return null; // we weren't given enough reference context to create the VariantContext
-                Byte refBaseForIndel = new Byte(ref.getBases()[index]);
-
-                final VariantContextBuilder builder = new VariantContextBuilder();
-                builder.source(name).id(dbsnp.getRsID());
-                builder.loc(dbsnp.getChr(), dbsnp.getStart() - (sawNullAllele ? 1 : 0), dbsnp.getEnd() - (refAllele.isNull() ? 1 : 0));
-                builder.alleles(alleles);
-                builder.referenceBaseForIndel(refBaseForIndel);
-                return builder.make();
-            } else
-                return null; // can't handle anything else
+            final VariantContextBuilder builder = new VariantContextBuilder();
+            builder.source(name).id(dbsnp.getRsID());
+            builder.loc(dbsnp.getChr(), dbsnp.getStart() - (addPaddingBase ? 1 : 0), dbsnp.getEnd() - (addPaddingBase && refAllele.length() == 1 ? 1 : 0));
+            builder.alleles(alleles);
+            return builder.make();
+        }
+
+        private static List<String> stripNullDashes(final List<String> alleles) {
+            final List<String> newAlleles = new ArrayList<String>(alleles.size());
+            for ( final String allele : alleles ) {
+                if ( allele.equals("-") )
+                    newAlleles.add("");
+                else
+                    newAlleles.add(allele);
+            }
+            return newAlleles;
         }
     }
 
@@ -294,7 +309,6 @@ public VariantContext convert(String name, Object input, ReferenceContext ref) {
             int index = hapmap.getStart() - ref.getWindow().getStart();
             if ( index < 0 )
                 return null; // we weren't given enough reference context to create the VariantContext
-            Byte refBaseForIndel = new Byte(ref.getBases()[index]);
 
             HashSet<Allele> alleles = new HashSet<Allele>();
             Allele refSNPAllele = Allele.create(ref.getBase(), true);
@@ -351,7 +365,7 @@ public VariantContext convert(String name, Object input, ReferenceContext ref) {
             long end = hapmap.getEnd();
             if ( deletionLength > 0 )
                 end += deletionLength;
-            VariantContext vc = new VariantContextBuilder(name, hapmap.getChr(), hapmap.getStart(), end, alleles).id(hapmap.getName()).genotypes(genotypes).referenceBaseForIndel(refBaseForIndel).make();
+            VariantContext vc = new VariantContextBuilder(name, hapmap.getChr(), hapmap.getStart(), end, alleles).id(hapmap.getName()).genotypes(genotypes).make();
             return vc;
        }
     }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java
index b5d5deedb8..a2fe946412 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java
@@ -85,18 +85,16 @@ public int compareTo(FeatureDescriptor o) {
 
     private final PluginManager<FeatureCodec> pluginManager;
     private final Collection<FeatureDescriptor> featureDescriptors = new TreeSet<FeatureDescriptor>();
-    private final VCFHeader headerForRepairs;
     private final boolean lenientVCFProcessing;
 
     /**
      * Construct a FeatureManager without a master VCF header
      */
     public FeatureManager() {
-        this(null, false);
+        this(false);
     }
 
-    public FeatureManager(final VCFHeader headerForRepairs, final boolean lenientVCFProcessing) {
-        this.headerForRepairs = headerForRepairs;
+    public FeatureManager(final boolean lenientVCFProcessing) {
         this.lenientVCFProcessing = lenientVCFProcessing;
         pluginManager = new PluginManager<FeatureCodec>(FeatureCodec.class, "Codecs", "Codec");
 
@@ -255,8 +253,6 @@ public FeatureCodec createCodec(FeatureDescriptor descriptor, String name, Genom
             ((NameAwareCodec)codex).setName(name);
         if ( codex instanceof ReferenceDependentFeatureCodec )
             ((ReferenceDependentFeatureCodec)codex).setGenomeLocParser(genomeLocParser);
-        if ( codex instanceof VCFCodec )
-            ((VCFCodec)codex).setHeaderForRepairs(headerForRepairs);
         if ( codex instanceof AbstractVCFCodec && lenientVCFProcessing )
             ((AbstractVCFCodec)codex).disableOnTheFlyModifications();
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java
index e183fe1698..81fe73075d 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java
@@ -89,17 +89,15 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
      * please talk through your approach with the SE team.
      * @param dict Sequence dictionary to use.
      * @param genomeLocParser Location parser to use.
-     * @param headerForRepairs a VCF header that should be used to repair VCF headers.  Can be null
      * @param validationExclusionType Types of validations to exclude, for sequence dictionary verification.
      */
     public RMDTrackBuilder(final SAMSequenceDictionary dict,
                            final GenomeLocParser genomeLocParser,
-                           final VCFHeader headerForRepairs,
                            ValidationExclusion.TYPE validationExclusionType) {
         this.dict = dict;
         this.validationExclusionType = validationExclusionType;
         this.genomeLocParser = genomeLocParser;
-        this.featureManager = new FeatureManager(headerForRepairs, GenomeAnalysisEngine.lenientVCFProcessing(validationExclusionType));
+        this.featureManager = new FeatureManager(GenomeAnalysisEngine.lenientVCFProcessing(validationExclusionType));
     }
 
     /**
@@ -111,18 +109,6 @@ public FeatureManager getFeatureManager() {
         return featureManager;
     }
 
-    /**
-     * Same as full constructor but makes one without a header for repairs
-     * @param dict
-     * @param genomeLocParser
-     * @param validationExclusionType
-     */
-    public RMDTrackBuilder(final SAMSequenceDictionary dict,
-                           final GenomeLocParser genomeLocParser,
-                           ValidationExclusion.TYPE validationExclusionType) {
-        this(dict, genomeLocParser, null, validationExclusionType);
-    }
-
     /**
      * create a RMDTrack of the specified type
      *
diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java
index bec1ea5430..47bc48f816 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java
@@ -89,9 +89,9 @@ private void loadReport(File file) {
             reader = new BufferedReader(new FileReader(file));
             reportHeader = reader.readLine();
         } catch (FileNotFoundException e) {
-            throw new ReviewedStingException("Could not open file : " + file);
+            throw new UserException.CouldNotReadInputFile(file, "it does not exist");
         } catch (IOException e) { 
-            throw new ReviewedStingException("Could not read file : " + file);                            
+            throw new UserException.CouldNotReadInputFile(file, e);
         }   
 
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java
index 7a272e1551..3b4bdd087d 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java
@@ -208,11 +208,23 @@ public GATKReportTable(final String tableName, final String tableDescription, fi
     }
 
     /**
-     * Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed
-     *
-     * @param name the name of the table or column
-     * @return true if the name is valid, false if otherwise
+     * Create a new GATKReportTable with the same structure
+     * @param tableToCopy
      */
+    public GATKReportTable(final GATKReportTable tableToCopy, final boolean copyData) {
+        this(tableToCopy.getTableName(), tableToCopy.getTableDescription(), tableToCopy.getNumColumns(), tableToCopy.sortByRowID);
+        for ( final GATKReportColumn column : tableToCopy.getColumnInfo() )
+            addColumn(column.getColumnName(), column.getFormat());
+        if ( copyData )
+            throw new IllegalArgumentException("sorry, copying data in GATKReportTable isn't supported");
+    }
+
+        /**
+        * Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed
+        *
+        * @param name the name of the table or column
+        * @return true if the name is valid, false if otherwise
+        */
     private boolean isValidName(String name) {
         Pattern p = Pattern.compile(INVALID_TABLE_NAME_REGEX);
         Matcher m = p.matcher(name);
@@ -490,6 +502,17 @@ public Object get(final Object rowID, final String columnName) {
         return get(rowIdToIndex.get(rowID), columnNameToIndex.get(columnName));
     }
 
+    /**
+     * Get a value from the given position in the table
+     *
+     * @param rowIndex       the row ID
+     * @param columnName  the name of the column
+     * @return the value stored at the specified position in the table
+     */
+    public Object get(final int rowIndex, final String columnName) {
+        return get(rowIndex, columnNameToIndex.get(columnName));
+    }
+
     /**
      * Get a value from the given position in the table
      *
diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java
index 4ef2555243..abc71e5499 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java
@@ -62,54 +62,6 @@ public ProcessingHistory(double elapsedSeconds, GenomeLoc loc, long unitsProcess
 
     }
 
-    /**
-     * Simple utility class that makes it convenient to print unit adjusted times
-     */
-    private static class MyTime {
-        double t;           // in Seconds
-        int precision;      // for format
-
-        public MyTime(double t, int precision) {
-            this.t = t;
-            this.precision = precision;
-        }
-
-        public MyTime(double t) {
-            this(t, 1);
-        }
-
-        /**
-         * Instead of 10000 s, returns 2.8 hours
-         * @return
-         */
-        public String toString() {
-            double unitTime = t;
-            String unit = "s";
-
-            if ( t > 120 ) {
-                unitTime = t / 60; // minutes
-                unit = "m";
-
-                if ( unitTime > 120 ) {
-                    unitTime /= 60; // hours
-                    unit = "h";
-
-                    if ( unitTime > 100 ) {
-                        unitTime /= 24; // days
-                        unit = "d";
-
-                        if ( unitTime > 20 ) {
-                            unitTime /= 7; // days
-                            unit = "w";
-                        }
-                    }
-                }
-            }
-
-            return String.format("%6."+precision+"f %s", unitTime, unit);
-        }
-    }
-
     /** lock object to sure updates to history are consistent across threads */
     private static final Object lock = new Object();
     LinkedList<ProcessingHistory> history = new LinkedList<ProcessingHistory>();
@@ -140,7 +92,7 @@ public String toString() {
     GenomeLocSortedSet targetIntervals = null;
 
     /** our log, which we want to capture anything from this class */
-    protected static Logger logger = Logger.getLogger(TraversalEngine.class);
+    protected static final Logger logger = Logger.getLogger(TraversalEngine.class);
 
     protected GenomeAnalysisEngine engine;
 
@@ -280,20 +232,20 @@ private void printProgress(GenomeLoc loc, ReadMetrics metrics, boolean mustPrint
 
             ProcessingHistory last = updateHistory(loc,cumulativeMetrics);
 
-            final MyTime elapsed = new MyTime(last.elapsedSeconds);
-            final MyTime bpRate = new MyTime(secondsPerMillionBP(last));
-            final MyTime unitRate = new MyTime(secondsPerMillionElements(last));
+            final AutoFormattingTime elapsed = new AutoFormattingTime(last.elapsedSeconds);
+            final AutoFormattingTime bpRate = new AutoFormattingTime(secondsPerMillionBP(last));
+            final AutoFormattingTime unitRate = new AutoFormattingTime(secondsPerMillionElements(last));
             final double fractionGenomeTargetCompleted = calculateFractionGenomeTargetCompleted(last);
-            final MyTime estTotalRuntime = new MyTime(elapsed.t / fractionGenomeTargetCompleted);
-            final MyTime timeToCompletion = new MyTime(estTotalRuntime.t - elapsed.t);
+            final AutoFormattingTime estTotalRuntime = new AutoFormattingTime(elapsed.getTimeInSeconds() / fractionGenomeTargetCompleted);
+            final AutoFormattingTime timeToCompletion = new AutoFormattingTime(estTotalRuntime.getTimeInSeconds() - elapsed.getTimeInSeconds());
 
             if ( printProgress ) {
                 lastProgressPrintTime = curTime;
 
                 // dynamically change the update rate so that short running jobs receive frequent updates while longer jobs receive fewer updates
-                if ( estTotalRuntime.t > TWELVE_HOURS_IN_SECONDS )
+                if ( estTotalRuntime.getTimeInSeconds() > TWELVE_HOURS_IN_SECONDS )
                     PROGRESS_PRINT_FREQUENCY = 60 * 1000; // in milliseconds
-                else if ( estTotalRuntime.t > TWO_HOURS_IN_SECONDS )
+                else if ( estTotalRuntime.getTimeInSeconds() > TWO_HOURS_IN_SECONDS )
                     PROGRESS_PRINT_FREQUENCY = 30 * 1000; // in milliseconds
                 else
                     PROGRESS_PRINT_FREQUENCY = 10 * 1000; // in milliseconds
@@ -308,8 +260,9 @@ else if ( estTotalRuntime.t > TWO_HOURS_IN_SECONDS )
                 lastPerformanceLogPrintTime = curTime;
                 synchronized(performanceLogLock) {
                     performanceLog.printf("%.2f\t%d\t%.2e\t%d\t%.2e\t%.2e\t%.2f\t%.2f%n",
-                            elapsed.t, nRecords, unitRate.t, last.bpProcessed, bpRate.t,
-                            fractionGenomeTargetCompleted, estTotalRuntime.t, timeToCompletion.t);
+                            elapsed.getTimeInSeconds(), nRecords, unitRate.getTimeInSeconds(), last.bpProcessed,
+                            bpRate.getTimeInSeconds(), fractionGenomeTargetCompleted, estTotalRuntime.getTimeInSeconds(),
+                            timeToCompletion.getTimeInSeconds());
                 }
             }
         }
@@ -401,7 +354,7 @@ public void setPerformanceLogFileName(String fileName) {
 
         synchronized(performanceLogLock) {
             // Ignore multiple calls to reset the same lock.
-            if(performanceLogFile != null && performanceLogFile.equals(fileName))
+            if(performanceLogFile != null && performanceLogFile.equals(file))
                 return;
 
             // Close an existing log
diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
index 1b9c12fb06..979e0f2d60 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
@@ -13,6 +13,7 @@
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocSortedSet;
 import org.broadinstitute.sting.utils.activeregion.ActivityProfile;
+import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 
@@ -28,7 +29,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
     /**
      * our log, which we want to capture anything from this class
      */
-    protected static Logger logger = Logger.getLogger(TraversalEngine.class);
+    protected final static Logger logger = Logger.getLogger(TraversalEngine.class);
 
     private final LinkedList<org.broadinstitute.sting.utils.activeregion.ActiveRegion> workQueue = new LinkedList<org.broadinstitute.sting.utils.activeregion.ActiveRegion>();
     private final LinkedHashSet<GATKSAMRecord> myReads = new LinkedHashSet<GATKSAMRecord>();
@@ -69,8 +70,7 @@ public T traverse( final ActiveRegionWalker<M,T> walker,
                     for(int iii = prevLoc.getStop() + 1; iii < location.getStart(); iii++ ) {
                         final GenomeLoc fakeLoc = engine.getGenomeLocParser().createGenomeLoc(prevLoc.getContig(), iii, iii);
                         if( initialIntervals == null || initialIntervals.overlaps( fakeLoc ) ) {
-                            final double isActiveProb = ( walker.hasPresetActiveRegions() && walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 );
-                            profile.add(fakeLoc, isActiveProb);
+                            profile.add(fakeLoc, new ActivityProfileResult( walker.hasPresetActiveRegions() && walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 ));
                         }
                     }
                 }
@@ -86,8 +86,7 @@ public T traverse( final ActiveRegionWalker<M,T> walker,
 
                 // Call the walkers isActive function for this locus and add them to the list to be integrated later
                 if( initialIntervals == null || initialIntervals.overlaps( location ) ) {
-                    final double isActiveProb = walkerActiveProb(walker, tracker, refContext, locus, location);
-                    profile.add(location, isActiveProb);
+                    profile.add(location, walkerActiveProb(walker, tracker, refContext, locus, location));
                 }
 
                 // Grab all the previously unseen reads from this pileup and add them to the massive read list
@@ -144,11 +143,11 @@ public T traverse( final ActiveRegionWalker<M,T> walker,
     //
     // --------------------------------------------------------------------------------
 
-    private final double walkerActiveProb(final ActiveRegionWalker<M,T> walker,
+    private final ActivityProfileResult walkerActiveProb(final ActiveRegionWalker<M,T> walker,
                                           final RefMetaDataTracker tracker, final ReferenceContext refContext,
                                           final AlignmentContext locus, final GenomeLoc location) {
         if ( walker.hasPresetActiveRegions() ) {
-            return walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0;
+            return new ActivityProfileResult(walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0);
         } else {
             return walker.isActive( tracker, refContext, locus );
         }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java
index 5c9b833120..a5a6919a22 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java
@@ -19,7 +19,7 @@ public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,Locu
     /**
      * our log, which we want to capture anything from this class
      */
-    protected static Logger logger = Logger.getLogger(TraversalEngine.class);
+    protected static final Logger logger = Logger.getLogger(TraversalEngine.class);
 
     @Override
     protected String getTraversalType() {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java
index dd4402d82f..ebaac40af6 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java
@@ -24,7 +24,7 @@
 public class TraverseReadPairs<M,T> extends TraversalEngine<M,T, ReadPairWalker<M,T>,ReadShardDataProvider> {
 
     /** our log, which we want to capture anything from this class */
-    protected static Logger logger = Logger.getLogger(TraverseReadPairs.class);
+    protected static final Logger logger = Logger.getLogger(TraverseReadPairs.class);
 
     @Override
     protected String getTraversalType() {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java
index 24b8ac986b..d29e9a5f27 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java
@@ -51,7 +51,7 @@
  */
 public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,ReadShardDataProvider> {    
     /** our log, which we want to capture anything from this class */
-    protected static Logger logger = Logger.getLogger(TraverseReads.class);
+    protected static final Logger logger = Logger.getLogger(TraverseReads.class);
 
     @Override
     protected String getTraversalType() {
@@ -75,8 +75,6 @@ public T traverse(ReadWalker<M,T> walker,
         if( !dataProvider.hasReads() )
             throw new IllegalArgumentException("Unable to traverse reads; no read data is available.");
 
-        boolean needsReferenceBasesP = WalkerManager.isRequired(walker, DataSource.REFERENCE_BASES);
-
         ReadView reads = new ReadView(dataProvider);
         ReadReferenceView reference = new ReadReferenceView(dataProvider);
 
@@ -91,7 +89,7 @@ public T traverse(ReadWalker<M,T> walker,
             ReferenceContext refContext = null;
 
             // get the array of characters for the reference sequence, since we're a mapped read
-            if (needsReferenceBasesP && !read.getReadUnmappedFlag() && dataProvider.hasReference())
+            if (!read.getReadUnmappedFlag() && dataProvider.hasReference())
                 refContext = reference.getReferenceContext(read);
 
             // update the number of reads we've seen
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java
index e38e166eac..cbe7913531 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java
@@ -12,6 +12,7 @@
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.GenomeLocSortedSet;
+import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
 import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
 import org.broadinstitute.sting.utils.interval.IntervalSetRule;
 import org.broadinstitute.sting.utils.interval.IntervalUtils;
@@ -27,10 +28,11 @@
  */
 
 @By(DataSource.READS)
-@Requires({DataSource.READS, DataSource.REFERENCE_BASES})
+@Requires({DataSource.READS, DataSource.REFERENCE})
 @PartitionBy(PartitionType.READ)
 @ActiveRegionExtension(extension=50,maxRegion=1500)
 @ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class})
+@RemoveProgramRecords
 public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
 
     @Output(fullName="activeRegionOut", shortName="ARO", doc="Output the active region to this interval list file", required = false)
@@ -72,7 +74,7 @@ public boolean wantsNonPrimaryReads() {
     }
 
     // Determine probability of active status over the AlignmentContext
-    public abstract double isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
+    public abstract ActivityProfileResult isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
 
     // Map over the ActiveRegion
     public abstract MapType map(final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion, final RefMetaDataTracker metaDataTracker);
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReads.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReads.java
index beafd0870b..4eaa166929 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReads.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReads.java
@@ -574,7 +574,7 @@ public String toString() {
         }
     }
 
-    public class ReadClipperWithData extends ReadClipper {
+    public static class ReadClipperWithData extends ReadClipper {
         private ClippingData data;
 
         public ReadClipperWithData(GATKSAMRecord read, List<SeqToClip> clipSeqs) {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/DataSource.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/DataSource.java
index a152ab1374..1f93c67a62 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/DataSource.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/DataSource.java
@@ -16,8 +16,18 @@
  * Allow user to choose between a number of different data sources.
  */
 public enum DataSource {
+    /**
+     * Does this walker require read (BAM) data to work?
+     */
     READS,
+
+    /**
+     * Does this walker require reference data to work?
+     */
     REFERENCE,
-    REFERENCE_BASES,         // Do I actually need the reference bases passed to the walker?
+
+    /**
+     * Does this walker require reference order data (VCF) to work?
+     */
     REFERENCE_ORDERED_DATA
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java
index e94d01d5af..3b18dda44c 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java
@@ -16,9 +16,10 @@
  * To change this template use File | Settings | File Templates.
  */
 @By(DataSource.READS)
-@Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES})
+@Requires({DataSource.READS,DataSource.REFERENCE})
 @PartitionBy(PartitionType.LOCUS)
 @ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
+@RemoveProgramRecords
 public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
     // Do we actually want to operate on the context?
     public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/Pileup.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/Pileup.java
index 0eb3a628db..2a6ecdb8cb 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/Pileup.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Pileup.java
@@ -64,9 +64,17 @@
  */
 @DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
 public class Pileup extends LocusWalker<Integer, Integer> implements TreeReducible<Integer> {
+
+    private static final String verboseDelimiter = "@"; // it's ugly to use "@" but it's literally the only usable character not allowed in read names
+
     @Output
     PrintStream out;
 
+    /**
+     * In addition to the standard pileup output, adds 'verbose' output too.  The verbose output contains the number of spanning deletions,
+     * and for each read in the pileup it has the read name, offset in the base string, read length, and read mapping quality.  These per
+     * read items are delimited with an '@' character.
+     */
     @Argument(fullName="showVerbose",shortName="verbose",doc="Add an extra verbose section to the pileup output")
     public boolean SHOW_VERBOSE = false;
 
@@ -116,8 +124,6 @@ private String getReferenceOrderedData( RefMetaDataTracker tracker ) {
         return rodString;
     }
 
-    private static final String verboseDelimiter = "@"; // it's ugly to use "@" but it's literally the only usable character not allowed in read names
-
     private static String createVerboseOutput(final ReadBackedPileup pileup) {
         final StringBuilder sb = new StringBuilder();
         boolean isFirst = true;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java
index 8933bd73e9..77e3af93f1 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java
@@ -12,7 +12,7 @@
  * Time: 2:52:28 PM
  * To change this template use File | Settings | File Templates.
  */
-@Requires({DataSource.READS, DataSource.REFERENCE_BASES})
+@Requires({DataSource.READS, DataSource.REFERENCE})
 @PartitionBy(PartitionType.READ)
 public abstract class ReadWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
     public boolean requiresOrderedReads() { return false; }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/RefWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/RefWalker.java
index 1d3debb48f..45bd14d4e6 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/RefWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/RefWalker.java
@@ -8,7 +8,7 @@
  * To change this template use File | Settings | File Templates.
  */
 @By(DataSource.REFERENCE)
-@Requires({DataSource.REFERENCE, DataSource.REFERENCE_BASES})
+@Requires({DataSource.REFERENCE})
 @Allows(DataSource.REFERENCE)
 public abstract class RefWalker<MapType, ReduceType> extends LocusWalker<MapType, ReduceType> {
 }
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/RemoveProgramRecords.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/RemoveProgramRecords.java
new file mode 100644
index 0000000000..d9abc79258
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/RemoveProgramRecords.java
@@ -0,0 +1,21 @@
+package org.broadinstitute.sting.gatk.walkers;
+
+/**
+ * Created with IntelliJ IDEA.
+ * User: thibault
+ * Date: 8/2/12
+ * Time: 1:58 PM
+ * To change this template use File | Settings | File Templates.
+ */
+
+import java.lang.annotation.*;
+
+/**
+ * Indicates that program records should be removed from SAM headers by default for this walker
+ */
+@Documented
+@Inherited
+@Retention(RetentionPolicy.RUNTIME)
+@Target(ElementType.TYPE)
+public @interface RemoveProgramRecords {
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java
index 0b919da18c..bd884892c0 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java
@@ -65,12 +65,12 @@ protected void fillIndelQualsFromPileup(ReadBackedPileup pileup, List<Double> re
                 // by design, first element in LinkedHashMap was ref allele
                 double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY;
 
-                for (Allele a : el.keySet()) {
+                for (Map.Entry<Allele, Double> entry : el.entrySet()) {
 
-                    if (a.isReference())
-                        refLikelihood =el.get(a);
+                    if (entry.getKey().isReference())
+                        refLikelihood = entry.getValue();
                     else {
-                        double like = el.get(a);
+                        double like = entry.getValue();
                         if (like >= altLikelihood)
                             altLikelihood = like;
                     }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java
index 28ca77f183..39b5e84dca 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java
@@ -22,19 +22,10 @@
 /**
  * Total (unfiltered) depth over all samples.
  *
- * This and AD are complementary fields that are two important ways of thinking about the depth of the data for this sample
- * at this site.  The DP field describe the total depth of reads that passed the Unified Genotypers internal
- * quality control metrics (like MAPQ > 17, for example), whatever base was present in the read at this site.
- * The AD values (one for each of REF and ALT fields) is the count of all reads that carried with them the
- * REF and ALT alleles. The reason for this distinction is that the DP is in some sense reflective of the
- * power I have to determine the genotype of the sample at this site, while the AD tells me how many times
- * I saw each of the REF and ALT alleles in the reads, free of any bias potentially introduced by filtering
- * the reads. If, for example, I believe there really is a an A/T polymorphism at a site, then I would like
- * to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would
- * normally be excluded from the statistical calculations going into GQ and QUAL.
- *
- * Note that the DP is affected by downsampling (-dcov) though, so the max value one can obtain for N samples with
- * -dcov D is N * D
+ * While the sample-level (FORMAT) DP field describes the total depth of reads that passed the Unified Genotyper's
+ * internal quality control metrics (like MAPQ > 17, for example), the INFO field DP represents the unfiltered depth
+ * over all samples.  Note though that the DP is affected by downsampling (-dcov), so the max value one can obtain for
+ * N samples with -dcov D is N * D
  */
 public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java
index 523aa81b13..5d83ddd510 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java
@@ -24,10 +24,10 @@
 /**
  * The depth of coverage of each VCF allele in this sample.
  *
- * This and DP are complementary fields that are two important ways of thinking about the depth of the data for this sample
- * at this site. The DP field describe the total depth of reads that passed the Unified Genotypers internal
- * quality control metrics (like MAPQ > 17, for example), whatever base was present in the read at this site.
- * The AD values (one for each of REF and ALT fields) is the count of all reads that carried with them the
+ * The AD and DP are complementary fields that are two important ways of thinking about the depth of the data for this
+ * sample at this site.  While the sample-level (FORMAT) DP field describes the total depth of reads that passed the
+ * Unified Genotyper's internal quality control metrics (like MAPQ > 17, for example), the AD values (one for each of
+ * REF and ALT fields) is the unfiltered count of all reads that carried with them the
  * REF and ALT alleles. The reason for this distinction is that the DP is in some sense reflective of the
  * power I have to determine the genotype of the sample at this site, while the AD tells me how many times
  * I saw each of the REF and ALT alleles in the reads, free of any bias potentially introduced by filtering
@@ -35,17 +35,13 @@
  * to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would
  * normally be excluded from the statistical calculations going into GQ and QUAL. Please note, however, that
  * the AD isn't necessarily calculated exactly for indels (it counts as non-reference only those indels that
- * are actually present and correctly left-aligned in the alignments themselves). Because of this fact and
+ * are unambiguously informative about the alternate allele). Because of this fact and
  * because the AD includes reads and bases that were filtered by the Unified Genotyper, <b>one should not base
  * assumptions about the underlying genotype based on it</b>; instead, the genotype likelihoods (PLs) are what
- * determine the genotype calls (see below).
+ * determine the genotype calls.
  */
 public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation {
 
-    private static final String REF_ALLELE = "REF";
-
-    private static final String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time
-
     public void annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g, GenotypeBuilder gb) {
         if ( g == null || !g.isCalled() )
             return;
@@ -53,10 +49,10 @@ public void annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, Ref
         if ( vc.isSNP() )
             annotateSNP(stratifiedContext, vc, gb);
         else if ( vc.isIndel() )
-            annotateIndel(stratifiedContext, vc, gb);
+            annotateIndel(stratifiedContext, ref.getBase(), vc, gb);
     }
 
-    private void annotateSNP(AlignmentContext stratifiedContext, VariantContext vc, GenotypeBuilder gb) {
+    private void annotateSNP(final AlignmentContext stratifiedContext, final VariantContext vc, final GenotypeBuilder gb) {
 
         HashMap<Byte, Integer> alleleCounts = new HashMap<Byte, Integer>();
         for ( Allele allele : vc.getAlleles() )
@@ -77,62 +73,47 @@ private void annotateSNP(AlignmentContext stratifiedContext, VariantContext vc,
         gb.AD(counts);
     }
 
-    private void annotateIndel(AlignmentContext stratifiedContext, VariantContext vc, GenotypeBuilder gb) {
+    private void annotateIndel(final AlignmentContext stratifiedContext, final byte refBase, final VariantContext vc, final GenotypeBuilder gb) {
         ReadBackedPileup pileup = stratifiedContext.getBasePileup();
         if ( pileup == null )
             return;
 
-        final HashMap<String, Integer> alleleCounts = new HashMap<String, Integer>();
-        alleleCounts.put(REF_ALLELE, 0);
+        final HashMap<Allele, Integer> alleleCounts = new HashMap<Allele, Integer>();
         final Allele refAllele = vc.getReference();
 
-        for ( Allele allele : vc.getAlternateAlleles() ) {
-
-            if ( allele.isNoCall() ) {
-                continue; // this does not look so good, should we die???
-            }
-
-            alleleCounts.put(getAlleleRepresentation(allele), 0);
+        for ( final Allele allele : vc.getAlleles() ) {
+            alleleCounts.put(allele, 0);
         }
 
         for ( PileupElement p : pileup ) {
             if ( p.isBeforeInsertion() ) {
 
-                final String b = p.getEventBases();
-                if ( alleleCounts.containsKey(b) ) {
-                    alleleCounts.put(b, alleleCounts.get(b)+1);
+                final Allele insertion = Allele.create((char)refBase + p.getEventBases(), false);
+                if ( alleleCounts.containsKey(insertion) ) {
+                    alleleCounts.put(insertion, alleleCounts.get(insertion)+1);
                 }
 
             } else if ( p.isBeforeDeletionStart() ) {
-                    if ( p.getEventLength() == refAllele.length() ) {
-                        // this is indeed the deletion allele recorded in VC
-                        final String b = DEL;
-                        if ( alleleCounts.containsKey(b) ) {
-                            alleleCounts.put(b, alleleCounts.get(b)+1);
-                        }
+                if ( p.getEventLength() == refAllele.length() - 1 ) {
+                    // this is indeed the deletion allele recorded in VC
+                    final Allele deletion = Allele.create(refBase);
+                    if ( alleleCounts.containsKey(deletion) ) {
+                        alleleCounts.put(deletion, alleleCounts.get(deletion)+1);
                     }
+                }
             } else if ( p.getRead().getAlignmentEnd() > vc.getStart() ) {
-                alleleCounts.put(REF_ALLELE, alleleCounts.get(REF_ALLELE)+1);
+                alleleCounts.put(refAllele, alleleCounts.get(refAllele)+1);
             }
         }
 
-        int[] counts = new int[alleleCounts.size()];
-        counts[0] = alleleCounts.get(REF_ALLELE);
+        final int[] counts = new int[alleleCounts.size()];
+        counts[0] = alleleCounts.get(refAllele);
         for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
-            counts[i+1] = alleleCounts.get( getAlleleRepresentation(vc.getAlternateAllele(i)) );
+            counts[i+1] = alleleCounts.get( vc.getAlternateAllele(i) );
 
         gb.AD(counts);
     }
 
-    private String getAlleleRepresentation(Allele allele) {
-        if ( allele.isNull() ) { // deletion wrt the ref
-             return DEL;
-        } else { // insertion, pass actual bases
-            return allele.getBaseString();
-        }
-
-    }
-
  //   public String getIndelBases()
     public List<String> getKeyNames() { return Arrays.asList(VCFConstants.GENOTYPE_ALLELE_DEPTHS); }
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java
index 4669cfef81..1316705991 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java
@@ -291,8 +291,8 @@ private static int[][] getIndelContingencyTable(Map<String, AlignmentContext> st
         
         int[][] table = new int[2][2];
 
-        for ( String sample : stratifiedContexts.keySet() ) {
-            final AlignmentContext context = stratifiedContexts.get(sample);
+        for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
+            final AlignmentContext context = sample.getValue();
             if ( context == null )
                 continue;
 
@@ -313,12 +313,12 @@ private static int[][] getIndelContingencyTable(Map<String, AlignmentContext> st
 
                     double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY;
 
-                    for (Allele a : el.keySet()) {
+                    for (Map.Entry<Allele,Double> entry : el.entrySet()) {
 
-                        if (a.isReference())
-                            refLikelihood =el.get(a);
+                        if (entry.getKey().isReference())
+                            refLikelihood = entry.getValue();
                         else {
-                            double like = el.get(a);
+                            double like = entry.getValue();
                             if (like >= altLikelihood)
                                 altLikelihood = like;
                         }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java
index 45444e05d7..c6d8883c52 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java
@@ -103,7 +103,7 @@ else if (vc.isIndel() || vc.isMixed()) {
         return map;
     }
 
-    private class HaplotypeComparator implements Comparator<Haplotype> {
+    private static class HaplotypeComparator implements Comparator<Haplotype> {
 
         public int compare(Haplotype a, Haplotype b) {
             if (a.getQualitySum() < b.getQualitySum())
@@ -362,8 +362,8 @@ private Double scoreIndelsAgainstHaplotypes(final ReadBackedPileup pileup) {
                 // Score all the reads in the pileup, even the filtered ones
                 final double[] scores = new double[el.size()];
                 int i = 0;
-                for (Allele a : el.keySet()) {
-                    scores[i++] = -el.get(a);
+                for (Map.Entry<Allele, Double> a : el.entrySet()) {
+                    scores[i++] = -a.getValue();
                     if (DEBUG) {
                         System.out.printf("  vs. haplotype %d = %f%n", i - 1, scores[i - 1]);
                     }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java
index c7fb7ecbab..31067e3868 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java
@@ -61,12 +61,12 @@ protected void fillIndelQualsFromPileup(ReadBackedPileup pileup, List<Double> re
                 // by design, first element in LinkedHashMap was ref allele
                 double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY;
 
-                for (Allele a : el.keySet()) {
+                for (Map.Entry<Allele,Double> a : el.entrySet()) {
 
-                    if (a.isReference())
-                        refLikelihood =el.get(a);
+                    if (a.getKey().isReference())
+                        refLikelihood = a.getValue();
                     else {
-                        double like = el.get(a);
+                        double like = a.getValue();
                         if (like >= altLikelihood)
                             altLikelihood = like;
                     }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java
index 630344992d..3456041c7e 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java
@@ -87,11 +87,11 @@ protected void fillIndelQualsFromPileup(ReadBackedPileup pileup, List<Double> re
                 LinkedHashMap<Allele, Double> el = indelLikelihoodMap.get(p);           // retrieve likelihood information corresponding to this read
                 double refLikelihood = 0.0, altLikelihood = Double.NEGATIVE_INFINITY;   // by design, first element in LinkedHashMap was ref allele
 
-                for (Allele a : el.keySet()) {
-                    if (a.isReference())
-                        refLikelihood = el.get(a);
+                for (Map.Entry<Allele,Double> a : el.entrySet()) {
+                    if (a.getKey().isReference())
+                        refLikelihood = a.getValue();
                     else {
-                        double like = el.get(a);
+                        double like = a.getValue();
                         if (like >= altLikelihood)
                             altLikelihood = like;
                     }
@@ -100,7 +100,6 @@ protected void fillIndelQualsFromPileup(ReadBackedPileup pileup, List<Double> re
                 int readPos = getOffsetFromClippedReadStart(p.getRead(), p.getOffset());
                 final int numAlignedBases = getNumAlignedBases(p.getRead());
 
-                int rp = readPos;
                 if (readPos > numAlignedBases / 2) {
                     readPos = numAlignedBases - (readPos + 1);
                 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java
index eced387b36..f220ecbd28 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java
@@ -66,8 +66,8 @@ public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompati
         return map;
     }
 
-    public static final String[] keyNames = {STR_PRESENT, REPEAT_UNIT_KEY,REPEATS_PER_ALLELE_KEY };
-    public static final VCFInfoHeaderLine[] descriptions = {
+    protected static final String[] keyNames = {STR_PRESENT, REPEAT_UNIT_KEY,REPEATS_PER_ALLELE_KEY };
+    protected static final VCFInfoHeaderLine[] descriptions = {
             new VCFInfoHeaderLine(STR_PRESENT, 0, VCFHeaderLineType.Flag, "Variant is a short tandem repeat"),
             new VCFInfoHeaderLine(REPEAT_UNIT_KEY, 1, VCFHeaderLineType.String, "Tandem repeat unit (bases)"),
             new VCFInfoHeaderLine(REPEATS_PER_ALLELE_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Number of times tandem repeat unit is repeated, for each allele (including reference)") };
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java
index 0aa05cf89c..83b10dd91d 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java
@@ -30,7 +30,6 @@
 import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.RodWalker;
 import org.broadinstitute.sting.utils.GenomeLoc;
@@ -142,9 +141,6 @@ public void initialize() {
         hInfo.add(new VCFFilterHeaderLine("BGL_RM_WAS_G", "This 'G' site was set to monomorphic by Beagle"));
         hInfo.add(new VCFFilterHeaderLine("BGL_RM_WAS_T", "This 'T' site was set to monomorphic by Beagle"));
 
-        // Open output file specified by output VCF ROD
-        final List<ReferenceOrderedDataSource> dataSources = this.getToolkit().getRodDataSources();
-
         if ( comp.isBound() ) {
             hInfo.add(new VCFInfoHeaderLine("ACH", 1, VCFHeaderLineType.Integer, "Allele Count from Comparison ROD at this site"));
             hInfo.add(new VCFInfoHeaderLine("ANH", 1, VCFHeaderLineType.Integer, "Allele Frequency from Comparison ROD at this site"));
@@ -250,8 +246,6 @@ public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentC
 
             // Beagle always produces genotype strings based on the strings we input in the likelihood file.
             String refString = vc_input.getReference().getDisplayString();
-            if (refString.length() == 0) // ref was null
-                refString = Allele.NULL_ALLELE_STRING;
 
             Allele bglAlleleA, bglAlleleB;
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java
index 0224d76a4e..d11747766a 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java
@@ -239,7 +239,7 @@ public void writeBeagleOutput(VariantContext preferredVC, VariantContext otherVC
         if ( markers != null ) markers.append(marker).append("\t").append(Integer.toString(markerCounter++)).append("\t");
         for ( Allele allele : preferredVC.getAlleles() ) {
             String bglPrintString;
-            if (allele.isNoCall() || allele.isNull())
+            if (allele.isNoCall())
                 bglPrintString = "-";
             else
                 bglPrintString = allele.getBaseString();  // get rid of * in case of reference allele
@@ -351,7 +351,6 @@ private void initializeVcfWriter() {
     }
 
     public static class CachingFormatter {
-        private int maxCacheSize = 0;
         private String format;
         private LRUCache<Double, String> cache;
 
@@ -379,7 +378,6 @@ public String format(double value) {
         }
 
         public CachingFormatter(String format, int maxCacheSize) {
-            this.maxCacheSize = maxCacheSize;
             this.format = format;
             this.cache = new LRUCache<Double, String>(maxCacheSize);
         }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphased.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphased.java
index e183a95d80..a6a6d484e1 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphased.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphased.java
@@ -149,7 +149,7 @@ public void writeUnphasedBeagleOutput(VariantContext vc, boolean makeMissing) {
 
         // write out the alleles at this site
         for ( Allele allele : vc.getAlleles() ) {
-            beagleOut.append(allele.isNoCall() || allele.isNull() ? "-" : allele.getBaseString()).append(" ");
+            beagleOut.append(allele.isNoCall() ? "-" : allele.getBaseString()).append(" ");
         }
 
         // write out sample level genotypes
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java
index 122958ac27..a6d82d5b3b 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java
@@ -28,6 +28,8 @@
 import org.broadinstitute.sting.commandline.Gatherer;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.recalibration.RecalUtils;
+import org.broadinstitute.sting.utils.recalibration.RecalibrationReport;
 
 import java.io.File;
 import java.io.FileNotFoundException;
@@ -71,11 +73,11 @@ public void gather(List<File> inputs, File output) {
         if (RAC.recalibrationReport != null && !RAC.NO_PLOTS) {
             final File recal_out = new File(output.getName() + ".original");
             final RecalibrationReport originalReport = new RecalibrationReport(RAC.recalibrationReport);
-            RecalDataManager.generateRecalibrationPlot(recal_out, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
+            RecalUtils.generateRecalibrationPlot(recal_out, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
         }
         else if (!RAC.NO_PLOTS) {
             final File recal_out = new File(output.getName() + ".recal");
-            RecalDataManager.generateRecalibrationPlot(recal_out, generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
+            RecalUtils.generateRecalibrationPlot(recal_out, generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
         }
 
         generalReport.output(outputFile);
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java
index 06f42cbad7..91d982f201 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java
@@ -34,6 +34,7 @@
 import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.*;
+import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
 import org.broadinstitute.sting.utils.baq.BAQ;
 import org.broadinstitute.sting.utils.classloader.GATKLiteUtils;
 import org.broadinstitute.sting.utils.collections.Pair;
@@ -41,6 +42,9 @@
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
+import org.broadinstitute.sting.utils.recalibration.QuantizationInfo;
+import org.broadinstitute.sting.utils.recalibration.RecalUtils;
+import org.broadinstitute.sting.utils.recalibration.RecalibrationReport;
 import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
@@ -103,13 +107,13 @@
 @BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
 @By(DataSource.READS)
 @ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class})                                   // only look at covered loci, not every loci of the reference file
-@Requires({DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES})                                         // filter out all reads with zero or unavailable mapping quality
+@Requires({DataSource.READS, DataSource.REFERENCE})                                         // filter out all reads with zero or unavailable mapping quality
 @PartitionBy(PartitionType.LOCUS)                                                                                       // this walker requires both -I input.bam and -R reference.fasta
 public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeReducible<Long> {
     @ArgumentCollection
     private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();                          // all the command line arguments for BQSR and it's covariates
 
-    private QuantizationInfo quantizationInfo;                                                                          // an object that keeps track of the information necessary for quality score quantization 
+    private QuantizationInfo quantizationInfo;                                                                          // an object that keeps track of the information necessary for quality score quantization
     
     private RecalibrationTables recalibrationTables;
 
@@ -132,6 +136,10 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
      */
     public void initialize() {
 
+        // TODO -- remove me after the 2.1 release
+        if ( getToolkit().getArguments().numberOfThreads > 1 )
+            throw new UserException("We have temporarily disabled the ability to run BaseRecalibrator multi-threaded for performance reasons.  We hope to have this fixed for the next GATK release (2.2) and apologize for the inconvenience.");
+
         // check for unsupported access
         if (getToolkit().isGATKLite() && !getToolkit().getArguments().disableIndelQuals)
             throw new UserException.NotSupportedInGATKLite("base insertion/deletion recalibration is not supported, please use the --disable_indel_quals argument");
@@ -143,12 +151,12 @@ public void initialize() {
             throw new UserException.CommandLineException(NO_DBSNP_EXCEPTION);
 
         if (RAC.LIST_ONLY) {
-            RecalDataManager.listAvailableCovariates(logger);
+            RecalUtils.listAvailableCovariates(logger);
             System.exit(0);
         }
         RAC.recalibrationReport = getToolkit().getArguments().BQSR_RECAL_FILE;                                          // if we have a recalibration file, record it so it goes on the report table
 
-        Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalDataManager.initializeCovariates(RAC);       // initialize the required and optional covariates
+        Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalUtils.initializeCovariates(RAC);       // initialize the required and optional covariates
         ArrayList<Covariate> requiredCovariates = covariates.getFirst();
         ArrayList<Covariate> optionalCovariates = covariates.getSecond();
 
@@ -222,17 +230,17 @@ public Long map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentConte
 
                 if (readNotSeen(read)) {
                     read.setTemporaryAttribute(SEEN_ATTRIBUTE, true);
-                    RecalDataManager.parsePlatformForRead(read, RAC);
-                    if (RecalDataManager.isColorSpaceConsistent(RAC.SOLID_NOCALL_STRATEGY, read)) {
+                    RecalUtils.parsePlatformForRead(read, RAC);
+                    if (!RecalUtils.isColorSpaceConsistent(RAC.SOLID_NOCALL_STRATEGY, read)) {
                         read.setTemporaryAttribute(SKIP_RECORD_ATTRIBUTE, true);
                         continue;
                     }
-                    read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalDataManager.computeCovariates(read, requestedCovariates));
+                    read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalUtils.computeCovariates(read, requestedCovariates));
                 }
 
                 if (!ReadUtils.isSOLiDRead(read) ||                                                                     // SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it
-                    RAC.SOLID_RECAL_MODE == RecalDataManager.SOLID_RECAL_MODE.DO_NOTHING ||
-                        RecalDataManager.isColorSpaceConsistent(read, offset))
+                    RAC.SOLID_RECAL_MODE == RecalUtils.SOLID_RECAL_MODE.DO_NOTHING ||
+                        RecalUtils.isColorSpaceConsistent(read, offset))
                     recalibrationEngine.updateDataForPileupElement(p, ref.getBase());                                                             // This base finally passed all the checks for a good base, so add it to the big data hashmap
             }
             countedSites++;
@@ -242,9 +250,9 @@ public Long map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentConte
     }
 
     /**
-     * Initialize the reduce step by creating a PrintStream from the filename specified as an argument to the walker.
+     * Initialize the reduce step by returning 0L
      *
-     * @return returns A PrintStream created from the -recalFile filename argument specified to the walker
+     * @return returns 0L
      */
     public Long reduceInit() {
         return 0L;
@@ -271,13 +279,16 @@ public Long treeReduce(Long sum1, Long sum2) {
     public void onTraversalDone(Long result) {
         logger.info("Calculating quantized quality scores...");
         quantizeQualityScores();
+
+        logger.info("Writing recalibration report...");
+        generateReport();
+        logger.info("...done!");
+
         if (!RAC.NO_PLOTS) {
             logger.info("Generating recalibration plots...");
             generatePlots();
         }
-        logger.info("Writing recalibration report...");
-        generateReport();
-        logger.info("...done!");
+
         logger.info("Processed: " + result + " sites");
     }
 
@@ -285,10 +296,10 @@ private void generatePlots() {
         File recalFile = getToolkit().getArguments().BQSR_RECAL_FILE;
         if (recalFile != null) {
             RecalibrationReport report = new RecalibrationReport(recalFile);
-            RecalDataManager.generateRecalibrationPlot(RAC.RECAL_FILE, report.getRecalibrationTables(), recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
+            RecalUtils.generateRecalibrationPlot(RAC.RECAL_FILE, report.getRecalibrationTables(), recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
         }
         else
-            RecalDataManager.generateRecalibrationPlot(RAC.RECAL_FILE, recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
+            RecalUtils.generateRecalibrationPlot(RAC.RECAL_FILE, recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
     }
 
 
@@ -309,7 +320,7 @@ private void generateReport() {
             throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_FILE, "could not be created");
         }
 
-        RecalDataManager.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates, output);
+        RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates, output);
     }
 }
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Datum.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Datum.java
deleted file mode 100644
index d7e8e16b5f..0000000000
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Datum.java
+++ /dev/null
@@ -1,109 +0,0 @@
-package org.broadinstitute.sting.gatk.walkers.bqsr;
-
-import org.broadinstitute.sting.utils.QualityUtils;
-
-/*
- * Copyright (c) 2010 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * Created by IntelliJ IDEA.
- * User: rpoplin
- * Date: Jan 6, 2010
- *
- * An individual piece of recalibration data. Optimized for CountCovariates. Extras added to make TableRecalibration fast have been removed.
- * Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates.
- */
-
-public class Datum {
-
-    long numObservations;                                                                                     // number of bases seen in total
-    long numMismatches;                                                                                       // number of bases seen that didn't match the reference
-
-    private static final int SMOOTHING_CONSTANT = 1;                                                                    // used when calculating empirical qualities to avoid division by zero
-
-    //---------------------------------------------------------------------------------------------------------------
-    //
-    // constructors
-    //
-    //---------------------------------------------------------------------------------------------------------------
-
-    public Datum() {
-        numObservations = 0L;
-        numMismatches = 0L;
-    }
-
-    public Datum(long numObservations, long numMismatches) {
-        this.numObservations = numObservations;
-        this.numMismatches = numMismatches;
-    }
-
-    //---------------------------------------------------------------------------------------------------------------
-    //
-    // increment methods
-    //
-    //---------------------------------------------------------------------------------------------------------------
-
-    synchronized void increment(final long incObservations, final long incMismatches) {
-        numObservations += incObservations;
-        numMismatches += incMismatches;
-    }
-
-    synchronized void increment(final boolean isError) {
-        numObservations++;
-        numMismatches += isError ? 1:0;
-    }
-
-    //---------------------------------------------------------------------------------------------------------------
-    //
-    // methods to derive empirical quality score
-    //
-    //---------------------------------------------------------------------------------------------------------------
-
-    double empiricalQualDouble() {
-        final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT);
-        final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT + SMOOTHING_CONSTANT); // smoothing is one error and one non-error observation, for example
-        final double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
-        return Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
-    }
-
-    byte empiricalQualByte() {
-        final double doubleMismatches = (double) (numMismatches);
-        final double doubleObservations = (double) (numObservations);
-        return QualityUtils.probToQual(1.0 - doubleMismatches / doubleObservations);                                    // This is capped at Q40
-    }
-
-    @Override
-    public String toString() {
-        return String.format("%d,%d,%d", numObservations, numMismatches, (int) empiricalQualByte());
-    }
-
-    @Override
-    public boolean equals(Object o) {
-        if (!(o instanceof Datum))
-            return false;
-        Datum other = (Datum) o;
-        return numMismatches == other.numMismatches && numObservations == other.numObservations;
-    }
-}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java
deleted file mode 100755
index 9b00b18768..0000000000
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java
+++ /dev/null
@@ -1,148 +0,0 @@
-package org.broadinstitute.sting.gatk.walkers.bqsr;
-
-/*
- * Copyright (c) 2009 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-import com.google.java.contract.Ensures;
-import com.google.java.contract.Requires;
-import org.broadinstitute.sting.utils.MathUtils;
-import org.broadinstitute.sting.utils.QualityUtils;
-
-import java.util.Random;
-
-/**
- * Created by IntelliJ IDEA.
- * User: rpoplin
- * Date: Nov 3, 2009
- *
- * An individual piece of recalibration data. Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates.
- */
-
-public class RecalDatum extends Datum {
-
-    private static final double UNINITIALIZED = -1.0;
-
-    private double estimatedQReported;                                                                                  // estimated reported quality score based on combined data's individual q-reporteds and number of observations
-    private double empiricalQuality;                                                                                    // the empirical quality for datums that have been collapsed together (by read group and reported quality, for example)
-
-
-    //---------------------------------------------------------------------------------------------------------------
-    //
-    // constructors
-    //
-    //---------------------------------------------------------------------------------------------------------------
-
-    public RecalDatum(final long _numObservations, final long _numMismatches, final byte reportedQuality) {
-        numObservations = _numObservations;
-        numMismatches = _numMismatches;
-        estimatedQReported = reportedQuality;
-        empiricalQuality = UNINITIALIZED;
-    }
-
-    public RecalDatum(final RecalDatum copy) {
-        this.numObservations = copy.numObservations;
-        this.numMismatches = copy.numMismatches;
-        this.estimatedQReported = copy.estimatedQReported;
-        this.empiricalQuality = copy.empiricalQuality;
-    }
-
-    public void combine(final RecalDatum other) {
-        final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors();
-        increment(other.numObservations, other.numMismatches);
-        estimatedQReported = -10 * Math.log10(sumErrors / this.numObservations);
-        empiricalQuality = UNINITIALIZED;
-    }
-
-    @Override
-    public void increment(final boolean isError) {
-        super.increment(isError);
-        empiricalQuality = UNINITIALIZED;
-    }
-
-    @Requires("empiricalQuality == UNINITIALIZED")
-    @Ensures("empiricalQuality != UNINITIALIZED")
-    protected final void calcEmpiricalQuality() {
-        empiricalQuality = empiricalQualDouble();                                                                       // cache the value so we don't call log over and over again
-    }
-
-    public void setEstimatedQReported(final double estimatedQReported) {
-        this.estimatedQReported = estimatedQReported;
-    }
-
-    public final double getEstimatedQReported() {
-        return estimatedQReported;
-    }
-
-    public void setEmpiricalQuality(final double empiricalQuality) {
-        this.empiricalQuality = empiricalQuality;
-    }
-
-    public final double getEmpiricalQuality() {
-        if (empiricalQuality == UNINITIALIZED)
-            calcEmpiricalQuality();
-        return empiricalQuality;
-    }
-
-    @Override
-    public String toString() {
-        return String.format("%d,%d,%d", numObservations, numMismatches, (byte) Math.floor(getEmpiricalQuality()));
-    }
-
-    public String stringForCSV() {
-        return String.format("%s,%d,%.2f", toString(), (byte) Math.floor(getEstimatedQReported()), getEmpiricalQuality() - getEstimatedQReported());
-    }
-
-    private double calcExpectedErrors() {
-        return (double) this.numObservations * qualToErrorProb(estimatedQReported);
-    }
-
-    private double qualToErrorProb(final double qual) {
-        return Math.pow(10.0, qual / -10.0);
-    }
-
-    public static RecalDatum createRandomRecalDatum(int maxObservations, int maxErrors) {
-        final Random random = new Random();
-        final int nObservations = random.nextInt(maxObservations);
-        final int nErrors = random.nextInt(maxErrors);
-        final int qual = random.nextInt(QualityUtils.MAX_QUAL_SCORE);
-        return new RecalDatum(nObservations, nErrors, (byte)qual);
-    }
-
-    /**
-     * We don't compare the estimated quality reported because it may be different when read from
-     * report tables.
-     *
-     * @param o the other recal datum
-     * @return true if the two recal datums have the same number of observations, errors and empirical quality.
-     */
-    @Override
-    public boolean equals(Object o) {
-        if (!(o instanceof RecalDatum))
-            return false;
-        RecalDatum other = (RecalDatum) o;
-        return super.equals(o) &&
-               MathUtils.compareDoubles(this.empiricalQuality, other.empiricalQuality, 0.001) == 0;
-    }
-}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java
index 2a94426a7e..f4b00925eb 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java
@@ -29,6 +29,7 @@
 import org.broadinstitute.sting.commandline.*;
 import org.broadinstitute.sting.gatk.report.GATKReportTable;
 import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.utils.recalibration.RecalUtils;
 
 import java.io.File;
 import java.util.Collections;
@@ -100,7 +101,7 @@ public class RecalibrationArgumentCollection {
      * reads which have had the reference inserted because of color space inconsistencies.
      */
     @Argument(fullName = "solid_recal_mode", shortName = "sMode", required = false, doc = "How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS")
-    public RecalDataManager.SOLID_RECAL_MODE SOLID_RECAL_MODE = RecalDataManager.SOLID_RECAL_MODE.SET_Q_ZERO;
+    public RecalUtils.SOLID_RECAL_MODE SOLID_RECAL_MODE = RecalUtils.SOLID_RECAL_MODE.SET_Q_ZERO;
 
     /**
      * CountCovariates and TableRecalibration accept a --solid_nocall_strategy <MODE> flag which governs how the recalibrator handles
@@ -108,7 +109,7 @@ public class RecalibrationArgumentCollection {
      * their color space tag can not be recalibrated.
      */
     @Argument(fullName = "solid_nocall_strategy", shortName = "solid_nocall_strategy", doc = "Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ", required = false)
-    public RecalDataManager.SOLID_NOCALL_STRATEGY SOLID_NOCALL_STRATEGY = RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION;
+    public RecalUtils.SOLID_NOCALL_STRATEGY SOLID_NOCALL_STRATEGY = RecalUtils.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION;
 
     /**
      * The context covariate will use a context of this size to calculate it's covariate value for base mismatches
@@ -174,44 +175,44 @@ public class RecalibrationArgumentCollection {
 
     public File recalibrationReport = null;
 
-    public GATKReportTable generateReportTable() {
+    public GATKReportTable generateReportTable(final String covariateNames) {
         GATKReportTable argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2);
         argumentsTable.addColumn("Argument");
-        argumentsTable.addColumn(RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME);
+        argumentsTable.addColumn(RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
         argumentsTable.addRowID("covariate", true);
-        argumentsTable.set("covariate", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, (COVARIATES == null) ? "null" : Utils.join(",", COVARIATES));
+        argumentsTable.set("covariate", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, covariateNames);
         argumentsTable.addRowID("no_standard_covs", true);
-        argumentsTable.set("no_standard_covs", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, DO_NOT_USE_STANDARD_COVARIATES);
+        argumentsTable.set("no_standard_covs", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, DO_NOT_USE_STANDARD_COVARIATES);
         argumentsTable.addRowID("run_without_dbsnp", true);
-        argumentsTable.set("run_without_dbsnp", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, RUN_WITHOUT_DBSNP);
+        argumentsTable.set("run_without_dbsnp", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, RUN_WITHOUT_DBSNP);
         argumentsTable.addRowID("solid_recal_mode", true);
-        argumentsTable.set("solid_recal_mode", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, SOLID_RECAL_MODE);
+        argumentsTable.set("solid_recal_mode", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, SOLID_RECAL_MODE);
         argumentsTable.addRowID("solid_nocall_strategy", true);
-        argumentsTable.set("solid_nocall_strategy", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, SOLID_NOCALL_STRATEGY);
+        argumentsTable.set("solid_nocall_strategy", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, SOLID_NOCALL_STRATEGY);
         argumentsTable.addRowID("mismatches_context_size", true);
-        argumentsTable.set("mismatches_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_CONTEXT_SIZE);
+        argumentsTable.set("mismatches_context_size", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_CONTEXT_SIZE);
         argumentsTable.addRowID("indels_context_size", true);
-        argumentsTable.set("indels_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, INDELS_CONTEXT_SIZE);
+        argumentsTable.set("indels_context_size", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, INDELS_CONTEXT_SIZE);
         argumentsTable.addRowID("mismatches_default_quality", true);
-        argumentsTable.set("mismatches_default_quality", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_DEFAULT_QUALITY);
+        argumentsTable.set("mismatches_default_quality", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_DEFAULT_QUALITY);
         argumentsTable.addRowID("insertions_default_quality", true);
-        argumentsTable.set("insertions_default_quality", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, INSERTIONS_DEFAULT_QUALITY);
+        argumentsTable.set("insertions_default_quality", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, INSERTIONS_DEFAULT_QUALITY);
         argumentsTable.addRowID("low_quality_tail", true);
-        argumentsTable.set("low_quality_tail", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, LOW_QUAL_TAIL);
+        argumentsTable.set("low_quality_tail", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, LOW_QUAL_TAIL);
         argumentsTable.addRowID("default_platform", true);
-        argumentsTable.set("default_platform", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, DEFAULT_PLATFORM);
+        argumentsTable.set("default_platform", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, DEFAULT_PLATFORM);
         argumentsTable.addRowID("force_platform", true);
-        argumentsTable.set("force_platform", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, FORCE_PLATFORM);
+        argumentsTable.set("force_platform", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, FORCE_PLATFORM);
         argumentsTable.addRowID("quantizing_levels", true);
-        argumentsTable.set("quantizing_levels", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS);
+        argumentsTable.set("quantizing_levels", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS);
         argumentsTable.addRowID("keep_intermediate_files", true);
-        argumentsTable.set("keep_intermediate_files", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, KEEP_INTERMEDIATE_FILES);
+        argumentsTable.set("keep_intermediate_files", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, KEEP_INTERMEDIATE_FILES);
         argumentsTable.addRowID("no_plots", true);
-        argumentsTable.set("no_plots", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, NO_PLOTS);
+        argumentsTable.set("no_plots", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, NO_PLOTS);
         argumentsTable.addRowID("recalibration_report", true);
-        argumentsTable.set("recalibration_report", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, recalibrationReport == null ? "null" : recalibrationReport.getAbsolutePath());
+        argumentsTable.set("recalibration_report", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, recalibrationReport == null ? "null" : recalibrationReport.getAbsolutePath());
         argumentsTable.addRowID("binary_tag_name", true);
-        argumentsTable.set("binary_tag_name", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME);
+        argumentsTable.set("binary_tag_name", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME);
         return argumentsTable;
     }
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java
index aa62a18bc0..38e3069393 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java
@@ -1,5 +1,6 @@
 package org.broadinstitute.sting.gatk.walkers.bqsr;
 
+import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java
index a24506d079..08c7da7543 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java
@@ -25,10 +25,14 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
+import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.classloader.PublicPackageSource;
 import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
+import org.broadinstitute.sting.utils.recalibration.EventType;
+import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
+import org.broadinstitute.sting.utils.recalibration.RecalDatum;
 import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java
index c964b0b4b0..e17c6cdb7b 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java
@@ -24,6 +24,7 @@
 
 package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
 
+import org.broadinstitute.sting.commandline.Argument;
 import org.broadinstitute.sting.commandline.Output;
 import org.broadinstitute.sting.gatk.CommandLineGATK;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
@@ -34,6 +35,7 @@
 import org.broadinstitute.sting.gatk.walkers.PartitionBy;
 import org.broadinstitute.sting.gatk.walkers.PartitionType;
 import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
 import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
 
 import java.io.PrintStream;
@@ -45,15 +47,17 @@ public class FindCoveredIntervals extends ActiveRegionWalker<GenomeLoc, Long> {
     @Output(required = true)
     private PrintStream out;
 
+    @Argument(fullName = "coverage_threshold", shortName = "cov", doc = "The minimum allowable coverage to be considered covered", required = false)
+    private int coverageThreshold = 20;
+
     @Override
     // Look to see if the region has sufficient coverage
-    public double isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
+    public ActivityProfileResult isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
 
         int depth = ThresHolder.DEFAULTS.getFilteredCoverage(context.getBasePileup());
 
         // note the linear probability scale
-        int coverageThreshold = 20;
-        return Math.min((double) depth / coverageThreshold, 1);
+        return new ActivityProfileResult(Math.min(depth / coverageThreshold, 1));
 
     }
 
@@ -74,9 +78,9 @@ public Long reduceInit() {
     public Long reduce(final GenomeLoc value, Long reduce) {
         if (value != null) {
             out.println(value.toString());
-            return reduce++;
-        } else
-            return reduce;
+            reduce++;
+        }
+        return reduce;
     }
 
     @Override
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/SampleStatistics.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/SampleStatistics.java
index 0fc2d8929e..9f6258eee3 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/SampleStatistics.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/SampleStatistics.java
@@ -264,7 +264,7 @@ boolean hasValidMate(GATKSAMRecord read, ThresHolder thresholds) {
             return false;
 
         // different contigs
-        if (read.getMateReferenceIndex() != read.getReferenceIndex())
+        if (!read.getMateReferenceIndex().equals(read.getReferenceIndex()))
             return false;
 
         // unmapped
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java
index 2d372ca9f0..0d4db55608 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java
@@ -104,7 +104,9 @@ public boolean canRead(File file) {
             InputStream fstream = new BufferedInputStream(new FileInputStream(file));
             if ( !BlockCompressedInputStream.isValidFile(fstream) )
                 return false;
-            new BlockCompressedInputStream(fstream).read(buffer, 0, BAM_MAGIC.length);
+            final BlockCompressedInputStream BCIS = new BlockCompressedInputStream(fstream);
+            BCIS.read(buffer, 0, BAM_MAGIC.length);
+            BCIS.close();
             return Arrays.equals(buffer, BAM_MAGIC);
         } catch ( IOException e ) {
             return false;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java
index 2f48de2d36..7315fe5033 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java
@@ -224,7 +224,7 @@ private static DiffElement fromString(String tree, DiffElement parent) {
         // X=(A=A B=B C=(D=D))
         String[] parts = tree.split("=", 2);
         if ( parts.length != 2 )
-            throw new ReviewedStingException("Unexpected tree structure: " + tree + " parts=" + parts);
+            throw new ReviewedStingException("Unexpected tree structure: " + tree);
         String name = parts[0];
         String value = parts[1];
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/GATKReportDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/GATKReportDiffableReader.java
index 480a1fc29c..5e4ea5f813 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/GATKReportDiffableReader.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/GATKReportDiffableReader.java
@@ -90,8 +90,10 @@ private DiffNode tableToNode(GATKReportTable table, DiffNode root) {
     public boolean canRead(File file) {
         try {
             final String HEADER = GATKReport.GATKREPORT_HEADER_PREFIX;
-            char[] buff = new char[HEADER.length()];
-            new FileReader(file).read(buff, 0, HEADER.length());
+            final char[] buff = new char[HEADER.length()];
+            final FileReader FR = new FileReader(file);
+            FR.read(buff, 0, HEADER.length());
+            FR.close();
             String firstLine = new String(buff);
             return firstLine.startsWith(HEADER);
         } catch (IOException e) {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReference.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReference.java
index 9c9a75fc4e..8fbd37e30b 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReference.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReference.java
@@ -47,7 +47,10 @@
  * <p>
  * Given variant tracks, it replaces the reference bases at variation sites with the bases supplied by the ROD(s).
  * Additionally, allows for one or more "snpmask" VCFs to set overlapping bases to 'N'.
- * Note that if there are multiple variants at a site, it takes the first one seen.
+ * Several important notes:
+ * 1) if there are multiple variants that start at a site, it chooses one of them randomly.
+ * 2) when there are overlapping indels (but with different start positions) only the first will be chosen.
+ * 3) this tool works only for SNPs and for simple indels (but not for things like complex substitutions).
  * Reference bases for each interval will be output as a separate fasta sequence (named numerically in order).
  *
  * <h2>Input</h2>
@@ -102,16 +105,16 @@ public Pair<GenomeLoc, String> map(RefMetaDataTracker tracker, ReferenceContext
         String refBase = String.valueOf((char)ref.getBase());
 
         // Check to see if we have a called snp
-        for ( VariantContext vc : tracker.getValues(variants) ) {
+        for ( VariantContext vc : tracker.getValues(variants, ref.getLocus()) ) {
             if ( vc.isFiltered() )
                 continue;
 
             if ( vc.isSimpleDeletion()) {
-                deletionBasesRemaining = vc.getReference().length();
+                deletionBasesRemaining = vc.getReference().length() - 1;
                 // delete the next n bases, not this one
                 return new Pair<GenomeLoc, String>(context.getLocation(), refBase);
             } else if ( vc.isSimpleInsertion()) {
-                return new Pair<GenomeLoc, String>(context.getLocation(), refBase.concat(vc.getAlternateAllele(0).toString()));
+                return new Pair<GenomeLoc, String>(context.getLocation(), vc.getAlternateAllele(0).toString());
             } else if (vc.isSNP()) {
                 return new Pair<GenomeLoc, String>(context.getLocation(), vc.getAlternateAllele(0).toString());
             }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java
index 432bbd6d76..08a3334863 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java
@@ -46,8 +46,7 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
 
     public enum Model {
         /** The default model with the best performance in all cases */
-        EXACT,
-        POOL
+        EXACT
     }
 
     protected int N;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java
index cef09a9131..7dcc95361f 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java
@@ -148,8 +148,8 @@ private Map<String, Integer> countConsensusAlleles(ReferenceContext ref,
                     boolean foundKey = false;
                     // copy of hashmap into temp arrayList
                     ArrayList<Pair<String,Integer>> cList = new ArrayList<Pair<String,Integer>>();
-                    for (String s : consensusIndelStrings.keySet()) {
-                        cList.add(new Pair<String, Integer>(s,consensusIndelStrings.get(s)));
+                    for (Map.Entry<String, Integer> s : consensusIndelStrings.entrySet()) {
+                        cList.add(new Pair<String, Integer>(s.getKey(), s.getValue()));
                     }
 
                     if (read.getAlignmentEnd() == loc.getStart()) {
@@ -246,18 +246,19 @@ private List<Allele> consensusCountsToAlleles(final ReferenceContext ref,
                 // get ref bases of accurate deletion
                 final int startIdxInReference = 1 + loc.getStart() - ref.getWindow().getStart();
                 stop = loc.getStart() + dLen;
-                final byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference, startIdxInReference + dLen);
+                final byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference - 1, startIdxInReference + dLen);   // add reference padding
 
                 if (Allele.acceptableAlleleBases(refBases, false)) {
                     refAllele = Allele.create(refBases, true);
-                    altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
+                    altAllele = Allele.create(ref.getBase(), false);
                 }
                 else continue; // don't go on with this allele if refBases are non-standard
             } else {
                 // insertion case
-                if (Allele.acceptableAlleleBases(s, false)) { // don't allow N's in insertions
-                    refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
-                    altAllele = Allele.create(s, false);
+                final String insertionBases = (char)ref.getBase() + s;  // add reference padding
+                if (Allele.acceptableAlleleBases(insertionBases, false)) { // don't allow N's in insertions
+                    refAllele = Allele.create(ref.getBase(), true);
+                    altAllele = Allele.create(insertionBases, false);
                     stop = loc.getStart();
                 }
                 else continue; // go on to next allele if consensus insertion has any non-standard base.
@@ -267,7 +268,6 @@ private List<Allele> consensusCountsToAlleles(final ReferenceContext ref,
             final VariantContextBuilder builder = new VariantContextBuilder().source("");
             builder.loc(loc.getContig(), loc.getStart(), stop);
             builder.alleles(Arrays.asList(refAllele, altAllele));
-            builder.referenceBaseForIndel(ref.getBase());
             builder.noGenotypes();
             if (doMultiAllelicCalls) {
                 vcs.add(builder.make());
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java
index 4253ff3ad5..6fdc926d53 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java
@@ -59,10 +59,9 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
     public enum Model {
         SNP,
         INDEL,
-        BOTH,
-        POOLSNP,
-        POOLINDEL,
-        POOLBOTH
+        GeneralPloidySNP,
+        GeneralPloidyINDEL,
+        BOTH
     }
 
     public enum GENOTYPING_MODE {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
index 230d6c324f..bedffa6909 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
@@ -35,7 +35,6 @@
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.Haplotype;
-import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
 import org.broadinstitute.sting.utils.variantcontext.*;
@@ -48,8 +47,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
     private boolean DEBUG = false;
     private boolean ignoreSNPAllelesWhenGenotypingIndels = false;
     private PairHMMIndelErrorModel pairModel;
-    private boolean allelesArePadded;
-    
+
     private static ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>> indelLikelihoodMap =
             new ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>>() {
                 protected synchronized HashMap<PileupElement, LinkedHashMap<Allele, Double>> initialValue() {
@@ -105,25 +103,21 @@ public VariantContext getLikelihoods(final RefMetaDataTracker tracker,
             indelLikelihoodMap.set(new HashMap<PileupElement, LinkedHashMap<Allele, Double>>());
             haplotypeMap.clear();
 
-            Pair<List<Allele>,Boolean> pair = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels);
-            alleleList = pair.first;
-            allelesArePadded = pair.second;
+            alleleList = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels);
             if (alleleList.isEmpty())
                 return null;
         }
 
-
         getHaplotypeMapFromAlleles(alleleList, ref, loc, haplotypeMap); // will update haplotypeMap adding elements
         if (haplotypeMap == null || haplotypeMap.isEmpty())
             return null;
 
         // start making the VariantContext
         // For all non-snp VC types, VC end location is just startLocation + length of ref allele including padding base.
-        
-        final int endLoc = computeEndLocation(alleleList, loc,allelesArePadded);
+        final int endLoc = loc.getStart() + alleleList.get(0).length() - 1;
         final int eventLength = getEventLength(alleleList);
 
-        final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList).referenceBaseForIndel(ref.getBase());
+        final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList);
 
         // create the genotypes; no-call everyone for now
         GenotypesContext genotypes = GenotypesContext.create();
@@ -160,15 +154,6 @@ public static HashMap<PileupElement, LinkedHashMap<Allele, Double>> getIndelLike
         return indelLikelihoodMap.get();
     }
 
-    public static int computeEndLocation(final List<Allele> alleles, final GenomeLoc loc, final boolean allelesArePadded) {
-        Allele refAllele = alleles.get(0);
-        int endLoc = loc.getStart() + refAllele.length()-1;
-        if (allelesArePadded)
-            endLoc++;
-
-        return endLoc;
-    }
-
     public static void getHaplotypeMapFromAlleles(final List<Allele> alleleList,
                                                  final ReferenceContext ref,
                                                  final GenomeLoc loc,
@@ -213,16 +198,15 @@ public static int getEventLength(List<Allele> alleleList) {
 
     }
     
-    public static Pair<List<Allele>,Boolean> getInitialAlleleList(final RefMetaDataTracker tracker,
+    public static List<Allele> getInitialAlleleList(final RefMetaDataTracker tracker,
                                                     final ReferenceContext ref,
                                                     final Map<String, AlignmentContext> contexts,
                                                     final AlignmentContextUtils.ReadOrientation contextType,
                                                     final GenomeLocParser locParser,
                                                     final UnifiedArgumentCollection UAC,
                                                     final boolean ignoreSNPAllelesWhenGenotypingIndels) {
-        
+
         List<Allele> alleles = new ArrayList<Allele>();
-        boolean allelesArePadded = true;
         if (UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
             VariantContext vc = null;
             for (final VariantContext vc_input : tracker.getValues(UAC.alleles, ref.getLocus())) {
@@ -235,7 +219,7 @@ public static Pair<List<Allele>,Boolean> getInitialAlleleList(final RefMetaDataT
             }
            // ignore places where we don't have a variant
             if (vc == null)
-                return new Pair<List<Allele>,Boolean>(alleles,false);
+                return alleles;
 
             if (ignoreSNPAllelesWhenGenotypingIndels) {
                 // if there's an allele that has same length as the reference (i.e. a SNP or MNP), ignore it and don't genotype it
@@ -248,15 +232,11 @@ public static Pair<List<Allele>,Boolean> getInitialAlleleList(final RefMetaDataT
             } else {
                 alleles.addAll(vc.getAlleles());
             }
-            if ( vc.getReference().getBases().length == vc.getEnd()-vc.getStart()+1)
-                allelesArePadded = false;
-
-
 
         } else {
-            alleles = IndelGenotypeLikelihoodsCalculationModel.computeConsensusAlleles(ref, contexts, contextType, locParser, UAC);
+            alleles = computeConsensusAlleles(ref, contexts, contextType, locParser, UAC);
         }
-        return new Pair<List<Allele>,Boolean> (alleles,allelesArePadded);
+        return alleles;
     }
 
     // Overload function in GenotypeLikelihoodsCalculationModel so that, for an indel case, we consider a deletion as part of the pileup,
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java
index c767cf783e..07d5d2f2d7 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java
@@ -208,7 +208,7 @@ public ReadBackedPileup createBAQedPileup( final ReadBackedPileup pileup ) {
         return new ReadBackedPileupImpl( pileup.getLocation(), BAQedElements );
     }
 
-    public class BAQedPileupElement extends PileupElement {
+    public static class BAQedPileupElement extends PileupElement {
         public BAQedPileupElement( final PileupElement PE ) {
             super(PE.getRead(), PE.getOffset(), PE.isDeletion(), PE.isBeforeDeletedBase(), PE.isAfterDeletedBase(), PE.isBeforeInsertion(), PE.isAfterInsertion(), PE.isNextToSoftClip());
         }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
index 020f7904d8..30c0f3e18d 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
@@ -26,11 +26,12 @@
 package org.broadinstitute.sting.gatk.walkers.genotyper;
 
 import org.broadinstitute.sting.commandline.*;
+import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
 
 
-public class UnifiedArgumentCollection {
+public class UnifiedArgumentCollection extends StandardCallerArgumentCollection {
 
     @Argument(fullName = "genotype_likelihoods_model", shortName = "glm", doc = "Genotype likelihoods calculation model to employ -- SNP is the default option, while INDEL is also available for calling indels and BOTH is available for calling both together", required = false)
     public GenotypeLikelihoodsCalculationModel.Model GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP;
@@ -42,13 +43,6 @@ public class UnifiedArgumentCollection {
     @Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ", required = false)
     protected AlleleFrequencyCalculationModel.Model AFmodel = AlleleFrequencyCalculationModel.Model.EXACT;
 
-    /**
-     * The expected heterozygosity value used to compute prior likelihoods for any locus. The default priors are:
-     * het = 1e-3, P(hom-ref genotype) = 1 - 3 * het / 2, P(het genotype) = het, P(hom-var genotype) = het / 2
-     */
-    @Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false)
-    public Double heterozygosity = UnifiedGenotyperEngine.HUMAN_SNP_HETEROZYGOSITY;
-
     /**
      * The PCR error rate is independent of the sequencing error rate, which is necessary because we cannot necessarily
      * distinguish between PCR errors vs. sequencing errors.  The practical implication for this value is that it
@@ -57,26 +51,6 @@ public class UnifiedArgumentCollection {
     @Argument(fullName = "pcr_error_rate", shortName = "pcr_error", doc = "The PCR error rate to be used for computing fragment-based likelihoods", required = false)
     public Double PCR_error = DiploidSNPGenotypeLikelihoods.DEFAULT_PCR_ERROR_RATE;
 
-    @Argument(fullName = "genotyping_mode", shortName = "gt_mode", doc = "Specifies how to determine the alternate alleles to use for genotyping", required = false)
-    public GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY;
-
-    @Argument(fullName = "output_mode", shortName = "out_mode", doc = "Specifies which type of calls we should output", required = false)
-    public UnifiedGenotyperEngine.OUTPUT_MODE OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY;
-
-    /**
-     * The minimum phred-scaled Qscore threshold to separate high confidence from low confidence calls. Only genotypes with
-     * confidence >= this threshold are emitted as called sites. A reasonable threshold is 30 for high-pass calling (this
-     * is the default).
-     */
-    @Argument(fullName = "standard_min_confidence_threshold_for_calling", shortName = "stand_call_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be called", required = false)
-    public double STANDARD_CONFIDENCE_FOR_CALLING = 30.0;
-
-    /**
-     * This argument allows you to emit low quality calls as filtered records.
-     */
-    @Argument(fullName = "standard_min_confidence_threshold_for_emitting", shortName = "stand_emit_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold)", required = false)
-    public double STANDARD_CONFIDENCE_FOR_EMITTING = 30.0;
-
     /**
      * Note that calculating the SLOD increases the runtime by an appreciable amount.
      */
@@ -90,12 +64,6 @@ public class UnifiedArgumentCollection {
     @Argument(fullName = "annotateNDA", shortName = "nda", doc = "If provided, we will annotate records with the number of alternate alleles that were discovered (but not necessarily genotyped) at a given site", required = false)
     public boolean ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED = false;
 
-    /**
-     * When the UnifiedGenotyper is put into GENOTYPE_GIVEN_ALLELES mode it will genotype the samples using only the alleles provide in this rod binding
-     */
-    @Input(fullName="alleles", shortName = "alleles", doc="The set of alleles at which to genotype when --genotyping_mode is GENOTYPE_GIVEN_ALLELES", required=false)
-    public RodBinding<VariantContext> alleles;
-
     /**
      * The minimum confidence needed in a given base for it to be used in variant calling.  Note that the base quality of a base
      * is capped by the mapping quality so that bases on reads with low mapping quality may get filtered out depending on this value.
@@ -107,18 +75,8 @@ public class UnifiedArgumentCollection {
     @Argument(fullName = "max_deletion_fraction", shortName = "deletions", doc = "Maximum fraction of reads with deletions spanning this locus for it to be callable [to disable, set to < 0 or > 1; default:0.05]", required = false)
     public Double MAX_DELETION_FRACTION = 0.05;
 
-    /**
-     * If there are more than this number of alternate alleles presented to the genotyper (either through discovery or GENOTYPE_GIVEN ALLELES),
-     * then only this many alleles will be used.  Note that genotyping sites with many alternate alleles is both CPU and memory intensive and it
-     * scales exponentially based on the number of alternate alleles.  Unless there is a good reason to change the default value, we highly recommend
-     * that you not play around with this parameter.
-     */
-    @Advanced
-    @Argument(fullName = "max_alternate_alleles", shortName = "maxAlleles", doc = "Maximum number of alternate alleles to genotype", required = false)
-    public int MAX_ALTERNATE_ALLELES = 3;
-
     @Hidden
-    @Argument(fullName = "cap_max_alternate_alleles_for_indels", shortName = "capMaxAllelesForIndels", doc = "Cap the maximum number of alternate alleles to genotype for indel calls at 2; overrides the --max_alternate_alleles argument; GSA production use only", required = false)
+    @Argument(fullName = "cap_max_alternate_alleles_for_indels", shortName = "capMaxAltAllelesForIndels", doc = "Cap the maximum number of alternate alleles to genotype for indel calls at 2; overrides the --max_alternate_alleles argument; GSA production use only", required = false)
     public boolean CAP_MAX_ALTERNATE_ALLELES_FOR_INDELS = false;
 
     // indel-related arguments
@@ -139,19 +97,18 @@ public class UnifiedArgumentCollection {
     @Argument(fullName = "min_indel_fraction_per_sample", shortName = "minIndelFrac", doc = "Minimum fraction of all reads at a locus that must contain an indel (of any allele) for that sample to contribute to the indel count for alleles", required = false)
     public double MIN_INDEL_FRACTION_PER_SAMPLE = 0.25;
 
-
     /**
      * This argument informs the prior probability of having an indel at a site.
      */
     @Argument(fullName = "indel_heterozygosity", shortName = "indelHeterozygosity", doc = "Heterozygosity for indel calling", required = false)
     public double INDEL_HETEROZYGOSITY = 1.0/8000;
 
-    @Hidden
-    @Argument(fullName = "indelGapContinuationPenalty", shortName = "indelGCP", doc = "Indel gap continuation penalty", required = false)
+    @Advanced
+    @Argument(fullName = "indelGapContinuationPenalty", shortName = "indelGCP", doc = "Indel gap continuation penalty, as Phred-scaled probability.  I.e., 30 => 10^-30/10", required = false)
     public byte INDEL_GAP_CONTINUATION_PENALTY = 10;
 
-    @Hidden
-    @Argument(fullName = "indelGapOpenPenalty", shortName = "indelGOP", doc = "Indel gap open penalty", required = false)
+    @Advanced
+    @Argument(fullName = "indelGapOpenPenalty", shortName = "indelGOP", doc = "Indel gap open penalty, as Phred-scaled probability.  I.e., 30 => 10^-30/10", required = false)
     public byte INDEL_GAP_OPEN_PENALTY = 45;
 
     @Hidden
@@ -181,7 +138,6 @@ Generalized ploidy argument (debug only): squash all reads into a single pileup
        Generalized ploidy argument (debug only): When building site error models, ignore lane information and build only
        sample-level error model
      */
-
     @Argument(fullName = "ignoreLaneInfo", shortName = "ignoreLane", doc = "Ignore lane when building error model, error model is then per-site", required = false)
     public boolean IGNORE_LANE_INFO = false;
 
@@ -263,7 +219,6 @@ public UnifiedArgumentCollection clone() {
         uac.referenceSampleName = referenceSampleName;
         uac.samplePloidy = samplePloidy;
         uac.maxQualityScore = minQualityScore;
-        uac.maxQualityScore = maxQualityScore;
         uac.phredScaledPrior = phredScaledPrior;
         uac.minPower = minPower;
         uac.minReferenceDepth = minReferenceDepth;
@@ -276,5 +231,16 @@ public UnifiedArgumentCollection clone() {
         return uac;
     }
 
-
+    public UnifiedArgumentCollection() { }
+
+    public UnifiedArgumentCollection( final StandardCallerArgumentCollection SCAC ) {
+        super();
+        this.alleles = SCAC.alleles;
+        this.GenotypingMode = SCAC.GenotypingMode;
+        this.heterozygosity = SCAC.heterozygosity;
+        this.MAX_ALTERNATE_ALLELES = SCAC.MAX_ALTERNATE_ALLELES;
+        this.OutputMode = SCAC.OutputMode;
+        this.STANDARD_CONFIDENCE_FOR_CALLING = SCAC.STANDARD_CONFIDENCE_FOR_CALLING;
+        this.STANDARD_CONFIDENCE_FOR_EMITTING = SCAC.STANDARD_CONFIDENCE_FOR_EMITTING;
+    }
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
index cd1815d82e..507806fbef 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
@@ -82,7 +82,7 @@
  *   -o snps.raw.vcf \
  *   -stand_call_conf [50.0] \
  *   -stand_emit_conf 10.0 \
- *   -dcov [50] \
+ *   -dcov [50 for 4x, 200 for >30x WGS or Whole exome] \
  *   [-L targets.interval_list]
  * </pre>
  *
@@ -241,7 +241,7 @@ public void initialize() {
         } else {
             // in full mode: check for consistency in ploidy/pool calling arguments
             // check for correct calculation models
-            if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) {
+/*            if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) {
                 // polyploidy requires POOL GL and AF calculation models to be specified right now
                 if (UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLSNP && UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLINDEL
                         && UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLBOTH)   {
@@ -252,6 +252,7 @@ public void initialize() {
                     throw new UserException("Incorrect AF Calculation model. Only POOL model supported if sample ploidy != 2");
 
             }
+  */
             // get all of the unique sample names
             if (UAC.TREAT_ALL_READS_AS_SINGLE_POOL) {
                 samples.clear();
@@ -311,8 +312,8 @@ public static Set<VCFHeaderLine> getHeaderInfo(final UnifiedArgumentCollection U
 
         // add the pool values for each genotype
         if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) {
-            headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed, for this pool"));
-            headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed, for this pool"));
+            headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the alternate allele count, in the same order as listed, for each individual sample"));
+            headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_FRACTION_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the alternate allele fraction, in the same order as listed, for each individual sample"));
         }
         if (UAC.referenceSampleName != null) {
             headerInfo.add(new VCFInfoHeaderLine(VCFConstants.REFSAMPLE_DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Total reference sample depth"));
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
index 32564984a9..3d9724ffb1 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
@@ -37,7 +37,6 @@
 import org.broadinstitute.sting.utils.*;
 import org.broadinstitute.sting.utils.baq.BAQ;
 import org.broadinstitute.sting.utils.classloader.PluginManager;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFAlleleClipper;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
@@ -51,6 +50,7 @@
 
 public class UnifiedGenotyperEngine {
     public static final String LOW_QUAL_FILTER_NAME = "LowQual";
+    private static final String GPSTRING = "GeneralPloidy";
 
     public static final String NUMBER_OF_DISCOVERED_ALLELES_KEY = "NDA";
 
@@ -184,7 +184,7 @@ public List<VariantCallContext> calculateLikelihoodsAndGenotypes(final RefMetaDa
             for ( final GenotypeLikelihoodsCalculationModel.Model model : models ) {
                 final Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, model);
                 if ( stratifiedContexts == null ) {
-                    results.add(UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext) : null);
+                    results.add(UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? generateEmptyContext(tracker, refContext, null, rawContext) : null);
                 }
                 else {
                     final VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model);
@@ -203,7 +203,7 @@ private List<VariantCallContext> addMissingSamples(final List<VariantCallContext
         final List<VariantCallContext> withAllSamples = new ArrayList<VariantCallContext>(calls.size());
         for ( final VariantCallContext call : calls ) {
             if ( call == null )
-                withAllSamples.add(call);
+                withAllSamples.add(null);
             else {
                 final VariantContext withoutMissing = VariantContextUtils.addMissingSamples(call, allSamples);
                 withAllSamples.add(new VariantCallContext(withoutMissing, call.confidentlyCalled, call.shouldEmit));
@@ -259,6 +259,16 @@ public VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, Referen
         return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model);
     }
 
+    /**
+     * Compute genotypes at a given locus.
+     *
+     * @param vc         the GL-annotated variant context
+     * @return the VariantCallContext object
+     */
+    public VariantCallContext calculateGenotypes(VariantContext vc) {
+        return calculateGenotypes(null, null, null, null, vc, GenotypeLikelihoodsCalculationModel.Model.valueOf("SNP"), false);
+    }
+
 
     // ---------------------------------------------------------------------------------------------------------
     //
@@ -274,7 +284,7 @@ private VariantContext calculateLikelihoods(RefMetaDataTracker tracker, Referenc
             glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC));
         }
 
-        return glcm.get().get(model.name()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser);
+        return glcm.get().get(model.name().toUpperCase()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser);
     }
 
     private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) {
@@ -283,7 +293,7 @@ private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, Refe
             VariantContext vcInput = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, rawContext.getLocation(), false, logger, UAC.alleles);
             if ( vcInput == null )
                 return null;
-            vc = new VariantContextBuilder("UG_call", ref.getLocus().getContig(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles()).referenceBaseForIndel(vcInput.getReferenceBaseForIndel()).make();
+            vc = new VariantContextBuilder("UG_call", ref.getLocus().getContig(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles()).make();
         } else {
             // deal with bad/non-standard reference bases
             if ( !Allele.acceptableAlleleBases(new byte[]{ref.getBase()}) )
@@ -408,11 +418,6 @@ else if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_M
         builder.log10PError(phredScaledConfidence/-10.0);
         if ( ! passesCallThreshold(phredScaledConfidence) )
             builder.filters(filter);
-        if ( limitedContext ) {
-            builder.referenceBaseForIndel(vc.getReferenceBaseForIndel());
-        } else {
-            builder.referenceBaseForIndel(refContext.getBase());
-        }
 
         // create the genotypes
         final GenotypesContext genotypes = afcm.get().subsetAlleles(vc, myAlleles, true,ploidy);
@@ -438,10 +443,11 @@ else if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_M
         // add the MLE AC and AF annotations
         if ( alleleCountsofMLE.size() > 0 ) {
             attributes.put(VCFConstants.MLE_ALLELE_COUNT_KEY, alleleCountsofMLE);
-            final double AN = (double)builder.make().getCalledChrCount();
+            final int AN = builder.make().getCalledChrCount();
             final ArrayList<Double> MLEfrequencies = new ArrayList<Double>(alleleCountsofMLE.size());
+            // the MLEAC is allowed to be larger than the AN (e.g. in the case of all PLs being 0, the GT is ./. but the exact model may arbitrarily choose an AC>1)
             for ( int AC : alleleCountsofMLE )
-                MLEfrequencies.add((double)AC / AN);
+                MLEfrequencies.add(Math.min(1.0, (double)AC / (double)AN));
             attributes.put(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, MLEfrequencies);
         }
 
@@ -493,8 +499,8 @@ else if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_M
 
         // if we are subsetting alleles (either because there were too many or because some were not polymorphic)
         // then we may need to trim the alleles (because the original VariantContext may have had to pad at the end).
-        if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext ) // TODO - this function doesn't work with mixed records or records that started as mixed and then became non-mixed
-            vcCall = VCFAlleleClipper.reverseTrimAlleles(vcCall);
+        if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext )
+            vcCall = VariantContextUtils.reverseTrimAlleles(vcCall);
 
         if ( annotationEngine != null && !limitedContext ) {
             // Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
@@ -646,25 +652,24 @@ private List<GenotypeLikelihoodsCalculationModel.Model> getGLModelsToUse(final R
         if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") )
             modelPrefix = UAC.GLmodel.name().toUpperCase().replaceAll("BOTH","");
 
+        if (!UAC.GLmodel.name().contains(GPSTRING) && UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY)
+            modelPrefix = GPSTRING + modelPrefix;
+
         // if we're genotyping given alleles and we have a requested SNP at this position, do SNP
         if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
-            final VariantContext vcInput = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, refContext, rawContext.getLocation(), false, logger, UAC.alleles);
+            final VariantContext vcInput = getVCFromAllelesRod(tracker, refContext, rawContext.getLocation(), false, logger, UAC.alleles);
             if ( vcInput == null )
                 return models;
 
             if ( vcInput.isSNP() )  {
                 // ignore SNPs if the user chose INDEL mode only
-                if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") )
+                if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") || UAC.GLmodel.name().toUpperCase().contains("SNP") )
                     models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"SNP"));
-                else if ( UAC.GLmodel.name().toUpperCase().contains("SNP") )
-                    models.add(UAC.GLmodel);
             }
             else if ( vcInput.isIndel() || vcInput.isMixed() ) {
                 // ignore INDELs if the user chose SNP mode only
-                if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") )
+                if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") || UAC.GLmodel.name().toUpperCase().contains("INDEL") )
                     models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"INDEL"));
-                else if (UAC.GLmodel.name().toUpperCase().contains("INDEL"))
-                    models.add(UAC.GLmodel);
             }
             // No support for other types yet
         }
@@ -674,7 +679,7 @@ else if (UAC.GLmodel.name().toUpperCase().contains("INDEL"))
                 models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"INDEL"));
             }
             else {
-                models.add(UAC.GLmodel);
+                models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+UAC.GLmodel.name().toUpperCase()));
             }
         }
 
@@ -736,12 +741,19 @@ private static Map<String, GenotypeLikelihoodsCalculationModel> getGenotypeLikel
     }
 
     private static AlleleFrequencyCalculationModel getAlleleFrequencyCalculationObject(int N, Logger logger, PrintStream verboseWriter, UnifiedArgumentCollection UAC) {
+
         List<Class<? extends AlleleFrequencyCalculationModel>> afClasses = new PluginManager<AlleleFrequencyCalculationModel>(AlleleFrequencyCalculationModel.class).getPlugins();
 
+        // user-specified name
+        String afModelName = UAC.AFmodel.name();
+
+        if (!afModelName.contains(GPSTRING) && UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY)
+            afModelName = GPSTRING + afModelName;
+
         for (int i = 0; i < afClasses.size(); i++) {
             Class<? extends AlleleFrequencyCalculationModel> afClass = afClasses.get(i);
             String key = afClass.getSimpleName().replace("AFCalculationModel","").toUpperCase();
-            if (UAC.AFmodel.name().equalsIgnoreCase(key)) {
+            if (afModelName.equalsIgnoreCase(key)) {
                 try {
                     Object args[] = new Object[]{UAC,N,logger,verboseWriter};
                     Constructor c = afClass.getDeclaredConstructor(UnifiedArgumentCollection.class, int.class, Logger.class, PrintStream.class);
@@ -758,7 +770,7 @@ private static AlleleFrequencyCalculationModel getAlleleFrequencyCalculationObje
 
     public static VariantContext getVCFromAllelesRod(RefMetaDataTracker tracker, ReferenceContext ref, GenomeLoc loc, boolean requireSNP, Logger logger, final RodBinding<VariantContext> allelesBinding) {
         if ( tracker == null || ref == null || logger == null )
-            throw new ReviewedStingException("Bad arguments: tracker=" + tracker + " ref=" + ref + " logger=" + logger);
+            return null;
         VariantContext vc = null;
 
         // search for usable record
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java
index 3dd51fa7d7..68365adf7e 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java
@@ -124,7 +124,7 @@ private SAMRecord remove(TreeSet<SAMRecord> treeSet) {
         return first;
     }
 
-    private class SAMRecordHashObject {
+    private static class SAMRecordHashObject {
         public SAMRecord record;
         public boolean wasModified;
 
@@ -305,7 +305,7 @@ public boolean iSizeTooBigToMove(SAMRecord read) {
     }
 
     public static boolean iSizeTooBigToMove(SAMRecord read, int maxInsertSizeForMovingReadPairs) {
-        return ( read.getReadPairedFlag() && ! read.getMateUnmappedFlag() && read.getReferenceName() != read.getMateReferenceName() ) // maps to different chromosomes
+        return ( read.getReadPairedFlag() && ! read.getMateUnmappedFlag() && !read.getReferenceName().equals(read.getMateReferenceName()) ) // maps to different chromosomes
                 || Math.abs(read.getInferredInsertSize()) > maxInsertSizeForMovingReadPairs;     // we won't try to move such a read
     }
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java
index 26023bd2ff..3a10620aad 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java
@@ -35,6 +35,7 @@
 
 import java.util.Arrays;
 import java.util.HashMap;
+import java.util.Map;
 
 public class HaplotypeIndelErrorModel {
 
@@ -427,8 +428,8 @@ public double[] computeReadHaplotypeLikelihoods(ReadBackedPileup pileup, HashMap
             // for each read/haplotype combination, compute likelihoods, ie -10*log10(Pr(R | Hi))
             // = sum_j(-10*log10(Pr(R_j | Hi) since reads are assumed to be independent
             int j=0;
-            for (Allele a: haplotypesInVC.keySet()) {
-                readLikelihoods[i][j]= computeReadLikelihoodGivenHaplotype(haplotypesInVC.get(a), read);
+            for (Map.Entry<Allele,Haplotype> a: haplotypesInVC.entrySet()) {
+                readLikelihoods[i][j]= computeReadLikelihoodGivenHaplotype(a.getValue(), read);
                 if (DEBUG) {
                     System.out.print(read.getReadName()+" ");
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java
index 2153525abb..d61b9e9b6a 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java
@@ -872,7 +872,13 @@ private void generateAlternateConsensesFromKnownIndels(final Set<Consensus> altC
         for ( VariantContext knownIndel : knownIndelsToTry ) {
             if ( knownIndel == null || !knownIndel.isIndel() || knownIndel.isComplexIndel() )
                 continue;
-            byte[] indelStr = knownIndel.isSimpleInsertion() ? knownIndel.getAlternateAllele(0).getBases() : Utils.dupBytes((byte)'-', knownIndel.getReference().length());
+            final byte[] indelStr;
+            if ( knownIndel.isSimpleInsertion() ) {
+                final byte[] fullAllele = knownIndel.getAlternateAllele(0).getBases();
+                indelStr = Arrays.copyOfRange(fullAllele, 1, fullAllele.length); // remove ref padding
+            } else {
+                indelStr = Utils.dupBytes((byte)'-', knownIndel.getReference().length() - 1);
+            }
             int start = knownIndel.getStart() - leftmostIndex + 1;
             Consensus c = createAlternateConsensus(start, reference, indelStr, knownIndel);
             if ( c != null )
@@ -1019,7 +1025,9 @@ private Consensus createAlternateConsensus(final int indexOnRef, final Cigar c,
                 elements.add(ce);
                 break;
             case M:
-                altIdx += elementLength;
+            case EQ:
+            case X:
+                    altIdx += elementLength;
             case N:
                 if ( reference.length < refIdx + elementLength )
                     ok_flag = false;
@@ -1281,6 +1289,8 @@ private boolean alternateReducesEntropy(final List<AlignedRead> reads, final byt
                 int elementLength = ce.getLength();
                 switch ( ce.getOperator() ) {
                     case M:
+                    case EQ:
+                    case X:
                         for (int k = 0 ; k < elementLength ; k++, refIdx++, altIdx++ ) {
                             if ( refIdx >= reference.length )
                                 break;
@@ -1426,6 +1436,8 @@ private void getUnclippedBases() {
                         fromIndex += elementLength;
                         break;
                     case M:
+                    case EQ:
+                    case X:
                     case I:
                         System.arraycopy(actualReadBases, fromIndex, readBases, toIndex, elementLength);
                         System.arraycopy(actualBaseQuals, fromIndex, baseQuals, toIndex, elementLength);
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java
index 6047a15b43..b08def44fc 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java
@@ -73,19 +73,7 @@ public class LeftAlignIndels extends ReadWalker<Integer, Integer> {
     @Output(required=false, doc="Output bam")
     protected StingSAMFileWriter writer = null;
 
-    /**
-     * If set too low, the tool may run out of system file descriptors needed to perform sorting; if too high, the tool
-     * may run out of memory.  We recommend that you additionally tell Java to use a temp directory with plenty of available
-     * space (by setting java.io.tempdir on the command-line).
-     */
-    @Argument(fullName="maxReadsInRam", shortName="maxInRam", doc="max reads allowed to be kept in memory at a time by the output writer", required=false)
-    protected int MAX_RECORDS_IN_RAM = 500000;
-
-    public void initialize() {
-        // set up the output writer
-        if ( writer != null )
-            writer.setMaxRecordsInRam(MAX_RECORDS_IN_RAM);
-    }
+    public void initialize() {}
 
     private void emit(final SAMRecord read) {
         if ( writer != null )
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java
index f49e78469a..65c5a2fbce 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java
@@ -25,6 +25,7 @@
 
 package org.broadinstitute.sting.gatk.walkers.indels;
 
+import com.google.java.contract.Ensures;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.utils.Haplotype;
 import org.broadinstitute.sting.utils.MathUtils;
@@ -175,7 +176,8 @@ public synchronized double[] computeDiploidReadHaplotypeLikelihoods(ReadBackedPi
         
     }
 
-    public synchronized double[][] computeGeneralReadHaplotypeLikelihoods(final ReadBackedPileup pileup, 
+    @Ensures("result != null && result.length == pileup.getNumberOfElements()")
+    public synchronized double[][] computeGeneralReadHaplotypeLikelihoods(final ReadBackedPileup pileup,
                                                                           final LinkedHashMap<Allele, Haplotype> haplotypeMap, 
                                                                           final ReferenceContext ref,
                                                                           final int eventLength, 
@@ -349,7 +351,9 @@ public synchronized double[][] computeGeneralReadHaplotypeLikelihoods(final Read
                             previousHaplotypeSeen = haplotypeBases.clone();
 
                             readLikelihood = pairHMM.computeReadLikelihoodGivenHaplotype(haplotypeBases, readBases, readQuals,
-                                    contextLogGapOpenProbabilities, contextLogGapOpenProbabilities, contextLogGapContinuationProbabilities,
+                                    (read.hasBaseIndelQualities() ? read.getBaseInsertionQualities() : contextLogGapOpenProbabilities),
+                                    (read.hasBaseIndelQualities() ? read.getBaseDeletionQualities() : contextLogGapOpenProbabilities),
+                                    contextLogGapContinuationProbabilities,
                                     startIndexInHaplotype, matchMetricArray, XMetricArray, YMetricArray);
 
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java
index 02e4d414de..fc6df69029 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java
@@ -332,7 +332,7 @@ static private Event mergeEvents(Event left, Event right) {
 
     private enum EVENT_TYPE { POINT_EVENT, INDEL_EVENT, BOTH }
 
-    class EventPair {
+    static class EventPair {
         public Event left, right;
         public TreeSet<GenomeLoc> intervals = new TreeSet<GenomeLoc>();
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java
index 0fd047f584..b0c09f78e5 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java
@@ -1131,12 +1131,13 @@ public void printVCFLine(VariantContextWriter vcf, IndelPrecall call, boolean di
         List<Allele> alleles = new ArrayList<Allele>(2); // actual observed (distinct!) alleles at the site
         List<Allele> homref_alleles = null; // when needed, will contain two identical copies of ref allele - needed to generate hom-ref genotype
 
+        final byte referencePaddingBase = refBases[(int)start-1];
 
         if ( call.getVariant() == null ) {
-            // we will need to cteate genotype with two (hom) ref alleles (below).
+            // we will need to create genotype with two (hom) ref alleles (below).
             // we can not use 'alleles' list here, since that list is supposed to contain
             // only *distinct* alleles observed at the site or VCFContext will frown upon us...
-            alleles.add( Allele.create(refBases[(int)start-1],true) );
+            alleles.add( Allele.create(referencePaddingBase,true) );
             homref_alleles = new ArrayList<Allele>(2);
             homref_alleles.add( alleles.get(0));
             homref_alleles.add( alleles.get(0));
@@ -1145,7 +1146,7 @@ public void printVCFLine(VariantContextWriter vcf, IndelPrecall call, boolean di
             // (Genotype will tell us whether it is an actual call or not!)
             int event_length = call.getVariant().lengthOnRef();
             if ( event_length < 0 ) event_length = 0;
-            fillAlleleList(alleles,call);
+            fillAlleleList(alleles,call,referencePaddingBase);
             stop += event_length;
         }
 
@@ -1165,7 +1166,7 @@ public void printVCFLine(VariantContextWriter vcf, IndelPrecall call, boolean di
             filters.add("NoCall");
         }
         VariantContext vc = new VariantContextBuilder("IGv2_Indel_call", refName, start, stop, alleles)
-                .genotypes(genotypes).filters(filters).referenceBaseForIndel(refBases[(int)start-1]).make();
+                .genotypes(genotypes).filters(filters).make();
         vcf.add(vc);
     }
 
@@ -1175,16 +1176,16 @@ public void printVCFLine(VariantContextWriter vcf, IndelPrecall call, boolean di
      * @param l
      * @param call
      */
-    private void fillAlleleList(List<Allele> l, IndelPrecall call) {
+    private void fillAlleleList(List<Allele> l, IndelPrecall call, byte referencePaddingBase) {
         int event_length = call.getVariant().lengthOnRef();
         if ( event_length == 0 ) { // insertion
 
-            l.add( Allele.create(Allele.NULL_ALLELE_STRING,true) );
-            l.add( Allele.create(call.getVariant().getBases(), false ));
+            l.add( Allele.create(referencePaddingBase,true) );
+            l.add( Allele.create(referencePaddingBase + call.getVariant().getBases(), false ));
 
         } else { //deletion:
-            l.add( Allele.create(call.getVariant().getBases(), true ));
-            l.add( Allele.create(Allele.NULL_ALLELE_STRING,false) );
+            l.add( Allele.create(referencePaddingBase + call.getVariant().getBases(), true ));
+            l.add( Allele.create(referencePaddingBase,false) );
         }
     }
 
@@ -1218,19 +1219,20 @@ public void printVCFLine(VariantContextWriter vcf, IndelPrecall nCall, IndelPrec
 //        }
         boolean homRefT = ( tCall.getVariant() == null );
         boolean homRefN = ( nCall.getVariant() == null );
+        final byte referencePaddingBase = refBases[(int)start-1];
         if ( tCall.getVariant() == null && nCall.getVariant() == null) {
             // no indel at all  ; create base-representation ref/ref alleles for genotype construction
-            alleles.add( Allele.create(refBases[(int)start-1],true) );
+            alleles.add( Allele.create(referencePaddingBase,true) );
         } else {
             // we got indel(s)
             int event_length = 0;
             if ( tCall.getVariant() != null ) {
                 // indel in tumor
                 event_length = tCall.getVariant().lengthOnRef();
-                fillAlleleList(alleles, tCall);
+                fillAlleleList(alleles, tCall, referencePaddingBase);
             } else {
                 event_length = nCall.getVariant().lengthOnRef();
-                fillAlleleList(alleles, nCall);
+                fillAlleleList(alleles, nCall, referencePaddingBase);
             }
             if ( event_length > 0 ) stop += event_length;
         }
@@ -1262,7 +1264,7 @@ public void printVCFLine(VariantContextWriter vcf, IndelPrecall nCall, IndelPrec
         }
 
         VariantContext vc = new VariantContextBuilder("IGv2_Indel_call", refName, start, stop, alleles)
-                .genotypes(genotypes).filters(filters).attributes(attrs).referenceBaseForIndel(refBases[(int)start-1]).make();
+                .genotypes(genotypes).filters(filters).attributes(attrs).make();
         vcf.add(vc);
     }
 
@@ -1302,7 +1304,7 @@ public Integer reduce(Integer value, Integer sum) {
 
     @Override
     public Integer reduceInit() {
-        return new Integer(0);
+        return 0;
     }
 
 
@@ -2055,7 +2057,9 @@ public ExpandedSAMRecord(SAMRecord r, byte [] ref, long offset, IndelListener l)
                     break; // do not count gaps or clipped bases
                 case I:
                 case M:
-                    readLength += cel.getLength();
+                case EQ:
+                case X:
+                        readLength += cel.getLength();
                     break; // advance along the gapless block in the alignment
                 default :
                     throw new IllegalArgumentException("Unexpected operator in cigar string: "+cel.getOperator());
@@ -2092,7 +2096,9 @@ public ExpandedSAMRecord(SAMRecord r, byte [] ref, long offset, IndelListener l)
 
                     break;
                 case M:
-                    for ( int k = 0; k < ce.getLength(); k++, posOnRef++, posOnRead++ ) {
+                case EQ:
+                case X:
+                        for ( int k = 0; k < ce.getLength(); k++, posOnRef++, posOnRead++ ) {
                         if ( readBases[posOnRead] != ref[posOnRef] )  { // mismatch!
                             mms++;
                             mismatch_flags[posOnRef] = 1;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java
index 3cf1d485ea..bbd4bf92f3 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java
@@ -426,10 +426,10 @@ private void setTrios(){
         Map<String,Set<Sample>> families = this.getSampleDB().getFamilies();
         Set<Sample> family;
         ArrayList<Sample> parents;
-        for(String familyID : families.keySet()){
-            family = families.get(familyID);
+        for(Map.Entry<String,Set<Sample>> familyEntry : families.entrySet()){
+            family = familyEntry.getValue();
             if(family.size()<2 || family.size()>3){
-                logger.info(String.format("Caution: Family %s has %d members; At the moment Phase By Transmission only supports trios and parent/child pairs. Family skipped.",familyID,family.size()));
+                logger.info(String.format("Caution: Family %s has %d members; At the moment Phase By Transmission only supports trios and parent/child pairs. Family skipped.",familyEntry.getKey(),family.size()));
             }
             else{
                 for(Sample familyMember : family){
@@ -438,7 +438,7 @@ private void setTrios(){
                         if(family.containsAll(parents))
                             this.trios.add(familyMember);
                         else
-                            logger.info(String.format("Caution: Family %s skipped as it is not a trio nor a parent/child pair; At the moment Phase By Transmission only supports trios and parent/child pairs. Family skipped.",familyID));
+                            logger.info(String.format("Caution: Family %s skipped as it is not a trio nor a parent/child pair; At the moment Phase By Transmission only supports trios and parent/child pairs. Family skipped.",familyEntry.getKey()));
                         break;
                     }
                 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PreciseNonNegativeDouble.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PreciseNonNegativeDouble.java
index b68739b48e..d3f4f6266e 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PreciseNonNegativeDouble.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PreciseNonNegativeDouble.java
@@ -86,7 +86,7 @@ public int compareTo(PreciseNonNegativeDouble other) {
         if (Math.abs(logValDiff) <= EQUALS_THRESH)
             return 0; // this.equals(other)
 
-        return new Double(Math.signum(logValDiff)).intValue();
+        return (int)Math.signum(logValDiff);
     }
 
     public boolean equals(PreciseNonNegativeDouble other) {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java
index f16deb701e..d8ae6b28b0 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java
@@ -288,7 +288,7 @@ public PhasingStatsAndOutput map(RefMetaDataTracker tracker, ReferenceContext re
     private VariantContext reduceVCToSamples(VariantContext vc, Set<String> samplesToPhase) {
 //        for ( String sample : samplesToPhase )
 //            logger.debug(String.format("  Sample %s has genotype %s, het = %s", sample, vc.getGenotype(sample), vc.getGenotype(sample).isHet() ));
-        VariantContext subvc = vc.subContextFromSamples(samplesToPhase, true);
+        VariantContext subvc = vc.subContextFromSamples(samplesToPhase);
 //        logger.debug("original VC = " + vc);
 //        logger.debug("sub      VC = " + subvc);
         return VariantContextUtils.pruneVariantContext(subvc, KEYS_TO_KEEP_IN_REDUCED_VCF);
@@ -870,7 +870,7 @@ private List<GenotypeAndReadBases> trimWindow(List<GenotypeAndReadBases> listHet
 
             int useOnLeft, useOnRight;
             if (numOnLeft <= numOnRight) {
-                int halfToUse = new Double(Math.floor(numToUse / 2.0)).intValue(); // skimp on the left [floor], and be generous with the right side
+                int halfToUse = numToUse / 2; // skimp on the left [floor], and be generous with the right side
                 useOnLeft = Math.min(halfToUse, numOnLeft);
                 useOnRight = Math.min(numToUse - useOnLeft, numOnRight);
             }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java
index 9676704c2b..9d96dedefb 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java
@@ -25,6 +25,7 @@
 import java.io.File;
 import java.io.PrintStream;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.LinkedList;
 import java.util.List;
 
@@ -262,20 +263,33 @@ public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentCo
                 sequenceInvalid = true;
                 invReason.add("SITE_IS_FILTERED");
             }
+
+            String refString = validate.getReference().getDisplayString();
+            String altString = validate.getAlternateAllele(0).getDisplayString();
+
             if ( validate.isIndel() ) {
                 sequence.append(Character.toUpperCase((char)ref.getBase()));
                 rawSequence.append(Character.toUpperCase((char)ref.getBase()));
+                final byte[] refAllele = validate.getReference().getBases();
+                refString = new String(Arrays.copyOfRange(refAllele, 1, refAllele.length));
+                if ( refString.isEmpty() )
+                    refString = "-";
+                final byte[] altAllele = validate.getAlternateAllele(0).getBases();
+                altString = new String(Arrays.copyOfRange(altAllele, 1, altAllele.length));
+                if ( altString.isEmpty() )
+                    altString = "-";
             }
+
             sequence.append('[');
-            sequence.append(validate.getAlternateAllele(0).toString());
+            sequence.append(altString);
             sequence.append('/');
-            sequence.append(validate.getReference().toString());
+            sequence.append(refString);
             sequence.append(']');
             // do this to the raw sequence to -- the indeces will line up that way
             rawSequence.append('[');
-            rawSequence.append(validate.getAlternateAllele(0).getBaseString());
+            rawSequence.append(altString);
             rawSequence.append('/');
-            rawSequence.append(validate.getReference().getBaseString());
+            rawSequence.append(refString);
             rawSequence.append(']');
             allelePos = ref.getLocus();
             if ( indelCounter > 0 ) {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java
index e54dc63888..3e48520a78 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java
@@ -43,7 +43,7 @@ public boolean selectSiteInSamples(VariantContext vc) {
             return true;
         // want to include a site in the given samples if it is *likely* to be variant (via the EXACT model)
         // first subset to the samples
-        VariantContext subContext = vc.subContextFromSamples(samples, true);
+        VariantContext subContext = vc.subContextFromSamples(samples);
 
         // now check to see (using EXACT model) whether this should be variant
         // do we want to apply a prior? maybe user-spec?
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GTBasedSampleSelector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GTBasedSampleSelector.java
index 0f55524a6e..de832b1089 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GTBasedSampleSelector.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GTBasedSampleSelector.java
@@ -45,7 +45,7 @@ public boolean selectSiteInSamples(VariantContext vc) {
         if ( samples == null || samples.isEmpty() )
             return true;
 
-        VariantContext subContext =  vc.subContextFromSamples(samples, false);
+        VariantContext subContext =  vc.subContextFromSamples(samples);
         if ( subContext.isPolymorphicInSamples() ) {
             return true;
         }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GenomeEvent.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GenomeEvent.java
index af6a520026..67ddc47ff8 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GenomeEvent.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GenomeEvent.java
@@ -26,7 +26,6 @@
 
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.variantcontext.Allele;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@@ -40,14 +39,11 @@ public class GenomeEvent implements Comparable {
     final protected GenomeLoc loc;
     /** A set of the alleles segregating in this context */
     final protected List<Allele> alleles;
-    final protected Byte refBase;
 //    final protected HashMap<String, Object> attributes;
 
-    public GenomeEvent(GenomeLocParser parser, final String contig, final int start, final int stop, final List<Allele> alleles, HashMap<String, Object> attributes,
-                       byte base) {
+    public GenomeEvent(GenomeLocParser parser, final String contig, final int start, final int stop, final List<Allele> alleles, HashMap<String, Object> attributes) {
         this.loc = parser.createGenomeLoc(contig, start, stop);
         this.alleles = alleles;
-        this.refBase = base;
 //        this.attributes = attributes;
     }
 
@@ -68,7 +64,7 @@ public int compareTo(final Object o) {
 
     public VariantContext createVariantContextFromEvent() {
         return new VariantContextBuilder("event", loc.getContig(), loc.getStart(), loc.getStop(), alleles)
-                .log10PError(0.0).referenceBaseForIndel(refBase).make();
+                .log10PError(0.0).make();
 
     }
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/KeepAFSpectrumFrequencySelector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/KeepAFSpectrumFrequencySelector.java
index 4b68eed2ea..7c1d63f028 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/KeepAFSpectrumFrequencySelector.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/KeepAFSpectrumFrequencySelector.java
@@ -115,7 +115,7 @@ public void logCurrentSiteData(VariantContext vc, boolean selectedInTargetSample
 
         // create bare-bones event and log in corresponding bin
         // attributes contains AC,AF,AN pulled from original vc, and we keep them here and log in output file for bookkeeping purposes
-        GenomeEvent event = new GenomeEvent(parser, vc.getChr(), vc.getStart(), vc.getEnd(),vc.getAlleles(), attributes, vc.getReferenceBaseForIndel());
+        GenomeEvent event = new GenomeEvent(parser, vc.getChr(), vc.getStart(), vc.getEnd(),vc.getAlleles(), attributes);
 
         binnedEventArray[binIndex].add(event);
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/UniformSamplingFrequencySelector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/UniformSamplingFrequencySelector.java
index eda75d6475..4019c56312 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/UniformSamplingFrequencySelector.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/UniformSamplingFrequencySelector.java
@@ -65,7 +65,7 @@ public void logCurrentSiteData(VariantContext vc, boolean selectedInTargetSample
         }
         // create bare-bones event and log in corresponding bin
         // attributes contains AC,AF,AN pulled from original vc, and we keep them here and log in output file for bookkeeping purposes
-        GenomeEvent event = new GenomeEvent(parser, vc.getChr(), vc.getStart(), vc.getEnd(),vc.getAlleles(), attributes, vc.getReferenceBaseForIndel());
+        GenomeEvent event = new GenomeEvent(parser, vc.getChr(), vc.getStart(), vc.getEnd(),vc.getAlleles(), attributes);
         binnedEventArray.add(event);
 
     }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java
index 0b395bc626..58cd147370 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java
@@ -500,7 +500,10 @@ private enum EvalCompMatchType { NO_MATCH, STRICT, LENIENT }
 
     @Requires({"eval != null", "comp != null"})
     private EvalCompMatchType doEvalAndCompMatch(final VariantContext eval, final VariantContext comp, boolean requireStrictAlleleMatch) {
-        // find all of the matching comps
+        if ( comp.getType() == VariantContext.Type.NO_VARIATION || eval.getType() == VariantContext.Type.NO_VARIATION )
+            // if either of these are NO_VARIATION they are LENIENT matches
+            return EvalCompMatchType.LENIENT;
+
         if ( comp.getType() != eval.getType() )
             return EvalCompMatchType.NO_MATCH;
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java
index 2a759f2f5a..97814075ca 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java
@@ -183,13 +183,13 @@ private static GATKReport initializeGATKReport(final Collection<VariantStratifie
                     throw new ReviewedStingException("Datamap is empty for analysis " + scanner.getAnalysis());
                 
                 // add DataPoint's for each field marked as such
-                for (final Field field : datamap.keySet()) {
+                for (final Map.Entry<Field, DataPoint> field : datamap.entrySet()) {
                     try {
-                        field.setAccessible(true);
+                        field.getKey().setAccessible(true);
 
                         // this is an atomic value, add a column for it
-                        final String format = datamap.get(field).format();
-                        table.addColumn(field.getName(), format);
+                        final String format = field.getValue().format();
+                        table.addColumn(field.getKey().getName(), format);
                     } catch (SecurityException e) {
                         throw new StingException("SecurityException: " + e);
                     }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java
index 88bf3aef99..b87a8ee85d 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java
@@ -41,7 +41,7 @@ public void update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceCont
         ConcurrentMap<String, Integer> alleleCounts = new ConcurrentHashMap<String, Integer>();
 
         int numHetsHere = 0;
-        float numGenosHere = 0;
+        int numGenosHere = 0;
         int numIndsHere = 0;
 
         for (final Genotype genotype : vc.getGenotypes()) {
@@ -56,7 +56,7 @@ public void update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceCont
                 //increment stats for pairwise mismatches
 
                 for (Allele allele : genotype.getAlleles()) {
-                    if (allele.isNonNull() && allele.isCalled()) {
+                    if (allele.isCalled()) {
                         String alleleString = allele.toString();
                         alleleCounts.putIfAbsent(alleleString, 0);
                         alleleCounts.put(alleleString, alleleCounts.get(alleleString) + 1);
@@ -68,7 +68,7 @@ public void update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceCont
             //only if have one called genotype at least
             this.numSites++;
 
-            this.totalHet += numHetsHere / numGenosHere;
+            this.totalHet += numHetsHere / (double)numGenosHere;
 
             //compute based on num sites
             float harmonicFactor = 0;
@@ -79,7 +79,7 @@ public void update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceCont
 
             //now compute pairwise mismatches
             float numPairwise = 0;
-            float numDiffs = 0;
+            int numDiffs = 0;
             for (String allele1 : alleleCounts.keySet()) {
                 int allele1Count = alleleCounts.get(allele1);
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java
index 6c4fcd26d8..fe24379765 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java
@@ -57,9 +57,12 @@ public void updateTiTv(VariantContext vc, boolean updateStandard) {
         }
     }
 
-    public void update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
-        if (vc1 != null) updateTiTv(vc1, false);
-        if (vc2 != null) updateTiTv(vc2, true);
+    @Override
+    public void update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
+        if (eval != null)
+            updateTiTv(eval, false);
+        if (comp != null)
+            updateTiTv(comp, true);
     }
 
     @Override
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java
index 158f20b61e..2b1bd9c62c 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java
@@ -19,6 +19,8 @@
  * it computes the AC from the genotypes themselves.  If no AC can be computed, 0 is used.
  */
 public class AlleleCount extends VariantStratifier {
+    int nchrom;
+
     @Override
     public void initialize() {
         // we can only work with a single eval VCF, and it must have genotypes
@@ -26,7 +28,8 @@ public void initialize() {
             throw new UserException.BadArgumentValue("AlleleCount", "AlleleCount stratification only works with a single eval vcf");
 
         // There are 2 x n sample chromosomes for diploids
-        int nchrom = getVariantEvalWalker().getSampleNamesForEvaluation().size() * 2;
+        // TODO -- generalize to handle multiple ploidy
+        nchrom = getVariantEvalWalker().getSampleNamesForEvaluation().size() * 2;
         if ( nchrom < 2 )
             throw new UserException.BadArgumentValue("AlleleCount", "AlleleCount stratification requires an eval vcf with at least one sample");
 
@@ -43,7 +46,8 @@ public List<Object> getRelevantStates(ReferenceContext ref, RefMetaDataTracker t
             int AC = 0; // by default, the site is considered monomorphic
 
             if ( eval.hasAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY) && eval.isBiallelic() ) {
-                AC = eval.getAttributeAsInt(VCFConstants.MLE_ALLELE_COUNT_KEY, 0);
+                // the MLEAC is allowed to be larger than the AN (e.g. in the case of all PLs being 0, the GT is ./. but the exact model may arbitrarily choose an AC>1)
+                AC = Math.min(eval.getAttributeAsInt(VCFConstants.MLE_ALLELE_COUNT_KEY, 0), nchrom);
             } else if ( eval.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) && eval.isBiallelic() ) {
                 AC = eval.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY, 0);
             } else if ( eval.isVariant() ) {
@@ -51,6 +55,12 @@ public List<Object> getRelevantStates(ReferenceContext ref, RefMetaDataTracker t
                     AC = Math.max(AC, eval.getCalledChrCount(allele));
             }
 
+            // make sure that the AC isn't invalid
+            if ( AC > nchrom )
+                throw new UserException.MalformedVCF(String.format("The AC value (%d) at position %s:%d " +
+                        "is larger than the number of chromosomes over all samples (%d)", AC,
+                        eval.getChr(), eval.getStart(), nchrom));
+
             return Collections.singletonList((Object) AC);
         } else {
             return Collections.emptyList();
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java
index 693bdf1988..2ad08d8064 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java
@@ -28,7 +28,7 @@ public List<Object> getRelevantStates(ReferenceContext ref, RefMetaDataTracker t
             final Collection<VariantContext> knownComps = tracker.getValues(knowns, ref.getLocus());
             for ( final VariantContext c : knownComps ) {
                 // loop over sites, looking for something that matches the type eval
-                if ( eval.getType() == c.getType() ) {
+                if ( eval.getType() == c.getType() || eval.getType() == VariantContext.Type.NO_VARIATION ) {
                     return KNOWN_STATES;
                 }
             }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java
index 3dcc1f85f1..e84b0b10e1 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java
@@ -197,7 +197,9 @@ public VariantContext getSubsetOfVariantContext(VariantContext vc, String sample
      * @return a new VariantContext with just the requested samples
      */
     public VariantContext getSubsetOfVariantContext(VariantContext vc, Set<String> sampleNames) {
-        return ensureAnnotations(vc, vc.subContextFromSamples(sampleNames, false));
+        // if we want to preserve AC0 sites as polymorphic we need to not rederive alleles
+        final boolean deriveAlleles = variantEvalWalker.ignoreAC0Sites();
+        return ensureAnnotations(vc, vc.subContextFromSamples(sampleNames, deriveAlleles));
     }
 
     public VariantContext ensureAnnotations(final VariantContext vc, final VariantContext vcsub) {
@@ -262,12 +264,8 @@ public VariantContext ensureAnnotations(final VariantContext vc, final VariantCo
                 // First, filter the VariantContext to represent only the samples for evaluation
                 VariantContext vcsub = vc;
 
-                if (subsetBySample && vc.hasGenotypes()) {
-                    if ( variantEvalWalker.isSubsettingToSpecificSamples() )
-                        vcsub = getSubsetOfVariantContext(vc, variantEvalWalker.getSampleNamesForEvaluation());
-                    else
-                        vcsub = ensureAnnotations(vc, vc);
-                }
+                if (subsetBySample && vc.hasGenotypes())
+                    vcsub = getSubsetOfVariantContext(vc, variantEvalWalker.getSampleNamesForEvaluation());
 
                 if ((byFilter || !vcsub.isFiltered())) {
                     addMapping(mapping, VariantEval.getAllSampleName(), vcsub);
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrancheManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrancheManager.java
index d457395289..af07783992 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrancheManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrancheManager.java
@@ -177,6 +177,7 @@ private static void writeTranchesDebuggingInfo(File f, List<VariantDatum> tranch
                 double runningValue = metric.getRunningMetric(i);
                 out.printf("%.4f %d %.4f%n", d.lod, score, runningValue);
             }
+            out.close();
         } catch (FileNotFoundException e) {
             throw new UserException.CouldNotCreateOutputFile(f, e);
         }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java
index 45fdad4f88..e88505f992 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java
@@ -235,7 +235,7 @@ private static double decodeAnnotation( final String annotationKey, final Varian
         double value;
 
         try {
-            value = Double.parseDouble( (String)vc.getAttribute( annotationKey ) );
+            value = vc.getAttributeAsDouble( annotationKey, Double.NaN );
             if( Double.isInfinite(value) ) { value = Double.NaN; }
             if( jitter && annotationKey.equalsIgnoreCase("HRUN") ) { // Integer valued annotations must be jittered a bit to work in this GMM
                   value += -0.25 + 0.5 * GenomeAnalysisEngine.getRandomGenerator().nextDouble();
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDatum.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDatum.java
index 32350f0fa2..a85129d78a 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDatum.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDatum.java
@@ -52,6 +52,7 @@ public class VariantDatum implements Comparable<VariantDatum> {
     public int worstAnnotation;
     public MultivariateGaussian assignment; // used in K-means implementation 
 
+    @Override
     public int compareTo( final VariantDatum other ) {
         return Double.compare(this.lod, other.lod);
     }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
index ab2ff61769..c670ad2fdb 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
@@ -331,7 +331,7 @@ public void onTraversalDone( final ExpandingArrayList<VariantDatum> reduceSum )
         // Find the filtering lodCutoff for display on the model PDFs. Red variants are those which were below the cutoff and filtered out of the final callset.
         double lodCutoff = 0.0;
         for( final Tranche tranche : tranches ) {
-            if( MathUtils.compareDoubles(tranche.ts, TS_FILTER_LEVEL, 0.0001)==0 ) {
+            if( MathUtils.compareDoubles(tranche.ts, TS_FILTER_LEVEL, 0.0001) == 0 ) {
                 lodCutoff = tranche.minVQSLod;
             }
         }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java
index 98a8ac92b6..555999bdb6 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java
@@ -33,10 +33,10 @@
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.Reference;
 import org.broadinstitute.sting.gatk.walkers.RodWalker;
+import org.broadinstitute.sting.gatk.walkers.TreeReducible;
 import org.broadinstitute.sting.gatk.walkers.Window;
 import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCounts;
 import org.broadinstitute.sting.utils.SampleUtils;
-import org.broadinstitute.sting.utils.Utils;
 import org.broadinstitute.sting.utils.codecs.vcf.*;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
@@ -67,6 +67,19 @@
  * VCF and then run SelectVariants to extract the common records with -select 'set == "Intersection"', as worked out
  * in the detailed example on the wiki.
  *
+ * Note that CombineVariants supports multi-threaded parallelism (8/15/12).  This is particularly useful
+ * when converting from VCF to BCF2, which can be expensive.  In this case each thread spends CPU time
+ * doing the conversion, and the GATK engine is smart enough to merge the partial BCF2 blocks together
+ * efficiency.  However, since this merge runs in only one thread, you can quickly reach diminishing
+ * returns with the number of parallel threads.  -nt 4 works well but -nt 8 may be too much.
+ *
+ * Some fine details about the merging algorithm:
+ *   <ul>
+ *   <li> As of GATK 2.1, when merging multiple VCF records at a site, the combined VCF record has the QUAL of
+ *      the first VCF record with a non-MISSING QUAL value.  The previous behavior was to take the
+ *      max QUAL, which resulted in sometime strange downstream confusion</li>
+ *   </ul>
+ *
  * <h2>Input</h2>
  * <p>
  * One or more variant sets to combine.
@@ -100,7 +113,7 @@
  */
 @DocumentedGATKFeature( groupName = "Variant Evaluation and Manipulation Tools", extraDocs = {CommandLineGATK.class} )
 @Reference(window=@Window(start=-50,stop=50))
-public class CombineVariants extends RodWalker<Integer, Integer> {
+public class CombineVariants extends RodWalker<Integer, Integer> implements TreeReducible<Integer> {
     /**
      * The VCF files to merge together
      *
@@ -188,7 +201,8 @@ public void initialize() {
             logger.warn("VCF output file not an instance of VCFWriterStub; cannot enable sites only output option");
 
         if ( PRIORITY_STRING == null ) {
-            PRIORITY_STRING = Utils.join(",", vcfRods.keySet());
+            genotypeMergeOption = VariantContextUtils.GenotypeMergeType.UNSORTED;
+            //PRIORITY_STRING = Utils.join(",", vcfRods.keySet());  Deleted by Ami (7/10/12)
             logger.info("Priority string not provided, using arbitrary genotyping order: " + PRIORITY_STRING);
         }
 
@@ -313,5 +327,10 @@ public Integer reduce(Integer counter, Integer sum) {
         return counter + sum;
     }
 
+    @Override
+    public Integer treeReduce(Integer lhs, Integer rhs) {
+        return reduce(lhs, rhs);
+    }
+
     public void onTraversalDone(Integer sum) {}
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java
index d223adefbe..f89bcb2a70 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java
@@ -34,15 +34,13 @@
 import org.broadinstitute.sting.gatk.walkers.*;
 import org.broadinstitute.sting.utils.SampleUtils;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
 import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
 import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Map;
-import java.util.Set;
+import java.util.*;
 
 /**
  * Filters a lifted-over VCF file for ref bases that have been changed.
@@ -66,7 +64,7 @@ public void initialize() {
         Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName));
         Map<String, VCFHeader> vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName));
 
-        final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey(trackName) ? vcfHeaders.get(trackName).getMetaDataInSortedOrder() : null, samples);
+        final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey(trackName) ? vcfHeaders.get(trackName).getMetaDataInSortedOrder() : Collections.<VCFHeaderLine>emptySet(), samples);
         writer.writeHeader(vcfHeader);
     }
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java
index 235eb1d9be..9fe499a036 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java
@@ -139,11 +139,11 @@ private int writeLeftAlignedIndel(final VariantContext vc, final ReferenceContex
         final byte[] refSeq = ref.getBases();
 
         // get the indel length
-        int indelLength;
+        final int indelLength;
         if ( vc.isSimpleDeletion() )
-            indelLength = vc.getReference().length();
+            indelLength = vc.getReference().length() - 1;
         else
-            indelLength = vc.getAlternateAllele(0).length();
+            indelLength = vc.getAlternateAllele(0).length() - 1;
 
         if ( indelLength > 200 ) {
             writer.add(vc);
@@ -151,7 +151,7 @@ private int writeLeftAlignedIndel(final VariantContext vc, final ReferenceContex
         }
 
         // create an indel haplotype
-        int originalIndex = ref.getLocus().getStart() - ref.getWindow().getStart() + 1;
+        final int originalIndex = ref.getLocus().getStart() - ref.getWindow().getStart() + 1;
         final byte[] originalIndel = makeHaplotype(vc, refSeq, originalIndex, indelLength);
 
         // create a CIGAR string to represent the event
@@ -170,11 +170,12 @@ private int writeLeftAlignedIndel(final VariantContext vc, final ReferenceContex
             VariantContext newVC = new VariantContextBuilder(vc).start(vc.getStart()-difference).stop(vc.getEnd()-difference).make();
             //System.out.println("Moving record from " + vc.getChr()+":"+vc.getStart() + " to " + vc.getChr()+":"+(vc.getStart()-difference));
 
-            int indelIndex = originalIndex-difference;
-            byte[] newBases = new byte[indelLength];
-            System.arraycopy((vc.isSimpleDeletion() ? refSeq : originalIndel), indelIndex, newBases, 0, indelLength);
-            Allele newAllele = Allele.create(newBases, vc.isSimpleDeletion());
-            newVC = updateAllele(newVC, newAllele, refSeq[indelIndex-1]);
+            final int indelIndex = originalIndex-difference;
+            final byte[] newBases = new byte[indelLength + 1];
+            newBases[0] = refSeq[indelIndex-1];
+            System.arraycopy((vc.isSimpleDeletion() ? refSeq : originalIndel), indelIndex, newBases, 1, indelLength);
+            final Allele newAllele = Allele.create(newBases, vc.isSimpleDeletion());
+            newVC = updateAllele(newVC, newAllele);
 
             writer.add(newVC);
             return 1;
@@ -195,7 +196,7 @@ private static byte[] makeHaplotype(VariantContext vc, byte[] ref, int indexOfRe
         if ( vc.isSimpleDeletion() ) {
             indexOfRef += indelLength;
         } else {
-            System.arraycopy(vc.getAlternateAllele(0).getBases(), 0, hap, currentPos, indelLength);
+            System.arraycopy(vc.getAlternateAllele(0).getBases(), 1, hap, currentPos, indelLength);
             currentPos += indelLength;
         }
 
@@ -205,14 +206,14 @@ private static byte[] makeHaplotype(VariantContext vc, byte[] ref, int indexOfRe
         return hap;
     }
 
-    public static VariantContext updateAllele(VariantContext vc, Allele newAllele, Byte refBaseForIndel) {
+    public static VariantContext updateAllele(final VariantContext vc, final Allele newAllele) {
         // create a mapping from original allele to new allele
         HashMap<Allele, Allele> alleleMap = new HashMap<Allele, Allele>(vc.getAlleles().size());
         if ( newAllele.isReference() ) {
             alleleMap.put(vc.getReference(), newAllele);
-            alleleMap.put(vc.getAlternateAllele(0), vc.getAlternateAllele(0));
+            alleleMap.put(vc.getAlternateAllele(0), Allele.create(newAllele.getBases()[0], false));
         } else {
-            alleleMap.put(vc.getReference(), vc.getReference());
+            alleleMap.put(vc.getReference(), Allele.create(newAllele.getBases()[0], true));
             alleleMap.put(vc.getAlternateAllele(0), newAllele);
         }
 
@@ -229,6 +230,6 @@ public static VariantContext updateAllele(VariantContext vc, Allele newAllele, B
             newGenotypes.add(new GenotypeBuilder(genotype).alleles(newAlleles).make());
         }
 
-        return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).referenceBaseForIndel(refBaseForIndel).make();
+        return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).make();
     }
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java
index 0d9a4fc031..63209e98c5 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java
@@ -119,7 +119,6 @@ private void convertAndWrite(VariantContext vc, ReferenceContext ref) {
 
         if ( toInterval != null ) {
             // check whether the strand flips, and if so reverse complement everything
-            // TODO -- make this work for indels (difficult because the 'previous base' context needed will be changing based on indel type/size)
             if ( fromInterval.isPositiveStrand() != toInterval.isPositiveStrand() && vc.isPointEvent() ) {
                 vc = VariantContextUtils.reverseComplement(vc);
             }
@@ -132,11 +131,10 @@ private void convertAndWrite(VariantContext vc, ReferenceContext ref) {
                         .attribute("OriginalStart", fromInterval.getStart()).make();
             }
 
-            VariantContext newVC = VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc);
-            if ( originalVC.isSNP() && originalVC.isBiallelic() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(newVC) ) {
+            if ( originalVC.isSNP() && originalVC.isBiallelic() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(vc) ) {
                 logger.warn(String.format("VCF at %s / %d => %s / %d is switching substitution type %s/%s to %s/%s",
-                        originalVC.getChr(), originalVC.getStart(), newVC.getChr(), newVC.getStart(),
-                        originalVC.getReference(), originalVC.getAlternateAllele(0), newVC.getReference(), newVC.getAlternateAllele(0)));
+                        originalVC.getChr(), originalVC.getStart(), vc.getChr(), vc.getStart(),
+                        originalVC.getReference(), originalVC.getAlternateAllele(0), vc.getReference(), vc.getAlternateAllele(0)));
             }
 
             writer.add(vc);
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java
index f14f6c2a67..46a3a8cd1d 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java
@@ -120,12 +120,6 @@ public class SelectHeaders extends RodWalker<Integer, Integer> implements TreeRe
     @Argument(fullName = "exclude_header_name", shortName = "xl_hn", doc = "Exclude header. Can be specified multiple times", required = false)
     public Set<String> XLheaderNames;
 
-    /**
-     * Note that reference inclusion takes precedence over other header matching. If set other reference lines may be excluded but the file name will still be added.
-     */
-    @Argument(fullName = "include_reference_name", shortName = "irn", doc = "If set the reference file name minus the file extension will be added to the headers", required = false)
-    public boolean includeReference;
-
     /**
      * Note that interval name inclusion takes precedence over other header matching. If set other interval lines may be excluded but the intervals will still be added.
      */
@@ -162,10 +156,6 @@ public void initialize() {
         // Select only the headers requested by name or expression.
         headerLines = new LinkedHashSet<VCFHeaderLine>(getSelectedHeaders(headerLines));
 
-        // Optionally add in the reference.
-        if (includeReference && getToolkit().getArguments().referenceFile != null)
-            headerLines.add(new VCFHeaderLine(VCFHeader.REFERENCE_KEY, FilenameUtils.getBaseName(getToolkit().getArguments().referenceFile.getName())));
-
         // Optionally add in the intervals.
         if (includeIntervals && getToolkit().getArguments().intervals != null) {
             for (IntervalBinding<Feature> intervalBinding : getToolkit().getArguments().intervals) {
@@ -205,7 +195,7 @@ private Set<VCFHeaderLine> getSelectedHeaders(Set<VCFHeaderLine> headerLines) {
             selectedHeaders = ListFileUtils.excludeMatching(selectedHeaders, headerKey, XLheaderNames, true);
 
         // always include the contig lines
-        selectedHeaders = VCFUtils.withUpdatedContigsAsLines(selectedHeaders, getToolkit().getArguments().referenceFile, getToolkit().getMasterSequenceDictionary());
+        selectedHeaders = VCFUtils.withUpdatedContigsAsLines(selectedHeaders, getToolkit().getArguments().referenceFile, getToolkit().getMasterSequenceDictionary(), true);
         return selectedHeaders;
     }
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java
index e4831eaf29..bfd9aa52f6 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java
@@ -31,7 +31,6 @@
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.samples.Sample;
 import org.broadinstitute.sting.gatk.walkers.RodWalker;
 import org.broadinstitute.sting.gatk.walkers.TreeReducible;
 import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCounts;
@@ -311,10 +310,6 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
     private File rsIDFile = null;
 
 
-    @Hidden
-    @Argument(fullName="outMVFile", shortName="outMVFile", doc="", required=false)
-    private String outMVFile = null;
-
     @Hidden
     @Argument(fullName="fullyDecode", doc="If true, the incoming VariantContext will be fully decoded", required=false)
     private boolean fullyDecode = false;
@@ -329,7 +324,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
 
 
     /* Private class used to store the intermediate variants in the integer random selection process */
-    private class RandomVariantStructure {
+    private static class RandomVariantStructure {
         private VariantContext vc;
 
         RandomVariantStructure(VariantContext vcP) {
@@ -369,8 +364,6 @@ public enum NumberAlleleRestriction {
     private int positionToAdd = 0;
     private RandomVariantStructure [] variantArray;
 
-    private PrintStream outMVFileStream = null;
-
     //Random number generator for the genotypes to remove
     private Random randomGenotypes = new Random();
 
@@ -470,6 +463,7 @@ public void initialize() {
             final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
             UAC.GLmodel = GenotypeLikelihoodsCalculationModel.Model.BOTH;
             UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES;
+            UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
             UAC.NO_SLOD = true;
             UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
             headerLines.addAll(UnifiedGenotyper.getHeaderInfo(UAC, null, null));
@@ -527,23 +521,6 @@ public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentCo
             if (MENDELIAN_VIOLATIONS && mv.countViolations(this.getSampleDB().getFamilies(samples),vc) < 1)
                 break;
 
-            if (outMVFile != null){
-                for( String familyId : mv.getViolationFamilies()){
-                    for(Sample sample : this.getSampleDB().getFamily(familyId)){
-                        if(sample.getParents().size() > 0){
-                            outMVFileStream.format("MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " +
-                                    "childG=%s childGL=%s\n",vc.getChr(), vc.getStart(),
-                                    vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(),  vc.getCalledChrCount(vc.getAlternateAllele(0)),
-                                    sample.getMaternalID(), sample.getPaternalID(), sample.getID(),
-                                    vc.getGenotype(sample.getMaternalID()).toBriefString(), vc.getGenotype(sample.getMaternalID()).getLikelihoods().getAsString(),
-                                    vc.getGenotype(sample.getPaternalID()).toBriefString(), vc.getGenotype(sample.getPaternalID()).getLikelihoods().getAsString(),
-                                    vc.getGenotype(sample.getID()).toBriefString(),vc.getGenotype(sample.getID()).getLikelihoods().getAsString()  );
-
-                        }
-                    }
-                }
-            }
-
             if (DISCORDANCE_ONLY) {
                 Collection<VariantContext> compVCs = tracker.getValues(discordanceTrack, context.getLocation());
                 if (!isDiscordant(vc, compVCs))
@@ -567,7 +544,7 @@ public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentCo
             VariantContext sub = subsetRecord(vc, EXCLUDE_NON_VARIANTS);
 
             if ( REGENOTYPE && sub.isPolymorphicInSamples() && hasPLs(sub) ) {
-                final VariantContextBuilder builder = new VariantContextBuilder(UG_engine.calculateGenotypes(tracker, ref, context, sub)).filters(sub.getFiltersMaybeNull());
+                final VariantContextBuilder builder = new VariantContextBuilder(UG_engine.calculateGenotypes(sub)).filters(sub.getFiltersMaybeNull());
                 addAnnotations(builder, sub);
                 sub = builder.make();
             }
@@ -730,7 +707,13 @@ private VariantContext subsetRecord(final VariantContext vc, final boolean exclu
         if ( vc.getAlleles().size() != sub.getAlleles().size() )
             newGC = VariantContextUtils.stripPLs(sub.getGenotypes());
 
-        //Remove a fraction of the genotypes if needed
+        // if we have fewer samples in the selected VC than in the original VC, we need to strip out the MLE tags
+        if ( vc.getNSamples() != sub.getNSamples() ) {
+            builder.rmAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY);
+            builder.rmAttribute(VCFConstants.MLE_ALLELE_FREQUENCY_KEY);
+        }
+
+        // Remove a fraction of the genotypes if needed
         if ( fractionGenotypes > 0 ){
             ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
             for ( Genotype genotype : newGC ) {
@@ -767,17 +750,21 @@ private void addAnnotations(final VariantContextBuilder builder, final VariantCo
 
         VariantContextUtils.calculateChromosomeCounts(builder, false);
 
+        boolean sawDP = false;
         int depth = 0;
         for (String sample : originalVC.getSampleNames()) {
             Genotype g = originalVC.getGenotype(sample);
 
             if ( ! g.isFiltered() ) {
-                if ( g.hasDP() )
+                if ( g.hasDP() ) {
                     depth += g.getDP();
+                    sawDP = true;
+                }
             }
         }
 
-        builder.attribute("DP", depth);
+        if ( sawDP )
+            builder.attribute("DP", depth);
     }
 
     private void randomlyAddVariant(int rank, VariantContext vc) {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java
index 4b793a31ef..c92551a734 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java
@@ -130,35 +130,16 @@ private void validate(VariantContext vc, RefMetaDataTracker tracker, ReferenceCo
             return;
 
         // get the true reference allele
-        Allele reportedRefAllele = vc.getReference();
-        Allele observedRefAllele = null;
-        // insertions
-        if ( vc.isSimpleInsertion() ) {
-            observedRefAllele = Allele.create(Allele.NULL_ALLELE_STRING);
+        final Allele reportedRefAllele = vc.getReference();
+        final int refLength = reportedRefAllele.length();
+        if ( refLength > 100 ) {
+            logger.info(String.format("Reference allele is too long (%d) at position %s:%d; skipping that record.", refLength, vc.getChr(), vc.getStart()));
+            return;
         }
-        // deletions
-        else if ( vc.isSimpleDeletion() || vc.isMNP() ) {
-            // we can't validate arbitrarily long deletions
-            if ( reportedRefAllele.length() > 100 ) {
-                logger.info(String.format("Reference allele is too long (%d) at position %s:%d; skipping that record.", reportedRefAllele.length(), vc.getChr(), vc.getStart()));
-                return;
-            }
 
-            // deletions are associated with the (position of) the last (preceding) non-deleted base;
-            // hence to get actually deleted bases we need offset = 1
-            int offset = vc.isMNP() ? 0 : 1;
-            byte[] refBytes = ref.getBases();
-            byte[] trueRef = new byte[reportedRefAllele.length()];
-            for (int i = 0; i < reportedRefAllele.length(); i++)
-                trueRef[i] = refBytes[i+offset];
-            observedRefAllele = Allele.create(trueRef, true);
-        }
-        // SNPs, etc. but not mixed types because they are too difficult
-        else if ( !vc.isMixed() ) {
-            byte[] refByte = new byte[1];
-            refByte[0] = ref.getBase();
-            observedRefAllele = Allele.create(refByte, true);
-        }
+        final byte[] observedRefBases = new byte[refLength];
+        System.arraycopy(ref.getBases(), 0, observedRefBases, 0, refLength);
+        final Allele observedRefAllele = Allele.create(observedRefBases);
 
         // get the RS IDs
         Set<String> rsIDs = null;
@@ -171,10 +152,10 @@ else if ( !vc.isMixed() ) {
         try {
             switch( type ) {
                 case ALL:
-                    vc.extraStrictValidation(observedRefAllele, ref.getBase(), rsIDs);
+                    vc.extraStrictValidation(reportedRefAllele, observedRefAllele, rsIDs);
                     break;
                 case REF:
-                    vc.validateReferenceBases(observedRefAllele, ref.getBase());
+                    vc.validateReferenceBases(reportedRefAllele, observedRefAllele);
                     break;
                 case IDS:
                     vc.validateRSIDs(rsIDs);
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java
index 7e82fc4540..3fba8fa77e 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java
@@ -8,8 +8,6 @@
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.RodWalker;
-import org.broadinstitute.sting.utils.R.RScriptExecutorException;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@@ -18,7 +16,6 @@
 import org.broadinstitute.sting.utils.text.XReadLines;
 import org.broadinstitute.sting.utils.variantcontext.Genotype;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
-import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
 import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
 
 import java.io.*;
@@ -95,7 +92,6 @@ public void initialize() {
         // write to the fam file, the first six columns of the standard ped file
         // first, load data from the input meta data file
         Map<String,Map<String,String>> metaValues = new HashMap<String,Map<String,String>>();
-        Set<String> samplesToUse = new HashSet<String>();
         logger.debug("Reading in metadata...");
         try {
             if ( metaDataFile.getAbsolutePath().endsWith(".fam") ) {
@@ -274,6 +270,7 @@ public void onTraversalDone(Integer numSites) {
                     inStream.read(readGenotypes);
                     outBed.write(readGenotypes);
                 }
+                inStream.close();
             } catch (IOException e) {
                 throw new ReviewedStingException("Error reading form temp file for input.",e);
             }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java
index 844c4d5fbe..b9577ca9b2 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java
@@ -372,7 +372,7 @@ public void onTraversalDone(Integer sum) {}
     // ----------------------------------------------------------------------------------------------------
 
     public static abstract class Getter { public abstract String get(VariantContext vc); }
-    public static Map<String, Getter> getters = new HashMap<String, Getter>();
+    public static final Map<String, Getter> getters = new HashMap<String, Getter>();
 
     static {
         // #CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT
@@ -381,7 +381,7 @@ public static abstract class Getter { public abstract String get(VariantContext
         getters.put("REF", new Getter() {
             public String get(VariantContext vc) {
                 StringBuilder x = new StringBuilder();
-                x.append(vc.getAlleleStringWithRefPadding(vc.getReference()));
+                x.append(vc.getReference().getDisplayString());
                 return x.toString();
             }
         });
@@ -393,7 +393,7 @@ public String get(VariantContext vc) {
 
                 for ( int i = 0; i < n; i++ ) {
                     if ( i != 0 ) x.append(",");
-                    x.append(vc.getAlleleStringWithRefPadding(vc.getAlternateAllele(i)));
+                    x.append(vc.getAlternateAllele(i));
                 }
                 return x.toString();
             }
@@ -435,11 +435,8 @@ public String get(VariantContext vc) {
     private static Object splitAltAlleles(VariantContext vc) {
         final int numAltAlleles = vc.getAlternateAlleles().size();
         if ( numAltAlleles == 1 )
-            return vc.getAlleleStringWithRefPadding(vc.getAlternateAllele(0));
+            return vc.getAlternateAllele(0);
 
-        final List<String> alleles = new ArrayList<String>(numAltAlleles);
-        for ( Allele allele : vc.getAlternateAlleles() )
-            alleles.add(vc.getAlleleStringWithRefPadding(allele));
-        return alleles;
+        return vc.getAlternateAlleles();
     }
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java
index 787d4d9abc..78c9c4a1ca 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java
@@ -103,12 +103,6 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
     @Argument(fullName="sample", shortName="sample", doc="The sample name represented by the variant rod", required=false)
     protected String sampleName = null;
 
-    /**
-     * This argument is useful for fixing input VCFs with bad reference bases (the output will be a fixed version of the VCF).
-     */
-    @Argument(fullName="fixRef", shortName="fixRef", doc="Fix common reference base in case there's an indel without padding", required=false)
-    protected boolean fixReferenceBase = false;
-
     private Set<String> allowedGenotypeFormatStrings = new HashSet<String>();
     private boolean wroteHeader = false;
     private Set<String> samples;
@@ -140,10 +134,6 @@ public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentCo
                 builder.genotypes(g);
             }
 
-            if ( fixReferenceBase ) {
-                builder.referenceBaseForIndel(ref.getBase());
-            }
-
             writeRecord(builder.make(), tracker, ref.getLocus());
         }
 
@@ -169,8 +159,8 @@ private Collection<VariantContext> getVariantContexts(RefMetaDataTracker tracker
                             continue;
 
                         Map<String, Allele> alleleMap = new HashMap<String, Allele>(2);
-                        alleleMap.put(RawHapMapFeature.DELETION, Allele.create(Allele.NULL_ALLELE_STRING, dbsnpVC.isSimpleInsertion()));
-                        alleleMap.put(RawHapMapFeature.INSERTION, Allele.create(((RawHapMapFeature)record).getAlleles()[1], !dbsnpVC.isSimpleInsertion()));
+                        alleleMap.put(RawHapMapFeature.DELETION, Allele.create(ref.getBase(), dbsnpVC.isSimpleInsertion()));
+                        alleleMap.put(RawHapMapFeature.INSERTION, Allele.create(ref.getBase() + ((RawHapMapFeature)record).getAlleles()[1], !dbsnpVC.isSimpleInsertion()));
                         hapmap.setActualAlleles(alleleMap);
 
                         // also, use the correct positioning for insertions
diff --git a/public/java/src/org/broadinstitute/sting/utils/AutoFormattingTime.java b/public/java/src/org/broadinstitute/sting/utils/AutoFormattingTime.java
new file mode 100644
index 0000000000..8964c16cb8
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/AutoFormattingTime.java
@@ -0,0 +1,53 @@
+package org.broadinstitute.sting.utils;
+
+/**
+ * Simple utility class that makes it convenient to print unit adjusted times
+ */
+public class AutoFormattingTime {
+    double timeInSeconds;           // in Seconds
+    int precision;      // for format
+
+    public AutoFormattingTime(double timeInSeconds, int precision) {
+        this.timeInSeconds = timeInSeconds;
+        this.precision = precision;
+    }
+
+    public AutoFormattingTime(double timeInSeconds) {
+        this(timeInSeconds, 1);
+    }
+
+    public double getTimeInSeconds() {
+        return timeInSeconds;
+    }
+
+    /**
+     * Instead of 10000 s, returns 2.8 hours
+     * @return
+     */
+    public String toString() {
+        double unitTime = timeInSeconds;
+        String unit = "s";
+
+        if ( timeInSeconds > 120 ) {
+            unitTime = timeInSeconds / 60; // minutes
+            unit = "m";
+
+            if ( unitTime > 120 ) {
+                unitTime /= 60; // hours
+                unit = "h";
+
+                if ( unitTime > 100 ) {
+                    unitTime /= 24; // days
+                    unit = "d";
+
+                    if ( unitTime > 20 ) {
+                        unitTime /= 7; // days
+                        unit = "w";
+                    }
+                }
+            }
+        }
+
+        return String.format("%6."+precision+"f %s", unitTime, unit);
+    }
+}
diff --git a/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java b/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java
index 393dd57358..2d7f51c3fb 100644
--- a/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java
@@ -67,10 +67,10 @@ private Base(char base, int index) {
     public static final byte DELETION_INDEX = 4;
     public static final byte NO_CALL_INDEX = 5; // (this is 'N')
 
-    public static int gIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'G');
-    public static int cIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'C');
-    public static int aIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'A');
-    public static int tIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'T');
+    public static final int aIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'A');
+    public static final int cIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'C');
+    public static final int gIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'G');
+    public static final int tIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'T');
 
     /// In genetics, a transition is a mutation changing a purine to another purine nucleotide (A <-> G) or
     // a pyrimidine to another pyrimidine nucleotide (C <-> T).
@@ -227,14 +227,21 @@ static public int extendedBaseToBaseIndex(byte base) {
     }
 
     @Deprecated
-    static public boolean isRegularBase(char base) {
+    static public boolean isRegularBase( final char base ) {
         return simpleBaseToBaseIndex(base) != -1;
     }
 
-    static public boolean isRegularBase(byte base) {
+    static public boolean isRegularBase( final byte base ) {
         return simpleBaseToBaseIndex(base) != -1;
     }
 
+    static public boolean isAllRegularBases( final byte[] bases ) {
+        for( final byte base : bases) {
+            if( !isRegularBase(base) ) { return false; }
+        }
+        return true;
+    }
+
     static public boolean isNBase(byte base) {
         return base == 'N' || base == 'n';
     }
@@ -431,6 +438,37 @@ static public String simpleComplement(String bases) {
         return new String(simpleComplement(bases.getBytes()));
     }
 
+    /**
+     * Returns the uppercased version of the bases
+     *
+     * @param bases   the bases
+     * @return the upper cased version
+     */
+    static public byte[] convertToUpperCase(final byte[] bases) {
+        for ( int i = 0; i < bases.length; i++ ) {
+            if ( (char)bases[i] >= 'a' )
+                bases[i] = toUpperCaseBase(bases[i]);
+        }
+        return bases;
+    }
+
+    static public byte toUpperCaseBase(final byte base) {
+        switch (base) {
+            case 'a':
+                return 'A';
+            case 'c':
+                return 'C';
+            case 'g':
+                return 'G';
+            case 't':
+                return 'T';
+            case 'n':
+                return 'N';
+            default:
+                return base;
+        }
+    }
+
     /**
      * Returns the index of the most common base in the basecounts array. To be used with
      * pileup.getBaseCounts.
diff --git a/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java b/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java
index 4f2b5b2eb7..77ecd295f3 100644
--- a/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java
+++ b/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java
@@ -43,9 +43,6 @@
 /**
  * Factory class for creating GenomeLocs
  */
-@Invariant({
-        "logger != null",
-        "contigInfo != null"})
 public final class GenomeLocParser {
     private static Logger logger = Logger.getLogger(GenomeLocParser.class);
 
@@ -54,20 +51,39 @@ public final class GenomeLocParser {
     // Ugly global variable defining the optional ordering of contig elements
     //
     // --------------------------------------------------------------------------------------------------------------
-    private final MasterSequenceDictionary contigInfo;
+
+    /**
+     * This single variable holds the underlying SamSequenceDictionary used by the GATK.  We assume
+     * it is thread safe.
+     */
+    final private SAMSequenceDictionary SINGLE_MASTER_SEQUENCE_DICTIONARY;
+
+    /**
+     * A thread-local caching contig info
+     */
+    private final ThreadLocal<CachingSequenceDictionary> contigInfoPerThread =
+            new ThreadLocal<CachingSequenceDictionary>();
+
+    /**
+     * @return a caching sequence dictionary appropriate for this thread
+     */
+    private CachingSequenceDictionary getContigInfo() {
+        if ( contigInfoPerThread.get() == null ) {
+            // initialize for this thread
+            logger.debug("Creating thread-local caching sequence dictionary for thread " + Thread.currentThread().getName());
+            contigInfoPerThread.set(new CachingSequenceDictionary(SINGLE_MASTER_SEQUENCE_DICTIONARY));
+        }
+
+        assert contigInfoPerThread.get() != null;
+
+        return contigInfoPerThread.get();
+    }
 
     /**
      * A wrapper class that provides efficient last used caching for the global
-     * SAMSequenceDictionary underlying all of the GATK engine capabilities
+     * SAMSequenceDictionary underlying all of the GATK engine capabilities.
      */
-    // todo -- enable when CoFoJa developers identify the problem (likely thread unsafe invariants)
-//    @Invariant({
-//            "dict != null",
-//            "dict.size() > 0",
-//            "lastSSR == null || dict.getSequence(lastContig).getSequenceIndex() == lastIndex",
-//            "lastSSR == null || dict.getSequence(lastContig).getSequenceName() == lastContig",
-//            "lastSSR == null || dict.getSequence(lastContig) == lastSSR"})
-    private final class MasterSequenceDictionary {
+    private final class CachingSequenceDictionary {
         final private SAMSequenceDictionary dict;
 
         // cache
@@ -76,7 +92,7 @@ private final class MasterSequenceDictionary {
         int lastIndex = -1;
 
         @Requires({"dict != null", "dict.size() > 0"})
-        public MasterSequenceDictionary(SAMSequenceDictionary dict) {
+        public CachingSequenceDictionary(SAMSequenceDictionary dict) {
             this.dict = dict;
         }
 
@@ -111,7 +127,6 @@ public synchronized final SAMSequenceRecord getSequence(final int index) {
                 return lastSSR;
             else
                 return updateCache(null, index);
-
         }
 
         @Requires("contig != null")
@@ -125,12 +140,12 @@ public synchronized final int getSequenceIndex(final String contig) {
         }
 
         @Requires({"contig != null", "lastContig != null"})
-        private final synchronized boolean isCached(final String contig) {
+        private synchronized boolean isCached(final String contig) {
             return lastContig.equals(contig);
         }
 
         @Requires({"lastIndex != -1", "index >= 0"})
-        private final synchronized boolean isCached(final int index) {
+        private synchronized boolean isCached(final int index) {
             return lastIndex == index;
         }
 
@@ -144,7 +159,7 @@ private final synchronized boolean isCached(final int index) {
          */
         @Requires("contig != null || index >= 0")
         @Ensures("result != null")
-        private final synchronized SAMSequenceRecord updateCache(final String contig, int index ) {
+        private synchronized SAMSequenceRecord updateCache(final String contig, int index ) {
             SAMSequenceRecord rec = contig == null ? dict.getSequence(index) : dict.getSequence(contig);
             if ( rec == null ) {
                 throw new ReviewedStingException("BUG: requested unknown contig=" + contig + " index=" + index);
@@ -174,7 +189,7 @@ public GenomeLocParser(SAMSequenceDictionary seqDict) {
             throw new UserException.CommandLineException("Failed to load reference dictionary");
         }
 
-        contigInfo = new MasterSequenceDictionary(seqDict);
+        SINGLE_MASTER_SEQUENCE_DICTIONARY = seqDict;
         logger.debug(String.format("Prepared reference sequence contig dictionary"));
         for (SAMSequenceRecord contig : seqDict.getSequences()) {
             logger.debug(String.format(" %s (%d bp)", contig.getSequenceName(), contig.getSequenceLength()));
@@ -188,11 +203,11 @@ public GenomeLocParser(SAMSequenceDictionary seqDict) {
      * @return True if the contig is valid.  False otherwise.
      */
     public final boolean contigIsInDictionary(String contig) {
-        return contig != null && contigInfo.hasContig(contig);
+        return contig != null && getContigInfo().hasContig(contig);
     }
 
     public final boolean indexIsInDictionary(final int index) {
-        return index >= 0 && contigInfo.hasContig(index);
+        return index >= 0 && getContigInfo().hasContig(index);
     }
 
 
@@ -208,7 +223,7 @@ public final boolean indexIsInDictionary(final int index) {
     public final SAMSequenceRecord getContigInfo(final String contig) {
         if ( contig == null || ! contigIsInDictionary(contig) )
             throw new UserException.MalformedGenomeLoc(String.format("Contig %s given as location, but this contig isn't present in the Fasta sequence dictionary", contig));
-        return contigInfo.getSequence(contig);
+        return getContigInfo().getSequence(contig);
     }
 
     /**
@@ -226,9 +241,9 @@ public final int getContigIndex(final String contig) {
 
     @Requires("contig != null")
     protected int getContigIndexWithoutException(final String contig) {
-        if ( contig == null || ! contigInfo.hasContig(contig) )
+        if ( contig == null || ! getContigInfo().hasContig(contig) )
             return -1;
-        return contigInfo.getSequenceIndex(contig);
+        return getContigInfo().getSequenceIndex(contig);
     }
 
     /**
@@ -236,7 +251,7 @@ protected int getContigIndexWithoutException(final String contig) {
      * @return
      */
     public final SAMSequenceDictionary getContigs() {
-        return contigInfo.dict;
+        return getContigInfo().dict;
     }
 
     // --------------------------------------------------------------------------------------------------------------
@@ -291,7 +306,7 @@ public GenomeLoc createGenomeLoc(String contig, int index, final int start, fina
      * @return true if it's valid, false otherwise.  If exceptOnError, then throws a UserException if invalid
      */
     private boolean validateGenomeLoc(String contig, int contigIndex, int start, int stop, boolean mustBeOnReference, boolean exceptOnError) {
-        if ( ! contigInfo.hasContig(contig) )
+        if ( ! getContigInfo().hasContig(contig) )
             return vglHelper(exceptOnError, String.format("Unknown contig %s", contig));
 
         if (stop < start)
@@ -300,8 +315,8 @@ private boolean validateGenomeLoc(String contig, int contigIndex, int start, int
         if (contigIndex < 0)
             return vglHelper(exceptOnError, String.format("The contig index %d is less than 0", contigIndex));
 
-        if (contigIndex >= contigInfo.getNSequences())
-            return vglHelper(exceptOnError, String.format("The contig index %d is greater than the stored sequence count (%d)", contigIndex, contigInfo.getNSequences()));
+        if (contigIndex >= getContigInfo().getNSequences())
+            return vglHelper(exceptOnError, String.format("The contig index %d is greater than the stored sequence count (%d)", contigIndex, getContigInfo().getNSequences()));
 
         if ( mustBeOnReference ) {
             if (start < 1)
@@ -310,7 +325,7 @@ private boolean validateGenomeLoc(String contig, int contigIndex, int start, int
             if (stop < 1)
                 return vglHelper(exceptOnError, String.format("The stop position %d is less than 1", stop));
 
-            int contigSize = contigInfo.getSequence(contigIndex).getSequenceLength();
+            int contigSize = getContigInfo().getSequence(contigIndex).getSequenceLength();
             if (start > contigSize || stop > contigSize)
                 return vglHelper(exceptOnError, String.format("The genome loc coordinates %d-%d exceed the contig size (%d)", start, stop, contigSize));
         }
@@ -558,7 +573,7 @@ public GenomeLoc incPos(GenomeLoc loc, int by) {
     @Requires("contigName != null")
     @Ensures("result != null")
     public GenomeLoc createOverEntireContig(String contigName) {
-        SAMSequenceRecord contig = contigInfo.getSequence(contigName);
+        SAMSequenceRecord contig = getContigInfo().getSequence(contigName);
         return createGenomeLoc(contigName,contig.getSequenceIndex(),1,contig.getSequenceLength(), true);
     }
 
@@ -573,7 +588,7 @@ public GenomeLoc createGenomeLocAtStart(GenomeLoc loc, int maxBasePairs) {
         if (GenomeLoc.isUnmapped(loc))
             return null;
         String contigName = loc.getContig();
-        SAMSequenceRecord contig = contigInfo.getSequence(contigName);
+        SAMSequenceRecord contig = getContigInfo().getSequence(contigName);
         int contigIndex = contig.getSequenceIndex();
 
         int start = loc.getStart() - maxBasePairs;
@@ -598,7 +613,7 @@ public GenomeLoc createPaddedGenomeLoc(final GenomeLoc loc, final int padding) {
         if (GenomeLoc.isUnmapped(loc))
             return loc;
         final String contigName = loc.getContig();
-        final SAMSequenceRecord contig = contigInfo.getSequence(contigName);
+        final SAMSequenceRecord contig = getContigInfo().getSequence(contigName);
         final int contigIndex = contig.getSequenceIndex();
         final int contigLength = contig.getSequenceLength();
 
@@ -619,7 +634,7 @@ public GenomeLoc createGenomeLocAtStop(GenomeLoc loc, int maxBasePairs) {
         if (GenomeLoc.isUnmapped(loc))
             return null;
         String contigName = loc.getContig();
-        SAMSequenceRecord contig = contigInfo.getSequence(contigName);
+        SAMSequenceRecord contig = getContigInfo().getSequence(contigName);
         int contigIndex = contig.getSequenceIndex();
         int contigLength = contig.getSequenceLength();
 
diff --git a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java
index 829e75682f..fcde1f419d 100755
--- a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java
+++ b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java
@@ -27,6 +27,7 @@
 import com.google.java.contract.Ensures;
 import com.google.java.contract.Requires;
 import net.sf.samtools.Cigar;
+import org.apache.commons.lang.ArrayUtils;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
@@ -40,6 +41,7 @@ public class Haplotype {
     protected final double[] quals;
     private GenomeLoc genomeLocation = null;
     private HashMap<String, double[]> readLikelihoodsPerSample = null;
+    private HashMap<String, int[]> readCountsPerSample = null;
     private HashMap<Integer, VariantContext> eventMap = null;
     private boolean isRef = false;
     private Cigar cigar;
@@ -83,18 +85,27 @@ public int hashCode() {
         return Arrays.hashCode(bases);
     }
 
-    public void addReadLikelihoods( final String sample, final double[] readLikelihoods ) {
+    public void addReadLikelihoods( final String sample, final double[] readLikelihoods, final int[] readCounts ) {
         if( readLikelihoodsPerSample == null ) {
             readLikelihoodsPerSample = new HashMap<String, double[]>();
         }
         readLikelihoodsPerSample.put(sample, readLikelihoods);
+        if( readCountsPerSample == null ) {
+            readCountsPerSample = new HashMap<String, int[]>();
+        }
+        readCountsPerSample.put(sample, readCounts);
     }
 
     @Ensures({"result != null"})
     public double[] getReadLikelihoods( final String sample ) {
         return readLikelihoodsPerSample.get(sample);
     }
-    
+
+    @Ensures({"result != null"})
+    public int[] getReadCounts( final String sample ) {
+        return readCountsPerSample.get(sample);
+    }
+
     public Set<String> getSampleKeySet() {
         return readLikelihoodsPerSample.keySet();
     }
@@ -160,52 +171,24 @@ public void setCigar( final Cigar cigar ) {
     }
 
     @Requires({"refInsertLocation >= 0"})
-    public Haplotype insertAllele( final Allele refAllele, final Allele altAllele, int refInsertLocation ) {
-
-        if( refAllele.length() != altAllele.length() ) { refInsertLocation++; }
+    public Haplotype insertAllele( final Allele refAllele, final Allele altAllele, final int refInsertLocation ) {
+        // refInsertLocation is in ref haplotype offset coordinates NOT genomic coordinates
         final int haplotypeInsertLocation = ReadUtils.getReadCoordinateForReferenceCoordinate(alignmentStartHapwrtRef, cigar, refInsertLocation, ReadUtils.ClippingTail.RIGHT_TAIL, true);
-        if( haplotypeInsertLocation == -1 ) { // desired change falls inside deletion so don't bother creating a new haplotype
-            return new Haplotype(bases.clone());
-        }
-        byte[] newHaplotype;
-
-        try {
-            if( refAllele.length() == altAllele.length() ) { // SNP or MNP
-                newHaplotype = bases.clone();
-                for( int iii = 0; iii < altAllele.length(); iii++ ) {
-                    newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
-                }
-            } else if( refAllele.length() < altAllele.length() ) { // insertion                
-                final int altAlleleLength = altAllele.length();
-                newHaplotype = new byte[bases.length + altAlleleLength];
-                for( int iii = 0; iii < bases.length; iii++ ) {
-                    newHaplotype[iii] = bases[iii];
-                }
-                for( int iii = newHaplotype.length - 1; iii > haplotypeInsertLocation + altAlleleLength - 1; iii-- ) {
-                    newHaplotype[iii] = newHaplotype[iii-altAlleleLength];
-                }
-                for( int iii = 0; iii < altAlleleLength; iii++ ) {
-                    newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
-                }
-            } else { // deletion
-                final int shift = refAllele.length() - altAllele.length();
-                newHaplotype = new byte[bases.length - shift];
-                for( int iii = 0; iii < haplotypeInsertLocation + altAllele.length(); iii++ ) {
-                    newHaplotype[iii] = bases[iii];
-                }
-                for( int iii = haplotypeInsertLocation + altAllele.length(); iii < newHaplotype.length; iii++ ) {
-                    newHaplotype[iii] = bases[iii+shift];
-                }
-            }
-        } catch (Exception e) { // event already on haplotype is too large/complex to insert another allele, most likely because of not enough reference padding
-            return new Haplotype(bases.clone());
+        if( haplotypeInsertLocation == -1 || haplotypeInsertLocation + refAllele.length() >= bases.length ) { // desired change falls inside deletion so don't bother creating a new haplotype
+            return null;
         }
-
-        return new Haplotype(newHaplotype);
+        byte[] newHaplotypeBases = new byte[]{};
+        newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(bases, 0, haplotypeInsertLocation)); // bases before the variant
+        newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, altAllele.getBases()); // the alt allele of the variant
+        newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(bases, haplotypeInsertLocation + refAllele.length(), bases.length)); // bases after the variant
+        return new Haplotype(newHaplotypeBases);
     }
 
-    public static LinkedHashMap<Allele,Haplotype> makeHaplotypeListFromAlleles(List<Allele> alleleList, int startPos, ReferenceContext ref,
-                                                               final int haplotypeSize, final int numPrefBases) {
+    public static LinkedHashMap<Allele,Haplotype> makeHaplotypeListFromAlleles(final List<Allele> alleleList,
+                                                                               final int startPos,
+                                                                               final ReferenceContext ref,
+                                                                               final int haplotypeSize,
+                                                                               final int numPrefBases) {
 
         LinkedHashMap<Allele,Haplotype> haplotypeMap = new LinkedHashMap<Allele,Haplotype>();
 
@@ -216,7 +199,6 @@ public static LinkedHashMap<Allele,Haplotype> makeHaplotypeListFromAlleles(List<
                 refAllele = a;
                 break;
             }
-
         }
 
         if (refAllele == null)
@@ -224,19 +206,12 @@ public static LinkedHashMap<Allele,Haplotype> makeHaplotypeListFromAlleles(List<
 
         byte[] refBases = ref.getBases();
 
+        final int startIdxInReference = 1 + startPos - numPrefBases - ref.getWindow().getStart();
+        final String basesBeforeVariant = new String(Arrays.copyOfRange(refBases, startIdxInReference, startIdxInReference + numPrefBases));
 
-        int startIdxInReference = (int)(1+startPos-numPrefBases-ref.getWindow().getStart());
-        //int numPrefBases = (int)(vc.getStart()-ref.getWindow().getStart()+1); // indel vc starts one before event
-
-
-        byte[] basesBeforeVariant = Arrays.copyOfRange(refBases,startIdxInReference,startIdxInReference+numPrefBases);
-        int startAfter = startIdxInReference+numPrefBases+ refAllele.getBases().length;
         // protect against long events that overrun available reference context
-        if (startAfter > refBases.length)
-            startAfter = refBases.length;
-        byte[] basesAfterVariant = Arrays.copyOfRange(refBases,
-                startAfter, refBases.length);
-
+        final int startAfter = Math.min(startIdxInReference + numPrefBases + refAllele.getBases().length - 1, refBases.length);
+        final String basesAfterVariant = new String(Arrays.copyOfRange(refBases, startAfter, refBases.length));
 
         // Create location for all haplotypes
         final int startLoc = ref.getWindow().getStart() + startIdxInReference;
@@ -244,16 +219,14 @@ public static LinkedHashMap<Allele,Haplotype> makeHaplotypeListFromAlleles(List<
 
         final GenomeLoc locus = ref.getGenomeLocParser().createGenomeLoc(ref.getLocus().getContig(),startLoc,stopLoc);
 
-
         for (final Allele a : alleleList) {
 
-            byte[] alleleBases = a.getBases();
+            final byte[] alleleBases = a.getBases();
             // use string concatenation
-            String haplotypeString = new String(basesBeforeVariant) + new String(alleleBases) + new String(basesAfterVariant);
+            String haplotypeString = basesBeforeVariant + new String(Arrays.copyOfRange(alleleBases, 1, alleleBases.length)) + basesAfterVariant;
             haplotypeString = haplotypeString.substring(0,haplotypeSize);
 
-           haplotypeMap.put(a,new Haplotype(haplotypeString.getBytes(), locus));
-
+            haplotypeMap.put(a,new Haplotype(haplotypeString.getBytes(), locus));
         }
 
         return haplotypeMap;
diff --git a/public/java/src/org/broadinstitute/sting/utils/MannWhitneyU.java b/public/java/src/org/broadinstitute/sting/utils/MannWhitneyU.java
index ecb381e3ff..d1bc755831 100755
--- a/public/java/src/org/broadinstitute/sting/utils/MannWhitneyU.java
+++ b/public/java/src/org/broadinstitute/sting/utils/MannWhitneyU.java
@@ -199,9 +199,9 @@ public static double calculatePUniformApproximation(int n, int m, long u) {
         else if ( z > n ) { return 0.0; }
         else {
             if ( z > ((double) n) /2 ) {
-                return 1.0-1/((double)Arithmetic.factorial(n))*uniformSumHelper(z, (int) Math.floor(z), n, 0);
+                return 1.0-1/(Arithmetic.factorial(n))*uniformSumHelper(z, (int) Math.floor(z), n, 0);
             } else {
-                return 1/((double)Arithmetic.factorial(n))*uniformSumHelper(z, (int) Math.floor(z), n, 0);
+                return 1/(Arithmetic.factorial(n))*uniformSumHelper(z, (int) Math.floor(z), n, 0);
             }
         }
     }
diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
index e024253c96..7d1561fc5c 100644
--- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
@@ -210,6 +210,13 @@ public static long sum(int[] x) {
         return total;
     }
 
+    public static int sum(byte[] x) {
+        int total = 0;
+        for (byte v : x)
+            total += (int)v;
+        return total;
+    }
+
     /**
      * Calculates the log10 cumulative sum of an array with log10 probabilities
      *
@@ -760,7 +767,7 @@ public static byte average(byte[] vals) {
         for (byte v : vals) {
             sum += v;
         }
-        return (byte) Math.floor(sum / vals.length);
+        return (byte) (sum / vals.length);
     }
 
     public static double averageDouble(List<Double> vals) {
@@ -1037,7 +1044,6 @@ public static Comparable orderStatisticSearch(int orderStat, List<Comparable> li
         // the list is assumed *not* to be sorted
 
         final Comparable x = list.get(orderStat);
-        ListIterator iterator = list.listIterator();
         ArrayList lessThanX = new ArrayList();
         ArrayList equalToX = new ArrayList();
         ArrayList greaterThanX = new ArrayList();
@@ -1596,7 +1602,17 @@ public static <E extends Number> Double dotProduct(E[] v1, E[] v2) {
             result += v1[k].doubleValue() * v2[k].doubleValue();
 
         return result;
+    }
+
+    public static double dotProduct(double[] v1, double[] v2) {
+        if (v1.length != v2.length)
+            throw new UserException("BUG: vectors v1, v2 of different size in vectorSum()");
 
+        double result = 0.0;
+        for (int k = 0; k < v1.length; k++)
+            result += v1[k] * v2[k];
+
+        return result;
     }
 
     public static double[] vectorLog10(double v1[]) {
diff --git a/public/java/src/org/broadinstitute/sting/utils/PairHMM.java b/public/java/src/org/broadinstitute/sting/utils/PairHMM.java
index 9fcb97a4dd..15f7a78695 100644
--- a/public/java/src/org/broadinstitute/sting/utils/PairHMM.java
+++ b/public/java/src/org/broadinstitute/sting/utils/PairHMM.java
@@ -36,7 +36,7 @@
  */
 
 public class PairHMM {
-    private static final int MAX_CACHED_QUAL = (int)Byte.MAX_VALUE;
+    private static final Byte MAX_CACHED_QUAL = Byte.MAX_VALUE;
     private static final byte DEFAULT_GOP = (byte) 45;
     private static final byte DEFAULT_GCP = (byte) 10;
     private static final double BANDING_TOLERANCE = 22.0;
diff --git a/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java b/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java
index 4acc0e2c3a..1242e5b001 100755
--- a/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java
@@ -69,9 +69,14 @@ static public double qualToProbLog10(byte qual) {
      * @return a probability (0.0 - 1.0)
      */
     static private double qualToErrorProbRaw(int qual) {
+        return qualToErrorProb((double) qual);
+    }
+
+    public static double qualToErrorProb(final double qual) {
         return Math.pow(10.0, ((double) qual)/-10.0);
     }
 
+
     static public double qualToErrorProb(byte qual) {
         return qualToErrorProbCache[(int)qual & 0xff]; // Map: 127 -> 127; -128 -> 128; -1 -> 255; etc.
     }
diff --git a/public/java/src/org/broadinstitute/sting/utils/SWPairwiseAlignment.java b/public/java/src/org/broadinstitute/sting/utils/SWPairwiseAlignment.java
index 92d73a5ced..500598765a 100755
--- a/public/java/src/org/broadinstitute/sting/utils/SWPairwiseAlignment.java
+++ b/public/java/src/org/broadinstitute/sting/utils/SWPairwiseAlignment.java
@@ -377,59 +377,6 @@ private double wk(int k) {
         return w_open+(k-1)*w_extend; // gap
     }
 
-    private void print(int[][] s) {
-        for ( int i = 0 ; i < s.length ; i++) {
-            for ( int j = 0; j < s[i].length ; j++ ) {
-                System.out.printf(" %4d",s[i][j]);
-            }
-            System.out.println();
-        }
-    }
-
-    private void print(double[][] s) {
-        for ( int i = 0 ; i < s.length ; i++) {
-            for ( int j = 0; j < s[i].length ; j++ ) {
-                System.out.printf(" %4g",s[i][j]);
-            }
-            System.out.println();
-        }
-    }
-
-    private void print(int[][] s, String a, String b) {
-
-        System.out.print("        ");
-        for ( int j = 1 ; j < s[0].length ; j++) System.out.printf(" %4c",b.charAt(j-1)) ;
-        System.out.println();
-
-        for ( int i = 0 ; i < s.length ; i++) {
-            if ( i > 0 ) System.out.print(a.charAt(i-1));
-            else System.out.print(' ');
-            System.out.print("  ");
-            for ( int j = 0; j < s[i].length ; j++ ) {
-                System.out.printf(" %4d",s[i][j]);
-            }
-            System.out.println();
-        }
-    }
-
-
-    private void print(double[][] s, String a, String b) {
-
-        System.out.print("");
-        for ( int j = 1 ; j < s[0].length ; j++) System.out.printf(" %4c",b.charAt(j-1)) ;
-        System.out.println();
-
-        for ( int i = 0 ; i < s.length ; i++) {
-            if ( i > 0 ) System.out.print(a.charAt(i-1));
-            else System.out.print(' ');
-            System.out.print("  ");
-            for ( int j = 0; j < s[i].length ; j++ ) {
-                System.out.printf(" %2.1f",s[i][j]);
-            }
-            System.out.println();
-        }
-    }
-
     private void print(double[] s, byte[] a, byte[] b) {
            int n = a.length+1;
            int m = b.length+1;
@@ -713,11 +660,11 @@ static Boolean extractSingleBooleanArg(String argname, Map<String,List<String>>
             System.err.println("Only one "+argname+" argument is allowed");
             System.exit(1);
         }
-        if ( l.get(0).equals("true") ) return new Boolean(true);
-        if ( l.get(0).equals("false") ) return new Boolean(false);
+        if ( l.get(0).equals("true") ) return Boolean.valueOf(true);
+        if ( l.get(0).equals("false") ) return Boolean.valueOf(false);
         System.err.println("Can not parse value provided for "+argname+" argument ("+l.get(0)+"); true/false are allowed");
         System.exit(1);
-        return null;
+        return Boolean.valueOf(false); // This value isn't used because it is preceded by System.exit(1)
     }
 
 /* ##############################################
diff --git a/public/java/src/org/broadinstitute/sting/utils/SequenceDictionaryUtils.java b/public/java/src/org/broadinstitute/sting/utils/SequenceDictionaryUtils.java
index d7a3906928..9e10fd670b 100755
--- a/public/java/src/org/broadinstitute/sting/utils/SequenceDictionaryUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/SequenceDictionaryUtils.java
@@ -329,7 +329,7 @@ private static List<SAMSequenceRecord> getSequencesOfName(Set<String> commonCont
      */
     private static class CompareSequenceRecordsByIndex implements Comparator<SAMSequenceRecord> {
         public int compare(SAMSequenceRecord x, SAMSequenceRecord y) {
-            return new Integer(x.getSequenceIndex()).compareTo(y.getSequenceIndex());
+            return Integer.valueOf(x.getSequenceIndex()).compareTo(y.getSequenceIndex());
         }
     }
 
diff --git a/public/java/src/org/broadinstitute/sting/utils/Utils.java b/public/java/src/org/broadinstitute/sting/utils/Utils.java
index 17c145dbf5..a5b5eca6aa 100755
--- a/public/java/src/org/broadinstitute/sting/utils/Utils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/Utils.java
@@ -32,7 +32,6 @@
 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
-import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.text.TextFormattingUtils;
 
 import java.net.InetAddress;
@@ -564,7 +563,6 @@ public static <T extends Comparable<T>, V> String sortedString(Map<T,V> c) {
         List<T> t = new ArrayList<T>(c.keySet());
         Collections.sort(t);
 
-        List<V> l = new ArrayList<V>();
         List<String> pairs = new ArrayList<String>();
         for ( T k : t ) {
             pairs.add(k + "=" + c.get(k));
@@ -732,6 +730,40 @@ public static <T> int nCombinations(final List<List<T>> options) {
         }
     }
 
+    /**
+     * Make all combinations of N size of objects
+     *
+     * if objects = [A, B, C]
+     * if N = 1 => [[A], [B], [C]]
+     * if N = 2 => [[A, A], [B, A], [C, A], [A, B], [B, B], [C, B], [A, C], [B, C], [C, C]]
+     *
+     * @param objects
+     * @param n
+     * @param <T>
+     * @param withReplacement if false, the resulting permutations will only contain unique objects from objects
+     * @return
+     */
+    public static <T> List<List<T>> makePermutations(final List<T> objects, final int n, final boolean withReplacement) {
+        final List<List<T>> combinations = new ArrayList<List<T>>();
+
+        if ( n <= 0 )
+            ;
+        else if ( n == 1 ) {
+            for ( final T o : objects )
+                combinations.add(Collections.singletonList(o));
+        } else {
+            final List<List<T>> sub = makePermutations(objects, n - 1, withReplacement);
+            for ( List<T> subI : sub ) {
+                for ( final T a : objects ) {
+                    if ( withReplacement || ! subI.contains(a) )
+                        combinations.add(Utils.cons(a, subI));
+                }
+            }
+        }
+
+        return combinations;
+    }
+
     /**
      * Convenience function that formats the novelty rate as a %.2f string
      *
diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java
index 18276f932f..8e660350ff 100644
--- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java
+++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java
@@ -90,11 +90,11 @@ public int compareTo( final ActiveRegion other ) {
     public void removeAll( final ArrayList<GATKSAMRecord> readsToRemove ) { reads.removeAll( readsToRemove ); }
 
     public boolean equalExceptReads(final ActiveRegion other) {
-        if ( ! activeRegionLoc.equals(other.activeRegionLoc) ) return false;
+        if ( activeRegionLoc.compareTo(other.activeRegionLoc) != 0 ) return false;
         if ( isActive != other.isActive ) return false;
         if ( genomeLocParser != other.genomeLocParser ) return false;
         if ( extension != other.extension ) return false;
-        if ( ! extendedLoc.equals(other.extendedLoc) ) return false;
+        if ( extendedLoc.compareTo(other.extendedLoc) != 0 ) return false;
         return true;
     }
 }
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java
index 4333e471e2..73f3cc487d 100644
--- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java
+++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java
@@ -31,7 +31,6 @@
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 
@@ -46,15 +45,14 @@ public class ActivityProfile {
     final GenomeLocParser parser;
     final boolean presetRegions;
     GenomeLoc regionStartLoc = null;
-    final List<Double> isActiveList;
-    private GenomeLoc lastLoc = null;
-    private static final int FILTER_SIZE = 65;
-    private static final Double[] GaussianKernel;
+    final List<ActivityProfileResult> isActiveList;
+    private static final int FILTER_SIZE = 80;
+    private static final double[] GaussianKernel;
 
     static {
-        GaussianKernel = new Double[2*FILTER_SIZE + 1];
+        GaussianKernel = new double[2*FILTER_SIZE + 1];
         for( int iii = 0; iii < 2*FILTER_SIZE + 1; iii++ ) {
-            GaussianKernel[iii] = MathUtils.NormalDistribution(FILTER_SIZE, 40.0, iii);
+            GaussianKernel[iii] = MathUtils.NormalDistribution(FILTER_SIZE, 55.0, iii);
         }
     }
 
@@ -63,22 +61,20 @@ public class ActivityProfile {
     // todo -- add unit tests
     // TODO -- own preset regions
     public ActivityProfile(final GenomeLocParser parser, final boolean presetRegions) {
-        this(parser, presetRegions, new ArrayList<Double>(), null);
+        this(parser, presetRegions, new ArrayList<ActivityProfileResult>(), null);
     }
 
-    protected ActivityProfile(final GenomeLocParser parser, final boolean presetRegions, final List<Double> isActiveList, final GenomeLoc regionStartLoc) {
+    protected ActivityProfile(final GenomeLocParser parser, final boolean presetRegions, final List<ActivityProfileResult> isActiveList, final GenomeLoc regionStartLoc) {
         this.parser = parser;
         this.presetRegions = presetRegions;
         this.isActiveList = isActiveList;
         this.regionStartLoc = regionStartLoc;
     }
 
-    public void add(final GenomeLoc loc, final double score) {
+    public void add(final GenomeLoc loc, final ActivityProfileResult result) {
         if ( loc.size() != 1 )
             throw new ReviewedStingException("Bad add call to ActivityProfile: loc " + loc + " size != 1" );
-        if ( lastLoc != null && loc.getStart() != lastLoc.getStop() + 1 )
-            throw new ReviewedStingException("Bad add call to ActivityProfile: lastLoc added " + lastLoc + " and next is " + loc);
-        isActiveList.add(score);
+        isActiveList.add(result);
         if( regionStartLoc == null ) {
             regionStartLoc = loc;
         }
@@ -93,22 +89,43 @@ public int size() {
      * @return a new ActivityProfile that's the band-pass filtered version of this profile
      */
     public ActivityProfile bandPassFilter() {
-        final Double[] activeProbArray = isActiveList.toArray(new Double[isActiveList.size()]);
-        final Double[] filteredProbArray = new Double[activeProbArray.length];
+        final double[] activeProbArray = new double[isActiveList.size()];
+        int iii = 0;
+        for( final ActivityProfileResult result : isActiveList ) {
+            activeProbArray[iii++] = result.isActiveProb;
+        }
+        iii = 0;
+        for( final ActivityProfileResult result : isActiveList ) {
+            if( result.resultState.equals(ActivityProfileResult.ActivityProfileResultState.HIGH_QUALITY_SOFT_CLIPS) ) { // special code to deal with the problem that high quality soft clipped bases aren't added to pileups
+                final int numHQClips = result.resultValue.intValue();
+                for( int jjj = Math.max(0, iii - numHQClips); jjj < Math.min(activeProbArray.length, iii+numHQClips); jjj++ ) {
+                    activeProbArray[jjj] = Math.max(activeProbArray[jjj], activeProbArray[iii]);
+                }
+            }
+            iii++;
+        }
+        final double[] filteredProbArray = new double[activeProbArray.length];
         if( !presetRegions ) {
-            for( int iii = 0; iii < activeProbArray.length; iii++ ) {
-                final Double[] kernel = (Double[]) ArrayUtils.subarray(GaussianKernel, Math.max(FILTER_SIZE-iii, 0), Math.min(GaussianKernel.length,FILTER_SIZE + activeProbArray.length - iii));
-                final Double[] activeProbSubArray = (Double[]) ArrayUtils.subarray(activeProbArray, Math.max(0,iii - FILTER_SIZE), Math.min(activeProbArray.length,iii + FILTER_SIZE + 1));
+            for( iii = 0; iii < activeProbArray.length; iii++ ) {
+                final double[] kernel = ArrayUtils.subarray(GaussianKernel, Math.max(FILTER_SIZE-iii, 0), Math.min(GaussianKernel.length,FILTER_SIZE + activeProbArray.length - iii));
+                final double[] activeProbSubArray = ArrayUtils.subarray(activeProbArray, Math.max(0,iii - FILTER_SIZE), Math.min(activeProbArray.length,iii + FILTER_SIZE + 1));
                 filteredProbArray[iii] = MathUtils.dotProduct(activeProbSubArray, kernel);
             }
         }
-        return new ActivityProfile(parser, presetRegions, Arrays.asList(filteredProbArray), regionStartLoc);
+        iii = 0;
+        for( final double prob : filteredProbArray ) {
+            final ActivityProfileResult result = isActiveList.get(iii++);
+            result.isActiveProb = prob;
+            result.resultState = ActivityProfileResult.ActivityProfileResultState.NONE;
+            result.resultValue = null;
+        }
+        return new ActivityProfile(parser, presetRegions, isActiveList, regionStartLoc);
     }
 
     /**
      * Partition this profile into active regions
-     * @param activeRegionExtension
-     * @return
+     * @param activeRegionExtension the amount of margin overlap in the active region
+     * @return the list of active regions
      */
     public List<ActiveRegion> createActiveRegions( final int activeRegionExtension, final int maxRegionSize ) {
         final double ACTIVE_PROB_THRESHOLD = 0.002; // TODO: needs to be set-able by the walker author
@@ -119,14 +136,14 @@ public List<ActiveRegion> createActiveRegions( final int activeRegionExtension,
             return Collections.emptyList();
         } else if( isActiveList.size() == 1 ) {
             // there's a single element, it's either active or inactive
-            boolean isActive = isActiveList.get(0) > ACTIVE_PROB_THRESHOLD;
+            boolean isActive = isActiveList.get(0).isActiveProb > ACTIVE_PROB_THRESHOLD;
             returnList.addAll(createActiveRegion(isActive, 0, 0, activeRegionExtension, maxRegionSize));
         } else {
             // there are 2+ elements, divide these up into regions
-            boolean isActive = isActiveList.get(0) > ACTIVE_PROB_THRESHOLD;
+            boolean isActive = isActiveList.get(0).isActiveProb > ACTIVE_PROB_THRESHOLD;
             int curStart = 0;
             for(int iii = 1; iii < isActiveList.size(); iii++ ) {
-                final boolean thisStatus = isActiveList.get(iii) > ACTIVE_PROB_THRESHOLD;
+                final boolean thisStatus = isActiveList.get(iii).isActiveProb > ACTIVE_PROB_THRESHOLD;
                 if( isActive != thisStatus ) {
                     returnList.addAll(createActiveRegion(isActive, curStart, iii - 1, activeRegionExtension, maxRegionSize));
                     isActive = thisStatus;
@@ -143,7 +160,7 @@ public List<ActiveRegion> createActiveRegions( final int activeRegionExtension,
      * @param isActive should the region be active?
      * @param curStart offset (0-based) from the start of this region
      * @param curEnd offset (0-based) from the start of this region
-     * @param activeRegionExtension
+     * @param activeRegionExtension the amount of margin overlap in the active region
      * @return a fully initialized ActiveRegion with the above properties
      */
     private final List<ActiveRegion> createActiveRegion(final boolean isActive, final int curStart, final int curEnd, final int activeRegionExtension, final int maxRegionSize) {
@@ -160,8 +177,8 @@ private final List<ActiveRegion> createActiveRegion(final boolean isActive, fina
         int cutPoint = -1;
 
         final int size = curEnd - curStart + 1;
-        for( int iii = curStart + (int)(size*0.25); iii < curEnd - (int)(size*0.25); iii++ ) {
-            if( isActiveList.get(iii) < minProb ) { minProb = isActiveList.get(iii); cutPoint = iii; }
+        for( int iii = curStart + (int)(size*0.15); iii < curEnd - (int)(size*0.15); iii++ ) {
+            if( isActiveList.get(iii).isActiveProb < minProb ) { minProb = isActiveList.get(iii).isActiveProb; cutPoint = iii; }
         }
         final List<ActiveRegion> leftList = createActiveRegion(isActive, curStart, cutPoint, activeRegionExtension, maxRegionSize, new ArrayList<ActiveRegion>());
         final List<ActiveRegion> rightList = createActiveRegion(isActive, cutPoint+1, curEnd, activeRegionExtension, maxRegionSize, new ArrayList<ActiveRegion>());
diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileResult.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileResult.java
new file mode 100644
index 0000000000..8dc29aa3c9
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileResult.java
@@ -0,0 +1,31 @@
+package org.broadinstitute.sting.utils.activeregion;
+
+/**
+ * Created with IntelliJ IDEA.
+ * User: rpoplin
+ * Date: 7/27/12
+ */
+
+public class ActivityProfileResult {
+    public double isActiveProb;
+    public ActivityProfileResultState resultState;
+    public Number resultValue;
+
+    public enum ActivityProfileResultState {
+        NONE,
+        HIGH_QUALITY_SOFT_CLIPS
+    }
+
+    public ActivityProfileResult( final double isActiveProb ) {
+        this.isActiveProb = isActiveProb;
+        this.resultState = ActivityProfileResultState.NONE;
+        this.resultValue = null;
+    }
+
+    public ActivityProfileResult( final double isActiveProb, final ActivityProfileResultState resultState, final Number resultValue ) {
+        this.isActiveProb = isActiveProb;
+        this.resultState = resultState;
+        this.resultValue = resultValue;
+    }
+
+}
diff --git a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java
index 1864522942..439a0d8edd 100644
--- a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java
+++ b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java
@@ -551,7 +551,7 @@ private final Pair<Integer,Integer> calculateQueryRange(SAMRecord read) {
             switch (elt.getOperator()) {
                 case N:  return null; // cannot handle these
                 case H : case P : case D: break; // ignore pads, hard clips, and deletions
-                case I : case S: case M:
+                case I : case S: case M: case EQ: case X:
                     int prev = readI;
                     readI += elt.getLength();
                     if ( includeClippedBases || elt.getOperator() != CigarOperator.S) {
diff --git a/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java b/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java
index f24bbb6366..9a2cb68db6 100644
--- a/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java
+++ b/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java
@@ -168,6 +168,28 @@ public PluginManager(Class<PluginType> pluginType, String pluginCategory, String
             String pluginName = getName(pluginClass);
             pluginsByName.put(pluginName, pluginClass);
         }
+
+        // sort the plugins so the order of elements is deterministic
+        sortPlugins(plugins);
+        sortPlugins(interfaces);
+    }
+
+    /**
+     * Sorts, in place, the list of plugins according to getName() on each element
+     *
+     * @param unsortedPlugins
+     */
+    private final void sortPlugins(final List<Class<? extends PluginType>> unsortedPlugins) {
+        Collections.sort(unsortedPlugins, new ComparePluginsByName());
+    }
+
+    private final class ComparePluginsByName implements Comparator<Class<? extends PluginType>> {
+        @Override
+        public int compare(final Class<? extends PluginType> aClass, final Class<? extends PluginType> aClass1) {
+            String pluginName1 = getName(aClass);
+            String pluginName2 = getName(aClass1);
+            return pluginName1.compareTo(pluginName2);
+        }
     }
 
     /**
diff --git a/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java
index a4383c3aed..08c50b982f 100644
--- a/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java
+++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java
@@ -4,7 +4,7 @@
 import net.sf.samtools.Cigar;
 import net.sf.samtools.CigarElement;
 import net.sf.samtools.CigarOperator;
-import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
+import org.broadinstitute.sting.utils.recalibration.EventType;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@@ -538,7 +538,7 @@ else if (cigarElement.getOperator() == CigarOperator.DELETION)
         return 0;
     }
 
-    private class CigarShift {
+    private static class CigarShift {
         private Cigar cigar;
         private int shiftFromStart;
         private int shiftFromEnd;
diff --git a/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java
index ba9267222a..6392ce4ce1 100644
--- a/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java
+++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java
@@ -3,7 +3,7 @@
 import com.google.java.contract.Requires;
 import net.sf.samtools.CigarElement;
 import net.sf.samtools.CigarOperator;
-import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
+import org.broadinstitute.sting.utils.recalibration.EventType;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java
index 0b96546105..c221b8fba1 100644
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java
@@ -51,7 +51,11 @@
  */
 public final class BCF2Codec implements FeatureCodec<VariantContext> {
     final protected static Logger logger = Logger.getLogger(BCF2Codec.class);
-    private final static boolean FORBID_SYMBOLICS = false;
+
+    private final static int ALLOWED_MAJOR_VERSION = 2;
+    private final static int MIN_MINOR_VERSION = 1;
+
+    private BCFVersion bcfVersion = null;
 
     private VCFHeader header = null;
 
@@ -108,18 +112,23 @@ public Feature decodeLoc( final PositionalBufferedStream inputStream ) {
 
     @Override
     public VariantContext decode( final PositionalBufferedStream inputStream ) {
-        recordNo++;
-        final VariantContextBuilder builder = new VariantContextBuilder();
-
-        final int sitesBlockSize = decoder.readBlockSize(inputStream);
-        final int genotypeBlockSize = decoder.readBlockSize(inputStream);
-        decoder.readNextBlock(sitesBlockSize, inputStream);
-        decodeSiteLoc(builder);
-        final SitesInfoForDecoding info = decodeSitesExtendedInfo(builder);
-
-        decoder.readNextBlock(genotypeBlockSize, inputStream);
-        createLazyGenotypesDecoder(info, builder);
-        return builder.fullyDecoded(true).make();
+        try {
+            recordNo++;
+            final VariantContextBuilder builder = new VariantContextBuilder();
+
+            final int sitesBlockSize = decoder.readBlockSize(inputStream);
+            final int genotypeBlockSize = decoder.readBlockSize(inputStream);
+
+            decoder.readNextBlock(sitesBlockSize, inputStream);
+            decodeSiteLoc(builder);
+            final SitesInfoForDecoding info = decodeSitesExtendedInfo(builder);
+
+            decoder.readNextBlock(genotypeBlockSize, inputStream);
+            createLazyGenotypesDecoder(info, builder);
+            return builder.fullyDecoded(true).make();
+        } catch ( IOException e ) {
+            throw new UserException.CouldNotReadInputFile("Failed to read BCF file", e);
+        }
     }
 
     @Override
@@ -131,10 +140,18 @@ public Class<VariantContext> getFeatureType() {
     public FeatureCodecHeader readHeader( final PositionalBufferedStream inputStream ) {
         try {
             // note that this reads the magic as well, and so does double duty
-            if ( ! BCF2Utils.startsWithBCF2Magic(inputStream) )
-                error("Input stream does not begin with BCF2 magic");
+            bcfVersion = BCFVersion.readBCFVersion(inputStream);
+            if ( bcfVersion == null )
+                error("Input stream does not contain a BCF encoded file; BCF magic header info not found");
+
+            if ( bcfVersion.getMajorVersion() != ALLOWED_MAJOR_VERSION )
+                error("BCF2Codec can only process BCF2 files, this file has major version " + bcfVersion.getMajorVersion());
+            if ( bcfVersion.getMinorVersion() < MIN_MINOR_VERSION )
+                error("BCF2Codec can only process BCF2 files with minor version >= " + MIN_MINOR_VERSION + " but this file has minor version " + bcfVersion.getMinorVersion());
 
-            final int headerSizeInBytes = BCF2Utils.readInt(BCF2Type.INT32.getSizeInBytes(), inputStream);
+            logger.debug("Parsing data stream with BCF version " + bcfVersion);
+
+            final int headerSizeInBytes = BCF2Type.INT32.read(inputStream);
 
             if ( headerSizeInBytes <= 0 || headerSizeInBytes > MAX_HEADER_SIZE) // no bigger than 8 MB
                 error("BCF2 header has invalid length: " + headerSizeInBytes + " must be >= 0 and < "+ MAX_HEADER_SIZE);
@@ -154,7 +171,6 @@ public FeatureCodecHeader readHeader( final PositionalBufferedStream inputStream
 
         // create the config offsets
         if ( ! header.getContigLines().isEmpty() ) {
-            logger.info("Found contig lines in BCF2 file, using those");
             contigNames.clear();
             for ( final VCFContigHeaderLine contig : header.getContigLines()) {
                 if ( contig.getID() == null || contig.getID().equals("") )
@@ -162,7 +178,7 @@ public FeatureCodecHeader readHeader( final PositionalBufferedStream inputStream
                 contigNames.add(contig.getID());
             }
         } else {
-            throw new UserException.MalformedBCF2("Didn't find any contig lines in BCF2 file header");
+            error("Didn't find any contig lines in BCF2 file header");
         }
 
         // create the string dictionary
@@ -187,7 +203,8 @@ public boolean canDecode( final String path ) {
         FileInputStream fis = null;
         try {
             fis = new FileInputStream(path);
-            return BCF2Utils.startsWithBCF2Magic(fis);
+            final BCFVersion version = BCFVersion.readBCFVersion(fis);
+            return version != null && version.getMajorVersion() == ALLOWED_MAJOR_VERSION;
         } catch ( FileNotFoundException e ) {
             return false;
         } catch ( IOException e ) {
@@ -196,7 +213,7 @@ public boolean canDecode( final String path ) {
             try {
                 if ( fis != null ) fis.close();
             } catch ( IOException e ) {
-                ; // do nothing
+                // do nothing
             }
         }
     }
@@ -221,7 +238,7 @@ public boolean canDecode( final String path ) {
      * @return
      */
     @Requires({"builder != null"})
-    private final void decodeSiteLoc(final VariantContextBuilder builder) {
+    private final void decodeSiteLoc(final VariantContextBuilder builder) throws IOException {
         final int contigOffset = decoder.decodeInt(BCF2Type.INT32);
         final String contig = lookupContigName(contigOffset);
         builder.chr(contig);
@@ -240,7 +257,7 @@ private final void decodeSiteLoc(final VariantContextBuilder builder) {
      */
     @Requires({"builder != null", "decoder != null"})
     @Ensures({"result != null", "result.isValid()"})
-    private final SitesInfoForDecoding decodeSitesExtendedInfo(final VariantContextBuilder builder) {
+    private final SitesInfoForDecoding decodeSitesExtendedInfo(final VariantContextBuilder builder) throws IOException {
         final Object qual = decoder.decodeSingleValue(BCF2Type.FLOAT);
         if ( qual != null ) {
             builder.log10PError(((Double)qual) / -10.0);
@@ -254,7 +271,7 @@ private final SitesInfoForDecoding decodeSitesExtendedInfo(final VariantContextB
         final int nSamples = nFormatSamples & 0x00FFFFF;
 
         if ( header.getNGenotypeSamples() != nSamples )
-            throw new UserException.MalformedBCF2("GATK currently doesn't support reading BCF2 files with " +
+            error("GATK currently doesn't support reading BCF2 files with " +
                     "different numbers of samples per record.  Saw " + header.getNGenotypeSamples() +
                     " samples in header but have a record with " + nSamples + " samples");
 
@@ -296,7 +313,7 @@ public String toString() {
      * Decode the id field in this BCF2 file and store it in the builder
      * @param builder
      */
-    private void decodeID( final VariantContextBuilder builder ) {
+    private void decodeID( final VariantContextBuilder builder ) throws IOException {
         final String id = (String)decoder.decodeTypedValue();
 
         if ( id == null )
@@ -305,27 +322,6 @@ private void decodeID( final VariantContextBuilder builder ) {
             builder.id(id);
     }
 
-    /**
-     * Annoying routine that deals with allele clipping from the BCF2 encoding to the standard
-     * GATK encoding.
-     *
-     * @param position
-     * @param ref
-     * @param unclippedAlleles
-     * @return
-     */
-    @Requires({"position > 0", "ref != null && ref.length() > 0", "! unclippedAlleles.isEmpty()"})
-    @Ensures("result.size() == unclippedAlleles.size()")
-    protected List<Allele> clipAllelesIfNecessary(final int position,
-                                                  final String ref,
-                                                  final List<Allele> unclippedAlleles) {
-        // the last argument of 1 allows us to safely ignore the end, because we are
-        // ultimately going to use the end in the record itself
-        final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(position, ref, unclippedAlleles, 1);
-        if ( clipped.getError() != null ) error(clipped.getError());
-        return clipped.getClippedAlleles();
-    }
-
     /**
      * Decode the alleles from this BCF2 file and put the results in builder
      * @param builder
@@ -334,7 +330,7 @@ protected List<Allele> clipAllelesIfNecessary(final int position,
      * @return the alleles
      */
     @Requires("nAlleles > 0")
-    private List<Allele> decodeAlleles( final VariantContextBuilder builder, final int pos, final int nAlleles ) {
+    private List<Allele> decodeAlleles( final VariantContextBuilder builder, final int pos, final int nAlleles ) throws IOException {
         // TODO -- probably need inline decoder for efficiency here (no sense in going bytes -> string -> vector -> bytes
         List<Allele> alleles = new ArrayList<Allele>(nAlleles);
         String ref = null;
@@ -347,17 +343,12 @@ private List<Allele> decodeAlleles( final VariantContextBuilder builder, final i
             if ( isRef ) ref = alleleBases;
 
             alleles.add(allele);
-
-            if ( FORBID_SYMBOLICS && allele.isSymbolic() )
-                throw new ReviewedStingException("LIMITATION: GATK BCF2 codec does not yet support symbolic alleles");
         }
         assert ref != null;
 
-        alleles = clipAllelesIfNecessary(pos, ref, alleles);
         builder.alleles(alleles);
 
         assert ref.length() > 0;
-        builder.referenceBaseForIndel(ref.getBytes()[0]);
 
         return alleles;
     }
@@ -366,7 +357,7 @@ private List<Allele> decodeAlleles( final VariantContextBuilder builder, final i
      * Decode the filter field of this BCF2 file and store the result in the builder
      * @param builder
      */
-    private void decodeFilter( final VariantContextBuilder builder ) {
+    private void decodeFilter( final VariantContextBuilder builder ) throws IOException {
         final Object value = decoder.decodeTypedValue();
 
         if ( value == null )
@@ -393,7 +384,7 @@ private void decodeFilter( final VariantContextBuilder builder ) {
      * @param numInfoFields
      */
     @Requires("numInfoFields >= 0")
-    private void decodeInfo( final VariantContextBuilder builder, final int numInfoFields ) {
+    private void decodeInfo( final VariantContextBuilder builder, final int numInfoFields ) throws IOException {
         if ( numInfoFields == 0 )
             // fast path, don't bother doing any work if there are no fields
             return;
@@ -429,9 +420,8 @@ private void createLazyGenotypesDecoder( final SitesInfoForDecoding siteInfo,
             final LazyGenotypesContext.LazyParser lazyParser =
                     new BCF2LazyGenotypesDecoder(this, siteInfo.alleles, siteInfo.nSamples, siteInfo.nFormatFields, builders);
 
-            LazyGenotypesContext lazy = new LazyGenotypesContext(lazyParser,
-                    new LazyData(siteInfo.nFormatFields, decoder.getRecordBytes()),
-                    header.getNGenotypeSamples());
+            final LazyData lazyData = new LazyData(header, siteInfo.nFormatFields, decoder.getRecordBytes());
+            final LazyGenotypesContext lazy = new LazyGenotypesContext(lazyParser, lazyData, header.getNGenotypeSamples());
 
             // did we resort the sample names?  If so, we need to load the genotype data
             if ( !header.samplesWereAlreadySorted() )
@@ -442,18 +432,20 @@ private void createLazyGenotypesDecoder( final SitesInfoForDecoding siteInfo,
     }
 
     public static class LazyData {
+        final public VCFHeader header;
         final public int nGenotypeFields;
         final public byte[] bytes;
 
         @Requires({"nGenotypeFields > 0", "bytes != null"})
-        public LazyData(final int nGenotypeFields, final byte[] bytes) {
+        public LazyData(final VCFHeader header, final int nGenotypeFields, final byte[] bytes) {
+            this.header = header;
             this.nGenotypeFields = nGenotypeFields;
             this.bytes = bytes;
         }
     }
 
     @Ensures("result != null")
-    private final String getDictionaryString() {
+    private final String getDictionaryString() throws IOException {
         return getDictionaryString((Integer) decoder.decodeTypedValue());
     }
 
@@ -501,7 +493,7 @@ protected BCF2GenotypeFieldDecoders.Decoder getGenotypeFieldDecoder(final String
         return gtFieldDecoders.getDecoder(field);
     }
 
-    private final void error(final String message) throws RuntimeException {
-        throw new UserException.MalformedBCF2(String.format("At record %d with position %d:", recordNo, pos, message));
+    private void error(final String message) throws RuntimeException {
+        throw new UserException.MalformedBCF2(String.format("%s, at record %d with position %d:", message, recordNo, pos));
     }
 }
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java
index a13be21c55..05ba2aa1f8 100644
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java
@@ -82,7 +82,7 @@ public void readNextBlock(final int blockSizeInBytes, final InputStream stream)
     public void skipNextBlock(final int blockSizeInBytes, final InputStream stream) {
         try {
             final int bytesRead = (int)stream.skip(blockSizeInBytes);
-            validateReadBytes(bytesRead, blockSizeInBytes);
+            validateReadBytes(bytesRead, 1, blockSizeInBytes);
         } catch ( IOException e ) {
             throw new UserException.CouldNotReadInputFile("I/O error while reading BCF2 file", e);
         }
@@ -129,18 +129,18 @@ public void setRecordBytes(final byte[] recordBytes) {
     //
     // ----------------------------------------------------------------------
 
-    public final Object decodeTypedValue() {
+    public final Object decodeTypedValue() throws IOException {
         final byte typeDescriptor = readTypeDescriptor();
         return decodeTypedValue(typeDescriptor);
     }
 
-    public final Object decodeTypedValue(final byte typeDescriptor) {
+    public final Object decodeTypedValue(final byte typeDescriptor) throws IOException {
         final int size = decodeNumberOfElements(typeDescriptor);
         return decodeTypedValue(typeDescriptor, size);
     }
 
     @Requires("size >= 0")
-    public final Object decodeTypedValue(final byte typeDescriptor, final int size) {
+    public final Object decodeTypedValue(final byte typeDescriptor, final int size) throws IOException {
         if ( size == 0 ) {
             // missing value => null in java
             return null;
@@ -162,7 +162,7 @@ public final Object decodeTypedValue(final byte typeDescriptor, final int size)
         }
     }
 
-    public final Object decodeSingleValue(final BCF2Type type) {
+    public final Object decodeSingleValue(final BCF2Type type) throws IOException {
         // TODO -- decodeTypedValue should integrate this routine
         final int value = decodeInt(type);
 
@@ -202,7 +202,7 @@ private final Object decodeLiteralString(final int size) {
                 return null;
             else {
                 final String s = new String(bytes, 0, goodLength);
-                return BCF2Utils.isCollapsedString(s) ? BCF2Utils.exploreStringList(s) : s;
+                return BCF2Utils.isCollapsedString(s) ? BCF2Utils.explodeStringList(s) : s;
             }
         } catch ( IOException e ) {
             throw new ReviewedStingException("readByte failure", e);
@@ -210,7 +210,7 @@ private final Object decodeLiteralString(final int size) {
     }
 
     @Ensures("result >= 0")
-    public final int decodeNumberOfElements(final byte typeDescriptor) {
+    public final int decodeNumberOfElements(final byte typeDescriptor) throws IOException {
         if ( BCF2Utils.sizeIsOverflow(typeDescriptor) )
             // -1 ensures we explode immediately with a bad size if the result is missing
             return decodeInt(readTypeDescriptor(), -1);
@@ -228,15 +228,15 @@ public final int decodeNumberOfElements(final byte typeDescriptor) {
      * @return
      */
     @Requires("BCF2Utils.decodeSize(typeDescriptor) == 1")
-    public final int decodeInt(final byte typeDescriptor, final int missingValue) {
+    public final int decodeInt(final byte typeDescriptor, final int missingValue) throws IOException {
         final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
         final int i = decodeInt(type);
         return i == type.getMissingBytes() ? missingValue : i;
     }
 
     @Requires("type != null")
-    public final int decodeInt(final BCF2Type type) {
-        return BCF2Utils.readInt(type.getSizeInBytes(), recordStream);
+    public final int decodeInt(final BCF2Type type) throws IOException {
+        return type.read(recordStream);
     }
 
     /**
@@ -258,7 +258,7 @@ public final int decodeInt(final BCF2Type type) {
      * @return see description
      */
     @Requires({"type != null", "type.isIntegerType()", "size >= 0"})
-    public final int[] decodeIntArray(final int size, final BCF2Type type, int[] maybeDest) {
+    public final int[] decodeIntArray(final int size, final BCF2Type type, int[] maybeDest) throws IOException {
         if ( size == 0 ) {
             return null;
         } else {
@@ -290,12 +290,12 @@ public final int[] decodeIntArray(final int size, final BCF2Type type, int[] may
         }
     }
 
-    public final int[] decodeIntArray(final byte typeDescriptor, final int size) {
+    public final int[] decodeIntArray(final byte typeDescriptor, final int size) throws IOException {
         final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
         return decodeIntArray(size, type, null);
     }
 
-    public final double rawFloatToFloat(final int rawFloat) {
+    private double rawFloatToFloat(final int rawFloat) {
         return (double)Float.intBitsToFloat(rawFloat);
     }
 
@@ -311,22 +311,42 @@ public final double rawFloatToFloat(final int rawFloat) {
      * @param inputStream
      * @return
      */
-    public final int readBlockSize(final InputStream inputStream) {
-        return BCF2Utils.readInt(4, inputStream);
+    public final int readBlockSize(final InputStream inputStream) throws IOException {
+        return BCF2Type.INT32.read(inputStream);
     }
 
     /**
+     * Read all bytes for a BCF record block into a byte[], and return it
      *
-     * @param inputStream
-     * @return
+     * Is smart about reading from the stream multiple times to fill the buffer, if necessary
+     *
+     * @param blockSizeInBytes number of bytes to read
+     * @param inputStream the stream to read from
+     * @return a non-null byte[] containing exactly blockSizeInBytes bytes from the inputStream
      */
-    private final static byte[] readRecordBytes(final int blockSizeInBytes, final InputStream inputStream) {
+    @Requires({"blockSizeInBytes >= 0", "inputStream != null"})
+    @Ensures("result != null")
+    private static byte[] readRecordBytes(final int blockSizeInBytes, final InputStream inputStream) {
         assert blockSizeInBytes >= 0;
 
         final byte[] record = new byte[blockSizeInBytes];
         try {
-            final int bytesRead = inputStream.read(record);
-            validateReadBytes(bytesRead, blockSizeInBytes);
+            int bytesRead = 0;
+            int nReadAttempts = 0; // keep track of how many times we've read
+
+            // because we might not read enough bytes from the file in a single go, do it in a loop until we get EOF
+            while ( bytesRead < blockSizeInBytes ) {
+                final int read1 = inputStream.read(record, bytesRead, blockSizeInBytes - bytesRead);
+                if ( read1 == -1 )
+                    validateReadBytes(bytesRead, nReadAttempts, blockSizeInBytes);
+                else
+                    bytesRead += read1;
+            }
+
+            if ( nReadAttempts > 1 ) // TODO -- remove me
+                logger.warn("Required multiple read attempts to actually get the entire BCF2 block, unexpected behavior");
+
+            validateReadBytes(bytesRead, nReadAttempts, blockSizeInBytes);
         } catch ( IOException e ) {
             throw new UserException.CouldNotReadInputFile("I/O error while reading BCF2 file", e);
         }
@@ -334,18 +354,24 @@ private final static byte[] readRecordBytes(final int blockSizeInBytes, final In
         return record;
     }
 
-    private final static void validateReadBytes(final int actuallyRead, final int expected) {
+    /**
+     * Make sure we read the right number of bytes, or throw an error
+     *
+     * @param actuallyRead
+     * @param nReadAttempts
+     * @param expected
+     */
+    private static void validateReadBytes(final int actuallyRead, final int nReadAttempts, final int expected) {
         assert expected >= 0;
 
         if ( actuallyRead < expected ) {
-            throw new UserException.MalformedBCF2(String.format("Failed to read next complete record: %s",
-                    actuallyRead == -1 ?
-                            "premature end of input stream" :
-                            String.format("expected %d bytes but read only %d", expected, actuallyRead)));
+            throw new UserException.MalformedBCF2(
+                    String.format("Failed to read next complete record: expected %d bytes but read only %d after %d iterations",
+                            expected, actuallyRead, nReadAttempts));
         }
     }
 
-    public final byte readTypeDescriptor() {
+    public final byte readTypeDescriptor() throws IOException {
         return BCF2Utils.readByte(recordStream);
     }
 }
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2GenotypeFieldDecoders.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2GenotypeFieldDecoders.java
index 0dadc49f9e..e4ae96262a 100644
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2GenotypeFieldDecoders.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2GenotypeFieldDecoders.java
@@ -32,6 +32,7 @@
 import org.broadinstitute.sting.utils.variantcontext.Allele;
 import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
 
+import java.io.IOException;
 import java.util.*;
 
 /**
@@ -105,12 +106,12 @@ public void decode(final List<Allele> siteAlleles,
                            final BCF2Decoder decoder,
                            final byte typeDescriptor,
                            final int numElements,
-                           final GenotypeBuilder[] gbs);
+                           final GenotypeBuilder[] gbs) throws IOException;
     }
 
     private class GTDecoder implements Decoder {
         @Override
-        public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
+        public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException {
             if ( ENABLE_FASTPATH_GT && siteAlleles.size() == 2 && numElements == 2 && gbs.length >= MIN_SAMPLES_FOR_FASTPATH_GENOTYPES )
                 fastBiallelicDiploidDecode(siteAlleles, decoder, typeDescriptor, gbs);
             else {
@@ -135,7 +136,7 @@ public void decode(final List<Allele> siteAlleles, final String field, final BCF
         private final void fastBiallelicDiploidDecode(final List<Allele> siteAlleles,
                                                       final BCF2Decoder decoder,
                                                       final byte typeDescriptor,
-                                                      final GenotypeBuilder[] gbs) {
+                                                      final GenotypeBuilder[] gbs) throws IOException {
             final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
 
             final int nPossibleGenotypes = 3 * 3;
@@ -177,7 +178,7 @@ private final void generalDecode(final List<Allele> siteAlleles,
                                          final int ploidy,
                                          final BCF2Decoder decoder,
                                          final byte typeDescriptor,
-                                         final GenotypeBuilder[] gbs) {
+                                         final GenotypeBuilder[] gbs) throws IOException {
             final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
 
             // a single cache for the encoded genotypes, since we don't actually need this vector
@@ -216,7 +217,7 @@ private final Allele getAlleleFromEncoded(final List<Allele> siteAlleles, final
 
     private class DPDecoder implements Decoder {
         @Override
-        public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
+        public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException {
             for ( final GenotypeBuilder gb : gbs ) {
                 // the -1 is for missing
                 gb.DP(decoder.decodeInt(typeDescriptor, -1));
@@ -226,7 +227,7 @@ public void decode(final List<Allele> siteAlleles, final String field, final BCF
 
     private class GQDecoder implements Decoder {
         @Override
-        public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
+        public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException {
             for ( final GenotypeBuilder gb : gbs ) {
                 // the -1 is for missing
                 gb.GQ(decoder.decodeInt(typeDescriptor, -1));
@@ -236,7 +237,7 @@ public void decode(final List<Allele> siteAlleles, final String field, final BCF
 
     private class ADDecoder implements Decoder {
         @Override
-        public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
+        public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException {
             for ( final GenotypeBuilder gb : gbs ) {
                 gb.AD(decoder.decodeIntArray(typeDescriptor, numElements));
             }
@@ -245,7 +246,7 @@ public void decode(final List<Allele> siteAlleles, final String field, final BCF
 
     private class PLDecoder implements Decoder {
         @Override
-        public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
+        public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException {
             for ( final GenotypeBuilder gb : gbs ) {
                 gb.PL(decoder.decodeIntArray(typeDescriptor, numElements));
             }
@@ -254,7 +255,7 @@ public void decode(final List<Allele> siteAlleles, final String field, final BCF
 
     private class GenericDecoder implements Decoder {
         @Override
-        public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
+        public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException {
             for ( final GenotypeBuilder gb : gbs ) {
                 Object value = decoder.decodeTypedValue(typeDescriptor, numElements);
                 if ( value != null ) { // don't add missing values
@@ -273,7 +274,7 @@ public void decode(final List<Allele> siteAlleles, final String field, final BCF
 
     private class FTDecoder implements Decoder {
         @Override
-        public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
+        public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException {
             for ( final GenotypeBuilder gb : gbs ) {
                 Object value = decoder.decodeTypedValue(typeDescriptor, numElements);
                 assert value == null || value instanceof String;
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2LazyGenotypesDecoder.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2LazyGenotypesDecoder.java
index 35fb2e97a3..46b1fa6c13 100644
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2LazyGenotypesDecoder.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2LazyGenotypesDecoder.java
@@ -26,9 +26,11 @@
 
 import com.google.java.contract.Requires;
 import org.apache.log4j.Logger;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.variantcontext.*;
 
+import java.io.IOException;
 import java.util.*;
 
 /**
@@ -37,7 +39,7 @@
  * @author Mark DePristo
  * @since 5/12
  */
-class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser {
+public class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser {
     final protected static Logger logger = Logger.getLogger(BCF2LazyGenotypesDecoder.class);
 
     // the essential information for us to use to decode the genotypes data
@@ -61,36 +63,40 @@ class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser {
 
     @Override
     public LazyGenotypesContext.LazyData parse(final Object data) {
-        if ( logger.isDebugEnabled() )
-            logger.debug("Decoding BCF genotypes for " + nSamples + " samples with " + nFields + " fields each");
+//        if ( logger.isDebugEnabled() )
+//            logger.debug("Decoding BCF genotypes for " + nSamples + " samples with " + nFields + " fields each");
+        try {
 
-        // load our byte[] data into the decoder
-        final BCF2Decoder decoder = new BCF2Decoder(((BCF2Codec.LazyData)data).bytes);
+            // load our byte[] data into the decoder
+            final BCF2Decoder decoder = new BCF2Decoder(((BCF2Codec.LazyData)data).bytes);
 
-        for ( int i = 0; i < nSamples; i++ )
-            builders[i].reset(true);
+            for ( int i = 0; i < nSamples; i++ )
+                builders[i].reset(true);
 
-        for ( int i = 0; i < nFields; i++ ) {
-            // get the field name
-            final int offset = (Integer) decoder.decodeTypedValue();
-            final String field = codec.getDictionaryString(offset);
+            for ( int i = 0; i < nFields; i++ ) {
+                // get the field name
+                final int offset = (Integer) decoder.decodeTypedValue();
+                final String field = codec.getDictionaryString(offset);
 
-            // the type of each element
-            final byte typeDescriptor = decoder.readTypeDescriptor();
-            final int numElements = decoder.decodeNumberOfElements(typeDescriptor);
-            final BCF2GenotypeFieldDecoders.Decoder fieldDecoder = codec.getGenotypeFieldDecoder(field);
-            try {
-                fieldDecoder.decode(siteAlleles, field, decoder, typeDescriptor, numElements, builders);
-            } catch ( ClassCastException e ) {
-                throw new UserException.MalformedBCF2("BUG: expected encoding of field " + field
-                        + " inconsistent with the value observed in the decoded value");
+                // the type of each element
+                final byte typeDescriptor = decoder.readTypeDescriptor();
+                final int numElements = decoder.decodeNumberOfElements(typeDescriptor);
+                final BCF2GenotypeFieldDecoders.Decoder fieldDecoder = codec.getGenotypeFieldDecoder(field);
+                try {
+                    fieldDecoder.decode(siteAlleles, field, decoder, typeDescriptor, numElements, builders);
+                } catch ( ClassCastException e ) {
+                    throw new UserException.MalformedBCF2("BUG: expected encoding of field " + field
+                            + " inconsistent with the value observed in the decoded value");
+                }
             }
-        }
 
-        final ArrayList<Genotype> genotypes = new ArrayList<Genotype>(nSamples);
-        for ( final GenotypeBuilder gb : builders )
-            genotypes.add(gb.make());
+            final ArrayList<Genotype> genotypes = new ArrayList<Genotype>(nSamples);
+            for ( final GenotypeBuilder gb : builders )
+                genotypes.add(gb.make());
 
-        return new LazyGenotypesContext.LazyData(genotypes, codec.getHeader().getSampleNamesInOrder(), codec.getHeader().getSampleNameToOffset());
+            return new LazyGenotypesContext.LazyData(genotypes, codec.getHeader().getSampleNamesInOrder(), codec.getHeader().getSampleNameToOffset());
+        } catch ( IOException e ) {
+            throw new ReviewedStingException("Unexpected IOException parsing already read genotypes data block", e);
+        }
     }
 }
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Type.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Type.java
index 49f375b250..1162a5d1e9 100644
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Type.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Type.java
@@ -26,6 +26,9 @@
 
 import com.google.java.contract.Requires;
 
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
 import java.util.EnumSet;
 
 /**
@@ -35,12 +38,86 @@
  * @since 05/12
  */
 public enum BCF2Type {
-    MISSING(0, 0, 0x00),
-    INT8 (1, 1, 0xFFFFFF80,        -127,        127), // todo -- confirm range
-    INT16(2, 2, 0xFFFF8000,      -32767,      32767),
-    INT32(3, 4, 0x80000000, -2147483647, 2147483647),
-    FLOAT(5, 4, 0x7F800001),
-    CHAR (7, 1, 0x00000000);
+    // the actual values themselves
+    MISSING(0, 0, 0x00) {
+        @Override public int read(final InputStream in) throws IOException {
+            throw new IllegalArgumentException("Cannot read MISSING type");
+        }
+        @Override public void write(final int value, final OutputStream out) throws IOException {
+            throw new IllegalArgumentException("Cannot write MISSING type");
+        }
+    },
+
+    INT8 (1, 1, 0xFFFFFF80,        -127,        127) {
+        @Override
+        public int read(final InputStream in) throws IOException {
+            return BCF2Utils.readByte(in);
+        }
+
+        @Override
+        public void write(final int value, final OutputStream out) throws IOException {
+            out.write(0xFF & value);   // TODO -- do we need this operation?
+        }
+    },
+
+    INT16(2, 2, 0xFFFF8000,      -32767,      32767) {
+        @Override
+        public int read(final InputStream in) throws IOException {
+            final int b2 = BCF2Utils.readByte(in) & 0xFF;
+            final int b1 = BCF2Utils.readByte(in) & 0xFF;
+            return (short)((b1 << 8) | b2);
+        }
+
+        @Override
+        public void write(final int value, final OutputStream out) throws IOException {
+            // TODO -- optimization -- should we put this in a local buffer?
+            out.write((0x00FF & value));
+            out.write((0xFF00 & value) >> 8);
+        }
+    },
+
+    INT32(3, 4, 0x80000000, -2147483647, 2147483647) {
+        @Override
+        public int read(final InputStream in) throws IOException {
+            final int b4 = BCF2Utils.readByte(in) & 0xFF;
+            final int b3 = BCF2Utils.readByte(in) & 0xFF;
+            final int b2 = BCF2Utils.readByte(in) & 0xFF;
+            final int b1 = BCF2Utils.readByte(in) & 0xFF;
+            return (int)(b1 << 24 | b2 << 16 | b3 << 8 | b4);
+        }
+
+        @Override
+        public void write(final int value, final OutputStream out) throws IOException {
+            out.write((0x000000FF & value));
+            out.write((0x0000FF00 & value) >> 8);
+            out.write((0x00FF0000 & value) >> 16);
+            out.write((0xFF000000 & value) >> 24);
+        }
+    },
+
+    FLOAT(5, 4, 0x7F800001) {
+        @Override
+        public int read(final InputStream in) throws IOException {
+            return INT32.read(in);
+        }
+
+        @Override
+        public void write(final int value, final OutputStream out) throws IOException {
+            INT32.write(value, out);
+        }
+    },
+
+    CHAR (7, 1, 0x00000000) {
+        @Override
+        public int read(final InputStream in) throws IOException {
+            return INT8.read(in);
+        }
+
+        @Override
+        public void write(final int value, final OutputStream out) throws IOException {
+            INT8.write(value, out);
+        }
+    };
 
     private final int id;
     private final Object missingJavaValue;
@@ -48,10 +125,6 @@ public enum BCF2Type {
     private final int sizeInBytes;
     private final long minValue, maxValue;
 
-    BCF2Type(final int id) {
-        this(id, -1, 0, 0, 0);
-    }
-
     BCF2Type(final int id, final int sizeInBytes, final int missingBytes) {
         this(id, sizeInBytes, missingBytes, 0, 0);
     }
@@ -121,4 +194,25 @@ public boolean isMissingType() {
     public boolean isIntegerType() {
         return INTEGERS.contains(this);
     }
+
+    /**
+     * Read a value from in stream of this BCF2 type as an int [32 bit] collection of bits
+     *
+     * For intX and char values this is just the int / byte value of the underlying data represented as a 32 bit int
+     * For a char the result must be converted to a char by (char)(byte)(0x0F & value)
+     * For doubles it's necessary to convert subsequently this value to a double via Double.bitsToDouble()
+     *
+     * @param in
+     * @return
+     * @throws IOException
+     */
+    @Requires("in != null")
+    public int read(final InputStream in) throws IOException {
+        throw new IllegalArgumentException("Not implemented");
+    }
+
+    @Requires("out != null")
+    public void write(final int value, final OutputStream out) throws IOException {
+        throw new IllegalArgumentException("Not implemented");
+    }
 }
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java
index 43e933948d..2ac916db1b 100644
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java
@@ -41,8 +41,6 @@
  * @since 5/12
  */
 public final class BCF2Utils {
-    public static final byte[] MAGIC_HEADER_LINE = "BCF\2".getBytes();
-
     public static final int MAX_ALLELES_IN_GENOTYPES = 127;
 
     public static final int OVERFLOW_ELEMENT_MARKER = 15;
@@ -75,74 +73,54 @@ private BCF2Utils() {}
      */
     @Requires("header != null")
     @Ensures({"result != null", "new HashSet(result).size() == result.size()"})
-    public final static ArrayList<String> makeDictionary(final VCFHeader header) {
+    public static ArrayList<String> makeDictionary(final VCFHeader header) {
         final Set<String> seen = new HashSet<String>();
         final ArrayList<String> dict = new ArrayList<String>();
 
-        boolean sawPASS = false;
+        // special case the special PASS field which doesn't show up in the FILTER field definitions
+        seen.add(VCFConstants.PASSES_FILTERS_v4);
+        dict.add(VCFConstants.PASSES_FILTERS_v4);
+
         // set up the strings dictionary
         for ( VCFHeaderLine line : header.getMetaDataInInputOrder() ) {
             if ( line instanceof VCFIDHeaderLine && ! (line instanceof VCFContigHeaderLine) ) {
                 final VCFIDHeaderLine idLine = (VCFIDHeaderLine)line;
                 if ( ! seen.contains(idLine.getID())) {
-                    sawPASS = sawPASS || idLine.getID().equals(VCFConstants.PASSES_FILTERS_v4);
                     dict.add(idLine.getID());
                     seen.add(idLine.getID());
                 }
             }
         }
 
-
-        if ( ! sawPASS )
-            dict.add(VCFConstants.PASSES_FILTERS_v4); // special case the special PASS field
-
         return dict;
     }
 
-    @Requires({"nElements >= 0", "type != null"})
-    public final static byte encodeTypeDescriptor(final int nElements, final BCF2Type type ) {
-        int encodeSize = Math.min(nElements, OVERFLOW_ELEMENT_MARKER);
-        byte typeByte = (byte)((0x0F & encodeSize) << 4 | (type.getID() & 0x0F));
-        return typeByte;
+    @Requires({"nElements >= 0", "nElements <= OVERFLOW_ELEMENT_MARKER", "type != null"})
+    public static byte encodeTypeDescriptor(final int nElements, final BCF2Type type ) {
+        return (byte)((0x0F & nElements) << 4 | (type.getID() & 0x0F));
     }
 
     @Ensures("result >= 0")
-    public final static int decodeSize(final byte typeDescriptor) {
+    public static int decodeSize(final byte typeDescriptor) {
         return (0xF0 & typeDescriptor) >> 4;
     }
 
     @Ensures("result >= 0")
-    public final static int decodeTypeID(final byte typeDescriptor) {
+    public static int decodeTypeID(final byte typeDescriptor) {
         return typeDescriptor & 0x0F;
     }
 
     @Ensures("result != null")
-    public final static BCF2Type decodeType(final byte typeDescriptor) {
+    public static BCF2Type decodeType(final byte typeDescriptor) {
         return ID_TO_ENUM[decodeTypeID(typeDescriptor)];
     }
 
-    public final static boolean sizeIsOverflow(final byte typeDescriptor) {
+    public static boolean sizeIsOverflow(final byte typeDescriptor) {
         return decodeSize(typeDescriptor) == OVERFLOW_ELEMENT_MARKER;
     }
 
-    @Requires("nElements >= 0")
-    public final static boolean willOverflow(final long nElements) {
-        return nElements > MAX_INLINE_ELEMENTS;
-    }
-
-    public final static boolean startsWithBCF2Magic(final InputStream stream) throws IOException {
-        final byte[] magicBytes = new byte[BCF2Utils.MAGIC_HEADER_LINE.length];
-        stream.read(magicBytes);
-        return Arrays.equals(magicBytes, BCF2Utils.MAGIC_HEADER_LINE);
-    }
-
-    public final static byte readByte(final InputStream stream) {
-        // TODO -- shouldn't be capturing error here
-        try {
-            return (byte)(stream.read() & 0xFF);
-        } catch ( IOException e ) {
-            throw new ReviewedStingException("readByte failure", e);
-        }
+    public static byte readByte(final InputStream stream) throws IOException {
+        return (byte)(stream.read() & 0xFF);
     }
 
     /**
@@ -153,17 +131,21 @@ public final static byte readByte(final InputStream stream) {
      * @param strings size > 1 list of strings
      * @return
      */
-    @Requires({"strings != null", "strings.size() > 1"})
+    @Requires({"strings != null"})
     @Ensures("result != null")
-    public static final String collapseStringList(final List<String> strings) {
-        final StringBuilder b = new StringBuilder();
-        for ( final String s : strings ) {
-            if ( s != null ) {
-                assert s.indexOf(",") == -1; // no commas in individual strings
-                b.append(",").append(s);
+    public static String collapseStringList(final List<String> strings) {
+        if ( strings.isEmpty() ) return "";
+        else if ( strings.size() == 1 ) return strings.get(0);
+        else {
+            final StringBuilder b = new StringBuilder();
+            for ( final String s : strings ) {
+                if ( s != null ) {
+                    assert s.indexOf(",") == -1; // no commas in individual strings
+                    b.append(",").append(s);
+                }
             }
+            return b.toString();
         }
-        return b.toString();
     }
 
     /**
@@ -177,15 +159,15 @@ public static final String collapseStringList(final List<String> strings) {
      */
     @Requires({"collapsed != null", "isCollapsedString(collapsed)"})
     @Ensures("result != null")
-    public static final List<String> exploreStringList(final String collapsed) {
+    public static List<String> explodeStringList(final String collapsed) {
         assert isCollapsedString(collapsed);
         final String[] exploded = collapsed.substring(1).split(",");
         return Arrays.asList(exploded);
     }
 
     @Requires("s != null")
-    public static final boolean isCollapsedString(final String s) {
-        return s.charAt(0) == ',';
+    public static boolean isCollapsedString(final String s) {
+        return s.length() > 0 && s.charAt(0) == ',';
     }
 
     /**
@@ -226,7 +208,7 @@ public static final File shadowBCF(final File vcfFile) {
     }
 
     @Ensures("result.isIntegerType()")
-    public final static BCF2Type determineIntegerType(final int value) {
+    public static BCF2Type determineIntegerType(final int value) {
         for ( final BCF2Type potentialType : INTEGER_TYPES_BY_SIZE) {
             if ( potentialType.withinRange(value) )
                 return potentialType;
@@ -236,19 +218,19 @@ public final static BCF2Type determineIntegerType(final int value) {
     }
 
     @Ensures("result.isIntegerType()")
-    public final static BCF2Type determineIntegerType(final int[] values) {
-        // literally a copy of the code below, but there's no general way to unify lists and arrays in java
-        BCF2Type maxType = BCF2Type.INT8;
-        for ( final int value : values ) {
-            final BCF2Type type1 = determineIntegerType(value);
-            switch ( type1 ) {
-                case INT8: break;
-                case INT16: maxType = BCF2Type.INT16; break;
-                case INT32: return BCF2Type.INT32; // fast path for largest possible value
-                default: throw new ReviewedStingException("Unexpected integer type " + type1 );
-            }
+    public static BCF2Type determineIntegerType(final int[] values) {
+        // find the min and max values in the array
+        int max = 0, min = 0;
+        for ( final int v : values ) {
+            if ( v > max ) max = v;
+            if ( v < min ) min = v;
         }
-        return maxType;
+
+        final BCF2Type maxType = determineIntegerType(max);
+        final BCF2Type minType = determineIntegerType(min);
+
+        // INT8 < INT16 < INT32 so this returns the larger of the two
+        return maxType.compareTo(minType) >= 0 ? maxType : minType;
     }
 
     /**
@@ -262,7 +244,7 @@ public final static BCF2Type determineIntegerType(final int[] values) {
      */
     @Requires({"t1.isIntegerType()","t2.isIntegerType()"})
     @Ensures("result.isIntegerType()")
-    public final static BCF2Type maxIntegerType(final BCF2Type t1, final BCF2Type t2) {
+    public static BCF2Type maxIntegerType(final BCF2Type t1, final BCF2Type t2) {
         switch ( t1 ) {
             case INT8: return t2;
             case INT16: return t2 == BCF2Type.INT32 ? t2 : t1;
@@ -272,7 +254,7 @@ public final static BCF2Type maxIntegerType(final BCF2Type t1, final BCF2Type t2
     }
 
     @Ensures("result.isIntegerType()")
-    public final static BCF2Type determineIntegerType(final List<Integer> values) {
+    public static BCF2Type determineIntegerType(final List<Integer> values) {
         BCF2Type maxType = BCF2Type.INT8;
         for ( final int value : values ) {
             final BCF2Type type1 = determineIntegerType(value);
@@ -297,56 +279,54 @@ public final static BCF2Type determineIntegerType(final List<Integer> values) {
      * @param o
      * @return
      */
-    public final static List<Object> toList(final Object o) {
+    public static List<Object> toList(final Object o) {
         if ( o == null ) return Collections.emptyList();
         else if ( o instanceof List ) return (List<Object>)o;
         else return Collections.singletonList(o);
     }
 
+    /**
+     * Are the elements and their order in the output and input headers consistent so that
+     * we can write out the raw genotypes block without decoding and recoding it?
+     *
+     * If the order of INFO, FILTER, or contrig elements in the output header is different than
+     * in the input header we must decode the blocks using the input header and then recode them
+     * based on the new output order.
+     *
+     * If they are consistent, we can simply pass through the raw genotypes block bytes, which is
+     * a *huge* performance win for large blocks.
+     *
+     * Many common operations on BCF2 files (merging them for -nt, selecting a subset of records, etc)
+     * don't modify the ordering of the header fields and so can safely pass through the genotypes
+     * undecoded.  Some operations -- those at add filters or info fields -- can change the ordering
+     * of the header fields and so produce invalid BCF2 files if the genotypes aren't decoded
+     */
+    public static boolean headerLinesAreOrderedConsistently(final VCFHeader outputHeader, final VCFHeader genotypesBlockHeader) {
+        // first, we have to have the same samples in the same order
+        if ( ! nullAsEmpty(outputHeader.getSampleNamesInOrder()).equals(nullAsEmpty(genotypesBlockHeader.getSampleNamesInOrder())) )
+            return false;
+
+        final Iterator<? extends VCFIDHeaderLine> outputLinesIt = outputHeader.getIDHeaderLines().iterator();
+        final Iterator<? extends VCFIDHeaderLine> inputLinesIt = genotypesBlockHeader.getIDHeaderLines().iterator();
 
-    @Requires({"stream != null", "bytesForEachInt > 0"})
-    public final static int readInt(int bytesForEachInt, final InputStream stream) {
-        switch ( bytesForEachInt ) {
-            case 1: {
-                return (byte)(readByte(stream));
-            } case 2: {
-                final int b2 = readByte(stream) & 0xFF;
-                final int b1 = readByte(stream) & 0xFF;
-                return (short)((b1 << 8) | b2);
-            } case 4: {
-                final int b4 = readByte(stream) & 0xFF;
-                final int b3 = readByte(stream) & 0xFF;
-                final int b2 = readByte(stream) & 0xFF;
-                final int b1 = readByte(stream) & 0xFF;
-                return (int)(b1 << 24 | b2 << 16 | b3 << 8 | b4);
-            } default: throw new ReviewedStingException("Unexpected size during decoding");
+        while ( inputLinesIt.hasNext() ) {
+            if ( ! outputLinesIt.hasNext() ) // missing lines in output
+                return false;
+
+            final VCFIDHeaderLine outputLine = outputLinesIt.next();
+            final VCFIDHeaderLine inputLine = inputLinesIt.next();
+
+            if ( ! inputLine.getClass().equals(outputLine.getClass()) || ! inputLine.getID().equals(outputLine.getID()) )
+                return false;
         }
+
+        return true;
     }
 
-    public final static void encodeRawBytes(final int value, final BCF2Type type, final OutputStream encodeStream) throws IOException {
-        switch ( type.getSizeInBytes() ) {
-            case 1:
-                encodeStream.write(0xFF & value);
-                break;
-            case 2:
-                encodeStream.write((0x00FF & value));
-                encodeStream.write((0xFF00 & value) >> 8);
-                break;
-            case 4:
-                encodeStream.write((0x000000FF & value));
-                encodeStream.write((0x0000FF00 & value) >> 8);
-                encodeStream.write((0x00FF0000 & value) >> 16);
-                encodeStream.write((0xFF000000 & value) >> 24);
-                break;
-            default:
-                throw new ReviewedStingException("BUG: unexpected type size " + type);
-        }
-// general case for reference
-//        for ( int i = type.getSizeInBytes() - 1; i >= 0; i-- ) {
-//            final int shift = i * 8;
-//            int mask = 0xFF << shift;
-//            int byteValue = (mask & value) >> shift;
-//            encodeStream.write(byteValue);
-//        }
+    private static <T> List<T> nullAsEmpty(List<T> l) {
+        if ( l == null )
+            return Collections.emptyList();
+        else
+            return l;
     }
 }
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCFVersion.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCFVersion.java
new file mode 100644
index 0000000000..742da7c0c9
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCFVersion.java
@@ -0,0 +1,80 @@
+package org.broadinstitute.sting.utils.codecs.bcf2;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Arrays;
+
+/**
+ * Simple holder for BCF version information
+ *
+ * User: depristo
+ * Date: 8/2/12
+ * Time: 2:16 PM
+ */
+public class BCFVersion {
+    /**
+     * BCF2 begins with the MAGIC info BCF_M_m where M is the major version (currently 2)
+     * and m is the minor version, currently 1
+     */
+    public static final byte[] MAGIC_HEADER_START = "BCF".getBytes();
+
+    final int majorVersion;
+    final int minorVersion;
+
+    public BCFVersion(int majorVersion, int minorVersion) {
+        this.majorVersion = majorVersion;
+        this.minorVersion = minorVersion;
+    }
+
+    /**
+     * @return the major version number of this BCF file
+     */
+    public int getMajorVersion() {
+        return majorVersion;
+    }
+
+    /**
+     * @return the minor version number of this BCF file
+     */
+    public int getMinorVersion() {
+        return minorVersion;
+    }
+
+    /**
+     * Return a new BCFVersion object describing the major and minor version of the BCF file in stream
+     *
+     * Note that stream must be at the very start of the file.
+     *
+     * @param stream
+     * @return a BCFVersion object, or null if stream doesn't contain a BCF file
+     * @throws IOException
+     */
+    public static BCFVersion readBCFVersion(final InputStream stream) throws IOException {
+        final byte[] magicBytes = new byte[MAGIC_HEADER_START.length];
+        stream.read(magicBytes);
+        if ( Arrays.equals(magicBytes, MAGIC_HEADER_START) ) {
+            // we're a BCF file
+            final int majorByte = stream.read();
+            final int minorByte = stream.read();
+            return new BCFVersion( majorByte, minorByte );
+        } else
+            return null;
+    }
+
+    /**
+     * Write out the BCF magic information indicating this is a BCF file with corresponding major and minor versions
+     * @param out
+     * @throws IOException
+     */
+    public void write(final OutputStream out) throws IOException {
+        out.write(MAGIC_HEADER_START);
+        out.write(getMajorVersion() & 0xFF);
+        out.write(getMinorVersion() & 0xFF);
+    }
+
+    @Override
+    public String toString() {
+        return String.format("BCF%d.%d", getMajorVersion(), getMinorVersion());
+    }
+}
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java
index b3420514bf..043e5e1851 100755
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java
@@ -237,7 +237,12 @@ private VariantContext parseVCFLine(final String[] parts, final boolean includeG
         // parse out the required fields
         final String chr = getCachedString(parts[0]);
         builder.chr(chr);
-        int pos = Integer.valueOf(parts[1]);
+        int pos = -1;
+        try {
+            pos = Integer.valueOf(parts[1]);
+        } catch (NumberFormatException e) {
+            generateException(parts[1] + " is not a valid start position in the VCF format");
+        }
         builder.start(pos);
 
         if ( parts[2].length() == 0 )
@@ -256,9 +261,20 @@ else if ( parts[2].equals(VCFConstants.EMPTY_ID_FIELD) )
         final Map<String, Object> attrs = parseInfo(parts[7]);
         builder.attributes(attrs);
 
+        if ( attrs.containsKey(VCFConstants.END_KEY) ) {
+            // update stop with the end key if provided
+            try {
+                builder.stop(Integer.valueOf(attrs.get(VCFConstants.END_KEY).toString()));
+            } catch (Exception e) {
+                generateException("the END value in the INFO field is not valid");
+            }
+        } else {
+            builder.stop(pos + ref.length() - 1);
+        }
+
         // get our alleles, filters, and setup an attribute map
-        final List<Allele> rawAlleles = parseAlleles(ref, alts, lineNo);
-        final List<Allele> alleles = updateBuilderAllelesAndStop(builder, ref, pos, rawAlleles, attrs);
+        final List<Allele> alleles = parseAlleles(ref, alts, lineNo);
+        builder.alleles(alleles);
 
         // do we have genotyping data
         if (parts.length > NUM_STANDARD_FIELDS && includeGenotypes) {
@@ -275,7 +291,6 @@ else if ( parts[2].equals(VCFConstants.EMPTY_ID_FIELD) )
 
         VariantContext vc = null;
         try {
-            builder.referenceBaseForIndel(ref.getBytes()[0]);
             vc = builder.make();
         } catch (Exception e) {
             generateException(e.getMessage());
@@ -284,31 +299,6 @@ else if ( parts[2].equals(VCFConstants.EMPTY_ID_FIELD) )
         return vc;
     }
 
-    private final List<Allele> updateBuilderAllelesAndStop(final VariantContextBuilder builder,
-                                                           final String ref,
-                                                           final int pos,
-                                                           final List<Allele> rawAlleles,
-                                                           final Map<String, Object> attrs) {
-        int endForSymbolicAlleles = pos; // by default we use the pos
-        if ( attrs.containsKey(VCFConstants.END_KEY) ) {
-            // update stop with the end key if provided
-            try {
-                endForSymbolicAlleles = Integer.valueOf(attrs.get(VCFConstants.END_KEY).toString());
-            } catch (Exception e) {
-                generateException("the END value in the INFO field is not valid");
-            }
-        }
-
-        // find out our current location, and clip the alleles down to their minimum length
-        final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(pos, ref, rawAlleles, endForSymbolicAlleles);
-        if ( clipped.getError() != null )
-            generateException(clipped.getError(), lineNo);
-
-        builder.stop(clipped.getStop());
-        builder.alleles(clipped.getClippedAlleles());
-        return clipped.getClippedAlleles();
-    }
-
     /**
      * get the name of this codec
      * @return our set name
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFAlleleClipper.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFAlleleClipper.java
deleted file mode 100644
index 40ba23d9d5..0000000000
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFAlleleClipper.java
+++ /dev/null
@@ -1,434 +0,0 @@
-/*
- * Copyright (c) 2012, The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.utils.codecs.vcf;
-
-import com.google.java.contract.Ensures;
-import com.google.java.contract.Invariant;
-import com.google.java.contract.Requires;
-import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
-import org.broadinstitute.sting.utils.variantcontext.*;
-
-import java.util.*;
-
-/**
- * All of the gross allele clipping and padding routines in one place
- *
- * Having attempted to understand / fix / document this code myself
- * I can only conclude that this entire approach needs to be rethought.  This
- * code just doesn't work robustly with symbolic alleles, with multiple alleles,
- * requires a special "reference base for indels" stored in the VariantContext
- * whose correctness isn't enforced, and overall has strange special cases
- * all over the place.
- *
- * The reason this code is so complex is due to symbolics and multi-alleleic
- * variation, which frequently occur when combining variants from multiple
- * VCF files.
- *
- * TODO rethink this class, make it clean, and make it easy to create, mix, and write out alleles
- * TODO this code doesn't work with reverse clipped alleles (ATA / GTTA -> AT / GT)
- *
- * @author Mark DePristo
- * @since 6/12
- */
-public final class VCFAlleleClipper {
-    private VCFAlleleClipper() { }
-
-    /**
-     * Determine whether we should clip off the first base of all unclippped alleles or not
-     *
-     * Returns true if all of the alleles in unclippedAlleles share a common first base with
-     * ref0.  Ref0 should be the first base of the reference allele  UnclippedAlleles may
-     * contain the reference allele itself, or just the alternate alleles, it doesn't matter.
-     *
-     * The algorithm returns true if the first base should be clipped off, or false otherwise
-     *
-     * This algorithm works even in the presence of symbolic alleles, logically ignoring these
-     * values.  It
-     *
-     * @param unclippedAlleles list of unclipped alleles to assay
-     * @param ref0 the first base of the reference allele
-     * @return true if we should clip the first base of unclippedAlleles
-     */
-    @Requires("unclippedAlleles != null")
-    public static boolean shouldClipFirstBaseP(final List<Allele> unclippedAlleles,
-                                           final byte ref0) {
-        boolean allSymbolicAlt = true;
-
-        for ( final Allele a : unclippedAlleles ) {
-            if ( a.isSymbolic() ) {
-                continue;
-            }
-
-            // already know we aren't symbolic, so we only need to decide if we have only seen a ref
-            if ( ! a.isReference() )
-                allSymbolicAlt = false;
-
-            if ( a.length() < 1 || (a.getBases()[0] != ref0) ) {
-                return false;
-            }
-        }
-
-        // to reach here all alleles are consistent with clipping the first base matching ref0
-        // but we don't clip if all ALT alleles are symbolic
-        return ! allSymbolicAlt;
-    }
-
-    public static int computeReverseClipping(final List<Allele> unclippedAlleles,
-                                             final byte[] ref,
-                                             final int forwardClipping,
-                                             final boolean allowFullClip) {
-        int clipping = 0;
-        boolean stillClipping = true;
-
-        while ( stillClipping ) {
-            for ( final Allele a : unclippedAlleles ) {
-                if ( a.isSymbolic() )
-                    continue;
-
-                // we need to ensure that we don't reverse clip out all of the bases from an allele because we then will have the wrong
-                // position set for the VariantContext (although it's okay to forward clip it all out, because the position will be fine).
-                if ( a.length() - clipping == 0 )
-                    return clipping - (allowFullClip ? 0 : 1);
-
-                if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 ) {
-                    stillClipping = false;
-                }
-                else if ( ref.length == clipping ) {
-                    if ( allowFullClip )
-                        stillClipping = false;
-                    else
-                        return -1;
-                }
-                else if ( a.getBases()[a.length()-clipping-1] != ref[ref.length-clipping-1] ) {
-                    stillClipping = false;
-                }
-            }
-            if ( stillClipping )
-                clipping++;
-        }
-
-        return clipping;
-    }
-
-    /**
-     * Are the alleles describing a polymorphism substitution one base for another?
-     *
-     * @param alleles a list of alleles, must not be empty
-     * @return Return true if the length of any allele in alleles isn't 1
-     */
-    @Requires("!alleles.isEmpty()")
-    private static boolean isSingleNucleotideEvent(final List<Allele> alleles) {
-        for ( final Allele a : alleles ) {
-            if ( a.length() != 1 )
-                return false;
-        }
-        return true;
-    }
-
-    /**
-     * clip the alleles, based on the reference, returning a ClippedAlleles object describing what happened
-     *
-     * The ClippedAlleles object contains the implied stop position of the alleles, given the provided start
-     * position, after clipping.  It also contains the list of alleles, in the same order as the provided
-     * unclipped ones, that are the fully clipped version of the input alleles.  If an error occurs
-     * during this option the getError() function returns a string describing the problem (for use in parsers).
-     *
-     * The basic operation are:
-     *
-     * single allele
-     *      => stop == start and clipped == unclipped
-     * any number of single nucleotide events
-     *      => stop == start and clipped == unclipped
-     * two alleles, second being symbolic
-     *      => stop == start and clipped == unclipped
-     *      Note in this case that the STOP should be computed by other means (from END in VCF, for example)
-     *      Note that if there's more than two alleles and the second is a symbolic the code produces an error
-     * Any other case:
-     *      The alleles are trimmed of any sequence shared at the end of the alleles.  If N bases
-     *      are common then the alleles will all be at least N bases shorter.
-     *      The stop position returned is the start position + the length of the
-     *      reverse trimmed only reference allele - 1.
-     *      If the alleles all share a single common starting sequence (just one base is considered)
-     *      then the alleles have this leading common base removed as well.
-     *
-     * TODO This code is gross and brittle and needs to be rethought from scratch
-     *
-     * @param start the unadjusted start position (pre-clipping)
-     * @param ref the reference string
-     * @param unclippedAlleles the list of unclipped alleles, including the reference allele
-     * @return the new reference end position of this event
-     */
-    @Requires({"start > 0", "ref != null && ref.length() > 0", "!unclippedAlleles.isEmpty()"})
-    @Ensures("result != null")
-    public static ClippedAlleles clipAlleles(final int start,
-                                             final String ref,
-                                             final List<Allele> unclippedAlleles,
-                                             final int endForSymbolicAllele ) {
-        // no variation or single nucleotide events are by definition fully clipped
-        if ( unclippedAlleles.size() == 1 || isSingleNucleotideEvent(unclippedAlleles) )
-            return new ClippedAlleles(start, unclippedAlleles, null);
-
-        // we've got to sort out the clipping by looking at the alleles themselves
-        final byte firstRefBase = (byte) ref.charAt(0);
-        final boolean firstBaseIsClipped = shouldClipFirstBaseP(unclippedAlleles, firstRefBase);
-        final int forwardClipping = firstBaseIsClipped ? 1 : 0;
-        final int reverseClipping = computeReverseClipping(unclippedAlleles, ref.getBytes(), forwardClipping, false);
-        final boolean needsClipping = forwardClipping > 0 || reverseClipping > 0;
-
-        if ( reverseClipping == -1 )
-            return new ClippedAlleles("computeReverseClipping failed due to bad alleles");
-
-        boolean sawSymbolic = false;
-        List<Allele> clippedAlleles;
-        if ( ! needsClipping ) {
-            // there's nothing to clip, so clippedAlleles are the original alleles
-            clippedAlleles = unclippedAlleles;
-        } else {
-            clippedAlleles = new ArrayList<Allele>(unclippedAlleles.size());
-            for ( final Allele a : unclippedAlleles ) {
-                if ( a.isSymbolic() ) {
-                    sawSymbolic = true;
-                    clippedAlleles.add(a);
-                } else {
-                    final byte[] allele = Arrays.copyOfRange(a.getBases(), forwardClipping, a.getBases().length - reverseClipping);
-                    if ( !Allele.acceptableAlleleBases(allele) )
-                        return new ClippedAlleles("Unparsable vcf record with bad allele [" + allele + "]");
-                    clippedAlleles.add(Allele.create(allele, a.isReference()));
-                }
-            }
-        }
-
-        int stop = VariantContextUtils.computeEndFromAlleles(clippedAlleles, start, endForSymbolicAllele);
-
-        // TODO
-        // TODO
-        // TODO COMPLETELY BROKEN CODE -- THE GATK CURRENTLY ENCODES THE STOP POSITION FOR CLIPPED ALLELES AS + 1
-        // TODO ITS TRUE SIZE TO DIFFERENTIATE CLIPPED VS. UNCLIPPED ALLELES.  NEEDS TO BE FIXED
-        // TODO
-        // TODO
-        if ( needsClipping && ! sawSymbolic && ! clippedAlleles.get(0).isNull() ) stop++;
-        // TODO
-        // TODO
-        // TODO COMPLETELY BROKEN CODE -- THE GATK CURRENTLY ENCODES THE STOP POSITION FOR CLIPPED ALLELES AS + 1
-        // TODO ITS TRUE SIZE TO DIFFERENTIATE CLIPPED VS. UNCLIPPED ALLELES.  NEEDS TO BE FIXED
-        // TODO
-        // TODO
-
-        final Byte refBaseForIndel = firstBaseIsClipped ? firstRefBase : null;
-        return new ClippedAlleles(stop, clippedAlleles, refBaseForIndel);
-    }
-
-    /**
-     * Returns true if the alleles in inputVC should have reference bases added for padding
-     *
-     * We need to pad a VC with a common base if the length of the reference allele is
-     * less than the length of the VariantContext. This happens because the position of
-     * e.g. an indel is always one before the actual event (as per VCF convention).
-     *
-     * @param inputVC the VC to evaluate, cannot be null
-     * @return true if
-     */
-    public static boolean needsPadding(final VariantContext inputVC) {
-        // biallelic sites with only symbolic never need padding
-        if ( inputVC.isBiallelic() && inputVC.getAlternateAllele(0).isSymbolic() )
-            return false;
-
-        final int recordLength = inputVC.getEnd() - inputVC.getStart() + 1;
-        final int referenceLength = inputVC.getReference().length();
-
-        if ( referenceLength == recordLength )
-            return false;
-        else if ( referenceLength == recordLength - 1 )
-            return true;
-        else if ( !inputVC.hasSymbolicAlleles() )
-            throw new IllegalArgumentException("Badly formed variant context at location " + String.valueOf(inputVC.getStart()) +
-                    " in contig " + inputVC.getChr() + ". Reference length must be at most one base shorter than location size");
-        else if ( inputVC.isMixed() && inputVC.hasSymbolicAlleles() )
-            throw new IllegalArgumentException("GATK infrastructure limitation prevents needsPadding from working properly with VariantContexts containing a mixture of symbolic and concrete alleles at " + inputVC);
-        return false;
-    }
-
-    public static Allele padAllele(final VariantContext vc, final Allele allele) {
-        assert needsPadding(vc);
-
-        if ( allele.isSymbolic() )
-            return allele;
-        else {
-            // get bases for current allele and create a new one with trimmed bases
-            final StringBuilder sb = new StringBuilder();
-            sb.append((char)vc.getReferenceBaseForIndel().byteValue());
-            sb.append(allele.getDisplayString());
-            final String newBases = sb.toString();
-            return Allele.create(newBases, allele.isReference());
-        }
-    }
-
-    public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC) {
-        final boolean padVC = needsPadding(inputVC);
-
-        // nothing to do if we don't need to pad bases
-        if ( padVC ) {
-            if ( !inputVC.hasReferenceBaseForIndel() )
-                throw new ReviewedStingException("Badly formed variant context at location " + inputVC.getChr() + ":" + inputVC.getStart() + "; no padded reference base is available.");
-
-            final ArrayList<Allele> alleles = new ArrayList<Allele>(inputVC.getNAlleles());
-            final Map<Allele, Allele> unpaddedToPadded = inputVC.hasGenotypes() ? new HashMap<Allele, Allele>(inputVC.getNAlleles()) : null;
-
-            boolean paddedAtLeastOne = false;
-            for (final Allele a : inputVC.getAlleles()) {
-                final Allele padded = padAllele(inputVC, a);
-                paddedAtLeastOne = paddedAtLeastOne || padded != a;
-                alleles.add(padded);
-                if ( unpaddedToPadded != null ) unpaddedToPadded.put(a, padded); // conditional to avoid making unnecessary make
-            }
-
-            if ( ! paddedAtLeastOne )
-                throw new ReviewedStingException("VC was supposed to need padding but no allele was actually changed at location " + inputVC.getChr() + ":" + inputVC.getStart() + " with allele " + inputVC.getAlleles());
-
-            final VariantContextBuilder vcb = new VariantContextBuilder(inputVC);
-            vcb.alleles(alleles);
-
-            // the position of the inputVC is one further, if it doesn't contain symbolic alleles
-            vcb.computeEndFromAlleles(alleles, inputVC.getStart(), inputVC.getEnd());
-
-            if ( inputVC.hasGenotypes() ) {
-                assert unpaddedToPadded != null;
-
-                // now we can recreate new genotypes with trimmed alleles
-                final GenotypesContext genotypes = GenotypesContext.create(inputVC.getNSamples());
-                for (final Genotype g : inputVC.getGenotypes() ) {
-                    final List<Allele> newGenotypeAlleles = new ArrayList<Allele>(g.getAlleles().size());
-                    for (final Allele a : g.getAlleles()) {
-                        newGenotypeAlleles.add( a.isCalled() ? unpaddedToPadded.get(a) : Allele.NO_CALL);
-                    }
-                    genotypes.add(new GenotypeBuilder(g).alleles(newGenotypeAlleles).make());
-                }
-                vcb.genotypes(genotypes);
-            }
-
-            return vcb.make();
-        }
-        else
-            return inputVC;
-
-    }
-
-    public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) {
-        // see if we need to trim common reference base from all alleles
-
-        final int trimExtent = computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, true);
-        if ( trimExtent <= 0 || inputVC.getAlleles().size() <= 1 )
-            return inputVC;
-
-        final List<Allele> alleles = new ArrayList<Allele>();
-        final GenotypesContext genotypes = GenotypesContext.create();
-        final Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
-
-        for (final Allele a : inputVC.getAlleles()) {
-            if (a.isSymbolic()) {
-                alleles.add(a);
-                originalToTrimmedAlleleMap.put(a, a);
-            } else {
-                // get bases for current allele and create a new one with trimmed bases
-                final byte[] newBases = Arrays.copyOfRange(a.getBases(), 0, a.length()-trimExtent);
-                final Allele trimmedAllele = Allele.create(newBases, a.isReference());
-                alleles.add(trimmedAllele);
-                originalToTrimmedAlleleMap.put(a, trimmedAllele);
-            }
-        }
-
-        // now we can recreate new genotypes with trimmed alleles
-        for ( final Genotype genotype : inputVC.getGenotypes() ) {
-            final List<Allele> originalAlleles = genotype.getAlleles();
-            final List<Allele> trimmedAlleles = new ArrayList<Allele>();
-            for ( final Allele a : originalAlleles ) {
-                if ( a.isCalled() )
-                    trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
-                else
-                    trimmedAlleles.add(Allele.NO_CALL);
-            }
-            genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make());
-        }
-
-        return new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length() + (inputVC.isMixed() ? -1 : 0)).alleles(alleles).genotypes(genotypes).make();
-    }
-
-    @Invariant("stop != -1 || error != null") // we're either an error or a meaningful result but not both
-    public static class ClippedAlleles {
-        private final int stop;
-        private final List<Allele> clippedAlleles;
-        private final Byte refBaseForIndel;
-        private final String error;
-
-        @Requires({"stop > 0", "clippedAlleles != null"})
-        private ClippedAlleles(final int stop, final List<Allele> clippedAlleles, final Byte refBaseForIndel) {
-            this.stop = stop;
-            this.clippedAlleles = clippedAlleles;
-            this.error = null;
-            this.refBaseForIndel = refBaseForIndel;
-        }
-
-        @Requires("error != null")
-        private ClippedAlleles(final String error) {
-            this.stop = -1;
-            this.clippedAlleles = null;
-            this.refBaseForIndel = null;
-            this.error = error;
-        }
-
-        /**
-         * Get an error if it occurred
-         * @return the error message, or null if no error occurred
-         */
-        public String getError() {
-            return error;
-        }
-
-        /**
-         * Get the stop position to use after the clipping as been applied, given the
-         * provided position to clipAlleles
-         * @return
-         */
-        public int getStop() {
-            return stop;
-        }
-
-        /**
-         * Get the clipped alleles themselves
-         * @return the clipped alleles in the order of the input unclipped alleles
-         */
-        public List<Allele> getClippedAlleles() {
-            return clippedAlleles;
-        }
-
-        /**
-         * Returns the reference base we should use for indels, or null if none is appropriate
-         * @return
-         */
-        public Byte getRefBaseForIndel() {
-            return refBaseForIndel;
-        }
-    }
-}
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java
index da5b18831f..4df1efee79 100755
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java
@@ -49,13 +49,6 @@ public class VCFCodec extends AbstractVCFCodec {
     // Our aim is to read in the records and convert to VariantContext as quickly as possible, relying on VariantContext to do the validation of any contradictory (or malformed) record parameters.
     public final static String VCF4_MAGIC_HEADER = "##fileformat=VCFv4";
 
-    /**
-     * A VCF header the contains master info/filter/format records that we use to 'fill in'
-     * any missing records from our input VCF header.  This allows us to repair headers on
-     * the fly
-     */
-    private VCFHeader headerForRepairs = null;
-
     /**
      * @param reader the line reader to take header lines from
      * @return the number of header lines
@@ -88,8 +81,6 @@ else if (line.startsWith(VCFHeader.HEADER_INDICATOR)) {
                     }
                     headerStrings.add(line);
                     super.parseHeaderFromLines(headerStrings, version);
-                    if ( headerForRepairs != null )
-                        this.header = repairHeader(this.header, headerForRepairs);
                     return this.header;
                 }
                 else {
@@ -103,24 +94,6 @@ else if (line.startsWith(VCFHeader.HEADER_INDICATOR)) {
         throw new TribbleException.InvalidHeader("We never saw the required CHROM header line (starting with one #) for the input VCF file");
     }
 
-    private final VCFHeader repairHeader(final VCFHeader readHeader, final VCFHeader masterHeader) {
-        final Set<VCFHeaderLine> lines = VCFUtils.smartMergeHeaders(Arrays.asList(readHeader, masterHeader), log);
-        return new VCFHeader(lines, readHeader.getGenotypeSamples());
-    }
-
-    /**
-     * Tells this VCFCodec to repair the incoming header files with the information in masterHeader
-     *
-     * @param headerForRepairs
-     */
-    public void setHeaderForRepairs(final VCFHeader headerForRepairs) {
-        if ( headerForRepairs != null )
-            log.info("Using master VCF header to repair missing files from incoming VCFs");
-        this.headerForRepairs = headerForRepairs;
-    }
-
-
-
     /**
      * parse the filter string, first checking to see if we already have parsed it in a previous attempt
      *
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java
index 8790a000df..dac58eb10f 100755
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java
@@ -36,6 +36,8 @@ public final class VCFConstants {
     public static final String MLE_ALLELE_COUNT_KEY = "MLEAC";
     public static final String ALLELE_FREQUENCY_KEY = "AF";
     public static final String MLE_ALLELE_FREQUENCY_KEY = "MLEAF";
+    public static final String MLE_PER_SAMPLE_ALLELE_COUNT_KEY = "MLPSAC";
+    public static final String MLE_PER_SAMPLE_ALLELE_FRACTION_KEY = "MLPSAF";
     public static final String ALLELE_NUMBER_KEY = "AN";
     public static final String RMS_BASE_QUALITY_KEY = "BQ";
     public static final String CIGAR_KEY = "CIGAR";
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFContigHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFContigHeaderLine.java
index d5d76cab7d..35cc75af23 100644
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFContigHeaderLine.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFContigHeaderLine.java
@@ -47,8 +47,8 @@ public VCFContigHeaderLine(final String line, final VCFHeaderVersion version, fi
         this.contigIndex = contigIndex;
     }
 
-    public VCFContigHeaderLine(final String key, final Map<String, String> mapping, int contigIndex) {
-        super(key, mapping, null);
+    public VCFContigHeaderLine(final Map<String, String> mapping, int contigIndex) {
+        super(VCFHeader.CONTIG_KEY, mapping, null);
         this.contigIndex = contigIndex;
     }
 
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java
index 7a93295834..2663e848f0 100755
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java
@@ -53,10 +53,10 @@ public enum HEADER_FIELDS {
 
     // the associated meta data
     private final Set<VCFHeaderLine> mMetaData = new LinkedHashSet<VCFHeaderLine>();
-    private final Map<String, VCFInfoHeaderLine> mInfoMetaData = new HashMap<String, VCFInfoHeaderLine>();
-    private final Map<String, VCFFormatHeaderLine> mFormatMetaData = new HashMap<String, VCFFormatHeaderLine>();
-    private final Map<String, VCFFilterHeaderLine> mFilterMetaData = new HashMap<String, VCFFilterHeaderLine>();
-    private final Map<String, VCFHeaderLine> mOtherMetaData = new HashMap<String, VCFHeaderLine>();
+    private final Map<String, VCFInfoHeaderLine> mInfoMetaData = new LinkedHashMap<String, VCFInfoHeaderLine>();
+    private final Map<String, VCFFormatHeaderLine> mFormatMetaData = new LinkedHashMap<String, VCFFormatHeaderLine>();
+    private final Map<String, VCFFilterHeaderLine> mFilterMetaData = new LinkedHashMap<String, VCFFilterHeaderLine>();
+    private final Map<String, VCFHeaderLine> mOtherMetaData = new LinkedHashMap<String, VCFHeaderLine>();
     private final List<VCFContigHeaderLine> contigMetaData = new ArrayList<VCFContigHeaderLine>();
 
     // the list of auxillary tags
@@ -101,6 +101,15 @@ public VCFHeader(Set<VCFHeaderLine> metaData) {
         loadMetaDataMaps();
     }
 
+    /**
+     * Creates a shallow copy of the meta data in VCF header toCopy
+     *
+     * @param toCopy
+     */
+    public VCFHeader(final VCFHeader toCopy) {
+        this(toCopy.mMetaData);
+    }
+
     /**
      * create a VCF header, given a list of meta data and auxillary tags
      *
@@ -153,12 +162,39 @@ public void addMetaDataLine(VCFHeaderLine headerLine) {
     }
 
     /**
-     * @return all of the VCF header lines of the ##contig form in order, or an empty set if none were present
+     * @return all of the VCF header lines of the ##contig form in order, or an empty list if none were present
      */
     public List<VCFContigHeaderLine> getContigLines() {
         return Collections.unmodifiableList(contigMetaData);
     }
 
+
+    /**
+     * @return all of the VCF FILTER lines in their original file order, or an empty list if none were present
+     */
+    public List<VCFFilterHeaderLine> getFilterLines() {
+        final List<VCFFilterHeaderLine> filters = new ArrayList<VCFFilterHeaderLine>();
+        for ( VCFHeaderLine line : mMetaData ) {
+            if ( line instanceof VCFFilterHeaderLine )  {
+                filters.add((VCFFilterHeaderLine)line);
+            }
+        }
+        return filters;
+    }
+
+    /**
+     * @return all of the VCF FILTER lines in their original file order, or an empty list if none were present
+     */
+    public List<VCFIDHeaderLine> getIDHeaderLines() {
+        final List<VCFIDHeaderLine> filters = new ArrayList<VCFIDHeaderLine>();
+        for ( VCFHeaderLine line : mMetaData ) {
+            if ( line instanceof VCFIDHeaderLine )  {
+                filters.add((VCFIDHeaderLine)line);
+            }
+        }
+        return filters;
+    }
+
     /**
      * check our metadata for a VCF version tag, and throw an exception if the version is out of date
      * or the version is not present
@@ -299,10 +335,16 @@ public int getColumnCount() {
         return HEADER_FIELDS.values().length + (hasGenotypingData() ? mGenotypeSampleNames.size() + 1 : 0);
     }
 
+    /**
+     * Returns the INFO HeaderLines in their original ordering
+     */
     public Collection<VCFInfoHeaderLine> getInfoHeaderLines() {
         return mInfoMetaData.values();
     }
 
+    /**
+     * Returns the FORMAT HeaderLines in their original ordering
+     */
     public Collection<VCFFormatHeaderLine> getFormatHeaderLines() {
         return mFormatMetaData.values();
     }
@@ -390,4 +432,13 @@ public ArrayList<String> getSampleNamesInOrder() {
     public HashMap<String, Integer> getSampleNameToOffset() {
         return sampleNameToOffset;
     }
+
+    @Override
+    public String toString() {
+        final StringBuilder b = new StringBuilder();
+        b.append("[VCFHeader:");
+        for ( final VCFHeaderLine line : mMetaData )
+            b.append("\n\t").append(line);
+        return b.append("\n]").toString();
+    }
 }
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLine.java
index 162c34d80b..9b5886c658 100755
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLine.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLine.java
@@ -38,8 +38,8 @@
  *         A class representing a key=value entry in the VCF header
  */
 public class VCFHeaderLine implements Comparable {
-    protected static boolean ALLOW_UNBOUND_DESCRIPTIONS = true;
-    protected static String UNBOUND_DESCRIPTION = "Not provided in original VCF header";
+    protected static final boolean ALLOW_UNBOUND_DESCRIPTIONS = true;
+    protected static final String UNBOUND_DESCRIPTION = "Not provided in original VCF header";
 
     private String mKey = null;
     private String mValue = null;
@@ -53,7 +53,7 @@ public class VCFHeaderLine implements Comparable {
      */
     public VCFHeaderLine(String key, String value) {
         if ( key == null )
-            throw new IllegalArgumentException("VCFHeaderLine: key cannot be null: key = " + key);
+            throw new IllegalArgumentException("VCFHeaderLine: key cannot be null");
         mKey = key;
         mValue = value;
     }
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java
index f80b0eae4e..be87e7306e 100755
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java
@@ -27,6 +27,7 @@
 
 import net.sf.samtools.SAMSequenceDictionary;
 import net.sf.samtools.SAMSequenceRecord;
+import org.apache.commons.io.FilenameUtils;
 import org.apache.log4j.Logger;
 import org.broad.tribble.Feature;
 import org.broadinstitute.sting.commandline.RodBinding;
@@ -165,12 +166,13 @@ public static Set<VCFHeaderLine> smartMergeHeaders(Collection<VCFHeader> headers
 
                 if ( map.containsKey(key) ) {
                     VCFHeaderLine other = map.get(key);
-                    if ( line.equals(other) )
-                        continue;
-                    else if ( ! line.getClass().equals(other.getClass()) )
+                    if ( line.equals(other) ) {
+                        // continue;
+                    } else if ( ! line.getClass().equals(other.getClass()) ) {
                         throw new IllegalStateException("Incompatible header types: " + line + " " + other );
-                    else if ( line instanceof VCFFilterHeaderLine ) {
-                        String lineName = ((VCFFilterHeaderLine) line).getID();                                                                                                         String otherName = ((VCFFilterHeaderLine) other).getID();
+                    } else if ( line instanceof VCFFilterHeaderLine ) {
+                        String lineName = ((VCFFilterHeaderLine) line).getID();
+                        String otherName = ((VCFFilterHeaderLine) other).getID();
                         if ( ! lineName.equals(otherName) )
                             throw new IllegalStateException("Incompatible header types: " + line + " " + other );
                     } else if ( line instanceof VCFCompoundHeaderLine ) {
@@ -198,7 +200,7 @@ else if ( line instanceof VCFFilterHeaderLine ) {
                                 throw new IllegalStateException("Incompatible header types, collision between these two types: " + line + " " + other );
                             }
                         }
-                        if ( ! compLine.getDescription().equals(compOther) )
+                        if ( ! compLine.getDescription().equals(compOther.getDescription()) )
                             conflictWarner.warn(line, "Allowing unequal description fields through: keeping " + compOther + " excluding " + compLine);
                     } else {
                         // we are not equal, but we're not anything special either
@@ -235,7 +237,7 @@ public static String rsIDOfFirstRealVariant(List<VariantContext> VCs, VariantCon
      * @param header the header to update
      * @param engine the GATK engine containing command line arguments and the master sequence dictionary
      */
-    public final static VCFHeader withUpdatedContigs(final VCFHeader header, final GenomeAnalysisEngine engine) {
+    public static VCFHeader withUpdatedContigs(final VCFHeader header, final GenomeAnalysisEngine engine) {
         return VCFUtils.withUpdatedContigs(header, engine.getArguments().referenceFile, engine.getMasterSequenceDictionary());
     }
 
@@ -246,11 +248,15 @@ public final static VCFHeader withUpdatedContigs(final VCFHeader header, final G
      * @param referenceFile the file path to the reference sequence used to generate this vcf
      * @param refDict the SAM formatted reference sequence dictionary
      */
-    public final static VCFHeader withUpdatedContigs(final VCFHeader oldHeader, final File referenceFile, final SAMSequenceDictionary refDict) {
+    public static VCFHeader withUpdatedContigs(final VCFHeader oldHeader, final File referenceFile, final SAMSequenceDictionary refDict) {
         return new VCFHeader(withUpdatedContigsAsLines(oldHeader.getMetaDataInInputOrder(), referenceFile, refDict), oldHeader.getGenotypeSamples());
     }
 
-    public final static Set<VCFHeaderLine> withUpdatedContigsAsLines(final Set<VCFHeaderLine> oldLines, final File referenceFile, final SAMSequenceDictionary refDict) {
+    public static Set<VCFHeaderLine> withUpdatedContigsAsLines(final Set<VCFHeaderLine> oldLines, final File referenceFile, final SAMSequenceDictionary refDict) {
+        return withUpdatedContigsAsLines(oldLines, referenceFile, refDict, false);
+    }
+
+    public static Set<VCFHeaderLine> withUpdatedContigsAsLines(final Set<VCFHeaderLine> oldLines, final File referenceFile, final SAMSequenceDictionary refDict, boolean referenceNameOnly) {
         final Set<VCFHeaderLine> lines = new LinkedHashSet<VCFHeaderLine>(oldLines.size());
 
         for ( final VCFHeaderLine line : oldLines ) {
@@ -264,17 +270,24 @@ public final static Set<VCFHeaderLine> withUpdatedContigsAsLines(final Set<VCFHe
         for ( final VCFHeaderLine contigLine : makeContigHeaderLines(refDict, referenceFile) )
             lines.add(contigLine);
 
-        lines.add(new VCFHeaderLine(VCFHeader.REFERENCE_KEY, "file://" + referenceFile.getAbsolutePath()));
+        String referenceValue;
+        if (referenceFile != null) {
+            if (referenceNameOnly)
+                referenceValue = FilenameUtils.getBaseName(referenceFile.getName());
+            else
+                referenceValue = "file://" + referenceFile.getAbsolutePath();
+            lines.add(new VCFHeaderLine(VCFHeader.REFERENCE_KEY, referenceValue));
+        }
         return lines;
     }
 
     /**
      * Create VCFHeaderLines for each refDict entry, and optionally the assembly if referenceFile != null
-     * @param refDict
+     * @param refDict reference dictionary
      * @param referenceFile for assembly name.  May be null
-     * @return
+     * @return list of vcf contig header lines
      */
-    public final static List<VCFContigHeaderLine> makeContigHeaderLines(final SAMSequenceDictionary refDict,
+    public static List<VCFContigHeaderLine> makeContigHeaderLines(final SAMSequenceDictionary refDict,
                                                                   final File referenceFile) {
         final List<VCFContigHeaderLine> lines = new ArrayList<VCFContigHeaderLine>();
         final String assembly = referenceFile != null ? getReferenceAssembly(referenceFile.getName()) : null;
@@ -283,15 +296,15 @@ public final static List<VCFContigHeaderLine> makeContigHeaderLines(final SAMSeq
         return lines;
     }
 
-    private final static VCFContigHeaderLine makeContigHeaderLine(final SAMSequenceRecord contig, final String assembly) {
+    private static VCFContigHeaderLine makeContigHeaderLine(final SAMSequenceRecord contig, final String assembly) {
         final Map<String, String> map = new LinkedHashMap<String, String>(3);
         map.put("ID", contig.getSequenceName());
         map.put("length", String.valueOf(contig.getSequenceLength()));
         if ( assembly != null ) map.put("assembly", assembly);
-        return new VCFContigHeaderLine(VCFHeader.CONTIG_KEY, map, contig.getSequenceIndex());
+        return new VCFContigHeaderLine(map, contig.getSequenceIndex());
     }
 
-    private final static String getReferenceAssembly(final String refPath) {
+    private static String getReferenceAssembly(final String refPath) {
         // This doesn't need to be perfect as it's not a required VCF header line, but we might as well give it a shot
         String assembly = null;
         if (refPath.contains("b37") || refPath.contains("v37"))
diff --git a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java
index 52da313626..3130469e5a 100755
--- a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java
+++ b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java
@@ -27,6 +27,7 @@
 import net.sf.samtools.SAMFileHeader;
 import net.sf.samtools.SAMRecord;
 import net.sf.samtools.SAMSequenceDictionary;
+import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
@@ -339,6 +340,17 @@ public CouldNotCreateReferenceIndexFile(File f, String message, Exception e) {
         }
     }
 
+    public static class CouldNotCreateReferenceFAIorDictForGzippedRef extends UserException {
+        public CouldNotCreateReferenceFAIorDictForGzippedRef(final File f) {
+            super("Although the GATK can process .gz reference sequences, it currently cannot create FAI " +
+                    "or DICT files for them.  In order to use the GATK with reference.fasta.gz you will need to " +
+                    "create .dict and .fai files for reference.fasta.gz and name them reference.fasta.gz.fai and " +
+                    "reference.dict.  Potentially the easiest way to do this is to uncompress reference.fasta, " +
+                    "run the GATK to create the .dict and .fai files, and copy them to the appropriate location. " +
+                    "Sorry for the inconvenience.");
+        }
+    }
+
     public static class CouldNotCreateReferenceIndexFileBecauseOfLock extends UserException.CouldNotCreateReferenceIndexFile {
         public CouldNotCreateReferenceIndexFileBecauseOfLock(File f) {
             super(f, "could not be written because an exclusive file lock could not be obtained. " +
@@ -351,8 +363,8 @@ public CouldNotCreateReferenceIndexFileBecauseOfLock(File f) {
     public static class UnreadableKeyException extends UserException {
         public UnreadableKeyException ( File f, Exception e ) {
             super(String.format("Key file %s cannot be read (possibly the key file is corrupt?). Error was: %s. " +
-                                "Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for help.",
-                                f.getAbsolutePath(), getMessage(e)));
+                                "Please see %s for help.",
+                                f.getAbsolutePath(), getMessage(e), GATKRunReport.PHONE_HOME_DOCS_URL));
         }
 
         public UnreadableKeyException ( String message, Exception e ) {
@@ -361,8 +373,8 @@ public UnreadableKeyException ( String message, Exception e ) {
 
         public UnreadableKeyException ( String message ) {
             super(String.format("Key file cannot be read (possibly the key file is corrupt?): %s. " +
-                                "Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for help.",
-                                message));
+                                "Please see %s for help.",
+                                message, GATKRunReport.PHONE_HOME_DOCS_URL));
         }
     }
 
@@ -370,9 +382,8 @@ public static class KeySignatureVerificationException extends UserException {
         public KeySignatureVerificationException ( File f ) {
             super(String.format("The signature in key file %s failed cryptographic verification. " +
                                 "If this key was valid in the past, it's likely been revoked. " +
-                                "Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home " +
-                                "for help.",
-                                f.getAbsolutePath()));
+                                "Please see %s for help.",
+                                f.getAbsolutePath(), GATKRunReport.PHONE_HOME_DOCS_URL));
         }
     }
 }
diff --git a/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java b/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java
index 44b586bcd2..48706543a3 100644
--- a/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java
+++ b/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java
@@ -41,6 +41,8 @@
  * Thread-safe!  Uses a lock object to protect write and access to the cache.
  */
 public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
+    protected static final org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(CachingIndexedFastaSequenceFile.class);
+
     /** global enable flag */
     private static final boolean USE_CACHE = true;
 
@@ -125,7 +127,7 @@ public CachingIndexedFastaSequenceFile(final File file, long cacheSize ) throws
     public void printEfficiency() {
         // comment out to disable tracking
         if ( (cacheHits + cacheMisses) % PRINT_FREQUENCY == 0 ) {
-            System.out.printf("### CachingIndexedFastaReader: hits=%d misses=%d efficiency %.6f%%%n", cacheHits, cacheMisses, calcEfficiency());
+            logger.info(String.format("### CachingIndexedFastaReader: hits=%d misses=%d efficiency %.6f%%%n", cacheHits, cacheMisses, calcEfficiency()));
         }
     }
 
diff --git a/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java
index c6eec24f1a..2f31c154c9 100644
--- a/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java
@@ -4,7 +4,7 @@
 import net.sf.samtools.CigarElement;
 import net.sf.samtools.CigarOperator;
 import net.sf.samtools.SAMRecord;
-import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
+import org.broadinstitute.sting.utils.recalibration.EventType;
 import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
@@ -134,17 +134,36 @@ public final static List<GATKSAMRecord> mergeOverlappingPairedFragments( List<GA
 
         GATKSAMRecord firstRead = overlappingPair.get(0);
         GATKSAMRecord secondRead = overlappingPair.get(1);
-        if( !(secondRead.getUnclippedStart() <= firstRead.getUnclippedEnd() && secondRead.getUnclippedStart() >= firstRead.getUnclippedStart() && secondRead.getUnclippedEnd() >= firstRead.getUnclippedEnd()) ) {
+   /*
+        System.out.println("read 0 unclipped start:"+overlappingPair.get(0).getUnclippedStart());
+        System.out.println("read 0 unclipped end:"+overlappingPair.get(0).getUnclippedEnd());
+        System.out.println("read 1 unclipped start:"+overlappingPair.get(1).getUnclippedStart());
+        System.out.println("read 1 unclipped end:"+overlappingPair.get(1).getUnclippedEnd());
+        System.out.println("read 0 start:"+overlappingPair.get(0).getAlignmentStart());
+        System.out.println("read 0 end:"+overlappingPair.get(0).getAlignmentEnd());
+        System.out.println("read 1 start:"+overlappingPair.get(1).getAlignmentStart());
+        System.out.println("read 1 end:"+overlappingPair.get(1).getAlignmentEnd());
+     */
+        if( !(secondRead.getSoftStart() <= firstRead.getSoftEnd() && secondRead.getSoftStart() >= firstRead.getSoftStart() && secondRead.getSoftEnd() >= firstRead.getSoftEnd()) ) {
             firstRead = overlappingPair.get(1); // swap them
             secondRead = overlappingPair.get(0);
         }
-        if( !(secondRead.getUnclippedStart() <= firstRead.getUnclippedEnd() && secondRead.getUnclippedStart() >= firstRead.getUnclippedStart() && secondRead.getUnclippedEnd() >= firstRead.getUnclippedEnd()) ) {
+        if( !(secondRead.getSoftStart() <= firstRead.getSoftEnd() && secondRead.getSoftStart() >= firstRead.getSoftStart() && secondRead.getSoftEnd() >= firstRead.getSoftEnd()) ) {
             return overlappingPair; // can't merge them, yet:  AAAAAAAAAAA-BBBBBBBBBBB-AAAAAAAAAAAAAA, B is contained entirely inside A
         }
         if( firstRead.getCigarString().contains("I") || firstRead.getCigarString().contains("D") || secondRead.getCigarString().contains("I") || secondRead.getCigarString().contains("D") ) {
             return overlappingPair; // fragments contain indels so don't merge them
         }
 
+/*        // check for inconsistent start positions between uncliped/soft alignment starts
+        if (secondRead.getAlignmentStart() >= firstRead.getAlignmentStart() && secondRead.getUnclippedStart() < firstRead.getUnclippedStart())
+            return overlappingPair;
+        if (secondRead.getAlignmentStart() <= firstRead.getAlignmentStart() && secondRead.getUnclippedStart() > firstRead.getUnclippedStart())
+            return overlappingPair;
+
+        if (secondRead.getUnclippedStart() < firstRead.getAlignmentEnd() && secondRead.getAlignmentStart() >= firstRead.getAlignmentEnd())
+            return overlappingPair;
+  */
         final Pair<Integer, Boolean> pair = ReadUtils.getReadCoordinateForReferenceCoordinate(firstRead, secondRead.getSoftStart());
 
         final int firstReadStop = ( pair.getSecond() ? pair.getFirst() + 1 : pair.getFirst() );
diff --git a/public/java/src/org/broadinstitute/sting/utils/help/ForumAPIUtils.java b/public/java/src/org/broadinstitute/sting/utils/help/ForumAPIUtils.java
index 388e7ce450..1dfc4ecc0f 100644
--- a/public/java/src/org/broadinstitute/sting/utils/help/ForumAPIUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/help/ForumAPIUtils.java
@@ -135,6 +135,7 @@ private static String httpPost(String data, String URL) {
                 System.out.println(line);
             }
 
+            br.close();
             httpClient.getConnectionManager().shutdown();
             return output;
 
diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java
index 69d2e7c9e9..ab5181b453 100644
--- a/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java
+++ b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java
@@ -584,20 +584,39 @@ protected Map<String, Object> docForArgument(FieldDoc fieldDoc, ArgumentSource s
      * @return
      */
     @Requires("enumClass.isEnum()")
-    private List<Map<String, Object>> docForEnumArgument(Class enumClass) {
-        ClassDoc doc = this.getDoclet().getClassDocForClass(enumClass);
-        if (doc == null) //  || ! doc.isEnum() )
-            throw new RuntimeException("Tried to get docs for enum " + enumClass + " but got instead: " + doc);
-
-        List<Map<String, Object>> bindings = new ArrayList<Map<String, Object>>();
-        for (final FieldDoc field : doc.fields(false)) {
-            bindings.add(
-                    new HashMap<String, Object>() {{
-                        put("name", field.name());
-                        put("summary", field.commentText());
-                    }});
+    private List<Map<String, Object>> docForEnumArgument(final Class enumClass) {
+        final ClassDoc doc = this.getDoclet().getClassDocForClass(enumClass);
+        if ( doc == null )
+            throw new RuntimeException("Tried to get docs for enum " + enumClass + " but got null instead");
+
+        final Set<String> enumConstantFieldNames = enumConstantsNames(enumClass);
+
+        final List<Map<String, Object>> bindings = new ArrayList<Map<String, Object>>();
+        for (final FieldDoc fieldDoc : doc.fields(false)) {
+            if (enumConstantFieldNames.contains(fieldDoc.name()) )
+                bindings.add(
+                        new HashMap<String, Object>() {{
+                            put("name", fieldDoc.name());
+                            put("summary", fieldDoc.commentText());
+                        }});
         }
 
         return bindings;
     }
+
+    /**
+     * Returns the name of the fields that are enum constants according to reflection
+     *
+     * @return a non-null set of fields that are enum constants
+     */
+    private Set<String> enumConstantsNames(final Class enumClass) {
+        final Set<String> enumConstantFieldNames = new HashSet<String>();
+
+        for ( final Field field : enumClass.getFields() ) {
+            if ( field.isEnumConstant() )
+                enumConstantFieldNames.add(field.getName());
+        }
+
+        return enumConstantFieldNames;
+    }
 }
diff --git a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java
index 6ee4af2888..85e9f362d8 100644
--- a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java
@@ -681,8 +681,8 @@ public static List<GenomeLoc> getFlankingIntervals(final GenomeLocParser parser,
 
         LinkedHashMap<String, List<GenomeLoc>> locsByContig = splitByContig(sorted);
         List<GenomeLoc> expanded = new ArrayList<GenomeLoc>();
-        for (String contig: locsByContig.keySet()) {
-            List<GenomeLoc> contigLocs = locsByContig.get(contig);
+        for (Map.Entry<String, List<GenomeLoc>> contig: locsByContig.entrySet()) {
+            List<GenomeLoc> contigLocs = contig.getValue();
             int contigLocsSize = contigLocs.size();
 
             GenomeLoc startLoc, stopLoc;
diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java
index e71cd01bec..3d986f6669 100644
--- a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java
+++ b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java
@@ -155,7 +155,7 @@ protected void calculateAbstractSize() {
 
     protected void addPileupToCumulativeStats(AbstractReadBackedPileup<RBP, PE> pileup) {
         size += pileup.getNumberOfElements();
-        abstractSize += pileup.depthOfCoverage();
+        abstractSize = pileup.depthOfCoverage() + (abstractSize == -1 ? 0 : abstractSize);
         nDeletions += pileup.getNumberOfDeletions();
         nMQ0Reads += pileup.getNumberOfMappingQualityZeroReads();
     }
diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java
index e5cd9f4d59..8cba5ec232 100755
--- a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java
+++ b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java
@@ -3,6 +3,7 @@
 import com.google.java.contract.Ensures;
 import com.google.java.contract.Requires;
 import org.broadinstitute.sting.utils.BaseUtils;
+import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@@ -220,7 +221,7 @@ public int getRepresentativeCount() {
             if (isDeletion() && (offset + 1 >= read.getReadLength()) )  // deletion in the end of the read
                 throw new UserException.MalformedBAM(read, String.format("Adjacent I/D events in read %s -- cigar: %s", read.getReadName(), read.getCigarString()));
 
-            representativeCount = (isDeletion()) ? Math.round((read.getReducedCount(offset) + read.getReducedCount(offset + 1)) / 2) : read.getReducedCount(offset);
+            representativeCount = (isDeletion()) ? MathUtils.fastRound((read.getReducedCount(offset) + read.getReducedCount(offset + 1)) / 2.0) : read.getReducedCount(offset);
         }
         return representativeCount;
     }
diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
index b5f7ad0462..c09eb00633 100644
--- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
@@ -27,7 +27,7 @@
 
 import net.sf.samtools.SAMTag;
 import net.sf.samtools.SAMUtils;
-import org.broadinstitute.sting.gatk.walkers.bqsr.*;
+import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.QualityUtils;
 import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
@@ -103,7 +103,7 @@ public void recalibrateRead(final GATKSAMRecord read) {
             }
         }
 
-        RecalDataManager.computeCovariates(read, requestedCovariates, readCovariates);                                  // compute all covariates for the read
+        RecalUtils.computeCovariates(read, requestedCovariates, readCovariates);                                  // compute all covariates for the read
         for (final EventType errorModel : EventType.values()) {                                                         // recalibrate all three quality strings
             if (disableIndelQuals && errorModel != EventType.BASE_SUBSTITUTION) {
                 read.setBaseQualities(null, errorModel);
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/EventType.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/EventType.java
similarity index 96%
rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/EventType.java
rename to public/java/src/org/broadinstitute/sting/utils/recalibration/EventType.java
index 2650f0f8d4..1c84518eb9 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/EventType.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/EventType.java
@@ -1,4 +1,4 @@
-package org.broadinstitute.sting.gatk.walkers.bqsr;
+package org.broadinstitute.sting.utils.recalibration;
 
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 
diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/QualQuantizer.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/QualQuantizer.java
index 62edd5fac5..a5a3104a01 100644
--- a/public/java/src/org/broadinstitute/sting/utils/recalibration/QualQuantizer.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/QualQuantizer.java
@@ -223,7 +223,7 @@ public boolean hasFixedQual() {
 
         @Override
         public int compareTo(final QualInterval qualInterval) {
-            return new Integer(this.qStart).compareTo(qualInterval.qStart);
+            return Integer.valueOf(this.qStart).compareTo(qualInterval.qStart);
         }
 
         /**
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QuantizationInfo.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java
similarity index 78%
rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QuantizationInfo.java
rename to public/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java
index fb3aef949b..2b67d12a98 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QuantizationInfo.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java
@@ -1,11 +1,9 @@
-package org.broadinstitute.sting.gatk.walkers.bqsr;
+package org.broadinstitute.sting.utils.recalibration;
 
 import org.broadinstitute.sting.gatk.report.GATKReportTable;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.QualityUtils;
 import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
-import org.broadinstitute.sting.utils.recalibration.QualQuantizer;
-import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
 
 import java.util.Arrays;
 import java.util.List;
@@ -41,7 +39,7 @@ public QuantizationInfo(final RecalibrationTables recalibrationTables, final int
         for (final RecalDatum value : qualTable.getAllValues()) {
             final RecalDatum datum = value;
             final int empiricalQual = MathUtils.fastRound(datum.getEmpiricalQuality());                                 // convert the empirical quality to an integer ( it is already capped by MAX_QUAL )
-            qualHistogram[empiricalQual] += datum.numObservations;                                                      // add the number of observations for every key
+            qualHistogram[empiricalQual] += datum.getNumObservations();                                                      // add the number of observations for every key
         }
         empiricalQualCounts = Arrays.asList(qualHistogram);                                                             // histogram with the number of observations of the empirical qualities
         quantizeQualityScores(quantizationLevels);
@@ -70,15 +68,15 @@ public int getQuantizationLevels() {
     }
 
     public GATKReportTable generateReportTable() {
-        GATKReportTable quantizedTable = new GATKReportTable(RecalDataManager.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3);
-        quantizedTable.addColumn(RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
-        quantizedTable.addColumn(RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME);
-        quantizedTable.addColumn(RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME);
+        GATKReportTable quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3);
+        quantizedTable.addColumn(RecalUtils.QUALITY_SCORE_COLUMN_NAME);
+        quantizedTable.addColumn(RecalUtils.QUANTIZED_COUNT_COLUMN_NAME);
+        quantizedTable.addColumn(RecalUtils.QUANTIZED_VALUE_COLUMN_NAME);
 
         for (int qual = 0; qual <= QualityUtils.MAX_QUAL_SCORE; qual++) {
-            quantizedTable.set(qual, RecalDataManager.QUALITY_SCORE_COLUMN_NAME, qual);
-            quantizedTable.set(qual, RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME, empiricalQualCounts.get(qual));
-            quantizedTable.set(qual, RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME, quantizedQuals.get(qual));
+            quantizedTable.set(qual, RecalUtils.QUALITY_SCORE_COLUMN_NAME, qual);
+            quantizedTable.set(qual, RecalUtils.QUANTIZED_COUNT_COLUMN_NAME, empiricalQualCounts.get(qual));
+            quantizedTable.set(qual, RecalUtils.QUANTIZED_VALUE_COLUMN_NAME, quantizedQuals.get(qual));
         }
         return quantizedTable;
     }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariates.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/ReadCovariates.java
similarity index 97%
rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariates.java
rename to public/java/src/org/broadinstitute/sting/utils/recalibration/ReadCovariates.java
index 5e907237d6..c86bd4deba 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariates.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/ReadCovariates.java
@@ -1,4 +1,4 @@
-package org.broadinstitute.sting.gatk.walkers.bqsr;
+package org.broadinstitute.sting.utils.recalibration;
 
 /**
  * The object temporarily held by a read that describes all of it's covariates.
diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java
new file mode 100755
index 0000000000..8c8815b54d
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java
@@ -0,0 +1,305 @@
+package org.broadinstitute.sting.utils.recalibration;
+
+/*
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+import com.google.java.contract.Ensures;
+import com.google.java.contract.Invariant;
+import com.google.java.contract.Requires;
+import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.QualityUtils;
+
+import java.util.Random;
+
+/**
+ * An individual piece of recalibration data. Each bin counts up the number of observations and the number
+ * of reference mismatches seen for that combination of covariates.
+ *
+ * Created by IntelliJ IDEA.
+ * User: rpoplin
+ * Date: Nov 3, 2009
+ */
+@Invariant({
+        "estimatedQReported >= 0.0",
+        "! Double.isNaN(estimatedQReported)",
+        "! Double.isInfinite(estimatedQReported)",
+        "empiricalQuality >= 0.0 || empiricalQuality == UNINITIALIZED",
+        "! Double.isNaN(empiricalQuality)",
+        "! Double.isInfinite(empiricalQuality)",
+        "numObservations >= 0",
+        "numMismatches >= 0",
+        "numMismatches <= numObservations"
+})
+public class RecalDatum {
+    private static final double UNINITIALIZED = -1.0;
+
+    /**
+     * estimated reported quality score based on combined data's individual q-reporteds and number of observations
+     */
+    private double estimatedQReported;
+
+    /**
+     * the empirical quality for datums that have been collapsed together (by read group and reported quality, for example)
+     */
+    private double empiricalQuality;
+
+    /**
+     * number of bases seen in total
+     */
+    private long numObservations;
+
+    /**
+     * number of bases seen that didn't match the reference
+     */
+    private long numMismatches;
+
+    /**
+     * used when calculating empirical qualities to avoid division by zero
+     */
+    private static final int SMOOTHING_CONSTANT = 1;
+
+    //---------------------------------------------------------------------------------------------------------------
+    //
+    // constructors
+    //
+    //---------------------------------------------------------------------------------------------------------------
+
+    /**
+     * Create a new RecalDatum with given observation and mismatch counts, and an reported quality
+     *
+     * @param _numObservations
+     * @param _numMismatches
+     * @param reportedQuality
+     */
+    public RecalDatum(final long _numObservations, final long _numMismatches, final byte reportedQuality) {
+        if ( _numObservations < 0 ) throw new IllegalArgumentException("numObservations < 0");
+        if ( _numMismatches < 0 ) throw new IllegalArgumentException("numMismatches < 0");
+        if ( reportedQuality < 0 ) throw new IllegalArgumentException("reportedQuality < 0");
+
+        numObservations = _numObservations;
+        numMismatches = _numMismatches;
+        estimatedQReported = reportedQuality;
+        empiricalQuality = UNINITIALIZED;
+    }
+
+    /**
+     * Copy copy into this recal datum, overwriting all of this objects data
+     * @param copy
+     */
+    public RecalDatum(final RecalDatum copy) {
+        this.numObservations = copy.getNumObservations();
+        this.numMismatches = copy.getNumMismatches();
+        this.estimatedQReported = copy.estimatedQReported;
+        this.empiricalQuality = copy.empiricalQuality;
+    }
+
+    /**
+     * Add in all of the data from other into this object, updating the reported quality from the expected
+     * error rate implied by the two reported qualities
+     *
+     * @param other
+     */
+    public synchronized void combine(final RecalDatum other) {
+        final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors();
+        increment(other.getNumObservations(), other.getNumMismatches());
+        estimatedQReported = -10 * Math.log10(sumErrors / getNumObservations());
+        empiricalQuality = UNINITIALIZED;
+    }
+
+    public synchronized void setEstimatedQReported(final double estimatedQReported) {
+        if ( estimatedQReported < 0 ) throw new IllegalArgumentException("estimatedQReported < 0");
+        if ( Double.isInfinite(estimatedQReported) ) throw new IllegalArgumentException("estimatedQReported is infinite");
+        if ( Double.isNaN(estimatedQReported) ) throw new IllegalArgumentException("estimatedQReported is NaN");
+
+        this.estimatedQReported = estimatedQReported;
+    }
+
+    public static RecalDatum createRandomRecalDatum(int maxObservations, int maxErrors) {
+        final Random random = new Random();
+        final int nObservations = random.nextInt(maxObservations);
+        final int nErrors = random.nextInt(maxErrors);
+        final int qual = random.nextInt(QualityUtils.MAX_QUAL_SCORE);
+        return new RecalDatum(nObservations, nErrors, (byte)qual);
+    }
+
+    public final double getEstimatedQReported() {
+        return estimatedQReported;
+    }
+    public final byte getEstimatedQReportedAsByte() {
+        return (byte)(int)(Math.round(getEstimatedQReported()));
+    }
+
+    //---------------------------------------------------------------------------------------------------------------
+    //
+    // Empirical quality score -- derived from the num mismatches and observations
+    //
+    //---------------------------------------------------------------------------------------------------------------
+
+    /**
+     * Returns the error rate (in real space) of this interval, or 0 if there are no obserations
+     * @return the empirical error rate ~= N errors / N obs
+     */
+    @Ensures("result >= 0.0")
+    public double getEmpiricalErrorRate() {
+        if ( numObservations == 0 )
+            return 0.0;
+        else {
+            // cache the value so we don't call log over and over again
+            final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT);
+            // smoothing is one error and one non-error observation, for example
+            final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT + SMOOTHING_CONSTANT);
+            return doubleMismatches / doubleObservations;
+        }
+    }
+
+    public synchronized void setEmpiricalQuality(final double empiricalQuality) {
+        if ( empiricalQuality < 0 ) throw new IllegalArgumentException("empiricalQuality < 0");
+        if ( Double.isInfinite(empiricalQuality) ) throw new IllegalArgumentException("empiricalQuality is infinite");
+        if ( Double.isNaN(empiricalQuality) ) throw new IllegalArgumentException("empiricalQuality is NaN");
+
+        this.empiricalQuality = empiricalQuality;
+    }
+
+    public final double getEmpiricalQuality() {
+        if (empiricalQuality == UNINITIALIZED)
+            calcEmpiricalQuality();
+        return empiricalQuality;
+    }
+
+    public final byte getEmpiricalQualityAsByte() {
+        return (byte)(Math.round(getEmpiricalQuality()));
+    }
+
+        //---------------------------------------------------------------------------------------------------------------
+    //
+    // increment methods
+    //
+    //---------------------------------------------------------------------------------------------------------------
+
+    @Override
+    public String toString() {
+        return String.format("%d,%d,%d", getNumObservations(), getNumMismatches(), (byte) Math.floor(getEmpiricalQuality()));
+    }
+
+    public String stringForCSV() {
+        return String.format("%s,%d,%.2f", toString(), (byte) Math.floor(getEstimatedQReported()), getEmpiricalQuality() - getEstimatedQReported());
+    }
+
+//    /**
+//     * We don't compare the estimated quality reported because it may be different when read from
+//     * report tables.
+//     *
+//     * @param o the other recal datum
+//     * @return true if the two recal datums have the same number of observations, errors and empirical quality.
+//     */
+//    @Override
+//    public boolean equals(Object o) {
+//        if (!(o instanceof RecalDatum))
+//            return false;
+//        RecalDatum other = (RecalDatum) o;
+//        return super.equals(o) &&
+//               MathUtils.compareDoubles(this.empiricalQuality, other.empiricalQuality, 0.001) == 0;
+//    }
+
+    //---------------------------------------------------------------------------------------------------------------
+    //
+    // increment methods
+    //
+    //---------------------------------------------------------------------------------------------------------------
+
+    public long getNumObservations() {
+        return numObservations;
+    }
+
+    public synchronized void setNumObservations(final long numObservations) {
+        if ( numObservations < 0 ) throw new IllegalArgumentException("numObservations < 0");
+        this.numObservations = numObservations;
+        empiricalQuality = UNINITIALIZED;
+    }
+
+    public long getNumMismatches() {
+        return numMismatches;
+    }
+
+    @Requires({"numMismatches >= 0"})
+    public synchronized void setNumMismatches(final long numMismatches) {
+        if ( numMismatches < 0 ) throw new IllegalArgumentException("numMismatches < 0");
+        this.numMismatches = numMismatches;
+        empiricalQuality = UNINITIALIZED;
+    }
+
+    @Requires({"by >= 0"})
+    public synchronized void incrementNumObservations(final long by) {
+        numObservations += by;
+        empiricalQuality = UNINITIALIZED;
+    }
+
+    @Requires({"by >= 0"})
+    public synchronized void incrementNumMismatches(final long by) {
+        numMismatches += by;
+        empiricalQuality = UNINITIALIZED;
+    }
+
+    @Requires({"incObservations >= 0", "incMismatches >= 0"})
+    @Ensures({"numObservations == old(numObservations) + incObservations", "numMismatches == old(numMismatches) + incMismatches"})
+    public synchronized void increment(final long incObservations, final long incMismatches) {
+        incrementNumObservations(incObservations);
+        incrementNumMismatches(incMismatches);
+    }
+
+    @Ensures({"numObservations == old(numObservations) + 1", "numMismatches >= old(numMismatches)"})
+    public synchronized void increment(final boolean isError) {
+        incrementNumObservations(1);
+        if ( isError )
+            incrementNumMismatches(1);
+    }
+
+    // -------------------------------------------------------------------------------------
+    //
+    // Private implementation helper functions
+    //
+    // -------------------------------------------------------------------------------------
+
+    /**
+     * Calculate and cache the empirical quality score from mismatches and observations (expensive operation)
+     */
+    @Requires("empiricalQuality == UNINITIALIZED")
+    @Ensures("empiricalQuality != UNINITIALIZED")
+    private synchronized final void calcEmpiricalQuality() {
+        final double empiricalQual = -10 * Math.log10(getEmpiricalErrorRate());
+        empiricalQuality = Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
+    }
+
+    /**
+     * calculate the expected number of errors given the estimated Q reported and the number of observations
+     * in this datum.
+     *
+     * @return a positive (potentially fractional) estimate of the number of errors
+     */
+    @Ensures("result >= 0.0")
+    private double calcExpectedErrors() {
+        return (double) getNumObservations() * QualityUtils.qualToErrorProb(estimatedQReported);
+    }
+}
diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java
new file mode 100644
index 0000000000..41e96222c0
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java
@@ -0,0 +1,531 @@
+package org.broadinstitute.sting.utils.recalibration;
+
+import com.google.java.contract.Ensures;
+import com.google.java.contract.Requires;
+import org.apache.commons.math.MathException;
+import org.apache.commons.math.stat.inference.ChiSquareTestImpl;
+import org.apache.log4j.Logger;
+import org.broadinstitute.sting.utils.collections.Pair;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.Set;
+
+/**
+ * A tree of recal datum, where each contains a set of sub datum representing sub-states of the higher level one
+ *
+ * @author Mark DePristo
+ * @since 07/27/12
+ */
+public class RecalDatumNode<T extends RecalDatum> {
+    private final static double SMALLEST_CHI2_PVALUE = 1e-300;
+    protected static final Logger logger = Logger.getLogger(RecalDatumNode.class);
+
+    /**
+     * fixedPenalty is this value if it's considered fixed
+     */
+    private final static double UNINITIALIZED = Double.NEGATIVE_INFINITY;
+
+    private final T recalDatum;
+    private double fixedPenalty = UNINITIALIZED;
+    private final Set<RecalDatumNode<T>> subnodes;
+
+    @Requires({"recalDatum != null"})
+    public RecalDatumNode(final T recalDatum) {
+        this(recalDatum, new HashSet<RecalDatumNode<T>>());
+    }
+
+    @Override
+    public String toString() {
+        return recalDatum.toString();
+    }
+
+    @Requires({"recalDatum != null", "subnodes != null"})
+    public RecalDatumNode(final T recalDatum, final Set<RecalDatumNode<T>> subnodes) {
+        this(recalDatum, UNINITIALIZED, subnodes);
+    }
+
+    @Requires({"recalDatum != null"})
+    protected RecalDatumNode(final T recalDatum, final double fixedPenalty) {
+        this(recalDatum, fixedPenalty, new HashSet<RecalDatumNode<T>>());
+    }
+
+    @Requires({"recalDatum != null", "subnodes != null"})
+    protected RecalDatumNode(final T recalDatum, final double fixedPenalty, final Set<RecalDatumNode<T>> subnodes) {
+        this.recalDatum = recalDatum;
+        this.fixedPenalty = fixedPenalty;
+        this.subnodes = new HashSet<RecalDatumNode<T>>(subnodes);
+    }
+
+    /**
+     * Get the recal data associated with this node
+     * @return
+     */
+    @Ensures("result != null")
+    public T getRecalDatum() {
+        return recalDatum;
+    }
+
+    /**
+     * The set of all subnodes of this tree.  May be modified.
+     * @return
+     */
+    @Ensures("result != null")
+    public Set<RecalDatumNode<T>> getSubnodes() {
+        return subnodes;
+    }
+
+    /**
+     * Return the fixed penalty, if set, or else the the calculated penalty for this node
+     * @return
+     */
+    public double getPenalty() {
+        if ( fixedPenalty != UNINITIALIZED )
+            return fixedPenalty;
+        else
+            return calcPenalty();
+    }
+
+    /**
+     * Set the fixed penalty for this node to a fresh calculation from calcPenalty
+     *
+     * This is important in the case where you want to compute the penalty from a full
+     * tree and then chop the tree up afterwards while considering the previous penalties.
+     * If you don't call this function then manipulating the tree may result in the
+     * penalty functions changing with changes in the tree.
+     *
+     * @param doEntireTree recurse into all subnodes?
+     * @return the fixed penalty for this node
+     */
+    public double calcAndSetFixedPenalty(final boolean doEntireTree) {
+        fixedPenalty = calcPenalty();
+        if ( doEntireTree )
+            for ( final RecalDatumNode<T> sub : subnodes )
+                sub.calcAndSetFixedPenalty(doEntireTree);
+        return fixedPenalty;
+    }
+
+    /**
+     * Add node to the set of subnodes of this node
+     * @param sub
+     */
+    @Requires("sub != null")
+    public void addSubnode(final RecalDatumNode<T> sub) {
+        subnodes.add(sub);
+    }
+
+    /**
+     * Is this a leaf node (i.e., has no subnodes)?
+     * @return
+     */
+    public boolean isLeaf() {
+        return subnodes.isEmpty();
+    }
+
+    /**
+     * Is this node immediately above only leaf nodes?
+     *
+     * @return
+     */
+    public boolean isAboveOnlyLeaves() {
+        for ( final RecalDatumNode<T> sub : subnodes )
+            if ( ! sub.isLeaf() )
+                return false;
+        return true;
+    }
+
+    /**
+     * What's the immediate number of subnodes from this node?
+     * @return
+     */
+    @Ensures("result >= 0")
+    public int getNumSubnodes() {
+        return subnodes.size();
+    }
+
+    /**
+     * Total penalty is the sum of leaf node penalties
+     *
+     * This algorithm assumes that penalties have been fixed before pruning, as leaf nodes by
+     * definition have 0 penalty unless they represent a pruned tree with underlying -- but now
+     * pruned -- subtrees
+     *
+     * @return
+     */
+    public double totalPenalty() {
+        if ( isLeaf() )
+            return getPenalty();
+        else {
+            double sum = 0.0;
+            for ( final RecalDatumNode<T> sub : subnodes )
+                sum += sub.totalPenalty();
+            return sum;
+        }
+    }
+
+    /**
+     * The maximum penalty among all nodes
+     * @return
+     */
+    public double maxPenalty(final boolean leafOnly) {
+        double max = ! leafOnly || isLeaf() ? getPenalty() : Double.MIN_VALUE;
+        for ( final RecalDatumNode<T> sub : subnodes )
+            max = Math.max(max, sub.maxPenalty(leafOnly));
+        return max;
+    }
+
+    /**
+     * The minimum penalty among all nodes
+     * @return
+     */
+    public double minPenalty(final boolean leafOnly) {
+        double min = ! leafOnly || isLeaf() ? getPenalty() : Double.MAX_VALUE;
+        for ( final RecalDatumNode<T> sub : subnodes )
+            min = Math.min(min, sub.minPenalty(leafOnly));
+        return min;
+    }
+
+    /**
+     * What's the longest branch from this node to any leaf?
+     * @return
+     */
+    public int maxDepth() {
+        int subMax = 0;
+        for ( final RecalDatumNode<T> sub : subnodes )
+            subMax = Math.max(subMax, sub.maxDepth());
+        return subMax + 1;
+    }
+
+    /**
+     * What's the shortest branch from this node to any leaf?  Includes this node
+     * @return
+     */
+    @Ensures("result > 0")
+    public int minDepth() {
+        if ( isLeaf() )
+            return 1;
+        else {
+            int subMin = Integer.MAX_VALUE;
+            for ( final RecalDatumNode<T> sub : subnodes )
+                subMin = Math.min(subMin, sub.minDepth());
+            return subMin + 1;
+        }
+    }
+
+    /**
+     * Return the number of nodes, including this one, reachable from this node
+     * @return
+     */
+    @Ensures("result > 0")
+    public int size() {
+        int size = 1;
+        for ( final RecalDatumNode<T> sub : subnodes )
+            size += sub.size();
+        return size;
+    }
+
+    /**
+     * Count the number of leaf nodes reachable from this node
+     *
+     * @return
+     */
+    @Ensures("result >= 0")
+    public int numLeaves() {
+        if ( isLeaf() )
+            return 1;
+        else {
+            int size = 0;
+            for ( final RecalDatumNode<T> sub : subnodes )
+                size += sub.numLeaves();
+            return size;
+        }
+    }
+
+    /**
+     * Calculate the phred-scaled p-value for a chi^2 test for independent among subnodes of this node.
+     *
+     * The chi^2 value indicates the degree of independence of the implied error rates among the
+     * immediate subnodes
+     *
+     * @return the phred-scaled p-value for chi2 penalty, or 0.0 if it cannot be calculated
+     */
+    private double calcPenalty() {
+        if ( isLeaf() || freeToMerge() )
+            return 0.0;
+        else if ( subnodes.size() == 1 )
+            // only one value, so its free to merge away
+            return 0.0;
+        else {
+            final long[][] counts = new long[subnodes.size()][2];
+
+            int i = 0;
+            for ( final RecalDatumNode<T> subnode : subnodes ) {
+                // use the yates correction to help avoid all zeros => NaN
+                counts[i][0] = subnode.getRecalDatum().getNumMismatches() + 1;
+                counts[i][1] = subnode.getRecalDatum().getNumObservations() + 2;
+                i++;
+            }
+
+            try {
+                final double chi2PValue = new ChiSquareTestImpl().chiSquareTest(counts);
+                final double penalty = -10 * Math.log10(Math.max(chi2PValue, SMALLEST_CHI2_PVALUE));
+
+                // make sure things are reasonable and fail early if not
+                if (Double.isInfinite(penalty) || Double.isNaN(penalty))
+                    throw new ReviewedStingException("chi2 value is " + chi2PValue + " at " + getRecalDatum());
+
+                return penalty;
+            } catch ( MathException e ) {
+                throw new ReviewedStingException("Failed in calculating chi2 value", e);
+            }
+        }
+    }
+
+    /**
+     * Is this node free to merge because its rounded Q score is the same as all nodes below
+     * @return
+     */
+    private boolean freeToMerge() {
+        if ( isLeaf() ) // leaves are free to merge
+            return true;
+        else {
+            final byte myQual = getRecalDatum().getEmpiricalQualityAsByte();
+            for ( final RecalDatumNode<T> sub : subnodes )
+                if ( sub.getRecalDatum().getEmpiricalQualityAsByte() != myQual )
+                    return false;
+            return true;
+        }
+    }
+
+    /**
+     * Calculate the penalty of this interval, given the overall error rate for the interval
+     *
+     * If the globalErrorRate is e, this value is:
+     *
+     * sum_i |log10(e_i) - log10(e)| * nObservations_i
+     *
+     * each the index i applies to all leaves of the tree accessible from this interval
+     * (found recursively from subnodes as necessary)
+     *
+     * @param globalErrorRate overall error rate in real space against which we calculate the penalty
+     * @return the cost of approximating the bins in this interval with the globalErrorRate
+     */
+    @Requires("globalErrorRate >= 0.0")
+    @Ensures("result >= 0.0")
+    private double calcPenaltyLog10(final double globalErrorRate) {
+        if ( globalErrorRate == 0.0 ) // there were no observations, so there's no penalty
+            return 0.0;
+
+        if ( isLeaf() ) {
+            // this is leave node
+            return (Math.abs(Math.log10(recalDatum.getEmpiricalErrorRate()) - Math.log10(globalErrorRate))) * recalDatum.getNumObservations();
+            // TODO -- how we can generalize this calculation?
+//            if ( this.qEnd <= minInterestingQual )
+//                // It's free to merge up quality scores below the smallest interesting one
+//                return 0;
+//            else {
+//                return (Math.abs(Math.log10(getEmpiricalErrorRate()) - Math.log10(globalErrorRate))) * getNumObservations();
+//            }
+        } else {
+            double sum = 0;
+            for ( final RecalDatumNode<T> hrd : subnodes)
+                sum += hrd.calcPenaltyLog10(globalErrorRate);
+            return sum;
+        }
+    }
+
+    /**
+     * Return a freshly allocated tree prunes to have no more than maxDepth from the root to any leaf
+     *
+     * @param maxDepth
+     * @return
+     */
+    public RecalDatumNode<T> pruneToDepth(final int maxDepth) {
+        if ( maxDepth < 1 )
+            throw new IllegalArgumentException("maxDepth < 1");
+        else {
+            final Set<RecalDatumNode<T>> subPruned = new HashSet<RecalDatumNode<T>>(getNumSubnodes());
+            if ( maxDepth > 1 )
+                for ( final RecalDatumNode<T> sub : subnodes )
+                    subPruned.add(sub.pruneToDepth(maxDepth - 1));
+            return new RecalDatumNode<T>(getRecalDatum(), fixedPenalty, subPruned);
+        }
+    }
+
+    /**
+     * Return a freshly allocated tree with to no more than maxElements in order of penalty
+     *
+     * Note that nodes must have fixed penalties to this algorithm will fail.
+     *
+     * @param maxElements
+     * @return
+     */
+    public RecalDatumNode<T> pruneByPenalty(final int maxElements) {
+        RecalDatumNode<T> root = this;
+
+        while ( root.size() > maxElements ) {
+            // remove the lowest penalty element, and continue
+            root = root.removeLowestPenaltyNode();
+        }
+
+        // our size is below the target, so we are good, return
+        return root;
+    }
+
+    /**
+     * Return a freshly allocated tree where all mergable nodes with < maxPenalty are merged
+     *
+     * Note that nodes must have fixed penalties to this algorithm will fail.
+     *
+     * @param maxPenaltyIn the maximum penalty we are allowed to incur for a merge
+     * @param applyBonferroniCorrection if true, we will adjust penalty by the phred-scaled bonferroni correction
+     *                                  for the size of the initial tree.  That is, if there are 10 nodes in the
+     *                                  tree and maxPenalty is 20 we will actually enforce 10^-2 / 10 = 10^-3 = 30
+     *                                  penalty for multiple testing
+     * @return
+     */
+    public RecalDatumNode<T> pruneToNoMoreThanPenalty(final double maxPenaltyIn, final boolean applyBonferroniCorrection) {
+        RecalDatumNode<T> root = this;
+
+        final double bonferroniCorrection = 10 * Math.log10(this.size());
+        final double maxPenalty = applyBonferroniCorrection ? maxPenaltyIn + bonferroniCorrection : maxPenaltyIn;
+
+        if ( applyBonferroniCorrection )
+        logger.info(String.format("Applying Bonferroni correction for %d nodes = %.2f to initial penalty %.2f for total " +
+                "corrected max penalty of %.2f", this.size(), bonferroniCorrection, maxPenaltyIn, maxPenalty));
+
+        while ( true ) {
+            final Pair<RecalDatumNode<T>, Double> minPenaltyNode = root.getMinPenaltyAboveLeafNode();
+
+            if ( minPenaltyNode == null || minPenaltyNode.getSecond() > maxPenalty ) {
+                // nothing to merge, or the best candidate is above our max allowed
+                if ( minPenaltyNode == null ) {
+                    if ( logger.isDebugEnabled() ) logger.debug("Stopping because no candidates could be found");
+                } else {
+                    if ( logger.isDebugEnabled() ) logger.debug("Stopping because node " + minPenaltyNode.getFirst() + " has penalty " + minPenaltyNode.getSecond() + " > max " + maxPenalty);
+                }
+                break;
+            } else {
+                // remove the lowest penalty element, and continue
+                if ( logger.isDebugEnabled() ) logger.debug("Removing node " + minPenaltyNode.getFirst() + " with penalty " + minPenaltyNode.getSecond());
+                root = root.removeLowestPenaltyNode();
+            }
+        }
+
+        // no more candidates exist with penalty < maxPenalty
+        return root;
+    }
+
+
+    /**
+     * Find the lowest penalty above leaf node in the tree, and return a tree without it
+     *
+     * Note this excludes the current (root) node
+     *
+     * @return
+     */
+    private RecalDatumNode<T> removeLowestPenaltyNode() {
+        final Pair<RecalDatumNode<T>, Double> nodeToRemove = getMinPenaltyAboveLeafNode();
+        if ( logger.isDebugEnabled() )
+            logger.debug("Removing " + nodeToRemove.getFirst() + " with penalty " + nodeToRemove.getSecond());
+
+        final Pair<RecalDatumNode<T>, Boolean> result = removeNode(nodeToRemove.getFirst());
+
+        if ( ! result.getSecond() )
+            throw new IllegalStateException("Never removed any node!");
+
+        final RecalDatumNode<T> oneRemoved = result.getFirst();
+        if ( oneRemoved == null )
+            throw new IllegalStateException("Removed our root node, wow, didn't expect that");
+        return oneRemoved;
+    }
+
+    /**
+     * Finds in the tree the node with the lowest penalty whose subnodes are all leaves
+     *
+     * @return the node and its penalty, or null if no such node exists
+     */
+    private Pair<RecalDatumNode<T>, Double> getMinPenaltyAboveLeafNode() {
+        if ( isLeaf() )
+            // not allowed to remove leafs directly
+            return null;
+        if ( isAboveOnlyLeaves() )
+            // we only consider removing nodes above all leaves
+            return new Pair<RecalDatumNode<T>, Double>(this, getPenalty());
+        else {
+            // just recurse, taking the result with the min penalty of all subnodes
+            Pair<RecalDatumNode<T>, Double> minNode = null;
+            for ( final RecalDatumNode<T> sub : subnodes ) {
+                final Pair<RecalDatumNode<T>, Double> subFind = sub.getMinPenaltyAboveLeafNode();
+                if ( subFind != null && (minNode == null || subFind.getSecond() < minNode.getSecond()) ) {
+                    minNode = subFind;
+                }
+            }
+            return minNode;
+        }
+    }
+
+    /**
+     * Return a freshly allocated tree without the node nodeToRemove
+     *
+     * @param nodeToRemove
+     * @return
+     */
+    private Pair<RecalDatumNode<T>, Boolean> removeNode(final RecalDatumNode<T> nodeToRemove) {
+        if ( this == nodeToRemove ) {
+            if ( isLeaf() )
+                throw new IllegalStateException("Trying to remove a leaf node from the tree! " + this + " " + nodeToRemove);
+            // node is the thing we are going to remove, but without any subnodes
+            final RecalDatumNode<T> node = new RecalDatumNode<T>(getRecalDatum(), fixedPenalty);
+            return new Pair<RecalDatumNode<T>, Boolean>(node, true);
+        } else {
+            // did we remove something in a sub branch?
+            boolean removedSomething = false;
+
+            // our sub nodes with the penalty node removed
+            final Set<RecalDatumNode<T>> sub = new HashSet<RecalDatumNode<T>>(getNumSubnodes());
+
+            for ( final RecalDatumNode<T> sub1 : subnodes ) {
+                if ( removedSomething ) {
+                    // already removed something, just add sub1 back to sub
+                    sub.add(sub1);
+                } else {
+                    // haven't removed anything yet, so try
+                    final Pair<RecalDatumNode<T>, Boolean> maybeRemoved = sub1.removeNode(nodeToRemove);
+                    removedSomething = maybeRemoved.getSecond();
+                    sub.add(maybeRemoved.getFirst());
+                }
+            }
+
+            final RecalDatumNode<T> node = new RecalDatumNode<T>(getRecalDatum(), fixedPenalty, sub);
+            return new Pair<RecalDatumNode<T>, Boolean>(node, removedSomething);
+        }
+    }
+
+    /**
+     * Return a collection of all of the data in the leaf nodes of this tree
+     *
+     * @return
+     */
+    public Collection<T> getAllLeaves() {
+        final LinkedList<T> list = new LinkedList<T>();
+        getAllLeavesRec(list);
+        return list;
+    }
+
+    /**
+     * Helpful recursive function for getAllLeaves()
+     *
+     * @param list the destination for the list of leaves
+     */
+    private void getAllLeavesRec(final LinkedList<T> list) {
+        if ( isLeaf() )
+            list.add(getRecalDatum());
+        else {
+            for ( final RecalDatumNode<T> sub : subnodes )
+                sub.getAllLeavesRec(list);
+        }
+    }
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java
similarity index 87%
rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java
rename to public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java
index f40a62d53c..2d05877af9 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java
@@ -23,11 +23,13 @@
  * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
-package org.broadinstitute.sting.gatk.walkers.bqsr;
+package org.broadinstitute.sting.utils.recalibration;
 
 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.gatk.report.GATKReport;
 import org.broadinstitute.sting.gatk.report.GATKReportTable;
+import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
+import org.broadinstitute.sting.utils.recalibration.covariates.*;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.R.RScriptExecutor;
 import org.broadinstitute.sting.utils.Utils;
@@ -39,7 +41,6 @@
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.io.Resource;
-import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
 import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
@@ -59,7 +60,7 @@
  * This class holds the parsing methods that are shared between CountCovariates and TableRecalibration.
  */
 
-public class RecalDataManager {
+public class RecalUtils {
     public final static String ARGUMENT_REPORT_TABLE_TITLE = "Arguments";
     public final static String QUANTIZED_REPORT_TABLE_TITLE = "Quantized";
     public final static String READGROUP_REPORT_TABLE_TITLE = "RecalTable0";
@@ -85,13 +86,108 @@ public class RecalDataManager {
 
     private static final String SCRIPT_FILE = "BQSR.R";
 
-    private static final Pair<String, String> covariateValue     = new Pair<String, String>(RecalDataManager.COVARIATE_VALUE_COLUMN_NAME, "%s");
-    private static final Pair<String, String> covariateName      = new Pair<String, String>(RecalDataManager.COVARIATE_NAME_COLUMN_NAME, "%s");
-    private static final Pair<String, String> eventType          = new Pair<String, String>(RecalDataManager.EVENT_TYPE_COLUMN_NAME, "%s");
-    private static final Pair<String, String> empiricalQuality   = new Pair<String, String>(RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME, "%.4f");
-    private static final Pair<String, String> estimatedQReported = new Pair<String, String>(RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME, "%.4f");
-    private static final Pair<String, String> nObservations      = new Pair<String, String>(RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME, "%d");
-    private static final Pair<String, String> nErrors            = new Pair<String, String>(RecalDataManager.NUMBER_ERRORS_COLUMN_NAME, "%d");
+    private static final Pair<String, String> covariateValue     = new Pair<String, String>(RecalUtils.COVARIATE_VALUE_COLUMN_NAME, "%s");
+    private static final Pair<String, String> covariateName      = new Pair<String, String>(RecalUtils.COVARIATE_NAME_COLUMN_NAME, "%s");
+    private static final Pair<String, String> eventType          = new Pair<String, String>(RecalUtils.EVENT_TYPE_COLUMN_NAME, "%s");
+    private static final Pair<String, String> empiricalQuality   = new Pair<String, String>(RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME, "%.4f");
+    private static final Pair<String, String> estimatedQReported = new Pair<String, String>(RecalUtils.ESTIMATED_Q_REPORTED_COLUMN_NAME, "%.4f");
+    private static final Pair<String, String> nObservations      = new Pair<String, String>(RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME, "%d");
+    private static final Pair<String, String> nErrors            = new Pair<String, String>(RecalUtils.NUMBER_ERRORS_COLUMN_NAME, "%d");
+
+    /**
+     * Generates two lists : required covariates and optional covariates based on the user's requests.
+     *
+     * Performs the following tasks in order:
+     *  1. Adds all requierd covariates in order
+     *  2. Check if the user asked to use the standard covariates and adds them all if that's the case
+     *  3. Adds all covariates requested by the user that were not already added by the two previous steps
+     *
+     * @param argumentCollection the argument collection object for the recalibration walker
+     * @return a pair of ordered lists : required covariates (first) and optional covariates (second)
+     */
+    public static Pair<ArrayList<Covariate>, ArrayList<Covariate>> initializeCovariates(RecalibrationArgumentCollection argumentCollection) {
+        final List<Class<? extends Covariate>> covariateClasses = new PluginManager<Covariate>(Covariate.class).getPlugins();
+        final List<Class<? extends RequiredCovariate>> requiredClasses = new PluginManager<RequiredCovariate>(RequiredCovariate.class).getPlugins();
+        final List<Class<? extends StandardCovariate>> standardClasses = new PluginManager<StandardCovariate>(StandardCovariate.class).getPlugins();
+
+        final ArrayList<Covariate> requiredCovariates = addRequiredCovariatesToList(requiredClasses);                   // add the required covariates
+        ArrayList<Covariate> optionalCovariates = new ArrayList<Covariate>();
+        if (!argumentCollection.DO_NOT_USE_STANDARD_COVARIATES)
+            optionalCovariates = addStandardCovariatesToList(standardClasses);                                          // add the standard covariates if -standard was specified by the user
+
+        if (argumentCollection.COVARIATES != null) {                                                                    // parse the -cov arguments that were provided, skipping over the ones already specified
+            for (String requestedCovariateString : argumentCollection.COVARIATES) {
+                boolean foundClass = false;
+                for (Class<? extends Covariate> covClass : covariateClasses) {
+                    if (requestedCovariateString.equalsIgnoreCase(covClass.getSimpleName())) {                          // -cov argument matches the class name for an implementing class
+                        foundClass = true;
+                        if (!requiredClasses.contains(covClass) &&
+                                (argumentCollection.DO_NOT_USE_STANDARD_COVARIATES || !standardClasses.contains(covClass))) {
+                            try {
+                                final Covariate covariate = covClass.newInstance();                                     // now that we've found a matching class, try to instantiate it
+                                optionalCovariates.add(covariate);
+                            } catch (Exception e) {
+                                throw new DynamicClassResolutionException(covClass, e);
+                            }
+                        }
+                    }
+                }
+
+                if (!foundClass) {
+                    throw new UserException.CommandLineException("The requested covariate type (" + requestedCovariateString + ") isn't a valid covariate option. Use --list to see possible covariates.");
+                }
+            }
+        }
+        return new Pair<ArrayList<Covariate>, ArrayList<Covariate>>(requiredCovariates, optionalCovariates);
+    }
+
+    /**
+     * Adds the required covariates to a covariate list
+     *
+     * Note: this method really only checks if the classes object has the expected number of required covariates, then add them by hand.
+     *
+     * @param classes list of classes to add to the covariate list
+     * @return the covariate list
+     */
+    private static ArrayList<Covariate> addRequiredCovariatesToList(List<Class<? extends RequiredCovariate>> classes) {
+        ArrayList<Covariate> dest = new ArrayList<Covariate>(classes.size());
+        if (classes.size() != 2)
+            throw new ReviewedStingException("The number of required covariates has changed, this is a hard change in the code and needs to be inspected");
+
+        dest.add(new ReadGroupCovariate());                                                                             // enforce the order with RG first and QS next.
+        dest.add(new QualityScoreCovariate());
+        return dest;
+    }
+
+    /**
+     * Adds the standard covariates to a covariate list
+     *
+     * @param classes list of classes to add to the covariate list
+     * @return the covariate list
+     */
+    private static ArrayList<Covariate> addStandardCovariatesToList(List<Class<? extends StandardCovariate>> classes) {
+        ArrayList<Covariate> dest = new ArrayList<Covariate>(classes.size());
+        for (Class<?> covClass : classes) {
+            try {
+                final Covariate covariate = (Covariate) covClass.newInstance();
+                dest.add(covariate);
+            } catch (Exception e) {
+                throw new DynamicClassResolutionException(covClass, e);
+            }
+        }
+        return dest;
+    }
+
+    public static void listAvailableCovariates(Logger logger) {
+        // Get a list of all available covariates
+        final List<Class<? extends Covariate>> covariateClasses = new PluginManager<Covariate>(Covariate.class).getPlugins();
+
+        // Print and exit if that's what was requested
+        logger.info("Available covariates:");
+        for (Class<?> covClass : covariateClasses)
+            logger.info(covClass.getSimpleName());
+        logger.info("");
+    }
 
 
     public enum SOLID_RECAL_MODE {
@@ -152,64 +248,6 @@ public static SOLID_NOCALL_STRATEGY nocallStrategyFromString(String nocallStrate
         }
     }
 
-    /**
-     * Generates two lists : required covariates and optional covariates based on the user's requests.
-     *
-     * Performs the following tasks in order:
-     *  1. Adds all requierd covariates in order
-     *  2. Check if the user asked to use the standard covariates and adds them all if that's the case
-     *  3. Adds all covariates requested by the user that were not already added by the two previous steps
-     *
-     * @param argumentCollection the argument collection object for the recalibration walker
-     * @return a pair of ordered lists : required covariates (first) and optional covariates (second)
-     */
-    public static Pair<ArrayList<Covariate>, ArrayList<Covariate>> initializeCovariates(RecalibrationArgumentCollection argumentCollection) {
-        final List<Class<? extends Covariate>> covariateClasses = new PluginManager<Covariate>(Covariate.class).getPlugins();
-        final List<Class<? extends RequiredCovariate>> requiredClasses = new PluginManager<RequiredCovariate>(RequiredCovariate.class).getPlugins();
-        final List<Class<? extends StandardCovariate>> standardClasses = new PluginManager<StandardCovariate>(StandardCovariate.class).getPlugins();
-
-        final ArrayList<Covariate> requiredCovariates = addRequiredCovariatesToList(requiredClasses);                   // add the required covariates
-        ArrayList<Covariate> optionalCovariates = new ArrayList<Covariate>();
-        if (!argumentCollection.DO_NOT_USE_STANDARD_COVARIATES)
-            optionalCovariates = addStandardCovariatesToList(standardClasses);                                          // add the standard covariates if -standard was specified by the user
-
-        if (argumentCollection.COVARIATES != null) {                                                                    // parse the -cov arguments that were provided, skipping over the ones already specified
-            for (String requestedCovariateString : argumentCollection.COVARIATES) {
-                boolean foundClass = false;
-                for (Class<? extends Covariate> covClass : covariateClasses) {
-                    if (requestedCovariateString.equalsIgnoreCase(covClass.getSimpleName())) {                          // -cov argument matches the class name for an implementing class
-                        foundClass = true;
-                        if (!requiredClasses.contains(covClass) &&
-                                (argumentCollection.DO_NOT_USE_STANDARD_COVARIATES || !standardClasses.contains(covClass))) {
-                            try {
-                                final Covariate covariate = covClass.newInstance();                                     // now that we've found a matching class, try to instantiate it
-                                optionalCovariates.add(covariate);
-                            } catch (Exception e) {
-                                throw new DynamicClassResolutionException(covClass, e);
-                            }
-                        }
-                    }
-                }
-
-                if (!foundClass) {
-                    throw new UserException.CommandLineException("The requested covariate type (" + requestedCovariateString + ") isn't a valid covariate option. Use --list to see possible covariates.");
-                }
-            }
-        }
-        return new Pair<ArrayList<Covariate>, ArrayList<Covariate>>(requiredCovariates, optionalCovariates);
-    }
-
-    public static void listAvailableCovariates(Logger logger) {
-        // Get a list of all available covariates
-        final List<Class<? extends Covariate>> covariateClasses = new PluginManager<Covariate>(Covariate.class).getPlugins();
-
-        // Print and exit if that's what was requested
-        logger.info("Available covariates:");
-        for (Class<?> covClass : covariateClasses)
-            logger.info(covClass.getSimpleName());
-        logger.info("");
-    }
-
     private static List<GATKReportTable> generateReportTables(final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates) {
         List<GATKReportTable> result = new LinkedList<GATKReportTable>();
         int reportTableIndex = 0;
@@ -272,8 +310,8 @@ private static List<GATKReportTable> generateReportTables(final RecalibrationTab
                 reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEmpiricalQuality());
                 if (tableIndex == RecalibrationTables.TableType.READ_GROUP_TABLE.index)
                     reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEstimatedQReported());   // we only add the estimated Q reported in the RG table
-                reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.numObservations);
-                reportTable.set(rowIndex, columnNames.get(columnIndex).getFirst(), datum.numMismatches);
+                reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getNumObservations());
+                reportTable.set(rowIndex, columnNames.get(columnIndex).getFirst(), datum.getNumMismatches());
 
                 rowIndex++;
             }
@@ -288,9 +326,23 @@ private static String parseCovariateName(final Covariate covariate) {
     }
 
     public static void outputRecalibrationReport(final RecalibrationArgumentCollection RAC, final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates, final PrintStream outputFile) {
-        outputRecalibrationReport(RAC.generateReportTable(), quantizationInfo.generateReportTable(), generateReportTables(recalibrationTables, requestedCovariates), outputFile);
+        outputRecalibrationReport(RAC.generateReportTable(covariateNames(requestedCovariates)), quantizationInfo.generateReportTable(), generateReportTables(recalibrationTables, requestedCovariates), outputFile);
+    }
+
+    /**
+     * Return a human-readable string representing the used covariates
+     *
+     * @param requestedCovariates a vector of covariates
+     * @return a non-null comma-separated string
+     */
+    public static String covariateNames(final Covariate[] requestedCovariates) {
+        final List<String> names = new ArrayList<String>(requestedCovariates.length);
+        for ( final Covariate cov : requestedCovariates )
+            names.add(cov.getClass().getSimpleName());
+        return Utils.join(",", names);
     }
 
+
     public static void outputRecalibrationReport(final GATKReportTable argumentTable, final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates, final PrintStream outputFile) {
         outputRecalibrationReport(argumentTable, quantizationInfo.generateReportTable(), generateReportTables(recalibrationTables, requestedCovariates), outputFile);
     }
@@ -314,14 +366,15 @@ private static Pair<PrintStream, File> initializeRecalibrationPlot(File filename
         return new Pair<PrintStream, File>(deltaTableStream, deltaTableFileName);
     }
 
-    private static void outputRecalibrationPlot(Pair<PrintStream, File> files, boolean keepIntermediates) {
+    private static void outputRecalibrationPlot(final File gatkReportFilename, Pair<PrintStream, File> files, boolean keepIntermediates) {
         final File csvFileName = files.getSecond();
         final File plotFileName = new File(csvFileName + ".pdf");
         files.getFirst().close();
 
         final RScriptExecutor executor = new RScriptExecutor();
-        executor.addScript(new Resource(SCRIPT_FILE, RecalDataManager.class));
+        executor.addScript(new Resource(SCRIPT_FILE, RecalUtils.class));
         executor.addArgs(csvFileName.getAbsolutePath());
+        executor.addArgs(gatkReportFilename.getAbsolutePath());
         executor.addArgs(plotFileName.getAbsolutePath());
         executor.exec();
 
@@ -334,14 +387,14 @@ private static void outputRecalibrationPlot(Pair<PrintStream, File> files, boole
     public static void generateRecalibrationPlot(final File filename, final RecalibrationTables original, final Covariate[] requestedCovariates, final boolean keepIntermediates) {
         final Pair<PrintStream, File> files = initializeRecalibrationPlot(filename);
         writeCSV(files.getFirst(), original, "ORIGINAL", requestedCovariates, true);
-        outputRecalibrationPlot(files, keepIntermediates);
+        outputRecalibrationPlot(filename, files, keepIntermediates);
     }
 
     public static void generateRecalibrationPlot(final File filename, final RecalibrationTables original, final RecalibrationTables recalibrated, final Covariate[] requestedCovariates, final boolean keepIntermediates) {
         final Pair<PrintStream, File> files = initializeRecalibrationPlot(filename);
         writeCSV(files.getFirst(), recalibrated, "RECALIBRATED", requestedCovariates, true);
         writeCSV(files.getFirst(), original, "ORIGINAL", requestedCovariates, false);
-        outputRecalibrationPlot(files, keepIntermediates);
+        outputRecalibrationPlot(filename, files, keepIntermediates);
     }
 
     private static void writeCSV(final PrintStream deltaTableFile, final RecalibrationTables recalibrationTables, final String recalibrationMode, final Covariate[] requestedCovariates, final boolean printHeader) {
@@ -471,46 +524,71 @@ public static void parsePlatformForRead(final GATKSAMRecord read, final Recalibr
 
     /**
      * Parse through the color space of the read and add a new tag to the SAMRecord that says which bases are 
-     * inconsistent with the color space. If there is no call in the color space, this method returns true meaning
+     * inconsistent with the color space. If there is a no call in the color space, this method returns false meaning
      * this read should be skipped
      *
      * @param strategy the strategy used for SOLID no calls
      * @param read     The SAMRecord to parse
-     * @return whether or not this read should be skipped   
+     * @return true if this read is consistent or false if this read should be skipped
      */
     public static boolean isColorSpaceConsistent(final SOLID_NOCALL_STRATEGY strategy, final GATKSAMRecord read) {
-        if (ReadUtils.isSOLiDRead(read)) {                                                                              // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base
-            if (read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null) {                            // Haven't calculated the inconsistency array yet for this read
-                final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
-                if (attr != null) {
-                    byte[] colorSpace;
-                    if (attr instanceof String)
-                        colorSpace = ((String) attr).getBytes();
-                    else
-                        throw new UserException.MalformedBAM(read, String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
-                    
-                    byte[] readBases = read.getReadBases();                                                             // Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read
-                    if (read.getReadNegativeStrandFlag())
-                        readBases = BaseUtils.simpleReverseComplement(read.getReadBases());
-
-                    final byte[] inconsistency = new byte[readBases.length];
-                    int i;
-                    byte prevBase = colorSpace[0];                                                                      // The sentinel
-                    for (i = 0; i < readBases.length; i++) {
-                        final byte thisBase = getNextBaseFromColor(read, prevBase, colorSpace[i + 1]);
-                        inconsistency[i] = (byte) (thisBase == readBases[i] ? 0 : 1);
-                        prevBase = readBases[i];
+        if (!ReadUtils.isSOLiDRead(read))                                                                               // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base
+            return true;
+
+        if (read.getAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG) == null) {                                      // Haven't calculated the inconsistency array yet for this read
+            final Object attr = read.getAttribute(RecalUtils.COLOR_SPACE_ATTRIBUTE_TAG);
+            if (attr != null) {
+                byte[] colorSpace;
+                if (attr instanceof String)
+                    colorSpace = ((String) attr).getBytes();
+                else
+                    throw new UserException.MalformedBAM(read, String.format("Value encoded by %s in %s isn't a string!", RecalUtils.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
+
+                final boolean badColor = hasNoCallInColorSpace(colorSpace);
+                if (badColor) {
+                    if (strategy == SOLID_NOCALL_STRATEGY.LEAVE_READ_UNRECALIBRATED) {
+                        return false; // can't recalibrate a SOLiD read with no calls in the color space, and the user wants to skip over them
+                    }
+                    else if (strategy == SOLID_NOCALL_STRATEGY.PURGE_READ) {
+                        read.setReadFailsVendorQualityCheckFlag(true);
+                        return false;
                     }
-                    read.setAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency);
                 }
-                else if (strategy == SOLID_NOCALL_STRATEGY.THROW_EXCEPTION)                                             // if the strategy calls for an exception, throw it
-                    throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() + " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
 
-                else
-                    return true;                                                                                       // otherwise, just skip the read
+                byte[] readBases = read.getReadBases();                                                                 // Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read
+                if (read.getReadNegativeStrandFlag())
+                    readBases = BaseUtils.simpleReverseComplement(read.getReadBases());
+
+                final byte[] inconsistency = new byte[readBases.length];
+                int i;
+                byte prevBase = colorSpace[0];                                                                          // The sentinel
+                for (i = 0; i < readBases.length; i++) {
+                    final byte thisBase = getNextBaseFromColor(read, prevBase, colorSpace[i + 1]);
+                    inconsistency[i] = (byte) (thisBase == readBases[i] ? 0 : 1);
+                    prevBase = readBases[i];
+                }
+                read.setAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency);
+            }
+            else if (strategy == SOLID_NOCALL_STRATEGY.THROW_EXCEPTION)                                                 // if the strategy calls for an exception, throw it
+                throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() + " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
+
+            else
+                return false;                                                                                           // otherwise, just skip the read
+        }
+
+        return true;
+    }
+
+    private static boolean hasNoCallInColorSpace(final byte[] colorSpace) {
+        final int length = colorSpace.length;
+        for (int i = 1; i < length; i++) {  // skip the sentinal
+            final byte color = colorSpace[i];
+            if (color != (byte) '0' && color != (byte) '1' && color != (byte) '2' && color != (byte) '3') {
+                return true; // There is a bad color in this SOLiD read
             }
         }
-        return false;
+
+        return false; // There aren't any color no calls in this SOLiD read
     }
 
     /**
@@ -545,7 +623,7 @@ private static byte getNextBaseFromColor(GATKSAMRecord read, final byte prevBase
      * @return Returns true if the base was inconsistent with the color space
      */
     public static boolean isColorSpaceConsistent(final GATKSAMRecord read, final int offset) {
-        final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG);
+        final Object attr = read.getAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG);
         if (attr != null) {
             final byte[] inconsistency = (byte[]) attr;
             // NOTE: The inconsistency array is in the direction of the read, not aligned to the reference!
@@ -691,40 +769,4 @@ private static byte performColorThree(byte base) {
     }
 
 
-    /**
-     * Adds the required covariates to a covariate list
-     *
-     * Note: this method really only checks if the classes object has the expected number of required covariates, then add them by hand.
-     *
-     * @param classes list of classes to add to the covariate list
-     * @return the covariate list
-     */
-    private static ArrayList<Covariate> addRequiredCovariatesToList(List<Class<? extends RequiredCovariate>> classes) {
-        ArrayList<Covariate> dest = new ArrayList<Covariate>(classes.size());
-        if (classes.size() != 2)
-            throw new ReviewedStingException("The number of required covariates has changed, this is a hard change in the code and needs to be inspected");
-
-        dest.add(new ReadGroupCovariate());                                                                             // enforce the order with RG first and QS next.
-        dest.add(new QualityScoreCovariate());
-        return dest;
-    }
-
-    /**
-     * Adds the standard covariates to a covariate list
-     *
-     * @param classes list of classes to add to the covariate list
-     * @return the covariate list
-     */
-    private static ArrayList<Covariate> addStandardCovariatesToList(List<Class<? extends StandardCovariate>> classes) {
-        ArrayList<Covariate> dest = new ArrayList<Covariate>(classes.size());
-        for (Class<?> covClass : classes) {
-            try {
-                final Covariate covariate = (Covariate) covClass.newInstance();
-                dest.add(covariate);
-            } catch (Exception e) {
-                throw new DynamicClassResolutionException(covClass, e);
-            }
-        }
-        return dest;
-    }
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java
similarity index 82%
rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java
rename to public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java
index e69cf4d69e..e6ab9e38bf 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java
@@ -1,11 +1,12 @@
-package org.broadinstitute.sting.gatk.walkers.bqsr;
+package org.broadinstitute.sting.utils.recalibration;
 
 import org.broadinstitute.sting.gatk.report.GATKReport;
 import org.broadinstitute.sting.gatk.report.GATKReportTable;
+import org.broadinstitute.sting.gatk.walkers.bqsr.*;
+import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
 import org.broadinstitute.sting.utils.QualityUtils;
 import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
 import org.broadinstitute.sting.utils.collections.Pair;
-import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
 
 import java.io.File;
 import java.io.PrintStream;
@@ -33,13 +34,13 @@ public class RecalibrationReport {
     public RecalibrationReport(final File RECAL_FILE) {
         final GATKReport report = new GATKReport(RECAL_FILE);
 
-        argumentTable = report.getTable(RecalDataManager.ARGUMENT_REPORT_TABLE_TITLE);
+        argumentTable = report.getTable(RecalUtils.ARGUMENT_REPORT_TABLE_TITLE);
         RAC = initializeArgumentCollectionTable(argumentTable);
 
-        GATKReportTable quantizedTable = report.getTable(RecalDataManager.QUANTIZED_REPORT_TABLE_TITLE);
+        GATKReportTable quantizedTable = report.getTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE);
         quantizationInfo = initializeQuantizationTable(quantizedTable);
 
-        Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalDataManager.initializeCovariates(RAC);       // initialize the required and optional covariates
+        Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalUtils.initializeCovariates(RAC);       // initialize the required and optional covariates
         ArrayList<Covariate> requiredCovariates = covariates.getFirst();
         ArrayList<Covariate> optionalCovariates = covariates.getSecond();
         requestedCovariates = new Covariate[requiredCovariates.size() + optionalCovariates.size()];
@@ -57,13 +58,13 @@ public RecalibrationReport(final File RECAL_FILE) {
         for (Covariate cov : requestedCovariates)
             cov.initialize(RAC);                                                                                        // initialize any covariate member variables using the shared argument collection
 
-        recalibrationTables = new RecalibrationTables(requestedCovariates, countReadGroups(report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE)));
+        recalibrationTables = new RecalibrationTables(requestedCovariates, countReadGroups(report.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE)));
 
-        parseReadGroupTable(report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE), recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE));
+        parseReadGroupTable(report.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE), recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE));
 
-        parseQualityScoreTable(report.getTable(RecalDataManager.QUALITY_SCORE_REPORT_TABLE_TITLE), recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE));
+        parseQualityScoreTable(report.getTable(RecalUtils.QUALITY_SCORE_REPORT_TABLE_TITLE), recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE));
 
-        parseAllCovariatesTable(report.getTable(RecalDataManager.ALL_COVARIATES_REPORT_TABLE_TITLE), recalibrationTables);
+        parseAllCovariatesTable(report.getTable(RecalUtils.ALL_COVARIATES_REPORT_TABLE_TITLE), recalibrationTables);
 
     }
 
@@ -85,7 +86,7 @@ protected RecalibrationReport(final QuantizationInfo quantizationInfo, final Rec
     private int countReadGroups(final GATKReportTable reportTable) {
         Set<String> readGroups = new HashSet<String>();
         for ( int i = 0; i < reportTable.getNumRows(); i++ )
-            readGroups.add(reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME).toString());
+            readGroups.add(reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME).toString());
         return readGroups.size();
     }
 
@@ -139,17 +140,17 @@ public Covariate[] getRequestedCovariates() {
 \     */
     private void parseAllCovariatesTable(final GATKReportTable reportTable, final RecalibrationTables recalibrationTables) {
         for ( int i = 0; i < reportTable.getNumRows(); i++ ) {
-            final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME);
+            final Object rg = reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME);
             tempCOVarray[0] = requestedCovariates[0].keyFromValue(rg);
-            final Object qual = reportTable.get(i, RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
+            final Object qual = reportTable.get(i, RecalUtils.QUALITY_SCORE_COLUMN_NAME);
             tempCOVarray[1] = requestedCovariates[1].keyFromValue(qual);
 
-            final String covName = (String)reportTable.get(i, RecalDataManager.COVARIATE_NAME_COLUMN_NAME);
+            final String covName = (String)reportTable.get(i, RecalUtils.COVARIATE_NAME_COLUMN_NAME);
             final int covIndex = optionalCovariateIndexes.get(covName);
-            final Object covValue = reportTable.get(i, RecalDataManager.COVARIATE_VALUE_COLUMN_NAME);
+            final Object covValue = reportTable.get(i, RecalUtils.COVARIATE_VALUE_COLUMN_NAME);
             tempCOVarray[2] = requestedCovariates[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + covIndex].keyFromValue(covValue);
 
-            final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME));
+            final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME));
             tempCOVarray[3] = event.index;
 
             recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + covIndex).put(getRecalDatum(reportTable, i, false), tempCOVarray);
@@ -164,11 +165,11 @@ private void parseAllCovariatesTable(final GATKReportTable reportTable, final Re
      */
     private void parseQualityScoreTable(final GATKReportTable reportTable, final NestedIntegerArray<RecalDatum> qualTable) {
         for ( int i = 0; i < reportTable.getNumRows(); i++ ) {
-            final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME);
+            final Object rg = reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME);
             tempQUALarray[0] = requestedCovariates[0].keyFromValue(rg);
-            final Object qual = reportTable.get(i, RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
+            final Object qual = reportTable.get(i, RecalUtils.QUALITY_SCORE_COLUMN_NAME);
             tempQUALarray[1] = requestedCovariates[1].keyFromValue(qual);
-            final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME));
+            final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME));
             tempQUALarray[2] = event.index;
 
             qualTable.put(getRecalDatum(reportTable, i, false), tempQUALarray);
@@ -183,9 +184,9 @@ private void parseQualityScoreTable(final GATKReportTable reportTable, final Nes
      */
     private void parseReadGroupTable(final GATKReportTable reportTable, final NestedIntegerArray<RecalDatum> rgTable) {
         for ( int i = 0; i < reportTable.getNumRows(); i++ ) {
-            final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME);
+            final Object rg = reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME);
             tempRGarray[0] = requestedCovariates[0].keyFromValue(rg);
-            final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME));
+            final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME));
             tempRGarray[1] = event.index;
 
             rgTable.put(getRecalDatum(reportTable, i, true), tempRGarray);
@@ -193,13 +194,13 @@ private void parseReadGroupTable(final GATKReportTable reportTable, final Nested
     }
 
     private RecalDatum getRecalDatum(final GATKReportTable reportTable, final int row, final boolean hasEstimatedQReportedColumn) {
-        final long nObservations = (Long) reportTable.get(row, RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME);
-        final long nErrors = (Long) reportTable.get(row, RecalDataManager.NUMBER_ERRORS_COLUMN_NAME);
-        final double empiricalQuality = (Double) reportTable.get(row, RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME);
+        final long nObservations = (Long) reportTable.get(row, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME);
+        final long nErrors = (Long) reportTable.get(row, RecalUtils.NUMBER_ERRORS_COLUMN_NAME);
+        final double empiricalQuality = (Double) reportTable.get(row, RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME);
 
         final double estimatedQReported = hasEstimatedQReportedColumn ?                                                 // the estimatedQreported column only exists in the ReadGroup table
-                (Double) reportTable.get(row, RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME) :                      // we get it if we are in the read group table
-                Byte.parseByte((String) reportTable.get(row, RecalDataManager.QUALITY_SCORE_COLUMN_NAME));              // or we use the reported quality if we are in any other table
+                (Double) reportTable.get(row, RecalUtils.ESTIMATED_Q_REPORTED_COLUMN_NAME) :                      // we get it if we are in the read group table
+                Byte.parseByte((String) reportTable.get(row, RecalUtils.QUALITY_SCORE_COLUMN_NAME));              // or we use the reported quality if we are in any other table
 
         final RecalDatum datum = new RecalDatum(nObservations, nErrors, (byte)1);
         datum.setEstimatedQReported(estimatedQReported);
@@ -218,8 +219,8 @@ private QuantizationInfo initializeQuantizationTable(GATKReportTable table) {
         final Long[] counts = new Long[QualityUtils.MAX_QUAL_SCORE + 1];
         for ( int i = 0; i < table.getNumRows(); i++ ) {
             final byte originalQual = (byte)i;
-            final Object quantizedObject = table.get(i, RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME);
-            final Object countObject = table.get(i, RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME);
+            final Object quantizedObject = table.get(i, RecalUtils.QUANTIZED_VALUE_COLUMN_NAME);
+            final Object countObject = table.get(i, RecalUtils.QUANTIZED_COUNT_COLUMN_NAME);
             final byte quantizedQual = Byte.parseByte(quantizedObject.toString());
             final long quantizedCount = Long.parseLong(countObject.toString());
             quals[originalQual] = quantizedQual;
@@ -239,7 +240,7 @@ private RecalibrationArgumentCollection initializeArgumentCollectionTable(GATKRe
 
         for ( int i = 0; i < table.getNumRows(); i++ ) {
             final String argument = table.get(i, "Argument").toString();
-            Object value = table.get(i, RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME);
+            Object value = table.get(i, RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
             if (value.equals("null"))
                 value = null;                                                                                           // generic translation of null values that were printed out as strings | todo -- add this capability to the GATKReport
 
@@ -250,10 +251,10 @@ else if (argument.equals("standard_covs"))
                 RAC.DO_NOT_USE_STANDARD_COVARIATES = Boolean.parseBoolean((String) value);
 
             else if (argument.equals("solid_recal_mode"))
-                RAC.SOLID_RECAL_MODE = RecalDataManager.SOLID_RECAL_MODE.recalModeFromString((String) value);
+                RAC.SOLID_RECAL_MODE = RecalUtils.SOLID_RECAL_MODE.recalModeFromString((String) value);
 
             else if (argument.equals("solid_nocall_strategy"))
-                RAC.SOLID_NOCALL_STRATEGY = RecalDataManager.SOLID_NOCALL_STRATEGY.nocallStrategyFromString((String) value);
+                RAC.SOLID_NOCALL_STRATEGY = RecalUtils.SOLID_NOCALL_STRATEGY.nocallStrategyFromString((String) value);
 
             else if (argument.equals("mismatches_context_size"))
                 RAC.MISMATCHES_CONTEXT_SIZE = Integer.parseInt((String) value);
@@ -307,7 +308,7 @@ public void calculateQuantizedQualities() {
     }
 
     public void output(PrintStream output) {
-        RecalDataManager.outputRecalibrationReport(argumentTable, quantizationInfo, recalibrationTables, requestedCovariates, output);
+        RecalUtils.outputRecalibrationReport(argumentTable, quantizationInfo, recalibrationTables, requestedCovariates, output);
     }
 
     public RecalibrationArgumentCollection getRAC() {
diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationTables.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationTables.java
index 0416b5eb94..f37e69c9ad 100644
--- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationTables.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationTables.java
@@ -25,9 +25,7 @@
 
 package org.broadinstitute.sting.utils.recalibration;
 
-import org.broadinstitute.sting.gatk.walkers.bqsr.Covariate;
-import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
-import org.broadinstitute.sting.gatk.walkers.bqsr.RecalDatum;
+import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
 import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
 
 /**
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BinaryTagCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/BinaryTagCovariate.java
similarity index 89%
rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BinaryTagCovariate.java
rename to public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/BinaryTagCovariate.java
index a89586c2c8..cebdebf9dd 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BinaryTagCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/BinaryTagCovariate.java
@@ -1,5 +1,7 @@
-package org.broadinstitute.sting.gatk.walkers.bqsr;
+package org.broadinstitute.sting.utils.recalibration.covariates;
 
+import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
+import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java
similarity index 95%
rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java
rename to public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java
index 5fe8809fbc..5709442459 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java
@@ -23,8 +23,11 @@
  * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
-package org.broadinstitute.sting.gatk.walkers.bqsr;
+package org.broadinstitute.sting.utils.recalibration.covariates;
 
+import org.apache.log4j.Logger;
+import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
+import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.clipping.ClippingRepresentation;
 import org.broadinstitute.sting.utils.clipping.ReadClipper;
@@ -41,6 +44,7 @@
  */
 
 public class ContextCovariate implements StandardCovariate {
+    private final static Logger logger = Logger.getLogger(ContextCovariate.class);
 
     private int mismatchesContextSize;
     private int indelsContextSize;
@@ -61,6 +65,9 @@ public class ContextCovariate implements StandardCovariate {
     public void initialize(final RecalibrationArgumentCollection RAC) {
         mismatchesContextSize = RAC.MISMATCHES_CONTEXT_SIZE;
         indelsContextSize = RAC.INDELS_CONTEXT_SIZE;
+
+        logger.info("\t\tContext sizes: base substitution model " + mismatchesContextSize + ", indel substitution model " + indelsContextSize);
+
         if (mismatchesContextSize > MAX_DNA_CONTEXT)
             throw new UserException.BadArgumentValue("mismatches_context_size", String.format("context size cannot be bigger than %d, but was %d", MAX_DNA_CONTEXT, mismatchesContextSize));
         if (indelsContextSize > MAX_DNA_CONTEXT)
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/Covariate.java
similarity index 94%
rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java
rename to public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/Covariate.java
index 1ad5346fa4..c613135bbe 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/Covariate.java
@@ -1,5 +1,7 @@
-package org.broadinstitute.sting.gatk.walkers.bqsr;
+package org.broadinstitute.sting.utils.recalibration.covariates;
 
+import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
+import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 
 /*
@@ -89,8 +91,3 @@ public interface Covariate {
     public int maximumKeyValue();
 }
 
-interface RequiredCovariate extends Covariate {}
-
-interface StandardCovariate extends Covariate {}
-
-interface ExperimentalCovariate extends Covariate {}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/CycleCovariate.java
similarity index 97%
rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java
rename to public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/CycleCovariate.java
index f0ff8f2bdc..cdf12d2848 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/CycleCovariate.java
@@ -1,5 +1,7 @@
-package org.broadinstitute.sting.gatk.walkers.bqsr;
+package org.broadinstitute.sting.utils.recalibration.covariates;
 
+import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
+import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.NGSPlatform;
 import org.broadinstitute.sting.utils.exceptions.UserException;
@@ -49,7 +51,7 @@ public class CycleCovariate implements StandardCovariate {
 
     private static final int MAXIMUM_CYCLE_VALUE = 1000;
     private static final int CUSHION_FOR_INDELS = 4;
-    private static String default_platform = null;
+    private String default_platform = null;
 
     private static final EnumSet<NGSPlatform> DISCRETE_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.ILLUMINA, NGSPlatform.SOLID, NGSPlatform.PACBIO, NGSPlatform.COMPLETE_GENOMICS);
     private static final EnumSet<NGSPlatform> FLOW_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.LS454, NGSPlatform.ION_TORRENT);
diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ExperimentalCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ExperimentalCovariate.java
new file mode 100644
index 0000000000..72df2a4103
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ExperimentalCovariate.java
@@ -0,0 +1,30 @@
+package org.broadinstitute.sting.utils.recalibration.covariates;
+
+/**
+ * [Short one sentence description of this walker]
+ * <p/>
+ * <p>
+ * [Functionality of this walker]
+ * </p>
+ * <p/>
+ * <h2>Input</h2>
+ * <p>
+ * [Input description]
+ * </p>
+ * <p/>
+ * <h2>Output</h2>
+ * <p>
+ * [Output description]
+ * </p>
+ * <p/>
+ * <h2>Examples</h2>
+ * <pre>
+ *    java
+ *      -jar GenomeAnalysisTK.jar
+ *      -T $WalkerName
+ *  </pre>
+ *
+ * @author Your Name
+ * @since Date created
+ */
+public interface ExperimentalCovariate extends Covariate {}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/QualityScoreCovariate.java
similarity index 92%
rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java
rename to public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/QualityScoreCovariate.java
index dd7060ff85..3ef8ee9310 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/QualityScoreCovariate.java
@@ -1,5 +1,7 @@
-package org.broadinstitute.sting.gatk.walkers.bqsr;
+package org.broadinstitute.sting.utils.recalibration.covariates;
 
+import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
+import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
 import org.broadinstitute.sting.utils.QualityUtils;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ReadGroupCovariate.java
similarity index 94%
rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java
rename to public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ReadGroupCovariate.java
index f04d27b7a6..85568dac95 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ReadGroupCovariate.java
@@ -1,5 +1,7 @@
-package org.broadinstitute.sting.gatk.walkers.bqsr;
+package org.broadinstitute.sting.utils.recalibration.covariates;
 
+import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
+import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
 import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 
diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RequiredCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RequiredCovariate.java
new file mode 100644
index 0000000000..50755dbcf4
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RequiredCovariate.java
@@ -0,0 +1,30 @@
+package org.broadinstitute.sting.utils.recalibration.covariates;
+
+/**
+ * [Short one sentence description of this walker]
+ * <p/>
+ * <p>
+ * [Functionality of this walker]
+ * </p>
+ * <p/>
+ * <h2>Input</h2>
+ * <p>
+ * [Input description]
+ * </p>
+ * <p/>
+ * <h2>Output</h2>
+ * <p>
+ * [Output description]
+ * </p>
+ * <p/>
+ * <h2>Examples</h2>
+ * <pre>
+ *    java
+ *      -jar GenomeAnalysisTK.jar
+ *      -T $WalkerName
+ *  </pre>
+ *
+ * @author Your Name
+ * @since Date created
+ */
+public interface RequiredCovariate extends Covariate {}
diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/StandardCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/StandardCovariate.java
new file mode 100644
index 0000000000..444954f255
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/StandardCovariate.java
@@ -0,0 +1,30 @@
+package org.broadinstitute.sting.utils.recalibration.covariates;
+
+/**
+ * [Short one sentence description of this walker]
+ * <p/>
+ * <p>
+ * [Functionality of this walker]
+ * </p>
+ * <p/>
+ * <h2>Input</h2>
+ * <p>
+ * [Input description]
+ * </p>
+ * <p/>
+ * <h2>Output</h2>
+ * <p>
+ * [Output description]
+ * </p>
+ * <p/>
+ * <h2>Examples</h2>
+ * <pre>
+ *    java
+ *      -jar GenomeAnalysisTK.jar
+ *      -T $WalkerName
+ *  </pre>
+ *
+ * @author Your Name
+ * @since Date created
+ */
+public interface StandardCovariate extends Covariate {}
diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java
index e5e747c2d5..4f1e66ba2b 100644
--- a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java
@@ -35,10 +35,10 @@
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
+import org.broadinstitute.sting.utils.recalibration.EventType;
 
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.BitSet;
 
 
 public class AlignmentUtils {
@@ -70,9 +70,18 @@ public static MismatchCount getMismatchCount(SAMRecord r, byte[] refSeq, int ref
             if (readIdx > endOnRead) break;
 
             CigarElement ce = c.getCigarElement(i);
+            final int elementLength = ce.getLength();
             switch (ce.getOperator()) {
+                case X:
+                    mc.numMismatches += elementLength;
+                    for (int j = 0; j < elementLength; j++)
+                        mc.mismatchQualities += r.getBaseQualities()[readIdx+j];
+                case EQ:
+                    refIndex += elementLength;
+                    readIdx += elementLength;
+                break;
                 case M:
-                    for (int j = 0; j < ce.getLength(); j++, refIndex++, readIdx++) {
+                    for (int j = 0; j < elementLength; j++, refIndex++, readIdx++) {
                         if (refIndex >= refSeq.length)
                             continue;
                         if (readIdx < startOnRead) continue;
@@ -91,11 +100,11 @@ public static MismatchCount getMismatchCount(SAMRecord r, byte[] refSeq, int ref
                     break;
                 case I:
                 case S:
-                    readIdx += ce.getLength();
+                    readIdx += elementLength;
                     break;
                 case D:
                 case N:
-                    refIndex += ce.getLength();
+                    refIndex += elementLength;
                     break;
                 case H:
                 case P:
@@ -163,6 +172,8 @@ public static int mismatchesInRefWindow(PileupElement p, ReferenceContext ref, b
             CigarElement ce = c.getCigarElement(i);
             int cigarElementLength = ce.getLength();
             switch (ce.getOperator()) {
+                case EQ:
+                case X:
                 case M:
                     for (int j = 0; j < cigarElementLength; j++, readIndex++, currentPos++) {
                         // are we past the ref window?
@@ -203,111 +214,6 @@ public static int mismatchesInRefWindow(PileupElement p, ReferenceContext ref, b
         return sum;
     }
 
-    /**
-     * Returns the number of mismatches in the pileup element within the given reference context.
-     *
-     * @param read          the SAMRecord
-     * @param ref           the reference context
-     * @param maxMismatches the maximum number of surrounding mismatches we tolerate to consider a base good
-     * @param windowSize    window size (on each side) to test
-     * @return a bitset representing which bases are good
-     */
-    public static BitSet mismatchesInRefWindow(SAMRecord read, ReferenceContext ref, int maxMismatches, int windowSize) {
-        // first determine the positions with mismatches
-        int readLength = read.getReadLength();
-        BitSet mismatches = new BitSet(readLength);
-
-        // it's possible we aren't starting at the beginning of a read,
-        //  and we don't need to look at any of the previous context outside our window
-        //  (although we do need future context)
-        int readStartPos = Math.max(read.getAlignmentStart(), ref.getLocus().getStart() - windowSize);
-        int currentReadPos = read.getAlignmentStart();
-
-        byte[] refBases = ref.getBases();
-        int refIndex = readStartPos - ref.getWindow().getStart();
-        if (refIndex < 0) {
-            throw new IllegalStateException("When calculating mismatches, we somehow don't have enough previous reference context for read " + read.getReadName() + " at position " + ref.getLocus());
-        }
-
-        byte[] readBases = read.getReadBases();
-        int readIndex = 0;
-
-        Cigar c = read.getCigar();
-
-        for (int i = 0; i < c.numCigarElements(); i++) {
-            CigarElement ce = c.getCigarElement(i);
-            int cigarElementLength = ce.getLength();
-            switch (ce.getOperator()) {
-                case M:
-                    for (int j = 0; j < cigarElementLength; j++, readIndex++) {
-                        // skip over unwanted bases
-                        if (currentReadPos++ < readStartPos)
-                            continue;
-
-                        // this is possible if reads extend beyond the contig end
-                        if (refIndex >= refBases.length)
-                            break;
-
-                        byte refChr = refBases[refIndex];
-                        byte readChr = readBases[readIndex];
-                        if (readChr != refChr)
-                            mismatches.set(readIndex);
-
-                        refIndex++;
-                    }
-                    break;
-                case I:
-                case S:
-                    readIndex += cigarElementLength;
-                    break;
-                case D:
-                case N:
-                    if (currentReadPos >= readStartPos)
-                        refIndex += cigarElementLength;
-                    currentReadPos += cigarElementLength;
-                    break;
-                case H:
-                case P:
-                    break;
-            }
-        }
-
-        // all bits are set to false by default
-        BitSet result = new BitSet(readLength);
-
-        int currentPos = 0, leftPos = 0, rightPos;
-        int mismatchCount = 0;
-
-        // calculate how many mismatches exist in the windows to the left/right
-        for (rightPos = 1; rightPos <= windowSize && rightPos < readLength; rightPos++) {
-            if (mismatches.get(rightPos))
-                mismatchCount++;
-        }
-        if (mismatchCount <= maxMismatches)
-            result.set(currentPos);
-
-        // now, traverse over the read positions
-        while (currentPos < readLength) {
-            // add a new rightmost position
-            if (rightPos < readLength && mismatches.get(rightPos++))
-                mismatchCount++;
-            // re-penalize the previous position
-            if (mismatches.get(currentPos++))
-                mismatchCount++;
-            // don't penalize the current position
-            if (mismatches.get(currentPos))
-                mismatchCount--;
-            // subtract the leftmost position
-            if (leftPos < currentPos - windowSize && mismatches.get(leftPos++))
-                mismatchCount--;
-
-            if (mismatchCount <= maxMismatches)
-                result.set(currentPos);
-        }
-
-        return result;
-    }
-
     /**
      * Returns number of alignment blocks (continuous stretches of aligned bases) in the specified alignment.
      * This method follows closely the SAMRecord::getAlignmentBlocks() implemented in samtools library, but
@@ -366,43 +272,40 @@ public static int getNumHardClippedBases(final SAMRecord r) {
         return n;
     }
 
-    public static byte[] alignmentToByteArray(final Cigar cigar, final byte[] read, final byte[] ref) {
+    public static int calcNumHighQualitySoftClips( final GATKSAMRecord read, final byte qualThreshold ) {
 
-        final byte[] alignment = new byte[read.length];
-        int refPos = 0;
+        int numHQSoftClips = 0;
         int alignPos = 0;
+        final Cigar cigar = read.getCigar();
+        final byte[] qual = read.getBaseQualities( EventType.BASE_SUBSTITUTION );
 
-        for (int iii = 0; iii < cigar.numCigarElements(); iii++) {
+        for( int iii = 0; iii < cigar.numCigarElements(); iii++ ) {
 
             final CigarElement ce = cigar.getCigarElement(iii);
             final int elementLength = ce.getLength();
 
-            switch (ce.getOperator()) {
-                case I:
+            switch( ce.getOperator() ) {
                 case S:
-                    for (int jjj = 0; jjj < elementLength; jjj++) {
-                        alignment[alignPos++] = '+';
+                    for( int jjj = 0; jjj < elementLength; jjj++ ) {
+                        if( qual[alignPos++] > qualThreshold ) { numHQSoftClips++; }
                     }
                     break;
-                case D:
-                case N:
-                    refPos += elementLength;
-                    break;
                 case M:
-                    for (int jjj = 0; jjj < elementLength; jjj++) {
-                        alignment[alignPos] = ref[refPos];
-                        alignPos++;
-                        refPos++;
-                    }
+                case I:
+                case EQ:
+                case X:
+                    alignPos += elementLength;
                     break;
                 case H:
                 case P:
+                case D:
+                case N:
                     break;
                 default:
                     throw new ReviewedStingException("Unsupported cigar operator: " + ce.getOperator());
             }
         }
-        return alignment;
+        return numHQSoftClips;
     }
 
     public static int calcAlignmentByteArrayOffset(final Cigar cigar, final PileupElement pileupElement, final int alignmentStart, final int refLocus) {
@@ -441,7 +344,6 @@ public static int calcAlignmentByteArrayOffset(final Cigar cigar, final int offs
                     }
                     break;
                 case D:
-                case N:
                     if (!isDeletion) {
                         alignmentPos += elementLength;
                     } else {
@@ -454,6 +356,8 @@ public static int calcAlignmentByteArrayOffset(final Cigar cigar, final int offs
                     }
                     break;
                 case M:
+                case EQ:
+                case X:
                     if (pos + elementLength - 1 >= pileupOffset) {
                         return alignmentPos + (pileupOffset - pos);
                     } else {
@@ -463,6 +367,7 @@ public static int calcAlignmentByteArrayOffset(final Cigar cigar, final int offs
                     break;
                 case H:
                 case P:
+                case N:
                     break;
                 default:
                     throw new ReviewedStingException("Unsupported cigar operator: " + ce.getOperator());
@@ -481,16 +386,15 @@ public static byte[] readToAlignmentByteArray(final Cigar cigar, final byte[] re
             final int elementLength = ce.getLength();
 
             switch (ce.getOperator()) {
-                case I:
-                case S:
-                    break;
                 case D:
                 case N:
-                    alignmentLength += elementLength;
-                    break;
                 case M:
+                case EQ:
+                case X:
                     alignmentLength += elementLength;
                     break;
+                case I:
+                case S:
                 case H:
                 case P:
                     break;
@@ -533,6 +437,8 @@ public static byte[] readToAlignmentByteArray(final Cigar cigar, final byte[] re
                     }
                     break;
                 case M:
+                case EQ:
+                case X:
                     for (int jjj = 0; jjj < elementLength; jjj++) {
                         alignment[alignPos] = read[readPos];
                         alignPos++;
@@ -766,6 +672,8 @@ private static byte[] createIndelString(final Cigar cigar, final int indexOfInde
 
             switch (ce.getOperator()) {
                 case M:
+                case EQ:
+                case X:
                     readIndex += length;
                     refIndex += length;
                     totalRefBases += length;
diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java
index df1ff2a0eb..849a7ddeea 100755
--- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java
@@ -13,7 +13,7 @@
  */
 public class GATKSAMReadGroupRecord extends SAMReadGroupRecord {
 
-    public static String LANE_TAG = "LN";
+    public static final String LANE_TAG = "LN";
     
     // the SAMReadGroupRecord data we're caching
     private String mSample = null;
diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
index 659615cf4f..c9b3a2df86 100755
--- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
@@ -25,7 +25,7 @@
 package org.broadinstitute.sting.utils.sam;
 
 import net.sf.samtools.*;
-import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
+import org.broadinstitute.sting.utils.recalibration.EventType;
 import org.broadinstitute.sting.utils.NGSPlatform;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 
diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java
index 6b9ba79b4a..bd908727f8 100755
--- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java
@@ -53,8 +53,17 @@ public class ReadUtils {
     private ReadUtils() {
     }
 
-    private static int DEFAULT_ADAPTOR_SIZE = 100;
-    public static int CLIPPING_GOAL_NOT_REACHED = -1;
+    private static final int DEFAULT_ADAPTOR_SIZE = 100;
+    public static final int CLIPPING_GOAL_NOT_REACHED = -1;
+
+    public static int getMeanRepresentativeReadCount(GATKSAMRecord read) {
+        if (!read.isReducedRead())
+            return 1;
+
+        // compute mean representative read counts
+        final byte[] counts = read.getReducedReadCounts();
+        return (int)Math.round((double)MathUtils.sum(counts)/counts.length);
+    }
 
     /**
      * A marker to tell which end of the read has been clipped
diff --git a/public/java/src/org/broadinstitute/sting/utils/threading/StateMonitoringThreadFactory.java b/public/java/src/org/broadinstitute/sting/utils/threading/StateMonitoringThreadFactory.java
new file mode 100644
index 0000000000..39d5c14978
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/threading/StateMonitoringThreadFactory.java
@@ -0,0 +1,293 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package org.broadinstitute.sting.utils.threading;
+
+import com.google.java.contract.Ensures;
+import com.google.java.contract.Invariant;
+import org.apache.log4j.Logger;
+import org.apache.log4j.Priority;
+import org.broadinstitute.sting.utils.AutoFormattingTime;
+
+import java.lang.management.ManagementFactory;
+import java.lang.management.ThreadInfo;
+import java.lang.management.ThreadMXBean;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.EnumMap;
+import java.util.List;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.ThreadFactory;
+
+/**
+ * Create activeThreads, collecting statistics about their running state over time
+ *
+ * Uses a ThreadMXBean to capture info via ThreadInfo
+ *
+ * User: depristo
+ * Date: 8/14/12
+ * Time: 8:47 AM
+ */
+@Invariant({
+        "activeThreads.size() <= nThreadsToCreate",
+        "countDownLatch.getCount() <= nThreadsToCreate",
+        "nThreadsToCreated <= nThreadsToCreate"
+})
+public class StateMonitoringThreadFactory implements ThreadFactory  {
+    protected static final boolean DEBUG = false;
+    private static Logger logger = Logger.getLogger(StateMonitoringThreadFactory.class);
+    public static final List<Thread.State> TRACKED_STATES = Arrays.asList(Thread.State.BLOCKED, Thread.State.RUNNABLE, Thread.State.WAITING);
+
+    // todo -- it would be nice to not have to specify upfront the number of threads.
+    // todo -- can we dynamically increment countDownLatch? It seems not...
+    final int nThreadsToCreate;
+    final List<Thread> activeThreads;
+    final EnumMap<Thread.State, Long> times = new EnumMap<Thread.State, Long>(Thread.State.class);
+
+    int nThreadsToCreated = 0;
+
+    /**
+     * The bean used to get the thread info about blocked and waiting times
+     */
+    final ThreadMXBean bean;
+
+    /**
+     * Counts down the number of active activeThreads whose runtime info hasn't been incorporated into
+     * times.  Counts down from nThreadsToCreate to 0, at which point any code waiting
+     * on the final times is freed to run.
+     */
+    final CountDownLatch countDownLatch;
+
+    /**
+     * Instead of RUNNABLE we want to print running.  This map goes from Thread.State names to human readable ones
+     */
+    final static EnumMap<Thread.State, String> PRETTY_NAMES = new EnumMap<Thread.State, String>(Thread.State.class);
+    static {
+        PRETTY_NAMES.put(Thread.State.RUNNABLE, "running");
+        PRETTY_NAMES.put(Thread.State.BLOCKED,  "blocked");
+        PRETTY_NAMES.put(Thread.State.WAITING,  "waiting");
+    }
+
+    /**
+     * Create a new factory generating threads whose runtime and contention
+     * behavior is tracked in this factory.
+     *
+     * @param nThreadsToCreate the number of threads we will create in the factory before it's considered complete
+     *                         // TODO -- remove argument when we figure out how to implement this capability
+     */
+    public StateMonitoringThreadFactory(final int nThreadsToCreate) {
+        if ( nThreadsToCreate <= 0 ) throw new IllegalArgumentException("nThreadsToCreate <= 0: " + nThreadsToCreate);
+
+        this.nThreadsToCreate = nThreadsToCreate;
+        activeThreads = new ArrayList<Thread>(nThreadsToCreate);
+
+        // initialize times to 0
+        for ( final Thread.State state : Thread.State.values() )
+            times.put(state, 0l);
+
+        // get the bean, and start tracking
+        bean = ManagementFactory.getThreadMXBean();
+        if ( bean.isThreadContentionMonitoringSupported() )
+            bean.setThreadContentionMonitoringEnabled(true);
+        else
+            logger.warn("Thread contention monitoring not supported, we cannot track GATK multi-threaded efficiency");
+            //bean.setThreadCpuTimeEnabled(true);
+
+        countDownLatch = new CountDownLatch(nThreadsToCreate);
+    }
+
+    /**
+     * Get the time spent in state across all threads created by this factory
+     *
+     * @param state on of the TRACKED_STATES
+     * @return the time in milliseconds
+     */
+    @Ensures({"result >= 0", "TRACKED_STATES.contains(state)"})
+    public synchronized long getStateTime(final Thread.State state) {
+        return times.get(state);
+    }
+
+    /**
+     * Get the total time spent in all states across all threads created by this factory
+     *
+     * @return the time in milliseconds
+     */
+    @Ensures({"result >= 0"})
+    public synchronized long getTotalTime() {
+        long total = 0;
+        for ( final long time : times.values() )
+            total += time;
+        return total;
+    }
+
+    /**
+     * Get the fraction of time spent in state across all threads created by this factory
+     *
+     * @return the fraction (0.0-1.0) of time spent in state over all state times of all threads
+     */
+    @Ensures({"result >= 0.0", "result <= 1.0", "TRACKED_STATES.contains(state)"})
+    public synchronized double getStateFraction(final Thread.State state) {
+        return getStateTime(state) / (1.0 * Math.max(getTotalTime(), 1));
+    }
+
+    /**
+     * How many threads have been created by this factory so far?
+     * @return
+     */
+    @Ensures("result >= 0")
+    public int getNThreadsCreated() {
+        return nThreadsToCreated;
+    }
+
+    public void waitForAllThreadsToComplete() throws InterruptedException {
+        countDownLatch.await();
+    }
+
+    @Override
+    public synchronized String toString() {
+        final StringBuilder b = new StringBuilder();
+
+        b.append("total ").append(getTotalTime()).append(" ");
+        for ( final Thread.State state : TRACKED_STATES ) {
+            b.append(state).append(" ").append(getStateTime(state)).append(" ");
+        }
+
+        return b.toString();
+    }
+
+    /**
+     * Print usage information about threads from this factory to logger
+     * with the INFO priority
+     *
+     * @param logger
+     */
+    public synchronized void printUsageInformation(final Logger logger) {
+        printUsageInformation(logger, Priority.INFO);
+    }
+
+    /**
+     * Print usage information about threads from this factory to logger
+     * with the provided priority
+     *
+     * @param logger
+     */
+    public synchronized void printUsageInformation(final Logger logger, final Priority priority) {
+        logger.log(priority, "Number of activeThreads used: " + getNThreadsCreated());
+        logger.log(priority, "Total runtime " + new AutoFormattingTime(getTotalTime() / 1000.0));
+        for ( final Thread.State state : TRACKED_STATES ) {
+            logger.log(priority, String.format("  Fraction of time spent %s is %.2f (%s)",
+                    prettyName(state), getStateFraction(state), new AutoFormattingTime(getStateTime(state) / 1000.0)));
+        }
+        logger.log(priority, String.format("Efficiency of multi-threading: %.2f%% of time spent doing productive work",
+                getStateFraction(Thread.State.RUNNABLE) * 100));
+    }
+
+    private String prettyName(final Thread.State state) {
+        return PRETTY_NAMES.get(state);
+    }
+
+    /**
+     * Create a new thread from this factory
+     *
+     * @param runnable
+     * @return
+     */
+    @Override
+    @Ensures({
+            "activeThreads.size() > old(activeThreads.size())",
+            "activeThreads.contains(result)",
+            "nThreadsToCreated == old(nThreadsToCreated) + 1"
+    })
+    public synchronized Thread newThread(final Runnable runnable) {
+        if ( activeThreads.size() >= nThreadsToCreate)
+            throw new IllegalStateException("Attempting to create more activeThreads than allowed by constructor argument nThreadsToCreate " + nThreadsToCreate);
+
+        nThreadsToCreated++;
+        final Thread myThread = new TrackingThread(runnable);
+        activeThreads.add(myThread);
+        return myThread;
+    }
+
+    /**
+     * Update the information about completed thread that ran for runtime in milliseconds
+     *
+     * This method updates all of the key timing and tracking information in the factory so that
+     * thread can be retired.  After this call the factory shouldn't have a pointer to the thread any longer
+     *
+     * @param thread
+     * @param runtimeInMilliseconds
+     */
+    @Ensures({
+            "activeThreads.size() < old(activeThreads.size())",
+            "! activeThreads.contains(thread)",
+            "getTotalTime() >= old(getTotalTime())",
+            "countDownLatch.getCount() < old(countDownLatch.getCount())"
+    })
+    private synchronized void threadIsDone(final Thread thread, final long runtimeInMilliseconds) {
+        if ( DEBUG ) logger.warn("  Countdown " + countDownLatch.getCount() + " in thread " + Thread.currentThread().getName());
+        if ( DEBUG ) logger.warn("UpdateThreadInfo called");
+
+        final ThreadInfo info = bean.getThreadInfo(thread.getId());
+        if ( info != null ) {
+            if ( DEBUG ) logger.warn("Updating thread total runtime " + runtimeInMilliseconds + " of which blocked " + info.getBlockedTime() + " and waiting " + info.getWaitedTime());
+            incTimes(Thread.State.BLOCKED, info.getBlockedTime());
+            incTimes(Thread.State.WAITING, info.getWaitedTime());
+            incTimes(Thread.State.RUNNABLE, runtimeInMilliseconds - info.getWaitedTime() - info.getBlockedTime());
+        }
+
+        // remove the thread from the list of active activeThreads
+        if ( ! activeThreads.remove(thread) )
+            throw new IllegalStateException("Thread " + thread + " not in list of active activeThreads");
+
+        // one less thread is live for those blocking on all activeThreads to be complete
+        countDownLatch.countDown();
+        if ( DEBUG ) logger.warn("  -> Countdown " + countDownLatch.getCount() + " in thread " + Thread.currentThread().getName());
+    }
+
+    /**
+     * Helper function that increments the times counter by by for state
+     *
+     * @param state
+     * @param by
+     */
+    private synchronized void incTimes(final Thread.State state, final long by) {
+        times.put(state, times.get(state) + by);
+    }
+
+    /**
+     * A wrapper around Thread that tracks the runtime of the thread and calls threadIsDone() when complete
+     */
+    private class TrackingThread extends Thread {
+        private TrackingThread(Runnable runnable) {
+            super(runnable);
+        }
+
+        @Override
+        public void run() {
+            final long startTime = System.currentTimeMillis();
+            super.run();
+            final long endTime = System.currentTimeMillis();
+            threadIsDone(this, endTime - startTime);
+        }
+    }
+}
diff --git a/public/java/src/org/broadinstitute/sting/utils/threading/package-info.java b/public/java/src/org/broadinstitute/sting/utils/threading/package-info.java
index dc350920e0..d72dad4714 100644
--- a/public/java/src/org/broadinstitute/sting/utils/threading/package-info.java
+++ b/public/java/src/org/broadinstitute/sting/utils/threading/package-info.java
@@ -1,4 +1,4 @@
 /**
- * Provides tools for managing threads, thread pools, and parallelization in general.
+ * Provides tools for managing activeThreads, thread pools, and parallelization in general.
  */
 package org.broadinstitute.sting.utils.threading;
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java
index 2e1770581b..2c312678e6 100755
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java
@@ -1,9 +1,9 @@
 package org.broadinstitute.sting.utils.variantcontext;
 
-import java.util.ArrayList;
+import org.broadinstitute.sting.utils.BaseUtils;
+
 import java.util.Arrays;
 import java.util.Collection;
-import java.util.List;
 
 /**
  * Immutable representation of an allele
@@ -77,32 +77,36 @@ public class Allele implements Comparable<Allele> {
     private static final byte[] EMPTY_ALLELE_BASES = new byte[0];
 
     private boolean isRef = false;
-    private boolean isNull = false;
     private boolean isNoCall = false;
     private boolean isSymbolic = false;
 
     private byte[] bases = null;
 
-    public final static String NULL_ALLELE_STRING = "-";
     public final static String NO_CALL_STRING = ".";
     /** A generic static NO_CALL allele for use */
 
     // no public way to create an allele
     private Allele(byte[] bases, boolean isRef) {
-        // standardize our representation of null allele and bases
+        // null alleles are no longer allowed
         if ( wouldBeNullAllele(bases) ) {
-            bases = EMPTY_ALLELE_BASES;
-            isNull = true;
-        } else if ( wouldBeNoCallAllele(bases) ) {
-            bases = EMPTY_ALLELE_BASES;
+            throw new IllegalArgumentException("Null alleles are not supported");
+        }
+
+        // no-calls are represented as no bases
+        if ( wouldBeNoCallAllele(bases) ) {
+            this.bases = EMPTY_ALLELE_BASES;
             isNoCall = true;
             if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele");
-        } else if ( wouldBeSymbolicAllele(bases) ) {
+            return;
+        }
+
+        if ( wouldBeSymbolicAllele(bases) ) {
             isSymbolic = true;
             if ( isRef ) throw new IllegalArgumentException("Cannot tag a symbolic allele as the reference allele");
         }
-//        else
-//            bases = new String(bases).toUpperCase().getBytes(); // todo -- slow performance
+        else {
+            bases = BaseUtils.convertToUpperCase(bases);
+        }
 
         this.isRef = isRef;
         this.bases = bases;
@@ -126,8 +130,6 @@ private Allele(String bases, boolean isRef) {
     private final static Allele ALT_T = new Allele("T", false);
     private final static Allele REF_N = new Allele("N", true);
     private final static Allele ALT_N = new Allele("N", false);
-    private final static Allele REF_NULL = new Allele(NULL_ALLELE_STRING, true);
-    private final static Allele ALT_NULL = new Allele(NULL_ALLELE_STRING, false);
     public final static Allele NO_CALL = new Allele(NO_CALL_STRING, false);
 
     // ---------------------------------------------------------------------------------------------------------
@@ -154,7 +156,6 @@ public static Allele create(byte[] bases, boolean isRef) {
                 case '.':
                     if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele");
                     return NO_CALL;
-                case '-': return isRef ? REF_NULL : ALT_NULL;
                 case 'A': case 'a' : return isRef ? REF_A : ALT_A;
                 case 'C': case 'c' : return isRef ? REF_C : ALT_C;
                 case 'G': case 'g' : return isRef ? REF_G : ALT_G;
@@ -179,14 +180,9 @@ public static Allele create(byte base) {
     public static Allele extend(Allele left, byte[] right) {
         if (left.isSymbolic())
             throw new IllegalArgumentException("Cannot extend a symbolic allele");
-        byte[] bases = null;
-        if ( left.length() == 0 )
-            bases = right;
-        else {
-            bases = new byte[left.length() + right.length];
-            System.arraycopy(left.getBases(), 0, bases, 0, left.length());
-            System.arraycopy(right, 0, bases, left.length(), right.length);
-        }
+        byte[] bases = new byte[left.length() + right.length];
+        System.arraycopy(left.getBases(), 0, bases, 0, left.length());
+        System.arraycopy(right, 0, bases, left.length(), right.length);
 
         return create(bases, left.isReference());
     }
@@ -242,7 +238,10 @@ public static boolean acceptableAlleleBases(byte[] bases) {
     }
     
     public static boolean acceptableAlleleBases(byte[] bases, boolean allowNsAsAcceptable) {
-        if ( wouldBeNullAllele(bases) || wouldBeNoCallAllele(bases) || wouldBeSymbolicAllele(bases) )
+        if ( wouldBeNullAllele(bases) )
+            return false;
+
+        if ( wouldBeNoCallAllele(bases) || wouldBeSymbolicAllele(bases) )
             return true;
 
         for (byte base :  bases ) {
@@ -299,11 +298,6 @@ public static Allele create(byte[] bases) {
     //
     // ---------------------------------------------------------------------------------------------------------
 
-    //Returns true if this is the null allele
-    public boolean isNull()             { return isNull; }
-    // Returns true if this is not the null allele
-    public boolean isNonNull()          { return ! isNull(); }
-
     // Returns true if this is the NO_CALL allele
     public boolean isNoCall()           { return isNoCall; }
     // Returns true if this is not the NO_CALL allele
@@ -319,7 +313,7 @@ public static Allele create(byte[] bases) {
 
     // Returns a nice string representation of this object
     public String toString() {
-        return (isNull() ? NULL_ALLELE_STRING : ( isNoCall() ? NO_CALL_STRING : getDisplayString() )) + (isReference() ? "*" : "");
+        return ( isNoCall() ? NO_CALL_STRING : getDisplayString() ) + (isReference() ? "*" : "");
     }
 
     /**
@@ -384,27 +378,27 @@ public int hashCode() {
      * @return true if this and other are equal
      */
     public boolean equals(Allele other, boolean ignoreRefState) {
-        return this == other || (isRef == other.isRef || ignoreRefState) && isNull == other.isNull && isNoCall == other.isNoCall && (bases == other.bases || Arrays.equals(bases, other.bases));
+        return this == other || (isRef == other.isRef || ignoreRefState) && isNoCall == other.isNoCall && (bases == other.bases || Arrays.equals(bases, other.bases));
     }
 
     /**
      * @param test  bases to test against
      *
-     * @return  true if this Alelle contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles
+     * @return  true if this Allele contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles
      */
     public boolean basesMatch(byte[] test) { return !isSymbolic && (bases == test || Arrays.equals(bases, test)); }
 
     /**
      * @param test  bases to test against
      *
-     * @return  true if this Alelle contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles
+     * @return  true if this Allele contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles
      */
     public boolean basesMatch(String test) { return basesMatch(test.toUpperCase().getBytes()); }
 
     /**
      * @param test  allele to test against
      *
-     * @return  true if this Alelle contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles
+     * @return  true if this Allele contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles
      */
     public boolean basesMatch(Allele test) { return basesMatch(test.getBases()); }
 
@@ -421,10 +415,6 @@ public int length() {
     //
     // ---------------------------------------------------------------------------------------------------------
 
-    public static Allele getMatchingAllele(Collection<Allele> allAlleles, String alleleBases) {
-        return getMatchingAllele(allAlleles, alleleBases.getBytes());
-    }
-
     public static Allele getMatchingAllele(Collection<Allele> allAlleles, byte[] alleleBases) {
         for ( Allele a : allAlleles ) {
             if ( a.basesMatch(alleleBases) ) {
@@ -438,26 +428,6 @@ public static Allele getMatchingAllele(Collection<Allele> allAlleles, byte[] all
             return null;    // couldn't find anything
     }
 
-    public static List<Allele> resolveAlleles(List<Allele> possibleAlleles, List<String> alleleStrings) {
-        List<Allele> myAlleles = new ArrayList<Allele>(alleleStrings.size());
-
-        for ( String alleleString : alleleStrings ) {
-            Allele allele = getMatchingAllele(possibleAlleles, alleleString);
-
-            if ( allele == null ) {
-                if ( Allele.wouldBeNoCallAllele(alleleString.getBytes()) ) {
-                    allele = create(alleleString);
-                } else {
-                    throw new IllegalArgumentException("Allele " + alleleString + " not present in the list of alleles " + possibleAlleles);
-                }
-            }
-
-            myAlleles.add(allele);
-        }
-
-        return myAlleles;
-    }
-
     public int compareTo(Allele other) {
         if ( isReference() && other.isNonReference() )
             return -1;
@@ -468,9 +438,6 @@ else if ( isNonReference() && other.isReference() )
     }
 
     public static boolean oneIsPrefixOfOther(Allele a1, Allele a2) {
-        if ( a1.isNull() || a2.isNull() )
-            return true;
-
         if ( a2.length() >= a1.length() )
             return firstIsPrefixOfSecond(a1, a2);
         else
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/CommonInfo.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/CommonInfo.java
index fb0d7140da..127f916778 100755
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/CommonInfo.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/CommonInfo.java
@@ -216,6 +216,7 @@ public double getAttributeAsDouble(String key, double defaultValue) {
         Object x = getAttribute(key);
         if ( x == null ) return defaultValue;
         if ( x instanceof Double ) return (Double)x;
+        if ( x instanceof Integer ) return (Integer)x;
         return Double.valueOf((String)x); // throws an exception if this isn't a string
     }
 
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/FastGenotype.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/FastGenotype.java
index d528bf0e43..4a7df9da44 100755
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/FastGenotype.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/FastGenotype.java
@@ -172,7 +172,7 @@ public Map<String, Object> getExtendedAttributes() {
      * @param values
      * @return
      */
-    private final static boolean validADorPLField(final int[] values) {
+    private static boolean validADorPLField(final int[] values) {
         if ( values != null )
             for ( int v : values )
                 if ( v < 0 )
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java
index e3bef6bc5c..0ee32fa2e2 100755
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java
@@ -53,8 +53,6 @@
  */
 @Invariant({"alleles != null"})
 public final class GenotypeBuilder {
-    public static boolean MAKE_FAST_BY_DEFAULT = true;
-
     private String sampleName = null;
     private List<Allele> alleles = Collections.emptyList();
 
@@ -67,8 +65,6 @@ public final class GenotypeBuilder {
     private String filters = null;
     private int initialAttributeMapSize = 5;
 
-    private boolean useFast = MAKE_FAST_BY_DEFAULT;
-
     private final static Map<String, Object> NO_ATTRIBUTES =
             Collections.unmodifiableMap(new HashMap<String, Object>(0));
 
@@ -78,31 +74,22 @@ public final class GenotypeBuilder {
     //
     // -----------------------------------------------------------------
 
-    public final static Genotype create(final String sampleName, final List<Allele> alleles) {
+    public static Genotype create(final String sampleName, final List<Allele> alleles) {
         return new GenotypeBuilder(sampleName, alleles).make();
     }
 
-    public final static Genotype create(final String sampleName,
+    public static Genotype create(final String sampleName,
                                         final List<Allele> alleles,
                                         final Map<String, Object> attributes) {
         return new GenotypeBuilder(sampleName, alleles).attributes(attributes).make();
     }
 
-    protected final static Genotype create(final String sampleName,
+    protected static Genotype create(final String sampleName,
                                            final List<Allele> alleles,
                                            final double[] gls) {
         return new GenotypeBuilder(sampleName, alleles).PL(gls).make();
     }
 
-    public final static Genotype create(final String sampleName,
-                                        final List<Allele> alleles,
-                                        final double log10Perror,
-                                        final Map<String, Object> attributes) {
-        return new GenotypeBuilder(sampleName, alleles)
-                .GQ(log10Perror == SlowGenotype.NO_LOG10_PERROR ? -1 : (int)(log10Perror * -10))
-                .attributes(attributes).make();
-    }
-
     /**
      * Create a empty builder.  Both a sampleName and alleles must be provided
      * before trying to make a Genotype from this builder.
@@ -182,23 +169,8 @@ public final void reset(final boolean keepSampleName) {
      */
     @Ensures({"result != null"})
     public Genotype make() {
-        if ( useFast ) {
-            final Map<String, Object> ea = extendedAttributes == null ? NO_ATTRIBUTES : extendedAttributes;
-            return new FastGenotype(sampleName, alleles, isPhased, GQ, DP, AD, PL, filters, ea);
-        } else {
-            final Map<String, Object> attributes = new LinkedHashMap<String, Object>();
-            if ( extendedAttributes != null ) attributes.putAll(extendedAttributes);
-            final double log10PError = GQ == -1 ? SlowGenotype.NO_LOG10_PERROR : (GQ == 0 ? 0 : GQ / -10.0);
-            if ( DP != -1 ) attributes.put(VCFConstants.DEPTH_KEY, DP);
-            if ( AD != null ) attributes.put(VCFConstants.GENOTYPE_ALLELE_DEPTHS, AD);
-            final double[] log10likelihoods = PL != null ? GenotypeLikelihoods.fromPLs(PL).getAsVector() : null;
-            return new SlowGenotype(sampleName, alleles, log10PError, filters, attributes, isPhased, log10likelihoods);
-        }
-    }
-
-    public GenotypeBuilder useFast(boolean useFast) {
-        this.useFast = useFast;
-        return this;
+        final Map<String, Object> ea = extendedAttributes == null ? NO_ATTRIBUTES : extendedAttributes;
+        return new FastGenotype(sampleName, alleles, isPhased, GQ, DP, AD, PL, filters, ea);
     }
 
     /**
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java
index d644eda7de..7b4256b709 100755
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java
@@ -401,7 +401,7 @@ public static GenotypeLikelihoodsAllelePair getAllelePair(final int PLindex) {
     }
 
     // An index conversion from the deprecated PL ordering to the new VCF-based ordering for up to 3 alternate alleles
-    protected static int[] PLindexConversion = new int[]{0, 1, 3, 6, 2, 4, 7, 5, 8, 9};
+    protected static final int[] PLindexConversion = new int[]{0, 1, 3, 6, 2, 4, 7, 5, 8, 9};
 
     /**
      * get the allele index pair for the given PL using the deprecated PL ordering:
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/SlowGenotype.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/SlowGenotype.java
deleted file mode 100755
index c3f027484f..0000000000
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/SlowGenotype.java
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Copyright (c) 2012, The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.utils.variantcontext;
-
-
-import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
-import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
-
-import java.util.*;
-
-/**
- * This class encompasses all the basic information about a genotype.  It is immutable.
- *
- * @author Mark DePristo
- */
-@Deprecated
-public class SlowGenotype extends Genotype {
-    protected CommonInfo commonInfo;
-    public final static double NO_LOG10_PERROR = CommonInfo.NO_LOG10_PERROR;
-    protected List<Allele> alleles = null;
-    protected boolean isPhased = false;
-
-    protected SlowGenotype(final String sampleName,
-                           final List<Allele> alleles,
-                           final double log10PError,
-                           final String filters,
-                           final Map<String, Object> attributes,
-                           final boolean isPhased,
-                           final double[] log10Likelihoods) {
-        super(sampleName, filters);
-
-        if ( alleles == null || alleles.isEmpty() )
-            this.alleles = Collections.emptyList();
-        else
-            this.alleles = Collections.unmodifiableList(alleles);
-        commonInfo = new CommonInfo(sampleName, log10PError, Collections.<String>emptySet(), attributes);
-        if ( log10Likelihoods != null )
-            commonInfo.putAttribute(VCFConstants.GENOTYPE_PL_KEY, GenotypeLikelihoods.fromLog10Likelihoods(log10Likelihoods));
-        this.isPhased = isPhased;
-        validate();
-    }
-
-    @Override public List<Allele> getAlleles() {
-        return alleles;
-    }
-
-    @Override public Allele getAllele(int i) {
-        if ( getType() == GenotypeType.UNAVAILABLE )
-            throw new ReviewedStingException("Requesting alleles for an UNAVAILABLE genotype");
-        return alleles.get(i);
-    }
-
-    @Override public boolean isPhased() { return isPhased; }
-
-    //
-    // Useful methods for getting genotype likelihoods for a genotype object, if present
-    //
-    @Override public boolean hasLikelihoods() {
-        return (commonInfo.hasAttribute(VCFConstants.GENOTYPE_PL_KEY) && !commonInfo.getAttribute(VCFConstants.GENOTYPE_PL_KEY).equals(VCFConstants.MISSING_VALUE_v4)) ||
-                (commonInfo.hasAttribute(VCFConstants.GENOTYPE_LIKELIHOODS_KEY) && !commonInfo.getAttribute(VCFConstants.GENOTYPE_LIKELIHOODS_KEY).equals(VCFConstants.MISSING_VALUE_v4));
-    }
-
-    @Override public GenotypeLikelihoods getLikelihoods() {
-        GenotypeLikelihoods x = getLikelihoods(VCFConstants.GENOTYPE_PL_KEY, true);
-        if ( x != null )
-            return x;
-        else {
-            x = getLikelihoods(VCFConstants.GENOTYPE_LIKELIHOODS_KEY, false);
-            return x;
-        }
-    }
-
-    private GenotypeLikelihoods getLikelihoods(String key, boolean asPL) {
-        Object x = commonInfo.getAttribute(key);
-        if ( x instanceof String ) {
-            if ( asPL )
-                return GenotypeLikelihoods.fromPLField((String)x);
-            else
-                return GenotypeLikelihoods.fromGLField((String)x);
-        }
-        else if ( x instanceof GenotypeLikelihoods ) return (GenotypeLikelihoods)x;
-        else return null;
-    }
-
-    private final void validate() {
-        if ( alleles.size() == 0) return;
-
-        for ( Allele allele : alleles ) {
-            if ( allele == null )
-                throw new IllegalArgumentException("BUG: allele cannot be null in Genotype");
-        }
-    }
-
-    // ---------------------------------------------------------------------------------------------------------
-    // 
-    // get routines to access context info fields
-    //
-    // ---------------------------------------------------------------------------------------------------------
-    @Override public boolean hasLog10PError()     { return commonInfo.hasLog10PError(); }
-    @Override public double getLog10PError()      { return commonInfo.getLog10PError(); }
-
-    @Override
-    public boolean hasExtendedAttribute(String key)     { return commonInfo.hasAttribute(key); }
-
-    @Override
-    public Object getExtendedAttribute(String key)      { return commonInfo.getAttribute(key); }
-
-    @Override
-    public Object getExtendedAttribute(String key, Object defaultValue) {
-        return commonInfo.getAttribute(key, defaultValue); 
-    }
-
-//    public String getAttributeAsString(String key, String defaultValue)   { return commonInfo.getAttributeAsString(key, defaultValue); }
-//    public int getAttributeAsInt(String key, int defaultValue)            { return commonInfo.getAttributeAsInt(key, defaultValue); }
-//    public double getAttributeAsDouble(String key, double  defaultValue)  { return commonInfo.getAttributeAsDouble(key, defaultValue); }
-//    public boolean getAttributeAsBoolean(String key, boolean  defaultValue)  { return commonInfo.getAttributeAsBoolean(key, defaultValue); }
-
-    @Override
-    public int[] getPL() {
-        return hasPL() ? getLikelihoods().getAsPLs() : null;
-    }
-
-    @Override
-    public boolean hasPL() {
-        return hasLikelihoods();
-    }
-
-    @Override
-    public int getDP() {
-        return commonInfo.getAttributeAsInt(VCFConstants.DEPTH_KEY, -1);
-    }
-
-    @Override
-    public boolean hasDP() {
-        return commonInfo.hasAttribute(VCFConstants.DEPTH_KEY);
-    }
-
-    @Override
-    public int[] getAD() {
-        if ( hasAD() ) {
-            return (int[])commonInfo.getAttribute(VCFConstants.GENOTYPE_ALLELE_DEPTHS);
-        } else
-            return null;
-    }
-
-    @Override
-    public boolean hasAD() {
-        return commonInfo.hasAttribute(VCFConstants.GENOTYPE_ALLELE_DEPTHS);
-    }
-
-    @Override
-    public int getGQ() {
-        if ( commonInfo.hasLog10PError() )
-            return (int)Math.round(commonInfo.getPhredScaledQual());
-        else
-            return -1;
-    }
-
-    @Override
-    public boolean hasGQ() {
-        return hasLog10PError();
-    }
-
-    @Override
-    public Map<String, Object> getExtendedAttributes() {
-        final Map<String, Object> ea = new LinkedHashMap<String, Object>(commonInfo.getAttributes());
-        for ( final String primary : FastGenotype.PRIMARY_KEYS )
-            ea.remove(primary);
-        return ea;
-    }
-}
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
index dcdd95d007..1fe6b86522 100755
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
@@ -188,8 +188,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
     @Deprecated // ID is no longer stored in the attributes map
     private final static String ID_KEY = "ID";
 
-    private final Byte REFERENCE_BASE_FOR_INDEL;
-
     public final static Set<String> PASSES_FILTERS = Collections.unmodifiableSet(new LinkedHashSet<String>());
 
     /** The location of this VariantContext */
@@ -228,7 +226,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
     // ---------------------------------------------------------------------------------------------------------
 
     public enum Validation {
-        REF_PADDING,
         ALLELES,
         GENOTYPES
     }
@@ -250,7 +247,7 @@ protected VariantContext(VariantContext other) {
         this(other.getSource(), other.getID(), other.getChr(), other.getStart(), other.getEnd(),
                 other.getAlleles(), other.getGenotypes(), other.getLog10PError(),
                 other.getFiltersMaybeNull(),
-                other.getAttributes(), other.REFERENCE_BASE_FOR_INDEL,
+                other.getAttributes(),
                 other.fullyDecoded, NO_VALIDATION);
     }
 
@@ -266,7 +263,6 @@ protected VariantContext(VariantContext other) {
      * @param log10PError  qual
      * @param filters         filters: use null for unfiltered and empty set for passes filters
      * @param attributes      attributes
-     * @param referenceBaseForIndel   padded reference base
      * @param validationToPerform     set of validation steps to take
      */
     protected VariantContext(final String source,
@@ -279,7 +275,6 @@ protected VariantContext(final String source,
                              final double log10PError,
                              final Set<String> filters,
                              final Map<String, Object> attributes,
-                             final Byte referenceBaseForIndel,
                              final boolean fullyDecoded,
                              final EnumSet<Validation> validationToPerform ) {
         if ( contig == null ) { throw new IllegalArgumentException("Contig cannot be null"); }
@@ -292,7 +287,6 @@ protected VariantContext(final String source,
         this.ID = ID.equals(VCFConstants.EMPTY_ID_FIELD) ? VCFConstants.EMPTY_ID_FIELD : ID;
 
         this.commonInfo = new CommonInfo(source, log10PError, filters, attributes);
-        REFERENCE_BASE_FOR_INDEL = referenceBaseForIndel;
 
         // todo -- remove me when this check is no longer necessary
         if ( this.commonInfo.hasAttribute(ID_KEY) )
@@ -340,11 +334,14 @@ protected VariantContext(final String source,
      * in this VC is returned as the set of alleles in the subContext, even if
      * some of those alleles aren't in the samples
      *
-     * @param sampleNames
-     * @return
+     * WARNING: BE CAREFUL WITH rederiveAllelesFromGenotypes UNLESS YOU KNOW WHAT YOU ARE DOING?
+     *
+     * @param sampleNames    the sample names
+     * @param rederiveAllelesFromGenotypes if true, returns the alleles to just those in use by the samples, true should be default
+     * @return new VariantContext subsetting to just the given samples
      */
     public VariantContext subContextFromSamples(Set<String> sampleNames, final boolean rederiveAllelesFromGenotypes ) {
-        if ( sampleNames.containsAll(getSampleNames()) ) {
+        if ( sampleNames.containsAll(getSampleNames()) && ! rederiveAllelesFromGenotypes ) {
             return this; // fast path when you don't have any work to do
         } else {
             VariantContextBuilder builder = new VariantContextBuilder(this);
@@ -360,8 +357,18 @@ public VariantContext subContextFromSamples(Set<String> sampleNames, final boole
         }
     }
 
+    /**
+     * @see #subContextFromSamples(java.util.Set, boolean) with rederiveAllelesFromGenotypes = true
+     *
+     * @param sampleNames
+     * @return
+     */
+    public VariantContext subContextFromSamples(final Set<String> sampleNames) {
+        return subContextFromSamples(sampleNames, true);
+    }
+
     public VariantContext subContextFromSample(String sampleName) {
-        return subContextFromSamples(Collections.singleton(sampleName), true);
+        return subContextFromSamples(Collections.singleton(sampleName));
     }
 
     /**
@@ -501,7 +508,7 @@ public Type getType() {
      */
     public boolean isSimpleInsertion() {
         // can't just call !isSimpleDeletion() because of complex indels
-        return getType() == Type.INDEL && getReference().isNull() && isBiallelic();
+        return getType() == Type.INDEL && isBiallelic() && getReference().length() == 1;
     }
 
     /**
@@ -509,7 +516,7 @@ public boolean isSimpleInsertion() {
      */
     public boolean isSimpleDeletion() {
         // can't just call !isSimpleInsertion() because of complex indels
-        return getType() == Type.INDEL && getAlternateAllele(0).isNull() && isBiallelic();
+        return getType() == Type.INDEL && isBiallelic() && getAlternateAllele(0).length() == 1;
     }
 
     /**
@@ -553,22 +560,6 @@ public String getID() {
         return ID;
     }
 
-    public boolean hasReferenceBaseForIndel() {
-        return REFERENCE_BASE_FOR_INDEL != null;
-    }
-
-    // the indel base that gets stripped off for indels
-    public Byte getReferenceBaseForIndel() {
-        return REFERENCE_BASE_FOR_INDEL;
-    }
-
-    public String getAlleleStringWithRefPadding(final Allele allele) {
-        if ( VCFAlleleClipper.needsPadding(this) )
-            return VCFAlleleClipper.padAllele(this, allele).getDisplayString();
-        else
-            return allele.getDisplayString();
-    }
-
 
     // ---------------------------------------------------------------------------------------------------------
     //
@@ -808,8 +799,8 @@ public Iterable<Genotype> getGenotypesOrderedBy(Iterable<String> sampleOrdering)
      * Returns a map from sampleName -> Genotype for the genotype associated with sampleName.  Returns a map
      * for consistency with the multi-get function.
      *
-     * @param sampleName
-     * @return
+     * @param sampleName   the sample name
+     * @return mapping from sample name to genotype
      * @throws IllegalArgumentException if sampleName isn't bound to a genotype
      */
     public GenotypesContext getGenotypes(String sampleName) {
@@ -823,7 +814,7 @@ public GenotypesContext getGenotypes(String sampleName) {
      * For testing convenience only
      *
      * @param sampleNames a unique list of sample names
-     * @return
+     * @return subsetting genotypes context
      * @throws IllegalArgumentException if sampleName isn't bound to a genotype
      */
     protected GenotypesContext getGenotypes(Collection<String> sampleNames) {
@@ -1011,13 +1002,13 @@ public int getMixedCount() {
     /**
      * Run all extra-strict validation tests on a Variant Context object
      *
-     * @param reference        the true reference allele
-     * @param paddedRefBase    the reference base used for padding indels
-     * @param rsIDs            the true dbSNP IDs
+     * @param reportedReference   the reported reference allele
+     * @param observedReference   the actual reference allele
+     * @param rsIDs               the true dbSNP IDs
      */
-    public void extraStrictValidation(Allele reference, Byte paddedRefBase, Set<String> rsIDs) {
+    public void extraStrictValidation(final Allele reportedReference, final Allele observedReference, final Set<String> rsIDs) {
         // validate the reference
-        validateReferenceBases(reference, paddedRefBase);
+        validateReferenceBases(reportedReference, observedReference);
 
         // validate the RS IDs
         validateRSIDs(rsIDs);
@@ -1032,18 +1023,9 @@ public void extraStrictValidation(Allele reference, Byte paddedRefBase, Set<Stri
         //checkReferenceTrack();
     }
 
-    public void validateReferenceBases(Allele reference, Byte paddedRefBase) {
-        if ( reference == null )
-            return;
-
-        // don't validate if we're a complex event
-        if ( !isComplexIndel() && !reference.isNull() && !reference.basesMatch(getReference()) ) {
-            throw new TribbleException.InternalCodecException(String.format("the REF allele is incorrect for the record at position %s:%d, fasta says %s vs. VCF says %s", getChr(), getStart(), reference.getBaseString(), getReference().getBaseString()));
-        }
-
-        // we also need to validate the padding base for simple indels
-        if ( hasReferenceBaseForIndel() && !getReferenceBaseForIndel().equals(paddedRefBase) ) {
-            throw new TribbleException.InternalCodecException(String.format("the padded REF base is incorrect for the record at position %s:%d, fasta says %s vs. VCF says %s", getChr(), getStart(), (char)paddedRefBase.byteValue(), (char)getReferenceBaseForIndel().byteValue()));
+    public void validateReferenceBases(final Allele reportedReference, final Allele observedReference) {
+        if ( reportedReference != null && !reportedReference.basesMatch(observedReference) ) {
+            throw new TribbleException.InternalCodecException(String.format("the REF allele is incorrect for the record at position %s:%d, fasta says %s vs. VCF says %s", getChr(), getStart(), observedReference.getBaseString(), reportedReference.getBaseString()));
         }
     }
 
@@ -1135,7 +1117,6 @@ private boolean validate(final EnumSet<Validation> validationToPerform) {
         for (final Validation val : validationToPerform ) {
             switch (val) {
                 case ALLELES: validateAlleles(); break;
-                case REF_PADDING: validateReferencePadding(); break;
                 case GENOTYPES: validateGenotypes(); break;
                 default: throw new IllegalArgumentException("Unexpected validation mode " + val);
             }
@@ -1151,8 +1132,7 @@ private void validateStop() {
         if ( hasAttribute(VCFConstants.END_KEY) ) {
             final int end = getAttributeAsInt(VCFConstants.END_KEY, -1);
             assert end != -1;
-            if ( end != getEnd() && end != getEnd() + 1 ) {
-                // the end is allowed to 1 bigger because of the padding
+            if ( end != getEnd() ) {
                 final String message = "Badly formed variant context at location " + getChr() + ":"
                         + getStart() + "; getEnd() was " + getEnd()
                         + " but this VariantContext contains an END key with value " + end;
@@ -1161,23 +1141,19 @@ private void validateStop() {
                 else
                     throw new ReviewedStingException(message);
             }
+        } else {
+            final long length = (stop - start) + 1;
+            if ( ! hasSymbolicAlleles() && length != getReference().length() ) {
+                throw new IllegalStateException("BUG: GenomeLoc " + contig + ":" + start + "-" + stop + " has a size == " + length + " but the variation reference allele has length " + getReference().length() + " this = " + this);
+            }
         }
     }
 
-    private void validateReferencePadding() {
-        if ( hasSymbolicAlleles() ) // symbolic alleles don't need padding...
-            return;
-
-        boolean needsPadding = (getReference().length() == getEnd() - getStart()); // off by one because padded base was removed
+    private void validateAlleles() {
 
-        if ( needsPadding && !hasReferenceBaseForIndel() )
-            throw new ReviewedStingException("Badly formed variant context at location " + getChr() + ":" + getStart() + "; no padded reference base was provided.");
-    }
+        boolean alreadySeenRef = false;
 
-    private void validateAlleles() {
-        // check alleles
-        boolean alreadySeenRef = false, alreadySeenNull = false;
-        for ( Allele allele : alleles ) {
+        for ( final Allele allele : alleles ) {
             // make sure there's only one reference allele
             if ( allele.isReference() ) {
                 if ( alreadySeenRef ) throw new IllegalArgumentException("BUG: Received two reference tagged alleles in VariantContext " + alleles + " this=" + this);
@@ -1187,26 +1163,11 @@ private void validateAlleles() {
             if ( allele.isNoCall() ) {
                 throw new IllegalArgumentException("BUG: Cannot add a no call allele to a variant context " + alleles + " this=" + this);
             }
-
-            // make sure there's only one null allele
-            if ( allele.isNull() ) {
-                if ( alreadySeenNull ) throw new IllegalArgumentException("BUG: Received two null alleles in VariantContext " + alleles + " this=" + this);
-                alreadySeenNull = true;
-            }
         }
 
         // make sure there's one reference allele
         if ( ! alreadySeenRef )
             throw new IllegalArgumentException("No reference allele found in VariantContext");
-
-//        if ( getType() == Type.INDEL ) {
-//            if ( getReference().length() != (getLocation().size()-1) ) {
-        long length = (stop - start) + 1;
-        if ( ! hasSymbolicAlleles()
-                && ((getReference().isNull() && length != 1 )
-                    || (getReference().isNonNull() && (length - getReference().length()  > 1)))) {
-            throw new IllegalStateException("BUG: GenomeLoc " + contig + ":" + start + "-" + stop + " has a size == " + length + " but the variation reference allele has length " + getReference().length() + " this = " + this);
-        }
     }
 
     private void validateGenotypes() {
@@ -1390,7 +1351,7 @@ private final void fullyDecodeInfo(final VariantContextBuilder builder, final VC
     private final Map<String, Object> fullyDecodeAttributes(final Map<String, Object> attributes,
                                                             final VCFHeader header,
                                                             final boolean lenientDecoding) {
-        final Map<String, Object> newAttributes = new HashMap<String, Object>(attributes.size());
+        final Map<String, Object> newAttributes = new HashMap<String, Object>(10);
 
         for ( final Map.Entry<String, Object> attr : attributes.entrySet() ) {
             final String field = attr.getKey();
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java
index f2375f6f9e..d8ab4bd236 100644
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java
@@ -25,9 +25,6 @@
 package org.broadinstitute.sting.utils.variantcontext;
 
 import com.google.java.contract.*;
-import org.broad.tribble.Feature;
-import org.broad.tribble.TribbleException;
-import org.broad.tribble.util.ParsingUtils;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@@ -74,7 +71,6 @@ public class VariantContextBuilder {
     private Set<String> filters = null;
     private Map<String, Object> attributes = null;
     private boolean attributesCanBeModified = false;
-    private Byte referenceBaseForIndel = null;
 
     /** enum of what must be validated */
     final private EnumSet<VariantContext.Validation> toValidate = EnumSet.noneOf(VariantContext.Validation.class);
@@ -117,7 +113,6 @@ public VariantContextBuilder(VariantContext parent) {
         this.genotypes = parent.genotypes;
         this.ID = parent.getID();
         this.log10PError = parent.getLog10PError();
-        this.referenceBaseForIndel = parent.getReferenceBaseForIndel();
         this.source = parent.getSource();
         this.start = parent.getStart();
         this.stop = parent.getEnd();
@@ -132,7 +127,6 @@ public VariantContextBuilder(VariantContextBuilder parent) {
         this.genotypes = parent.genotypes;
         this.ID = parent.ID;
         this.log10PError = parent.log10PError;
-        this.referenceBaseForIndel = parent.referenceBaseForIndel;
         this.source = parent.source;
         this.start = parent.start;
         this.stop = parent.stop;
@@ -362,21 +356,6 @@ public VariantContextBuilder log10PError(final double log10PError) {
         return this;
     }
 
-    /**
-     * Tells us that the resulting VariantContext should use this byte for the reference base
-     * Null means no refBase is available
-     * @param referenceBaseForIndel
-     */
-    public VariantContextBuilder referenceBaseForIndel(final Byte referenceBaseForIndel) {
-        this.referenceBaseForIndel = referenceBaseForIndel;
-        toValidate.add(VariantContext.Validation.REF_PADDING);
-        return this;
-    }
-
-    public VariantContextBuilder referenceBaseForIndel(final String referenceBaseForIndel) {
-        return referenceBaseForIndel(referenceBaseForIndel.getBytes()[0]);
-    }
-
     /**
      * Tells us that the resulting VariantContext should have source field set to source
      * @param source
@@ -401,7 +380,6 @@ public VariantContextBuilder loc(final String contig, final long start, final lo
         this.start = start;
         this.stop = stop;
         toValidate.add(VariantContext.Validation.ALLELES);
-        toValidate.add(VariantContext.Validation.REF_PADDING);
         return this;
     }
 
@@ -416,7 +394,6 @@ public VariantContextBuilder loc(final GenomeLoc loc) {
         this.start = loc.getStart();
         this.stop = loc.getStop();
         toValidate.add(VariantContext.Validation.ALLELES);
-        toValidate.add(VariantContext.Validation.REF_PADDING);
         return this;
     }
 
@@ -440,7 +417,6 @@ public VariantContextBuilder chr(final String contig) {
     public VariantContextBuilder start(final long start) {
         this.start = start;
         toValidate.add(VariantContext.Validation.ALLELES);
-        toValidate.add(VariantContext.Validation.REF_PADDING);
         return this;
     }
 
@@ -517,6 +493,6 @@ public VariantContextBuilder fullyDecoded(boolean isFullyDecoded) {
     public VariantContext make() {
         return new VariantContext(source, ID, contig, start, stop, alleles,
                 genotypes, log10PError, filters, attributes,
-                referenceBaseForIndel, fullyDecoded, toValidate);
+                fullyDecoded, toValidate);
     }
 }
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
index d7e0729800..d7e4a71358 100755
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
@@ -64,9 +64,9 @@ public class VariantContextUtils {
      * Ensures that VC contains all of the samples in allSamples by adding missing samples to
      * the resulting VC with default diploid ./. genotypes
      *
-     * @param vc
-     * @param allSamples
-     * @return
+     * @param vc            the VariantContext
+     * @param allSamples    all of the samples needed
+     * @return a new VariantContext with missing samples added
      */
     public static VariantContext addMissingSamples(final VariantContext vc, final Set<String> allSamples) {
         // TODO -- what's the fastest way to do this calculation?
@@ -376,9 +376,9 @@ private final static Map<String, Object> subsetAttributes(final CommonInfo igc,
 
     /**
      * @deprecated use variant context builder version instead
-     * @param vc
-     * @param keysToPreserve
-     * @return
+     * @param vc                  the variant context
+     * @param keysToPreserve      the keys to preserve
+     * @return a pruned version of the original variant context
      */
     @Deprecated
     public static VariantContext pruneVariantContext(final VariantContext vc, Collection<String> keysToPreserve ) {
@@ -486,14 +486,13 @@ public static VariantContext simpleMerge(final GenomeLocParser genomeLocParser,
         if ( genotypeMergeOptions == GenotypeMergeType.REQUIRE_UNIQUE )
             verifyUniqueSampleNames(unsortedVCs);
 
-        final List<VariantContext> prepaddedVCs = sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, genotypeMergeOptions);
+        final List<VariantContext> preFilteredVCs = sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, genotypeMergeOptions);
         // Make sure all variant contexts are padded with reference base in case of indels if necessary
         final List<VariantContext> VCs = new ArrayList<VariantContext>();
 
-        for (final VariantContext vc : prepaddedVCs) {
-            // also a reasonable place to remove filtered calls, if needed
+        for (final VariantContext vc : preFilteredVCs) {
             if ( ! filteredAreUncalled || vc.isNotFiltered() )
-                VCs.add(VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc));
+                VCs.add(vc);
         }
         if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled
             return null;
@@ -505,8 +504,8 @@ public static VariantContext simpleMerge(final GenomeLocParser genomeLocParser,
         Byte referenceBaseForIndel = null;
 
         final Set<Allele> alleles = new LinkedHashSet<Allele>();
-        final Set<String> filters = new TreeSet<String>();
-        final Map<String, Object> attributes = new TreeMap<String, Object>();
+        final Set<String> filters = new HashSet<String>();
+        final Map<String, Object> attributes = new LinkedHashMap<String, Object>();
         final Set<String> inconsistentAttributes = new HashSet<String>();
         final Set<String> variantSources = new HashSet<String>(); // contains the set of sources we found in our set of VCs that are variant
         final Set<String> rsIDs = new LinkedHashSet<String>(1); // most of the time there's one id
@@ -514,8 +513,8 @@ public static VariantContext simpleMerge(final GenomeLocParser genomeLocParser,
         GenomeLoc loc = getLocation(genomeLocParser,first);
         int depth = 0;
         int maxAC = -1;
-        final Map<String, Object> attributesWithMaxAC = new TreeMap<String, Object>();
-        double log10PError = 1;
+        final Map<String, Object> attributesWithMaxAC = new LinkedHashMap<String, Object>();
+        double log10PError = CommonInfo.NO_LOG10_PERROR;
         VariantContext vcWithMaxAC = null;
         GenotypesContext genotypes = GenotypesContext.create();
 
@@ -543,13 +542,12 @@ public static VariantContext simpleMerge(final GenomeLocParser genomeLocParser,
 
             mergeGenotypes(genotypes, vc, alleleMapping, genotypeMergeOptions == GenotypeMergeType.UNIQUIFY);
 
-            log10PError = Math.min(log10PError, vc.isVariant() ? vc.getLog10PError() : 1);
+            // We always take the QUAL of the first VC with a non-MISSING qual for the combined value
+            if ( log10PError == CommonInfo.NO_LOG10_PERROR )
+                log10PError =  vc.getLog10PError();
 
             filters.addAll(vc.getFilters());
 
-            if ( referenceBaseForIndel == null )
-                referenceBaseForIndel = vc.getReferenceBaseForIndel();
-
             //
             // add attributes
             //
@@ -660,11 +658,11 @@ else if ( variantSources.isEmpty() )    // everyone was reference
         builder.alleles(alleles);
         builder.genotypes(genotypes);
         builder.log10PError(log10PError);
-        builder.filters(filters).attributes(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes);
-        builder.referenceBaseForIndel(referenceBaseForIndel);
+        builder.filters(filters.isEmpty() ? filters : new TreeSet<String>(filters));
+        builder.attributes(new TreeMap<String, Object>(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes));
 
         // Trim the padded bases of all alleles if necessary
-        final VariantContext merged = createVariantContextWithTrimmedAlleles(builder.make());
+        final VariantContext merged = builder.make();
         if ( printMessages && remapped ) System.out.printf("Remapped => %s%n", merged);
         return merged;
     }
@@ -700,73 +698,6 @@ public static boolean allelesAreSubset(VariantContext vc1, VariantContext vc2) {
         return true;
     }
 
-    private static VariantContext createVariantContextWithTrimmedAlleles(VariantContext inputVC) {
-        // see if we need to trim common reference base from all alleles
-        boolean trimVC;
-
-        // We need to trim common reference base from all alleles in all genotypes if a ref base is common to all alleles
-        Allele refAllele = inputVC.getReference();
-        if (!inputVC.isVariant())
-            trimVC = false;
-        else if (refAllele.isNull())
-            trimVC = false;
-        else {
-            trimVC = VCFAlleleClipper.shouldClipFirstBaseP(inputVC.getAlternateAlleles(), (byte) inputVC.getReference().getDisplayString().charAt(0));
-         }
-
-        // nothing to do if we don't need to trim bases
-        if (trimVC) {
-            List<Allele> alleles = new ArrayList<Allele>();
-            GenotypesContext genotypes = GenotypesContext.create();
-
-            Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
-
-            for (final Allele a : inputVC.getAlleles()) {
-                if (a.isSymbolic()) {
-                    alleles.add(a);
-                    originalToTrimmedAlleleMap.put(a, a);
-                } else {
-                    // get bases for current allele and create a new one with trimmed bases
-                    byte[] newBases = Arrays.copyOfRange(a.getBases(), 1, a.length());
-                    Allele trimmedAllele = Allele.create(newBases, a.isReference());
-                    alleles.add(trimmedAllele);
-                    originalToTrimmedAlleleMap.put(a, trimmedAllele);
-                }
-            }
-
-            // detect case where we're trimming bases but resulting vc doesn't have any null allele. In that case, we keep original representation
-            // example: mixed records such as {TA*,TGA,TG}
-            boolean hasNullAlleles = false;
-
-            for (final Allele a: originalToTrimmedAlleleMap.values()) {
-                if (a.isNull())
-                    hasNullAlleles = true;
-             }
-
-             if (!hasNullAlleles)
-               return inputVC;
-           // now we can recreate new genotypes with trimmed alleles
-            for ( final Genotype genotype : inputVC.getGenotypes() ) {
-
-                List<Allele> originalAlleles = genotype.getAlleles();
-                List<Allele> trimmedAlleles = new ArrayList<Allele>();
-                for ( final Allele a : originalAlleles ) {
-                    if ( a.isCalled() )
-                        trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
-                    else
-                        trimmedAlleles.add(Allele.NO_CALL);
-                }
-                genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make());
-
-            }
-
-            final VariantContextBuilder builder = new VariantContextBuilder(inputVC);
-            return builder.alleles(alleles).genotypes(genotypes).referenceBaseForIndel(new Byte(inputVC.getReference().getBases()[0])).make();
-        }
-
-        return inputVC;
-    }
-
     public static GenotypesContext stripPLs(GenotypesContext genotypes) {
         GenotypesContext newGs = GenotypesContext.create(genotypes.size());
 
@@ -801,7 +732,7 @@ public static Map<VariantContext.Type, List<VariantContext>> separateVariantCont
                         vcList.remove(k);
                         // avoid having empty lists
                         if (vcList.size() == 0)
-                            mappedVCs.remove(vcList);
+                            mappedVCs.remove(type);
                         if ( !mappedVCs.containsKey(vc.getType()) )
                             mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
                         mappedVCs.get(vc.getType()).add(otherVC);
@@ -819,7 +750,7 @@ else if (allelesAreSubset(vc,otherVC)) {
                 if ( !mappedVCs.containsKey(vc.getType()) )
                     mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
                 mappedVCs.get(vc.getType()).add(vc);
-                }
+            }
         }
 
         return mappedVCs;
@@ -881,10 +812,10 @@ static private AlleleMapper resolveIncompatibleAlleles(Allele refAllele, Variant
             //
             // refAllele: ACGTGA
             // myRef:     ACGT
-            // myAlt:     -
+            // myAlt:     A
             //
             // We need to remap all of the alleles in vc to include the extra GA so that
-            // myRef => refAllele and myAlt => GA
+            // myRef => refAllele and myAlt => AGA
             //
 
             Allele myRef = vc.getReference();
@@ -979,7 +910,7 @@ public static VariantContext reverseComplement(VariantContext vc) {
         HashMap<Allele, Allele> alleleMap = new HashMap<Allele, Allele>(vc.getAlleles().size());
         for ( Allele originalAllele : vc.getAlleles() ) {
             Allele newAllele;
-            if ( originalAllele.isNoCall() || originalAllele.isNull() )
+            if ( originalAllele.isNoCall() )
                 newAllele = originalAllele;
             else
                 newAllele = Allele.create(BaseUtils.simpleReverseComplement(originalAllele.getBases()), originalAllele.isReference());
@@ -1235,13 +1166,14 @@ public static Pair<List<Integer>,byte[]> getNumTandemRepeatUnits(final VariantCo
         if ( ! vc.isIndel() ) // only indels are tandem repeats
             return null;
 
-        final Allele ref = vc.getReference();
+        final Allele refAllele = vc.getReference();
+        final byte[] refAlleleBases = Arrays.copyOfRange(refAllele.getBases(), 1, refAllele.length());
 
         byte[] repeatUnit = null;
         final ArrayList<Integer> lengths = new ArrayList<Integer>();
 
         for ( final Allele allele : vc.getAlternateAlleles() ) {
-            Pair<int[],byte[]> result = getNumTandemRepeatUnits(ref.getBases(), allele.getBases(), refBasesStartingAtVCWithoutPad.getBytes());
+            Pair<int[],byte[]> result = getNumTandemRepeatUnits(refAlleleBases, Arrays.copyOfRange(allele.getBases(), 1, allele.length()), refBasesStartingAtVCWithoutPad.getBytes());
 
             final int[] repetitionCount = result.first;
             // repetition count = 0 means allele is not a tandem expansion of context
@@ -1256,7 +1188,7 @@ public static Pair<List<Integer>,byte[]> getNumTandemRepeatUnits(final VariantCo
             repeatUnit = result.second;
             if (VERBOSE) {
                 System.out.println("RefContext:"+refBasesStartingAtVCWithoutPad);
-                System.out.println("Ref:"+ref.toString()+" Count:" + String.valueOf(repetitionCount[0]));
+                System.out.println("Ref:"+refAllele.toString()+" Count:" + String.valueOf(repetitionCount[0]));
                 System.out.println("Allele:"+allele.toString()+" Count:" + String.valueOf(repetitionCount[1]));
                 System.out.println("RU:"+new String(repeatUnit));
             }
@@ -1405,4 +1337,113 @@ public static int computeEndFromAlleles(final List<Allele> alleles, final int st
             return start + Math.max(ref.length() - 1, 0);
         }
     }
+
+    public static boolean requiresPaddingBase(final List<String> alleles) {
+
+        // see whether one of the alleles would be null if trimmed through
+
+        for ( final String allele : alleles ) {
+            if ( allele.isEmpty() )
+                return true;
+        }
+
+        int clipping = 0;
+        Character currentBase = null;
+
+        while ( true ) {
+            for ( final String allele : alleles ) {
+                if ( allele.length() - clipping == 0 )
+                    return true;
+
+                char myBase = allele.charAt(clipping);
+                if ( currentBase == null )
+                    currentBase = myBase;
+                else if ( currentBase != myBase )
+                    return false;
+            }
+
+            clipping++;
+            currentBase = null;
+        }
+    }
+
+    public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) {
+
+        // TODO - this function doesn't work with mixed records or records that started as mixed and then became non-mixed
+
+        // see whether we need to trim common reference base from all alleles
+
+        final int trimExtent = computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, false);
+        if ( trimExtent <= 0 || inputVC.getAlleles().size() <= 1 )
+            return inputVC;
+
+        final List<Allele> alleles = new ArrayList<Allele>();
+        final GenotypesContext genotypes = GenotypesContext.create();
+        final Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
+
+        for (final Allele a : inputVC.getAlleles()) {
+            if (a.isSymbolic()) {
+                alleles.add(a);
+                originalToTrimmedAlleleMap.put(a, a);
+            } else {
+                // get bases for current allele and create a new one with trimmed bases
+                final byte[] newBases = Arrays.copyOfRange(a.getBases(), 0, a.length()-trimExtent);
+                final Allele trimmedAllele = Allele.create(newBases, a.isReference());
+                alleles.add(trimmedAllele);
+                originalToTrimmedAlleleMap.put(a, trimmedAllele);
+            }
+        }
+
+        // now we can recreate new genotypes with trimmed alleles
+        for ( final Genotype genotype : inputVC.getGenotypes() ) {
+            final List<Allele> originalAlleles = genotype.getAlleles();
+            final List<Allele> trimmedAlleles = new ArrayList<Allele>();
+            for ( final Allele a : originalAlleles ) {
+                if ( a.isCalled() )
+                    trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
+                else
+                    trimmedAlleles.add(Allele.NO_CALL);
+            }
+            genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make());
+        }
+
+        return new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length() - 1).alleles(alleles).genotypes(genotypes).make();
+    }
+
+    public static int computeReverseClipping(final List<Allele> unclippedAlleles,
+                                             final byte[] ref,
+                                             final int forwardClipping,
+                                             final boolean allowFullClip) {
+        int clipping = 0;
+        boolean stillClipping = true;
+
+        while ( stillClipping ) {
+            for ( final Allele a : unclippedAlleles ) {
+                if ( a.isSymbolic() )
+                    continue;
+
+                // we need to ensure that we don't reverse clip out all of the bases from an allele because we then will have the wrong
+                // position set for the VariantContext (although it's okay to forward clip it all out, because the position will be fine).
+                if ( a.length() - clipping == 0 )
+                    return clipping - (allowFullClip ? 0 : 1);
+
+                if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 ) {
+                    stillClipping = false;
+                }
+                else if ( ref.length == clipping ) {
+                    if ( allowFullClip )
+                        stillClipping = false;
+                    else
+                        return -1;
+                }
+                else if ( a.getBases()[a.length()-clipping-1] != ref[ref.length-clipping-1] ) {
+                    stillClipping = false;
+                }
+            }
+            if ( stillClipping )
+                clipping++;
+        }
+
+        return clipping;
+    }
 }
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java
index 2c1d99546b..22acc47879 100644
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java
@@ -124,7 +124,7 @@ public final void encodeTypedFloat(final double d) throws IOException {
     @Ensures("encodeStream.size() > old(encodeStream.size())")
     public final void encodeTyped(List<? extends Object> v, final BCF2Type type) throws IOException {
         if ( type == BCF2Type.CHAR && v.size() != 0 ) {
-            final String s = v.size() > 1 ? BCF2Utils.collapseStringList((List<String>) v) : (String)v.get(0);
+            final String s = BCF2Utils.collapseStringList((List<String>) v);
             v = stringToBytes(s);
         }
 
@@ -191,9 +191,12 @@ public final void encodeRawFloat(final double value) throws IOException {
     @Requires("size >= 0")
     @Ensures("encodeStream.size() > old(encodeStream.size())")
     public final void encodeType(final int size, final BCF2Type type) throws IOException {
-        final byte typeByte = BCF2Utils.encodeTypeDescriptor(size, type);
-        encodeStream.write(typeByte);
-        if ( BCF2Utils.willOverflow(size) ) {
+        if ( size <= BCF2Utils.MAX_INLINE_ELEMENTS ) {
+            final int typeByte = BCF2Utils.encodeTypeDescriptor(size, type);
+            encodeStream.write(typeByte);
+        } else {
+            final int typeByte = BCF2Utils.encodeTypeDescriptor(BCF2Utils.OVERFLOW_ELEMENT_MARKER, type);
+            encodeStream.write(typeByte);
             // write in the overflow size
             encodeTypedInt(size);
         }
@@ -201,12 +204,12 @@ public final void encodeType(final int size, final BCF2Type type) throws IOExcep
 
     @Ensures("encodeStream.size() > old(encodeStream.size())")
     public final void encodeRawInt(final int value, final BCF2Type type) throws IOException {
-        BCF2Utils.encodeRawBytes(value, type, encodeStream);
+        type.write(value, encodeStream);
     }
 
     @Ensures("encodeStream.size() > old(encodeStream.size())")
     public final void encodeRawBytes(final int value, final BCF2Type type) throws IOException {
-        BCF2Utils.encodeRawBytes(value, type, encodeStream);
+        type.write(value, encodeStream);
     }
 
     // --------------------------------------------------------------------------------
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldEncoder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldEncoder.java
index ddeb4d2842..a91eb216d1 100644
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldEncoder.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldEncoder.java
@@ -335,7 +335,6 @@ private String javaStringToBCF2String(final Object value) {
             else if (value instanceof List) {
                 final List<String> l = (List<String>)value;
                 if ( l.isEmpty() ) return "";
-                else if ( l.size() == 1 ) return (String)l.get(0);
                 else return BCF2Utils.collapseStringList(l);
             } else
                 return (String)value;
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java
index 219daf315a..7b8224568c 100644
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java
@@ -76,7 +76,7 @@ private final <T> void add(final Map<String, T> map, final String field, final T
         if ( map.containsKey(field) )
             throw new ReviewedStingException("BUG: field " + field + " already seen in VCFHeader while building BCF2 field encoders");
         map.put(field, writer);
-        logger.info(writer);
+        if ( logger.isDebugEnabled() ) logger.debug(writer);
     }
 
     // -----------------------------------------------------------------
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java
index df2008e8e0..e4c64b26bb 100644
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java
@@ -31,6 +31,7 @@
 import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Codec;
 import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type;
 import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
+import org.broadinstitute.sting.utils.codecs.bcf2.BCFVersion;
 import org.broadinstitute.sting.utils.codecs.vcf.*;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
@@ -83,14 +84,9 @@
  * @since 06/12
  */
 class BCF2Writer extends IndexingVariantContextWriter {
-    /**
-     * If true, we will write out the undecoded raw bytes for a genotypes block, if it
-     * is found in the input VC.  This can be very dangerous as the genotype encoding
-     * depends on the exact ordering of the header.
-     *
-     * TODO -- enable when the new smart VCF header code is created by Eric Banks
-     */
-    private final static boolean WRITE_UNDECODED_GENOTYPE_BLOCK = false;
+    public static final int MAJOR_VERSION = 2;
+    public static final int MINOR_VERSION = 1;
+
     final protected static Logger logger = Logger.getLogger(BCF2Writer.class);
     final private static boolean ALLOW_MISSING_CONTIG_LINES = false;
 
@@ -104,6 +100,13 @@ class BCF2Writer extends IndexingVariantContextWriter {
     private final BCF2Encoder encoder = new BCF2Encoder(); // initialized after the header arrives
     final BCF2FieldWriterManager fieldManager = new BCF2FieldWriterManager();
 
+    /**
+     * cached results for whether we can write out raw genotypes data.
+     */
+    private VCFHeader lastVCFHeaderOfUnparsedGenotypes = null;
+    private boolean canPassOnUnparsedGenotypeDataForLastVCFHeader = false;
+
+
     public BCF2Writer(final File location, final OutputStream output, final SAMSequenceDictionary refDict, final boolean enableOnTheFlyIndexing, final boolean doNotWriteGenotypes) {
         super(writerName(location, output), location, output, refDict, enableOnTheFlyIndexing);
         this.outputStream = getOutputStream();
@@ -153,8 +156,8 @@ public void writeHeader(VCFHeader header) {
             writer.close();
 
             final byte[] headerBytes = capture.toByteArray();
-            outputStream.write(BCF2Utils.MAGIC_HEADER_LINE);
-            BCF2Utils.encodeRawBytes(headerBytes.length, BCF2Type.INT32, outputStream);
+            new BCFVersion(MAJOR_VERSION, MINOR_VERSION).write(outputStream);
+            BCF2Type.INT32.write(headerBytes.length, outputStream);
             outputStream.write(headerBytes);
         } catch (IOException e) {
             throw new UserException.CouldNotCreateOutputFile("BCF2 stream", "Got IOException while trying to write BCF2 header", e);
@@ -243,13 +246,39 @@ private byte[] buildSitesData( VariantContext vc ) throws IOException {
         return encoder.getRecordBytes();
     }
 
+
+    /**
+     * Can we safely write on the raw (undecoded) genotypes of an input VC?
+     *
+     * The cache depends on the undecoded lazy data header == lastVCFHeaderOfUnparsedGenotypes, in
+     * which case we return the previous result.  If it's not cached, we use the BCF2Util to
+     * compare the VC header with our header (expensive) and cache it.
+     *
+     * @param lazyData
+     * @return
+     */
+    private boolean canSafelyWriteRawGenotypesBytes(final BCF2Codec.LazyData lazyData) {
+        if ( lazyData.header != lastVCFHeaderOfUnparsedGenotypes ) {
+            // result is already cached
+            canPassOnUnparsedGenotypeDataForLastVCFHeader = BCF2Utils.headerLinesAreOrderedConsistently(this.header,lazyData.header);
+            lastVCFHeaderOfUnparsedGenotypes = lazyData.header;
+        }
+
+        return canPassOnUnparsedGenotypeDataForLastVCFHeader;
+    }
+
     private BCF2Codec.LazyData getLazyData(final VariantContext vc) {
         if ( vc.getGenotypes().isLazyWithData() ) {
-                LazyGenotypesContext lgc = (LazyGenotypesContext)vc.getGenotypes();
-            if ( WRITE_UNDECODED_GENOTYPE_BLOCK && lgc.getUnparsedGenotypeData() instanceof BCF2Codec.LazyData )
+            final LazyGenotypesContext lgc = (LazyGenotypesContext)vc.getGenotypes();
+
+            if ( lgc.getUnparsedGenotypeData() instanceof BCF2Codec.LazyData &&
+                    canSafelyWriteRawGenotypesBytes((BCF2Codec.LazyData) lgc.getUnparsedGenotypeData())) {
+                //logger.info("Passing on raw BCF2 genotypes data");
                 return (BCF2Codec.LazyData)lgc.getUnparsedGenotypeData();
-            else
+            } else {
+                //logger.info("Decoding raw BCF2 genotypes data");
                 lgc.decode(); // WARNING -- required to avoid keeping around bad lazy data for too long
+            }
         }
 
         return null;
@@ -274,10 +303,7 @@ private void buildID( VariantContext vc ) throws IOException {
     }
 
     private void buildAlleles( VariantContext vc ) throws IOException {
-        final boolean needsPadding = VCFAlleleClipper.needsPadding(vc);
         for ( Allele allele : vc.getAlleles() ) {
-            if ( needsPadding )
-                allele = VCFAlleleClipper.padAllele(vc, allele);
             final byte[] s = allele.getDisplayBases();
             if ( s == null )
                 throw new ReviewedStingException("BUG: BCF2Writer encountered null padded allele" + allele);
@@ -358,8 +384,8 @@ private final void errorUnexpectedFieldToWrite(final VariantContext vc, final St
      */
     @Requires({"infoBlock.length > 0", "genotypesBlock.length >= 0"})
     private void writeBlock(final byte[] infoBlock, final byte[] genotypesBlock) throws IOException {
-        BCF2Utils.encodeRawBytes(infoBlock.length, BCF2Type.INT32, outputStream);
-        BCF2Utils.encodeRawBytes(genotypesBlock.length, BCF2Type.INT32, outputStream);
+        BCF2Type.INT32.write(infoBlock.length, outputStream);
+        BCF2Type.INT32.write(genotypesBlock.length, outputStream);
         outputStream.write(infoBlock);
         outputStream.write(genotypesBlock);
     }
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java
index 4548e026ea..db74f22630 100755
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java
@@ -162,7 +162,6 @@ public void add(VariantContext vc) {
             vc = new VariantContextBuilder(vc).noGenotypes().make();
 
         try {
-            vc = VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc);
             super.add(vc);
 
             Map<Allele, String> alleleMap = buildAlleleMap(vc);
@@ -564,6 +563,6 @@ private final void fieldIsMissingFromHeaderError(final VariantContext vc, final
                     + " at " + vc.getChr() + ":" + vc.getStart()
                     + " but this key isn't defined in the VCFHeader.  The GATK now requires all VCFs to have"
                     + " complete VCF headers by default.  This error can be disabled with the engine argument"
-                    + " -U LENIENT_VCF_PROCESSING or repair the VCF file header using repairVCFHeader");
+                    + " -U LENIENT_VCF_PROCESSING");
     }
 }
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VariantContextWriterFactory.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VariantContextWriterFactory.java
index f23166a026..035aff7d69 100644
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VariantContextWriterFactory.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VariantContextWriterFactory.java
@@ -84,6 +84,16 @@ public static VariantContextWriter create(final File location,
         }
     }
 
+    /**
+     * Should we output a BCF file based solely on the name of the file at location?
+     *
+     * @param location
+     * @return
+     */
+    public static boolean isBCFOutput(final File location) {
+        return isBCFOutput(location, EnumSet.noneOf(Options.class));
+    }
+
     public static boolean isBCFOutput(final File location, final EnumSet<Options> options) {
         return options.contains(Options.FORCE_BCF) || (location != null && location.getName().contains(".bcf"));
     }
diff --git a/public/java/test/org/broadinstitute/sting/BaseTest.java b/public/java/test/org/broadinstitute/sting/BaseTest.java
index af48918566..76e25a3c08 100755
--- a/public/java/test/org/broadinstitute/sting/BaseTest.java
+++ b/public/java/test/org/broadinstitute/sting/BaseTest.java
@@ -282,12 +282,12 @@ public static void log(final String message) {
     private static final double DEFAULT_FLOAT_TOLERANCE = 1e-1;
 
     public static final void assertEqualsDoubleSmart(final Object actual, final Double expected) {
-        Assert.assertTrue(actual instanceof Double);
+        Assert.assertTrue(actual instanceof Double, "Not a double");
         assertEqualsDoubleSmart((double)(Double)actual, (double)expected);
     }
 
     public static final void assertEqualsDoubleSmart(final Object actual, final Double expected, final double tolerance) {
-        Assert.assertTrue(actual instanceof Double);
+        Assert.assertTrue(actual instanceof Double, "Not a double");
         assertEqualsDoubleSmart((double)(Double)actual, (double)expected, tolerance);
     }
 
@@ -303,13 +303,13 @@ public static final <T> void assertEqualsSet(final Set<T> actual, final Set<T> e
 
     public static final void assertEqualsDoubleSmart(final double actual, final double expected, final double tolerance) {
         if ( Double.isNaN(expected) ) // NaN == NaN => false unfortunately
-            Assert.assertTrue(Double.isNaN(actual));
+            Assert.assertTrue(Double.isNaN(actual), "expected is nan, actual is not");
         else if ( Double.isInfinite(expected) ) // NaN == NaN => false unfortunately
-            Assert.assertTrue(Double.isInfinite(actual));
+            Assert.assertTrue(Double.isInfinite(actual), "expected is infinite, actual is not");
         else {
             final double delta = Math.abs(actual - expected);
             final double ratio = Math.abs(actual / expected - 1.0);
-            Assert.assertTrue(delta < tolerance || ratio < tolerance);
+            Assert.assertTrue(delta < tolerance || ratio < tolerance, "expected = " + expected + " actual = " + actual + " not within tolerance " + tolerance);
         }
     }
 }
diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java
index 1c5dab254a..f2c5463175 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java
@@ -24,9 +24,12 @@
 
 package org.broadinstitute.sting.gatk.datasources.reads;
 
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
 import static org.testng.Assert.fail;
 import net.sf.picard.reference.IndexedFastaSequenceFile;
 import net.sf.samtools.SAMFileReader;
+import net.sf.samtools.SAMProgramRecord;
 import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.BaseTest;
 import org.broadinstitute.sting.commandline.Tags;
@@ -36,6 +39,7 @@
 import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
 import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.baq.BAQ;
 import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.testng.annotations.AfterMethod;
@@ -143,4 +147,73 @@ public void testLinearBreakIterateAll() {
             fail("testLinearBreakIterateAll: We Should get a UserException.CouldNotReadInputFile exception");
         }
     }
+
+    /** Test that we clear program records when requested */
+    @Test
+    public void testRemoveProgramRecords() {
+        logger.warn("Executing testRemoveProgramRecords");
+
+        // setup the data
+        readers.add(new SAMReaderID(new File(b37GoodBAM),new Tags()));
+
+        // use defaults
+        SAMDataSource data = new SAMDataSource(readers,
+                new ThreadAllocation(),
+                null,
+                genomeLocParser,
+                false,
+                SAMFileReader.ValidationStringency.SILENT,
+                null,
+                null,
+                new ValidationExclusion(),
+                new ArrayList<ReadFilter>(),
+                false);
+
+        List<SAMProgramRecord> defaultProgramRecords = data.getHeader().getProgramRecords();
+        assertTrue(defaultProgramRecords.size() != 0, "testRemoveProgramRecords: No program records found when using default constructor");
+
+        boolean removeProgramRecords = false;
+        data = new SAMDataSource(readers,
+                new ThreadAllocation(),
+                null,
+                genomeLocParser,
+                false,
+                SAMFileReader.ValidationStringency.SILENT,
+                null,
+                null,
+                new ValidationExclusion(),
+                new ArrayList<ReadFilter>(),
+                false,
+                BAQ.CalculationMode.OFF,
+                BAQ.QualityMode.DONT_MODIFY,
+                null, // no BAQ
+                null, // no BQSR
+                (byte) -1,
+                removeProgramRecords);
+
+        List<SAMProgramRecord> dontRemoveProgramRecords = data.getHeader().getProgramRecords();
+        assertEquals(dontRemoveProgramRecords, defaultProgramRecords, "testRemoveProgramRecords: default program records differ from removeProgramRecords = false");
+
+        removeProgramRecords = true;
+        data = new SAMDataSource(readers,
+                new ThreadAllocation(),
+                null,
+                genomeLocParser,
+                false,
+                SAMFileReader.ValidationStringency.SILENT,
+                null,
+                null,
+                new ValidationExclusion(),
+                new ArrayList<ReadFilter>(),
+                false,
+                BAQ.CalculationMode.OFF,
+                BAQ.QualityMode.DONT_MODIFY,
+                null, // no BAQ
+                null, // no BQSR
+                (byte) -1,
+                removeProgramRecords);
+
+        List<SAMProgramRecord> doRemoveProgramRecords = data.getHeader().getProgramRecords();
+        assertTrue(doRemoveProgramRecords.isEmpty(), "testRemoveProgramRecords: program records not cleared when removeProgramRecords = true");
+    }
 }
diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/TheoreticalMinimaBenchmark.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/TheoreticalMinimaBenchmark.java
index 8e67c9efce..1abca54872 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/TheoreticalMinimaBenchmark.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/TheoreticalMinimaBenchmark.java
@@ -94,7 +94,7 @@ public void timeIterateOverCigarString(int reps) {
                     int elementSize = cigarElement.getLength();
                     while(elementSize > 0) {
                         switch(cigarElement.getOperator()) {
-                            case M: matchMismatches++; break;
+                            case M: case EQ: case X: matchMismatches++; break;
                             case I: insertions++; break;
                             case D: deletions++; break;
                             default: others++; break;
diff --git a/public/java/test/org/broadinstitute/sting/gatk/filters/BadCigarFilterUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/filters/BadCigarFilterUnitTest.java
index 333d35641e..ff918db68c 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/filters/BadCigarFilterUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/filters/BadCigarFilterUnitTest.java
@@ -1,11 +1,14 @@
 package org.broadinstitute.sting.gatk.filters;
 
-import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
+import net.sf.samtools.Cigar;
+import org.broadinstitute.sting.utils.clipping.ReadClipperTestUtils;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.testng.Assert;
 import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
 
+import java.util.List;
+
 /**
  * Checks that the Bad Cigar filter works for all kinds of wonky cigars
  *
@@ -14,6 +17,29 @@
  */
 public class BadCigarFilterUnitTest {
 
+    public static final String[] BAD_CIGAR_LIST = {
+            "2D4M",               // starting with multiple deletions
+            "4M2D",               // ending with multiple deletions
+            "3M1I1D",             // adjacent indels AND ends in deletion
+            "1M1I1D2M",           // adjacent indels I->D
+            "1M1D2I1M",           // adjacent indels D->I
+            "1M1I2M1D",           // ends in single deletion with insertion in the middle
+            "4M1D",               // ends in single deletion
+            "1D4M",               // starts with single deletion
+            "2M1D1D2M",           // adjacent D's
+            "1M1I1I1M",           // adjacent I's
+            "1H1D4M",             // starting with deletion after H
+            "1S1D3M",             // starting with deletion after S
+            "1H1S1D3M",           // starting with deletion after HS
+            "4M1D1H",             // ending with deletion before H
+            "3M1D1S",             // ending with deletion before S
+            "3M1D1S1H",           // ending with deletion before HS
+            "10M2H10M",           // H in the middle
+            "10M2S10M",           // S in the middle
+            "1H1S10M2S10M1S1H",    // deceiving S in the middle
+            "1H1S10M2H10M1S1H"    // deceiving H in the middle
+    };
+
     BadCigarFilter filter;
 
     @BeforeClass
@@ -21,40 +47,20 @@ public void init() {
         filter = new BadCigarFilter();
     }
 
-    @Test
+    @Test(enabled = true)
     public void testWonkyCigars () {
-        byte[] bases = {'A', 'A', 'A', 'A'};
-        byte[] quals = {30, 30, 30, 30};
-        GATKSAMRecord read;
-                                                                                                                        // starting with multiple deletions
-        read = ArtificialSAMUtils.createArtificialRead(bases, quals, "2D4M");
-        Assert.assertTrue(filter.filterOut(read), read.getCigarString());
-
-        read = ArtificialSAMUtils.createArtificialRead(bases, quals, "4M2D");                                           // ending with multiple deletions
-        Assert.assertTrue(filter.filterOut(read), read.getCigarString());
-
-        read = ArtificialSAMUtils.createArtificialRead(bases, quals, "3M1I1D");                                         // adjacent indels AND ends in deletion
-        Assert.assertTrue(filter.filterOut(read), read.getCigarString());
-
-        read = ArtificialSAMUtils.createArtificialRead(bases, quals, "1M1I1D2M");                                       // adjacent indels I->D
-        Assert.assertTrue(filter.filterOut(read), read.getCigarString());
-
-        read = ArtificialSAMUtils.createArtificialRead(bases, quals, "1M1D2I1M");                                       // adjacent indels D->I
-        Assert.assertTrue(filter.filterOut(read), read.getCigarString());
-
-        read = ArtificialSAMUtils.createArtificialRead(bases, quals, "1M1I2M1D");                                       // ends in single deletion with insertion in the middle
-        Assert.assertTrue(filter.filterOut(read), read.getCigarString());
-
-        read = ArtificialSAMUtils.createArtificialRead(bases, quals, "4M1D");                                           // ends in single deletion
-        Assert.assertTrue(filter.filterOut(read), read.getCigarString());
-
-        read = ArtificialSAMUtils.createArtificialRead(bases, quals, "1D4M");                                           // starts with single deletion
-        Assert.assertTrue(filter.filterOut(read), read.getCigarString());
-
-        read = ArtificialSAMUtils.createArtificialRead(bases, quals, "2M1D1D2M");                                       // adjacent D's
-        Assert.assertTrue(filter.filterOut(read), read.getCigarString());
+        for (String cigarString : BAD_CIGAR_LIST) {
+            GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigarString);
+            Assert.assertTrue(filter.filterOut(read), read.getCigarString());
+        }
+    }
 
-        read = ArtificialSAMUtils.createArtificialRead(bases, quals, "1M1I1I1M");                                       // adjacent I's
-        Assert.assertTrue(filter.filterOut(read), read.getCigarString());
+    @Test(enabled = true)
+    public void testGoodCigars() {
+        List<Cigar> cigarList = ReadClipperTestUtils.generateCigarList(10);
+        for (Cigar cigar : cigarList) {
+            GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
+            Assert.assertFalse(filter.filterOut(read), read.getCigarString());
+        }
     }
 }
diff --git a/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java
index 218548b00a..dc908c323c 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java
@@ -41,6 +41,46 @@ private final LocusIteratorByState makeLTBS(List<SAMRecord> reads, ReadPropertie
         return new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()), readAttributes, genomeLocParser, LocusIteratorByState.sampleListForSAMWithoutReadGroups());
     }
 
+    @Test
+    public void testXandEQOperators() {
+        final byte[] bases1 = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
+        final byte[] bases2 = new byte[] {'A','A','A','C','A','A','A','A','A','C'};
+
+        // create a test version of the Reads object
+        ReadProperties readAttributes = createTestReadProperties();
+
+        SAMRecord r1 = ArtificialSAMUtils.createArtificialRead(header,"r1",0,1,10);
+        r1.setReadBases(bases1);
+        r1.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20});
+        r1.setCigarString("10M");
+
+        SAMRecord r2 = ArtificialSAMUtils.createArtificialRead(header,"r2",0,1,10);
+        r2.setReadBases(bases2);
+        r2.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20,20});
+        r2.setCigarString("3=1X5=1X");
+
+        SAMRecord r3 = ArtificialSAMUtils.createArtificialRead(header,"r3",0,1,10);
+        r3.setReadBases(bases2);
+        r3.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20,20});
+        r3.setCigarString("3=1X5M1X");
+
+        SAMRecord r4  = ArtificialSAMUtils.createArtificialRead(header,"r4",0,1,10);
+        r4.setReadBases(bases2);
+        r4.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20});
+        r4.setCigarString("10M");
+
+        List<SAMRecord> reads = Arrays.asList(r1, r2, r3, r4);
+
+        // create the iterator by state with the fake reads and fake records
+        li = makeLTBS(reads,readAttributes);
+
+        while (li.hasNext()) {
+            AlignmentContext context = li.next();
+            ReadBackedPileup pileup = context.getBasePileup();
+            Assert.assertEquals(pileup.depthOfCoverage(), 4);
+        }
+    }
+
     @Test
     public void testIndelsInRegularPileup() {
         final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java
index 8e9f2533f5..f1ffbe80f9 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java
@@ -2,6 +2,7 @@
 
 import org.broadinstitute.sting.gatk.report.GATKReport;
 import org.broadinstitute.sting.gatk.report.GATKReportTable;
+import org.broadinstitute.sting.utils.recalibration.RecalUtils;
 import org.testng.Assert;
 import org.testng.annotations.Test;
 
@@ -33,15 +34,15 @@ public void testCombineSimilarFiles() {
         for (GATKReportTable originalTable : originalReport.getTables()) {
             GATKReportTable calculatedTable = calculatedReport.getTable(originalTable.getTableName());
             List<String> columnsToTest = new LinkedList<String>();
-            columnsToTest.add(RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME);
-            columnsToTest.add(RecalDataManager.NUMBER_ERRORS_COLUMN_NAME);
-            if (originalTable.getTableName().equals(RecalDataManager.ARGUMENT_REPORT_TABLE_TITLE)) {                    // these tables must be IDENTICAL
-                columnsToTest.add(RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME);
+            columnsToTest.add(RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME);
+            columnsToTest.add(RecalUtils.NUMBER_ERRORS_COLUMN_NAME);
+            if (originalTable.getTableName().equals(RecalUtils.ARGUMENT_REPORT_TABLE_TITLE)) {                    // these tables must be IDENTICAL
+                columnsToTest.add(RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
                 testTablesWithColumnsAndFactor(originalTable, calculatedTable, columnsToTest, 1);
             }
             
-            else if (originalTable.getTableName().equals(RecalDataManager.QUANTIZED_REPORT_TABLE_TITLE)) {
-                columnsToTest.add(RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME);
+            else if (originalTable.getTableName().equals(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE)) {
+                columnsToTest.add(RecalUtils.QUANTIZED_COUNT_COLUMN_NAME);
                 testTablesWithColumnsAndFactor(originalTable, calculatedTable, columnsToTest, 2);
             }
             
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java
index 1c5db4262e..4611f3a405 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java
@@ -26,7 +26,7 @@ public void testIntervals() {
         WalkerTestSpec spec2 = new WalkerTestSpec(
                 "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + b36dbSNP129 + " -L 1:10,075,000-10,075,380 -L 1:10,093,447-10,093,847 -L 1:10,271,252-10,271,452 -o %s",
                  1,
-                 Arrays.asList("0567b32ebdc26604ddf2a390de4579ac"));
+                 Arrays.asList("ef481be9962e21d09847b8a1d4a4ff65"));
         executeTest("testFastaAlternateReferenceIndels", spec2);
 
         WalkerTestSpec spec3 = new WalkerTestSpec(
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java
index 256f93473f..f7f7999be2 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java
@@ -46,8 +46,9 @@
 
 
 public class ArtificialReadPileupTestProvider {
+    final String refBases = "ACAGAGCTGACCCTCCCTCCCCTCTCCCAGTGCAACAGCACGGGCGGCGACTGCTTTTACCGAGGCTACACGTCAGGCGTGGCGGCTGTCCAGGACTGGTACCACTTCCACTATGTGGATCTCTGCTGAGGACCAGGAAAGCCAGCACCCGCAGAGACTCTTCCCCAGTGCTCCATACGATCACCATTCTCTGCAGAAGGTCAGACGTCACTGGTGGCCCCCCAGCCTCCTCAGCAGGGAAGGATACTGTCCCGCAGATGAGATGAGCGAGAGCCGCCAGACCCACGTGACGCTGCACGACATCGACCCTCAGGCCTTGGACCAGCTGGTGCAGTTTGCCTACACGGCTGAGATTGTGGTGGGCGAGGGC";
     final int contigStart = 1;
-    final int contigStop = 10;
+    final int contigStop = refBases.length();
     final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, contigStop - contigStart + 1);
 //    final GATKSAMReadGroupRecord artificialGATKRG = new GATKSAMReadGroupRecord("synthetic");
     final String artificialContig = "chr1";
@@ -57,16 +58,18 @@ public class ArtificialReadPileupTestProvider {
     final int artificialMappingQuality = 60;
     Map<String, SAMReadGroupRecord> sample2RG = new HashMap<String, SAMReadGroupRecord>();
     List<SAMReadGroupRecord> sampleRGs;
-
-    final String refBases = "AGGATACTGT";
     List<String> sampleNames = new ArrayList<String>();
     private String sampleName(int i) { return sampleNames.get(i); }
     private SAMReadGroupRecord sampleRG(String name) { return sample2RG.get(name); }
-    public final int offset = 5;
+    public final int locStart = 105; // start position where we desire artificial variant
+    private final int readLength = 10; // desired read length in pileup
+    public final int readOffset = 4;
+    private final int readStart = locStart - readOffset;
     public final GenomeLocParser genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
-    public final GenomeLoc loc = genomeLocParser.createGenomeLoc(artificialContig,offset,offset);
-    public final GenomeLoc window = genomeLocParser.createGenomeLoc(artificialContig,artificialRefStart,10);
-    public final ReferenceContext referenceContext = new ReferenceContext(genomeLocParser,loc,window,this.refBases.getBytes());
+    public final GenomeLoc loc = genomeLocParser.createGenomeLoc(artificialContig,locStart,locStart);
+    public final GenomeLoc window = genomeLocParser.createGenomeLoc(artificialContig,locStart-100,locStart+100);
+    public final String windowBases = refBases.substring(locStart-100-1,locStart+100);
+    public final ReferenceContext referenceContext = new ReferenceContext(genomeLocParser,loc,window,windowBases.getBytes());
 
     byte BASE_QUAL = 50;
 
@@ -90,7 +93,7 @@ public List<String> getSampleNames() {
         return sampleNames;
     }
     public byte getRefByte() {
-        return refBases.substring(offset,offset+1).getBytes()[0];
+        return referenceContext.getBase();
     }
 
     public ReferenceContext getReferenceContext()   { return referenceContext;}
@@ -99,43 +102,34 @@ public byte getRefByte() {
     public Map<String,AlignmentContext> getAlignmentContextFromAlleles(int eventLength, String altBases, int[] numReadsPerAllele) {
         return getAlignmentContextFromAlleles(eventLength, altBases, numReadsPerAllele, false, BASE_QUAL);
     }
-    public Map<String,AlignmentContext> getAlignmentContextFromAlleles(int eventLength, String altBases, int[] numReadsPerAllele,
-                                                                       boolean addBaseErrors, int phredScaledBaseErrorRate) {
-        //    RefMetaDataTracker tracker = new RefMetaDataTracker(null,referenceContext);
-
-
-        ArrayList<Allele> vcAlleles = new ArrayList<Allele>();
-        Allele refAllele, altAllele;
-        if (eventLength == 0)  {// SNP case
-            refAllele =Allele.create(refBases.substring(offset,offset+1),true);
-            altAllele = Allele.create(altBases.substring(0,1), false);
+    public Map<String,AlignmentContext> getAlignmentContextFromAlleles(final int eventLength,
+                                                                       final String altBases,
+                                                                       final int[] numReadsPerAllele,
+                                                                       final boolean addBaseErrors,
+                                                                       final int phredScaledBaseErrorRate) {
+        final String refChar = new String(new byte[]{referenceContext.getBase()});
+
+        String refAllele, altAllele;
+        if (eventLength == 0)  {
+            // SNP case
+            refAllele = refChar;
+            altAllele = altBases.substring(0,1);
 
         } else if (eventLength>0){
             // insertion
-            refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
-            altAllele = Allele.create(altBases.substring(0,eventLength), false);
+            refAllele = refChar;
+            altAllele = refChar+altBases/*.substring(0,eventLength)*/;
         }
         else {
             // deletion
-            refAllele =Allele.create(refBases.substring(offset,offset+Math.abs(eventLength)),true);
-            altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
+            refAllele = new String(referenceContext.getForwardBases()).substring(0,Math.abs(eventLength)+1);
+            altAllele = refChar;
         }
-        int stop = loc.getStart();
-        vcAlleles.add(refAllele);
-        vcAlleles.add(altAllele);
-
-        final VariantContextBuilder builder = new VariantContextBuilder().source("");
-        builder.loc(loc.getContig(), loc.getStart(), stop);
-        builder.alleles(vcAlleles);
-        builder.referenceBaseForIndel(referenceContext.getBase());
-        builder.noGenotypes();
-
-        final VariantContext vc = builder.make();
 
         Map<String,AlignmentContext> contexts = new HashMap<String,AlignmentContext>();
 
         for (String sample: sampleNames) {
-            AlignmentContext context = new AlignmentContext(loc, generateRBPForVariant(loc,vc, altBases, numReadsPerAllele, sample, addBaseErrors, phredScaledBaseErrorRate));
+            AlignmentContext context = new AlignmentContext(loc, generateRBPForVariant(loc, refAllele, altAllele, altBases, numReadsPerAllele, sample, addBaseErrors, phredScaledBaseErrorRate));
             contexts.put(sample,context);
 
         }
@@ -149,73 +143,79 @@ private SAMReadGroupRecord createRG(String name) {
         rg.setSample(name);
         return rg;
     }
-    private ReadBackedPileup generateRBPForVariant( GenomeLoc loc, VariantContext vc, String altBases,
+
+    private ReadBackedPileup generateRBPForVariant( GenomeLoc loc, String refAllele, String altAllele, String altBases,
                                                     int[] numReadsPerAllele, String sample, boolean addErrors, int phredScaledErrorRate) {
         List<PileupElement> pileupElements = new ArrayList<PileupElement>();
-        int readStart = contigStart;
-        int offset = (contigStop-contigStart+1)/2;
-        int refAlleleLength = 0;
+        final int refAlleleLength = refAllele.length();
+
+        pileupElements.addAll(createPileupElements(refAllele, loc, numReadsPerAllele[0], sample, readStart, altBases, addErrors, phredScaledErrorRate, refAlleleLength, true));
+        pileupElements.addAll(createPileupElements(altAllele, loc, numReadsPerAllele[1], sample, readStart, altBases, addErrors, phredScaledErrorRate, refAlleleLength, false));
+        return new ReadBackedPileupImpl(loc,pileupElements);
+    }
+
+    private List<PileupElement> createPileupElements(String allele, GenomeLoc loc, int numReadsPerAllele, String sample, int readStart, String altBases, boolean addErrors, int phredScaledErrorRate, int refAlleleLength, boolean isReference) {
+
+        int alleleLength = allele.length();
+        List<PileupElement> pileupElements = new ArrayList<PileupElement>();
+
         int readCounter = 0;
-        int alleleCounter = 0;
-        for (Allele allele: vc.getAlleles()) {
-            if (allele.isReference())
-                refAlleleLength = allele.getBases().length;
-
-            int alleleLength = allele.getBases().length;
-
-            for ( int d = 0; d < numReadsPerAllele[alleleCounter]; d++ ) {
-                byte[] readBases = trueHaplotype(allele, offset, refAlleleLength);
-                if (addErrors)
-                    addBaseErrors(readBases, phredScaledErrorRate);
-
-                byte[] readQuals = new byte[readBases.length];
-                Arrays.fill(readQuals, (byte)phredScaledErrorRate);
-
-                GATKSAMRecord read = new GATKSAMRecord(header);
-                read.setBaseQualities(readQuals);
-                read.setReadBases(readBases);
-                read.setReadName(artificialReadName+readCounter++);
-
-                boolean isBeforeDeletion = false, isBeforeInsertion = false;
-                if (allele.isReference())
-                    read.setCigarString(readBases.length + "M");
-                else {
-                    isBeforeDeletion = alleleLength<refAlleleLength;
-                    isBeforeInsertion = alleleLength>refAlleleLength;
-                    if (isBeforeDeletion || isBeforeInsertion)
-                        read.setCigarString(offset+"M"+ alleleLength + (isBeforeDeletion?"D":"I") +
-                            (readBases.length-offset)+"M");
-                    else // SNP case
-                        read.setCigarString(readBases.length+"M");
-                }
-
-                int eventLength = (isBeforeDeletion?refAlleleLength:(isBeforeInsertion?alleleLength:0));
-                read.setReadPairedFlag(false);
-                read.setAlignmentStart(readStart);
-                read.setMappingQuality(artificialMappingQuality);
-                read.setReferenceName(loc.getContig());
-                read.setReadNegativeStrandFlag(false);
-                read.setAttribute("RG", sampleRG(sample).getReadGroupId());
-
-
-                pileupElements.add(new PileupElement(read,offset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases.substring(0,alleleLength),eventLength));
+        for ( int d = 0; d < numReadsPerAllele; d++ ) {
+            byte[] readBases = trueHaplotype(allele, refAlleleLength, readLength);
+            if (addErrors)
+                addBaseErrors(readBases, phredScaledErrorRate);
+
+            byte[] readQuals = new byte[readBases.length];
+            Arrays.fill(readQuals, (byte)phredScaledErrorRate);
+
+            GATKSAMRecord read = new GATKSAMRecord(header);
+            read.setBaseQualities(readQuals);
+            read.setReadBases(readBases);
+            read.setReadName(artificialReadName+readCounter++);
+
+            boolean isBeforeDeletion = alleleLength<refAlleleLength;
+            boolean isBeforeInsertion = alleleLength>refAlleleLength;
+
+            int eventLength = alleleLength - refAlleleLength;
+            if (isReference)
+                read.setCigarString(readBases.length + "M");
+            else {
+                if (isBeforeDeletion || isBeforeInsertion)
+                    read.setCigarString((readOffset+1)+"M"+ Math.abs(eventLength) + (isBeforeDeletion?"D":"I") +
+                            (readBases.length-readOffset)+"M");
+                else // SNP case
+                    read.setCigarString(readBases.length+"M");
             }
-            alleleCounter++;
+
+            read.setReadPairedFlag(false);
+            read.setAlignmentStart(readStart);
+            read.setMappingQuality(artificialMappingQuality);
+            read.setReferenceName(loc.getContig());
+            read.setReadNegativeStrandFlag(false);
+            read.setAttribute("RG", sampleRG(sample).getReadGroupId());
+
+
+            pileupElements.add(new PileupElement(read,readOffset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases,Math.abs(eventLength)));
         }
 
-        return new ReadBackedPileupImpl(loc,pileupElements);
+        return pileupElements;
     }
 
-    private byte[] trueHaplotype(Allele allele, int offset, int refAlleleLength) {
+    /**
+     * Create haplotype with desired allele and reference context
+     * @param allele                             Desired allele string
+     * @param refAlleleLength                    Length of reference allele.
+     * @param desiredLength                      Desired haplotype length
+     * @return                                   String with haplotype formed by (prefix)+allele bases + postfix
+     */
+    private byte[] trueHaplotype(final String allele, final int refAlleleLength, final int desiredLength) {
         // create haplotype based on a particular allele
-        String prefix = refBases.substring(offset);
-        String alleleBases = new String(allele.getBases());
-        String postfix = refBases.substring(offset+refAlleleLength,refBases.length());
-
-        return (prefix+alleleBases+postfix).getBytes();
-
+        final int startIdx= locStart - readOffset-1;
 
+        final String prefix = refBases.substring(startIdx, locStart-1);
+        final String postfix = refBases.substring(locStart+refAlleleLength-1,startIdx + desiredLength);
 
+        return (prefix+allele+postfix).getBytes();
     }
 
     private void addBaseErrors(final byte[] readBases, final int phredScaledErrorRate) {
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsUnitTest.java
index c7ef51d0cb..85528f58b6 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsUnitTest.java
@@ -45,7 +45,6 @@
  */
 public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
     
-    final String refBases = "AGGATACTGT";
     final int nSamples = 1;
     final int[] numReadsPerAllele = new int[]{10,10};
     final String SAMPLE_PREFIX = "sample";
@@ -65,21 +64,19 @@ public void before() {
     @Test
     public void testBasicConsensusCounts() {
         // 4 inserted bases, min cnt = 10
-        String altBases = "CCTCCTGAGA";
+        String altBases = "CCTC";
         int eventLength = 4;
         List<Allele> alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases);
         
         Assert.assertEquals(alleles.size(),2);
-        Assert.assertEquals(alleles.get(1).getBaseString(), altBases.substring(0,eventLength));
+        Assert.assertEquals(alleles.get(1).getBaseString().substring(1), altBases.substring(0,eventLength));
 
 
-
-        //altBases = "CCTCMTGAGA";
-
+        // test deletions
         eventLength = 3;
         alleles = getConsensusAlleles(eventLength,false,10,0.1, altBases);
         Assert.assertEquals(alleles.size(),2);
-        Assert.assertEquals(alleles.get(0).getBaseString(), refBases.substring(pileupProvider.offset,pileupProvider.offset+eventLength));
+        Assert.assertEquals(alleles.get(0).getBaseString().substring(1,eventLength), new String(pileupProvider.getReferenceContext().getForwardBases()).substring(1,eventLength));
 
         // same with min Reads = 11
         alleles = getConsensusAlleles(eventLength,false,11,0.1, altBases);
@@ -92,14 +89,14 @@ public void testBasicConsensusCounts() {
         Assert.assertEquals(alleles.size(),0);
 
         // test N's in insertions
-        altBases = "CCTCNTGAGA";
+        altBases = "CCTC";
         eventLength = 4;
         alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases);
 
         Assert.assertEquals(alleles.size(),2);
-        Assert.assertEquals(alleles.get(1).getBaseString(), altBases.substring(0,eventLength));
+        Assert.assertEquals(alleles.get(1).getBaseString().substring(1,eventLength+1), altBases);
 
-        altBases = "CCTCNTGAGA";
+        altBases = "CCTCN";
         eventLength = 5;
         alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases);
 
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
index f35eb4404c..7b6e1ee967 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@@ -355,6 +355,19 @@ public void testGGAwithNoEvidenceInReads() {
         executeTest("test GENOTYPE_GIVEN_ALLELES with no evidence in reads", spec);
     }
 
+    @Test
+    public void testBaseIndelQualityScores() {
+        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
+                baseCommandIndelsb37 +
+                        " -I " + privateTestDir + "NA12878.100kb.BQSRv2.example.bam" +
+                        " -o %s" +
+                        " -L 20:10,000,000-10,100,000",
+                1,
+                Arrays.asList("b3c923ed9efa04b85fc18a9b45c8d2a6"));
+
+        executeTest(String.format("test UG with base indel quality scores"), spec);
+    }
+
     // --------------------------------------------------------------------------------------------------------------
     //
     // testing SnpEff
@@ -373,13 +386,13 @@ public void testSnpEffAnnotationRequestedWithoutRodBinding() {
 
     // --------------------------------------------------------------------------------------------------------------
     //
-    // testing SnpEff
+    // testing MinIndelFraction
     //
     // --------------------------------------------------------------------------------------------------------------
 
     final static String assessMinIndelFraction = baseCommandIndelsb37 + " -I " + validationDataLocation
             + "978604.bam -L 1:978,586-978,626 -o %s --sites_only -rf Sample -goodSM 7377 -goodSM 22-0022 -goodSM 134 -goodSM 344029-53 -goodSM 14030";
-    
+
     @Test
     public void testMinIndelFraction0() {
         WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@@ -403,4 +416,18 @@ public void testMinIndelFraction100() {
                 Arrays.asList("3f07efb768e08650a7ce333edd4f9a52"));
         executeTest("test minIndelFraction 1.0", spec);
     }
+
+    // --------------------------------------------------------------------------------------------------------------
+    //
+    // testing Ns in CIGAR
+    //
+    // --------------------------------------------------------------------------------------------------------------
+
+    @Test
+    public void testNsInCigar() {
+        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
+                "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -I " + validationDataLocation + "testWithNs.bam -o %s -L 8:141799600-141814700", 1,
+                Arrays.asList("22c9fd65ce3298bd7fbf400c9c209f29"));
+        executeTest("test calling on reads with Ns in CIGAR", spec);
+    }
 }
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmpliconsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmpliconsIntegrationTest.java
index 7a849a8193..80eda5ed9e 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmpliconsIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmpliconsIntegrationTest.java
@@ -23,7 +23,7 @@ public void testWikiExample() {
         testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF;
         testArgs += " --virtualPrimerSize 30";
         WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
-                Arrays.asList("27f9450afa132888a8994167f0035fd7"));
+                Arrays.asList("240d99b58f73985fb114abe9044c0271"));
         executeTest("Test probes", spec);
     }
 
@@ -36,7 +36,7 @@ public void testWikiExampleNoBWA() {
         testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF;
         testArgs += " --virtualPrimerSize 30 --doNotUseBWA";
         WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
-                Arrays.asList("f2611ff1d9cd5bedaad003251fed8bc1"));
+                Arrays.asList("6e7789445e29d91979a21e78d3d53295"));
         executeTest("Test probes", spec);
     }
 
@@ -49,7 +49,7 @@ public void testWikiExampleMonoFilter() {
         testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF;
         testArgs += " --virtualPrimerSize 30 --filterMonomorphic";
         WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
-                Arrays.asList("77b3f30e38fedad812125bdf6cf3255f"));
+                Arrays.asList("18d7236208db603e143b40db06ef2aca"));
         executeTest("Test probes", spec);
     }
 
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java
index d9a91c4c26..c92d6d4cfb 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java
@@ -34,7 +34,7 @@
 import java.util.List;
 
 public class VariantEvalIntegrationTest extends WalkerTest {
-    private static String variantEvalTestDataRoot = validationDataLocation + "VariantEval/";
+    private static String variantEvalTestDataRoot = privateTestDir + "VariantEval/";
     private static String fundamentalTestVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf";
     private static String fundamentalTestSNPsVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.final.vcf";
     private static String fundamentalTestSNPsWithMLEVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.final.withMLE.vcf";
@@ -122,7 +122,7 @@ public void testFundamentalsCountVariantsSNPsAndIndelsWithNovelty() {
                         "-o %s"
                 ),
                 1,
-                Arrays.asList("e62a3bd9914d48e2bb2fb4f5dfc5ebc0")
+                Arrays.asList("40abbc9be663aed8ee1158f832463ca8")
         );
         executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec);
     }
@@ -144,7 +144,7 @@ public void testFundamentalsCountVariantsSNPsAndIndelsWithNoveltyAndFilter() {
                         "-o %s"
                 ),
                 1,
-                Arrays.asList("087a2d9943c53e7f49663667c3305c7e")
+                Arrays.asList("106a0e8753e839c0a2c030eb4b165fa9")
         );
         executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec);
     }
@@ -585,6 +585,21 @@ public void testStandardIndelEval() {
         executeTest("testStandardIndelEval", spec);
     }
 
+    @Test
+    public void testBadACValue() {
+        WalkerTestSpec spec = new WalkerTestSpec(
+                buildCommandLine(
+                        "-T VariantEval",
+                        "-R " + b37KGReference,
+                        "-eval " + privateTestDir + "vcfexample.withBadAC.vcf",
+                        "-noST -ST AlleleCount",
+                        "-noEV -EV VariantSummary"
+                ),
+                0,
+                UserException.class);
+        executeTest("testBadACValue", spec);
+    }
+
 
     @Test()
     public void testIncompatibleEvalAndStrat() {
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java
index 74d071a908..b780bcd00c 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java
@@ -13,7 +13,7 @@ private static class VRTest {
         String recalMD5;
         String cutVCFMD5;
         public VRTest(String inVCF, String tranchesMD5, String recalMD5, String cutVCFMD5) {
-            this.inVCF = validationDataLocation + inVCF;
+            this.inVCF = inVCF;
             this.tranchesMD5 = tranchesMD5;
             this.recalMD5 = recalMD5;
             this.cutVCFMD5 = cutVCFMD5;
@@ -25,7 +25,7 @@ public String toString() {
         }
     }
 
-    VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf",
+    VRTest lowPass = new VRTest(validationDataLocation + "phase1.projectConsensus.chr20.raw.snps.vcf",
             "f360ce3eb2b0b887301be917a9843e2b",  // tranches
             "287fea5ea066bf3fdd71f5ce9b58eab3",  // recal file
             "356b9570817b9389da71fbe991d8b2f5"); // cut VCF
@@ -74,14 +74,65 @@ public void testApplyRecalibration(VRTest params) {
         executeTest("testApplyRecalibration-"+params.inVCF, spec);
     }
 
+    VRTest bcfTest = new VRTest(privateTestDir + "vqsr.bcf_test.snps.unfiltered.bcf",
+            "a8ce3cd3dccafdf7d580bcce7d660a9a",  // tranches
+            "74c10fc15f9739a938b7138909fbde04",  // recal file
+            "62fda105e14b619a1c263855cf56af1d"); // cut VCF
+
+    @DataProvider(name = "VRBCFTest")
+    public Object[][] createVRBCFTest() {
+        return new Object[][]{ {bcfTest} };
+        //return new Object[][]{ {yriTrio}, {lowPass} }; // Add hg19 chr20 trio calls here
+    }
+
+    @Test(dataProvider = "VRBCFTest")
+    public void testVariantRecalibratorWithBCF(VRTest params) {
+        //System.out.printf("PARAMS FOR %s is %s%n", vcf, clusterFile);
+        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
+                "-R " + b37KGReference +
+                        " -resource:known=true,prior=10.0 " + GATKDataLocation + "dbsnp_132_b37.leftAligned.vcf" +
+                        " -resource:truth=true,training=true,prior=15.0 " + comparisonDataLocation + "Validated/HapMap/3.3/sites_r27_nr.b37_fwd.vcf" +
+                        " -resource:training=true,truth=true,prior=12.0 " + comparisonDataLocation + "Validated/Omni2.5_chip/Omni25_sites_1525_samples.b37.vcf" +
+                        " -T VariantRecalibrator" +
+                        " -input " + params.inVCF +
+                        " -L 20:10,000,000-20,000,000" +
+                        " --no_cmdline_in_header" +
+                        " -an AC " + // integer value
+                        " -an QD -an ReadPosRankSum -an FS -an InbreedingCoeff " + // floats value
+                        " -mG 2 "+
+                        " -recalFile %s" +
+                        " -tranchesFile %s",
+                2,
+                Arrays.asList("bcf", "txt"),
+                Arrays.asList(params.recalMD5, params.tranchesMD5));
+        executeTest("testVariantRecalibrator-"+params.inVCF, spec).getFirst();
+    }
+
+    @Test(dataProvider = "VRBCFTest", dependsOnMethods="testVariantRecalibratorWithBCF")
+    public void testApplyRecalibrationWithBCF(VRTest params) {
+        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
+                "-R " + b37KGReference +
+                        " -T ApplyRecalibration" +
+                        " -L 20:10,000,000-20,000,000" +
+                        " --no_cmdline_in_header" +
+                        " -input " + params.inVCF +
+                        " -U LENIENT_VCF_PROCESSING -o %s" +
+                        " -tranchesFile " + getMd5DB().getMD5FilePath(params.tranchesMD5, null) +
+                        " -recalFile " + getMd5DB().getMD5FilePath(params.recalMD5, null),
+                Arrays.asList(params.cutVCFMD5));
+        spec.disableShadowBCF();
+        executeTest("testApplyRecalibration-"+params.inVCF, spec);
+    }
+
+
     VRTest indelUnfiltered = new VRTest(
-            "combined.phase1.chr20.raw.indels.unfiltered.sites.vcf", // all FILTERs as .
+            validationDataLocation + "combined.phase1.chr20.raw.indels.unfiltered.sites.vcf", // all FILTERs as .
             "b7589cd098dc153ec64c02dcff2838e4",  // tranches
             "a04a9001f62eff43d363f4d63769f3ee",  // recal file
             "64f576881e21323dd4078262604717a2"); // cut VCF
 
     VRTest indelFiltered = new VRTest(
-            "combined.phase1.chr20.raw.indels.filtered.sites.vcf", // all FILTERs as PASS
+            validationDataLocation + "combined.phase1.chr20.raw.indels.filtered.sites.vcf", // all FILTERs as PASS
             "b7589cd098dc153ec64c02dcff2838e4",  // tranches
             "a04a9001f62eff43d363f4d63769f3ee",  // recal file
             "af22c55d91394c56a222fd40d6d54781"); // cut VCF
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java
index bbee99ba67..c32d77f824 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java
@@ -45,12 +45,16 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
     // TODO TODO TODO TODO TODO TODO TODO TODO
     //
     private static String baseTestString(String args) {
-        return "-T CombineVariants --no_cmdline_in_header -L 1:1-50,000,000 -o %s -U LENIENT_VCF_PROCESSING -R " + b36KGReference + args;
+        return "-T CombineVariants --no_cmdline_in_header -L 1:1-50,000,000 -o %s -R " + b36KGReference + args;
+        //return "-T CombineVariants --no_cmdline_in_header -L 1:1-50,000,000 -o %s -U LENIENT_VCF_PROCESSING -R " + b36KGReference + args;
     }
 
-    private void cvExecuteTest(final String name, final WalkerTestSpec spec) {
+    private void cvExecuteTest(final String name, final WalkerTestSpec spec, final boolean parallel) {
         spec.disableShadowBCF();
-        executeTest(name, spec);
+        if ( parallel )
+            executeTestParallel(name, spec);
+        else
+            executeTest(name, spec);
     }
 
     public void test1InOut(String file, String md5) {
@@ -62,15 +66,19 @@ public void test1InOut(String file, String md5, String args) {
                  baseTestString(" -priority v1 -V:v1 " + validationDataLocation + file + args),
                  1,
                  Arrays.asList(md5));
-         cvExecuteTest("testInOut1--" + file, spec);
+         cvExecuteTest("testInOut1--" + file, spec, true);
     }
 
     public void combine2(String file1, String file2, String args, String md5) {
+        combine2(file1, file2, args, md5, true);
+    }
+
+    public void combine2(String file1, String file2, String args, String md5, final boolean parallel) {
          WalkerTestSpec spec = new WalkerTestSpec(
                  baseTestString(" -priority v1,v2 -V:v1 " + validationDataLocation + file1 + " -V:v2 "+ validationDataLocation + file2 + args),
                  1,
                  Arrays.asList(md5));
-         cvExecuteTest("combine2 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec);
+         cvExecuteTest("combine2 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec, parallel);
     }
 
     public void combineSites(String args, String md5) {
@@ -82,7 +90,7 @@ public void combineSites(String args, String md5) {
                         + " -V:hm3 " + validationDataLocation + file2 + args,
                 1,
                 Arrays.asList(md5));
-        cvExecuteTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);
+        cvExecuteTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec, true);
     }
 
     public void combinePLs(String file1, String file2, String md5) {
@@ -90,26 +98,29 @@ public void combinePLs(String file1, String file2, String md5) {
                  "-T CombineVariants --no_cmdline_in_header -o %s -R " + b36KGReference + " -priority v1,v2 -V:v1 " + privateTestDir + file1 + " -V:v2 " + privateTestDir + file2,
                  1,
                  Arrays.asList(md5));
-         cvExecuteTest("combine PLs 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec);
+         cvExecuteTest("combine PLs 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec, true);
     }
 
-    @Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "6469fce8a5cd5a0f77e5ac5d9e9e192b"); }
-    @Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "a4cedaa83d54e34cafc3ac4b80acf5b4", " -setKey foo"); }
-    @Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "ac58a5fde17661e2a19004ca954d9781", " -setKey null"); }
+    @Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "6469fce8a5cd5a0f77e5ac5d9e9e192b", " -U LENIENT_VCF_PROCESSING"); }
+    @Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "a4cedaa83d54e34cafc3ac4b80acf5b4", " -setKey foo -U LENIENT_VCF_PROCESSING"); }
+    @Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "ac58a5fde17661e2a19004ca954d9781", " -setKey null -U LENIENT_VCF_PROCESSING"); }
     @Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "67a8076e30b4bca0ea5acdc9cd26a4e0"); } // official project VCF files in tabix format
 
-    @Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "ef2d249ea4b25311966e038aac05c661"); }
-    @Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "cdb448aaa92ca5a9e393d875b42581b3"); }
+    @Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "909c6dc74eeb5ab86f8e74073eb0c1d6"); }
+    @Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "381875b3280ba56eef0152e56f64f68d"); }
 
     @Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "f0ce3fb83d4ad9ba402d7cb11cd000c3"); }
 
     @Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "4efdf983918db822e4ac13d911509576"); } // official project VCF files in tabix format
     @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "848d4408ee953053d2307cefebc6bd6d"); } // official project VCF files in tabix format
-    @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "91f6087e6e2bf3df4d1c9700eaff958b"); }
+    @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "629656bfef7713c23f3a593523503b2f"); }
 
-    @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "a9be239ab5e03e7e97caef58a3841dd2"); }
+    @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e54d0dcf14f90d5c8e58b45191dd0219"); }
 
-    @Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "0b1815c699e71e143ed129bfadaffbcb"); }
+    @Test public void uniqueSNPs() {
+        // parallelism must be disabled because the input VCF is malformed (DB=0) and parallelism actually fixes this which breaks the md5s
+        combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "e5ea6ac3905bd9eeea1a2ef5d2cb5af7", true);
+    }
 
     @Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "def52bcd3942bbe39cd7ebe845c4f206"); }
     @Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "5f61145949180bf2a0cd342d8e064860"); }
@@ -122,11 +133,12 @@ public void combinePLs(String file1, String file2, String md5) {
                         " -V:denovoInfo "+validationDataLocation+"yri_merged_validation_data_240610.annotated.b36.vcf" +
                         " -setKey centerSet" +
                         " -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED" +
+                        " -U LENIENT_VCF_PROCESSING" +
                         " -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
                         " -genotypeMergeOptions UNIQUIFY -L 1"),
                 1,
-                Arrays.asList("3039cfff7abee6aa7fbbafec66a1b019"));
-        cvExecuteTest("threeWayWithRefs", spec);
+                Arrays.asList("e5f0e7a80cd392172ebf5ddb06b91a00"));
+        cvExecuteTest("threeWayWithRefs", spec, true);
     }
 
     // complex examples with filtering, indels, and multiple alleles
@@ -139,13 +151,13 @@ public void combineComplexSites(String args, String md5) {
                         + " -V:two " + privateTestDir + file2 + args,
                 1,
                 Arrays.asList(md5));
-        cvExecuteTest("combineComplexSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);
+        cvExecuteTest("combineComplexSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec, true);
     }
 
-    @Test public void complexTestFull() { combineComplexSites("", "151a4970367dd3e73ba3e7f3c2f874f6"); }
-    @Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "c0625e092b878b3d3eb1703c48e216b7"); }
-    @Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "6978329d6a1033ac16f83b49072c679b"); }
-    @Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "6978329d6a1033ac16f83b49072c679b"); }
+    @Test public void complexTestFull() { combineComplexSites("", "9d989053826ffe5bef7c4e05ac51bcca"); }
+    @Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "4f38d9fd30a7ae83e2a7dec265a28772"); }
+    @Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "46bbbbb8fc9ae6467a4f8fe35b8d7d14"); }
+    @Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "46bbbbb8fc9ae6467a4f8fe35b8d7d14"); }
 
     @Test
     public void combineDBSNPDuplicateSites() {
@@ -153,6 +165,6 @@ public void combineDBSNPDuplicateSites() {
                  "-T CombineVariants --no_cmdline_in_header -L 1:902000-903000 -o %s -R " + b37KGReference + " -V:v1 " + b37dbSNP132,
                  1,
                  Arrays.asList("aa926eae333208dc1f41fe69dc95d7a6"));
-         cvExecuteTest("combineDBSNPDuplicateSites:", spec);
+         cvExecuteTest("combineDBSNPDuplicateSites:", spec, true);
     }
 }
\ No newline at end of file
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java
index e25d654659..bde597fbef 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java
@@ -34,7 +34,7 @@ public void testRepeatedLineSelection() {
         WalkerTestSpec spec = new WalkerTestSpec(
                 baseTestString(" -sn A -sn B -sn C --variant " + testfile),
                 1,
-                Arrays.asList("3d98a024bf3aecbd282843e0af89d0e6")
+                Arrays.asList("125d1c9fa111cd38dfa2ff3900f16b57")
         );
 
         executeTest("testRepeatedLineSelection--" + testfile, spec);
@@ -49,7 +49,7 @@ public void testDiscordance() {
                         + b37hapmapGenotypes + " -disc " + testFile
                         + " -o %s --no_cmdline_in_header -U LENIENT_VCF_PROCESSING",
                 1,
-                Arrays.asList("54289033d35d32b8ebbb38c51fbb614c")
+                Arrays.asList("c0b937edb6a8b6392d477511d4f1ebcf")
         );
         spec.disableShadowBCF();
 
@@ -135,7 +135,7 @@ public void testUsingDbsnpName() {
         WalkerTestSpec spec = new WalkerTestSpec(
                 "-T SelectVariants -R " + b36KGReference + " -sn NA12892 --variant:dbsnp " + testFile + " -o %s --no_cmdline_in_header",
                 1,
-                Arrays.asList("d12ae1617deb38f5ed712dc326935b9a")
+                Arrays.asList("a554459c9ccafb9812ff6d8c06c11726")
         );
 
         executeTest("testUsingDbsnpName--" + testFile, spec);
@@ -148,12 +148,38 @@ public void testRegenotype() {
         WalkerTestSpec spec = new WalkerTestSpec(
                 "-T SelectVariants -R " + b36KGReference + " -regenotype -sn NA12892 --variant " + testFile + " -o %s --no_cmdline_in_header",
                 1,
-                Arrays.asList("c22ad8864d9951403672a24c20d6c3c2")
+                Arrays.asList("52cb2f150559ca1457e9df7ec153dbb4")
         );
 
         executeTest("testRegenotype--" + testFile, spec);
     }
 
+    @Test
+    public void testRemoveMLE() {
+        String testFile = privateTestDir + "vcfexample.withMLE.vcf";
+
+        WalkerTestSpec spec = new WalkerTestSpec(
+                "-T SelectVariants -R " + b36KGReference + " -sn NA12892 --variant " + testFile + " -o %s --no_cmdline_in_header",
+                1,
+                Arrays.asList("a554459c9ccafb9812ff6d8c06c11726")
+        );
+
+        executeTest("testRemoveMLE--" + testFile, spec);
+    }
+
+    @Test
+    public void testRemoveMLEAndRegenotype() {
+        String testFile = privateTestDir + "vcfexample.withMLE.vcf";
+
+        WalkerTestSpec spec = new WalkerTestSpec(
+                "-T SelectVariants -R " + b36KGReference + " -regenotype -sn NA12892 --variant " + testFile + " -o %s --no_cmdline_in_header",
+                1,
+                Arrays.asList("52cb2f150559ca1457e9df7ec153dbb4")
+        );
+
+        executeTest("testRemoveMLEAndRegenotype--" + testFile, spec);
+    }
+
     @Test
     public void testMultipleRecordsAtOnePosition() {
         String testFile = privateTestDir + "selectVariants.onePosition.vcf";
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java
index 3277f5060f..6a3d755d7d 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java
@@ -125,4 +125,14 @@ public void testBadAllele2() {
         executeTest("test bad ref allele in deletion", spec);
     }
 
+    @Test
+    public void testComplexEvents() {
+        WalkerTestSpec spec = new WalkerTestSpec(
+                baseTestString("complexEvents.vcf", "ALL"),
+                0,
+                Arrays.asList("d41d8cd98f00b204e9800998ecf8427e")
+        );
+
+        executeTest("test validating complex events", spec);
+    }
 }
diff --git a/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java
index ec08d97c5d..ddffb6e4cb 100644
--- a/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java
@@ -31,6 +31,8 @@
 import net.sf.samtools.CigarOperator;
 import org.broadinstitute.sting.BaseTest;
 import org.broadinstitute.sting.utils.variantcontext.Allele;
+import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
 import org.testng.Assert;
 import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
@@ -53,11 +55,11 @@ public void testSimpleInsertionAllele() {
         h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M));
         final Cigar h1Cigar = new Cigar(h1CigarList);
         String h1bases = "AACTTCTGGTCAACTGGTCAACTGGTCAACTGGTCA";
-        basicInsertTest("-", "ACTT", 1, h1Cigar, bases, h1bases);
-        h1bases = "ACTGGTCACTTAACTGGTCAACTGGTCAACTGGTCA";
-        basicInsertTest("-", "ACTT", 7, h1Cigar, bases, h1bases);
+        basicInsertTest("A", "AACTT", 0, h1Cigar, bases, h1bases);
+        h1bases = "ACTGGTCAACTTACTGGTCAACTGGTCAACTGGTCA";
+        basicInsertTest("A", "AACTT", 7, h1Cigar, bases, h1bases);
         h1bases = "ACTGGTCAACTGGTCAAACTTCTGGTCAACTGGTCA";
-        basicInsertTest("-", "ACTT", 17, h1Cigar, bases, h1bases);
+        basicInsertTest("A", "AACTT", 16, h1Cigar, bases, h1bases);
     }
 
     @Test
@@ -68,11 +70,11 @@ public void testSimpleDeletionAllele() {
         h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M));
         final Cigar h1Cigar = new Cigar(h1CigarList);
         String h1bases = "ATCAACTGGTCAACTGGTCAACTGGTCA";
-        basicInsertTest("ACTT", "-", 1, h1Cigar, bases, h1bases);
-        h1bases = "ACTGGTCGGTCAACTGGTCAACTGGTCA";
-        basicInsertTest("ACTT", "-", 7, h1Cigar, bases, h1bases);
+        basicInsertTest("ACTGG", "A", 0, h1Cigar, bases, h1bases);
+        h1bases = "ACTGGTCAGTCAACTGGTCAACTGGTCA";
+        basicInsertTest("AACTG", "A", 7, h1Cigar, bases, h1bases);
         h1bases = "ACTGGTCAACTGGTCAATCAACTGGTCA";
-        basicInsertTest("ACTT", "-", 17, h1Cigar, bases, h1bases);
+        basicInsertTest("ACTGG", "A", 16, h1Cigar, bases, h1bases);
     }
 
     @Test
@@ -102,11 +104,11 @@ public void testComplexInsertionAllele() {
         h1CigarList.add(new CigarElement(7 + 4, CigarOperator.M));
         final Cigar h1Cigar = new Cigar(h1CigarList);
         String h1bases = "AACTTTCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
-        basicInsertTest("-", "ACTT", 1, h1Cigar, bases, h1bases);
+        basicInsertTest("A", "AACTT", 0, h1Cigar, bases, h1bases);
         h1bases = "ATCG" + "CCGGCCGGCC" + "ATCACTTGATCG" + "AGGGGGA" + "AGGC";
-        basicInsertTest("-", "ACTT", 7, h1Cigar, bases, h1bases);
+        basicInsertTest("C", "CACTT", 6, h1Cigar, bases, h1bases);
         h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGACTTGGGGA" + "AGGC";
-        basicInsertTest("-", "ACTT", 17, h1Cigar, bases, h1bases);
+        basicInsertTest("G", "GACTT", 16, h1Cigar, bases, h1bases);
     }
 
     @Test
@@ -120,12 +122,12 @@ public void testComplexDeletionAllele() {
         h1CigarList.add(new CigarElement(3, CigarOperator.D));
         h1CigarList.add(new CigarElement(7 + 4, CigarOperator.M));
         final Cigar h1Cigar = new Cigar(h1CigarList);
-        String h1bases = "A" + "CGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
-        basicInsertTest("ACTT", "-", 1, h1Cigar, bases, h1bases);
-        h1bases = "ATCG" + "CCGGCCGGCC" + "ATCG" + "AGGGGGA" + "AGGC";
-        basicInsertTest("ACTT", "-", 7, h1Cigar, bases, h1bases);
+        String h1bases = "A" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
+        basicInsertTest("ATCG", "A", 0, h1Cigar, bases, h1bases);
+        h1bases = "ATCG" + "CCGGCCGGCC" + "ATAAAG" + "AGGGGGA" + "AGGC";
+        basicInsertTest("CGATC", "AAA", 6, h1Cigar, bases, h1bases);
         h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGA" + "AGGC";
-        basicInsertTest("ACTT", "-", 17, h1Cigar, bases, h1bases);
+        basicInsertTest("GGGGG", "G", 16, h1Cigar, bases, h1bases);
     }
 
     @Test
@@ -148,13 +150,16 @@ public void testComplexSNPAllele() {
     }
 
     private void basicInsertTest(String ref, String alt, int loc, Cigar cigar, String hap, String newHap) {
-        final int INDEL_PADDING_BASE = (ref.length() == alt.length() ? 0 : 1);
         final Haplotype h = new Haplotype(hap.getBytes());
         final Allele h1refAllele = Allele.create(ref, true);
         final Allele h1altAllele = Allele.create(alt, false);
+        final ArrayList<Allele> alleles = new ArrayList<Allele>();
+        alleles.add(h1refAllele);
+        alleles.add(h1altAllele);
+        final VariantContext vc = new VariantContextBuilder().alleles(alleles).loc("1", loc, loc + h1refAllele.getBases().length - 1).make();
         h.setAlignmentStartHapwrtRef(0);
         h.setCigar(cigar);
-        final Haplotype h1 = h.insertAllele(h1refAllele, h1altAllele, loc - INDEL_PADDING_BASE);
+        final Haplotype h1 = h.insertAllele(vc.getReference(), vc.getAlternateAllele(0), loc);
         final Haplotype h1expected = new Haplotype(newHap.getBytes());
         Assert.assertEquals(h1, h1expected);
     }
diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java
index 282f19d8a2..f7c564c741 100644
--- a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java
@@ -123,12 +123,12 @@ public void testBasicActivityProfile(BasicActivityProfileTestProvider cfg) {
         for ( int i = 0; i < cfg.probs.size(); i++ ) {
             double p = cfg.probs.get(i);
             GenomeLoc loc = genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart() + i, cfg.regionStart.getStart() + i);
-            profile.add(loc, p);
+            profile.add(loc, new ActivityProfileResult(p));
         }
         Assert.assertEquals(profile.regionStartLoc, genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart(), cfg.regionStart.getStart() ));
 
         Assert.assertEquals(profile.size(), cfg.probs.size());
-        Assert.assertEquals(profile.isActiveList, cfg.probs);
+        assertProbsAreEqual(profile.isActiveList, cfg.probs);
 
         assertRegionsAreEqual(profile.createActiveRegions(0, 100), cfg.expectedRegions);
     }
@@ -140,5 +140,12 @@ private void assertRegionsAreEqual(List<ActiveRegion> actual, List<ActiveRegion>
         }
     }
 
+    private void assertProbsAreEqual(List<ActivityProfileResult> actual, List<Double> expected) {
+        Assert.assertEquals(actual.size(), expected.size());
+        for ( int i = 0; i < actual.size(); i++ ) {
+            Assert.assertEquals(actual.get(i).isActiveProb, expected.get(i));
+        }
+    }
+
     // todo -- test extensions
 }
\ No newline at end of file
diff --git a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperTestUtils.java b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperTestUtils.java
index baa2f6218f..208c14fbd0 100644
--- a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperTestUtils.java
+++ b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperTestUtils.java
@@ -4,6 +4,7 @@
 import net.sf.samtools.CigarElement;
 import net.sf.samtools.CigarOperator;
 import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.testng.Assert;
@@ -37,17 +38,22 @@ public static GATKSAMRecord makeReadFromCigar(Cigar cigar) {
         return ArtificialSAMUtils.createArtificialRead(Utils.arrayFromArrayWithLength(BASES, cigar.getReadLength()), Utils.arrayFromArrayWithLength(QUALS, cigar.getReadLength()), cigar.toString());
     }
 
-    /**
-     * This function generates every valid permutation of cigar strings with a given length.
-     *
-     * A valid cigar object obeys the following rules:
-     *  - No Hard/Soft clips in the middle of the read
-     *  - No deletions in the beginning / end of the read
-     *  - No repeated adjacent element (e.g. 1M2M -> this should be 3M)
-     *
-     * @param maximumLength the maximum number of elements in the cigar
-     * @return a list with all valid Cigar objects
-     */
+    public static GATKSAMRecord makeReadFromCigar(String cigarString) {
+        return makeReadFromCigar(cigarFromString(cigarString));
+    }
+
+        /**
+        * This function generates every valid permutation of cigar strings with a given length.
+        *
+        * A valid cigar object obeys the following rules:
+        *  - No Hard/Soft clips in the middle of the read
+        *  - No deletions in the beginning / end of the read
+        *  - No repeated adjacent element (e.g. 1M2M -> this should be 3M)
+        *  - No consecutive I/D elements
+        *
+        * @param maximumLength the maximum number of elements in the cigar
+        * @return a list with all valid Cigar objects
+        */
     public static List<Cigar> generateCigarList(int maximumLength) {
         int numCigarElements = cigarElements.length;
         LinkedList<Cigar> cigarList = new LinkedList<Cigar>();
@@ -137,7 +143,10 @@ private static Cigar combineAdjacentCigarElements(Cigar rawCigar) {
         CigarElement lastElement = null;
         int lastElementLength = 0;
         for (CigarElement cigarElement : rawCigar.getCigarElements()) {
-            if (lastElement != null && lastElement.getOperator() == cigarElement.getOperator())
+            if (lastElement != null &&
+                    ((lastElement.getOperator() == cigarElement.getOperator()) ||
+                     (lastElement.getOperator() == CigarOperator.I && cigarElement.getOperator() == CigarOperator.D) ||
+                     (lastElement.getOperator() == CigarOperator.D && cigarElement.getOperator() == CigarOperator.I)))
                 lastElementLength += cigarElement.getLength();
             else
             {
@@ -191,7 +200,7 @@ public static Cigar invertCigar (Cigar cigar) {
     /**
      * Checks whether or not the read has any cigar element that is not H or S
      *
-     * @param read
+     * @param read the read
      * @return true if it has any M, I or D, false otherwise
      */
     public static boolean readHasNonClippedBases(GATKSAMRecord read) {
@@ -201,5 +210,79 @@ public static boolean readHasNonClippedBases(GATKSAMRecord read) {
         return false;
     }
 
+    public static Cigar cigarFromString(String cigarString) {
+        Cigar cigar = new Cigar();
+
+        boolean isNumber = false;
+        int number = 0;
+        for (int i = 0; i < cigarString.length(); i++) {
+            char x = cigarString.charAt(i);
+
+            if (x >= '0' && x <='9') {
+                if (isNumber) {
+                    number *= 10;
+                }
+                else {
+                    isNumber = true;
+                }
+                number += x - '0';
+            }
+
+            else {
+                CigarElement e;
+                switch (x) {
+                    case 'M':
+                    case 'm':
+                        e = new CigarElement(number, CigarOperator.M);
+                    break;
+
+                    case 'I':
+                    case 'i':
+                        e = new CigarElement(number, CigarOperator.I);
+                    break;
+
+                    case 'D':
+                    case 'd':
+                        e = new CigarElement(number, CigarOperator.D);
+                    break;
+
+                    case 'S':
+                    case 's':
+                        e = new CigarElement(number, CigarOperator.S);
+                    break;
+
+                    case 'N':
+                    case 'n':
+                        e = new CigarElement(number, CigarOperator.N);
+                    break;
+
+                    case 'H':
+                    case 'h':
+                        e = new CigarElement(number, CigarOperator.H);
+                    break;
+
+                    case 'P':
+                    case 'p':
+                        e = new CigarElement(number, CigarOperator.P);
+                    break;
+
+                    case '=':
+                        e = new CigarElement(number, CigarOperator.EQ);
+                    break;
+
+                    case 'X':
+                    case 'x':
+                        e = new CigarElement(number, CigarOperator.X);
+                    break;
+
+                    default:
+                        throw new ReviewedStingException("Unrecognized cigar operator: " + x + " (number: " + number + ")");
+                }
+                cigar.add(e);
+            }
+        }
+        return cigar;
+    }
+
 
 }
diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java
index a0feef186e..77050c069f 100644
--- a/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java
@@ -351,7 +351,7 @@ public Object[][] listOfStringsProvider() {
     public void testEncodingListOfString(List<String> strings, String expected) throws IOException {
         final String collapsed = BCF2Utils.collapseStringList(strings);
         Assert.assertEquals(collapsed, expected);
-        Assert.assertEquals(BCF2Utils.exploreStringList(collapsed), strings);
+        Assert.assertEquals(BCF2Utils.explodeStringList(collapsed), strings);
     }
 
     // -----------------------------------------------------------------
@@ -537,11 +537,11 @@ private final byte[] encodeRecord(final List<BCF2TypedValue> toEncode) throws IO
         return record;
     }
 
-    private final void decodeRecord(final List<BCF2TypedValue> toEncode, final byte[] record) {
+    private final void decodeRecord(final List<BCF2TypedValue> toEncode, final byte[] record) throws IOException {
         decodeRecord(toEncode, new BCF2Decoder(record));
     }
 
-    private final void decodeRecord(final List<BCF2TypedValue> toEncode, final BCF2Decoder decoder) {
+    private final void decodeRecord(final List<BCF2TypedValue> toEncode, final BCF2Decoder decoder) throws IOException {
         for ( final BCF2TypedValue tv : toEncode ) {
             Assert.assertFalse(decoder.blockIsFullyDecoded());
             final Object decoded = decoder.decodeTypedValue();
diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2UtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2UtilsUnitTest.java
new file mode 100644
index 0000000000..ae76a374a1
--- /dev/null
+++ b/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2UtilsUnitTest.java
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.utils.codecs.bcf2;
+
+import org.broad.tribble.readers.PositionalBufferedStream;
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.utils.codecs.vcf.*;
+
+import java.io.*;
+import java.util.*;
+import org.testng.Assert;
+import org.testng.annotations.BeforeSuite;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+/**
+ * Tests for BCF2Utils
+ */
+public final class BCF2UtilsUnitTest extends BaseTest {
+    @DataProvider(name = "CollapseExpandTest")
+    public Object[][] makeCollapseExpandTest() {
+        List<Object[]> tests = new ArrayList<Object[]>();
+        tests.add(new Object[]{Arrays.asList("A"), "A", false});
+        tests.add(new Object[]{Arrays.asList("A", "B"), ",A,B", true});
+        tests.add(new Object[]{Arrays.asList("AB"), "AB", false});
+        tests.add(new Object[]{Arrays.asList("AB", "C"), ",AB,C", true});
+        tests.add(new Object[]{Arrays.asList(), "", false});
+        return tests.toArray(new Object[][]{});
+    }
+
+    @Test(dataProvider = "CollapseExpandTest")
+    public void testCollapseExpandTest(final List<String> in, final String expectedCollapsed, final boolean isCollapsed) {
+        final String actualCollapsed = BCF2Utils.collapseStringList(in);
+        Assert.assertEquals(actualCollapsed, expectedCollapsed);
+        Assert.assertEquals(BCF2Utils.isCollapsedString(actualCollapsed), isCollapsed);
+        if ( isCollapsed )
+            Assert.assertEquals(BCF2Utils.explodeStringList(actualCollapsed), in);
+    }
+
+    @DataProvider(name = "HeaderOrderTestProvider")
+    public Object[][] makeHeaderOrderTestProvider() {
+        final List<VCFHeaderLine> inputLines = new ArrayList<VCFHeaderLine>();
+        final List<VCFHeaderLine> extraLines = new ArrayList<VCFHeaderLine>();
+
+        int counter = 0;
+        inputLines.add(new VCFFilterHeaderLine(String.valueOf(counter++)));
+        inputLines.add(new VCFFilterHeaderLine(String.valueOf(counter++)));
+        inputLines.add(new VCFContigHeaderLine(Collections.singletonMap("ID", String.valueOf(counter++)), counter));
+        inputLines.add(new VCFContigHeaderLine(Collections.singletonMap("ID", String.valueOf(counter++)), counter));
+        inputLines.add(new VCFInfoHeaderLine(String.valueOf(counter++), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
+        inputLines.add(new VCFInfoHeaderLine(String.valueOf(counter++), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
+        inputLines.add(new VCFFormatHeaderLine(String.valueOf(counter++), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
+        inputLines.add(new VCFFormatHeaderLine(String.valueOf(counter++), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
+        final int inputLineCounter = counter;
+        final VCFHeader inputHeader = new VCFHeader(new LinkedHashSet<VCFHeaderLine>(inputLines));
+
+        extraLines.add(new VCFFilterHeaderLine(String.valueOf(counter++)));
+        extraLines.add(new VCFContigHeaderLine(Collections.singletonMap("ID", String.valueOf(counter++)), counter));
+        extraLines.add(new VCFInfoHeaderLine(String.valueOf(counter++), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
+        extraLines.add(new VCFFormatHeaderLine(String.valueOf(counter++), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
+        extraLines.add(new VCFHeaderLine("x", "misc"));
+        extraLines.add(new VCFHeaderLine("y", "misc"));
+
+        List<Object[]> tests = new ArrayList<Object[]>();
+        for ( final int extrasToTake : Arrays.asList(0, 1, 2, 3) ) {
+            final List<VCFHeaderLine> empty = Collections.emptyList();
+            final List<List<VCFHeaderLine>> permutations = extrasToTake == 0
+                    ? Collections.singletonList(empty)
+                    : Utils.makePermutations(extraLines, extrasToTake, false);
+            for ( final List<VCFHeaderLine> permutation : permutations ) {
+                for ( int i = -1; i < inputLines.size(); i++ ) {
+                    final List<VCFHeaderLine> allLines = new ArrayList<VCFHeaderLine>(inputLines);
+                    if ( i >= 0 )
+                        allLines.remove(i);
+                    allLines.addAll(permutation);
+                    final VCFHeader testHeader = new VCFHeader(new LinkedHashSet<VCFHeaderLine>(allLines));
+                    final boolean expectedConsistent = expectedConsistent(testHeader, inputLineCounter);
+                    tests.add(new Object[]{inputHeader, testHeader, expectedConsistent});
+                }
+            }
+        }
+
+        // sample name tests
+        final List<List<String>> sampleNameTests = Arrays.asList(
+                new ArrayList<String>(),
+                Arrays.asList("A"),
+                Arrays.asList("A", "B"),
+                Arrays.asList("A", "B", "C"));
+        for ( final List<String> inSamples : sampleNameTests ) {
+            for ( final List<String> testSamples : sampleNameTests ) {
+                final VCFHeader inputHeaderWithSamples = new VCFHeader(inputHeader.getMetaDataInInputOrder(), inSamples);
+
+                final List<List<String>> permutations = testSamples.isEmpty()
+                        ? Collections.singletonList(testSamples)
+                        : Utils.makePermutations(testSamples, testSamples.size(), false);
+                for ( final List<String> testSamplesPermutation : permutations ) {
+                    final VCFHeader testHeaderWithSamples = new VCFHeader(inputHeader.getMetaDataInInputOrder(), testSamplesPermutation);
+                    final boolean expectedConsistent = testSamples.equals(inSamples);
+                    tests.add(new Object[]{inputHeaderWithSamples, testHeaderWithSamples, expectedConsistent});
+                }
+            }
+        }
+
+        return tests.toArray(new Object[][]{});
+    }
+
+    private static boolean expectedConsistent(final VCFHeader combinationHeader, final int minCounterForInputLines) {
+        final List<Integer> ids = new ArrayList<Integer>();
+        for ( final VCFHeaderLine line : combinationHeader.getMetaDataInInputOrder() ) {
+            if ( line instanceof VCFIDHeaderLine ) {
+                ids.add(Integer.valueOf(((VCFIDHeaderLine) line).getID()));
+            }
+        }
+
+        // as long as the start contains all of the ids up to minCounterForInputLines in order
+        for ( int i = 0; i < minCounterForInputLines; i++ )
+            if ( i >= ids.size() || ids.get(i) != i )
+                return false;
+
+        return true;
+    }
+
+    //
+    // Test to make sure that we detect correctly the case where we can preserve the genotypes data in a BCF2
+    // even when the header file is slightly different
+    //
+    @Test(dataProvider = "HeaderOrderTestProvider")
+    public void testHeaderOrder(final VCFHeader inputHeader, final VCFHeader testHeader, final boolean expectedConsistent) {
+        final boolean actualOrderConsistency = BCF2Utils.headerLinesAreOrderedConsistently(testHeader, inputHeader);
+        Assert.assertEquals(actualOrderConsistency, expectedConsistent);
+    }
+}
diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFAlleleClipperUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFAlleleClipperUnitTest.java
deleted file mode 100644
index 8cd051e018..0000000000
--- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFAlleleClipperUnitTest.java
+++ /dev/null
@@ -1,226 +0,0 @@
-/*
- * Copyright (c) 2012, The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-package org.broadinstitute.sting.utils.codecs.vcf;
-
-import com.google.java.contract.Requires;
-import org.broadinstitute.sting.BaseTest;
-import org.broadinstitute.sting.utils.variantcontext.*;
-import org.testng.Assert;
-import org.testng.SkipException;
-import org.testng.annotations.DataProvider;
-import org.testng.annotations.Test;
-
-import java.util.*;
-
-public class VCFAlleleClipperUnitTest extends BaseTest {
-    // --------------------------------------------------------------------------------
-    //
-    // Test allele clipping
-    //
-    // --------------------------------------------------------------------------------
-
-    private class ClipAllelesTest extends TestDataProvider {
-        final int position;
-        final int stop;
-        final String ref;
-        List<Allele> inputs;
-        List<Allele> expected;
-
-        @Requires("arg.length % 2 == 0")
-        private ClipAllelesTest(final int position, final int stop, final String ... arg) {
-            super(ClipAllelesTest.class);
-            this.position = position;
-            this.stop = stop;
-            this.ref = arg[0];
-
-            int n = arg.length / 2;
-            inputs = new ArrayList<Allele>(n);
-            expected = new ArrayList<Allele>(n);
-
-            for ( int i = 0; i < n; i++ ) {
-                final boolean ref = i % n == 0;
-                inputs.add(Allele.create(arg[i], ref));
-            }
-            for ( int i = n; i < arg.length; i++ ) {
-                final boolean ref = i % n == 0;
-                expected.add(Allele.create(arg[i], ref));
-            }
-        }
-
-        public boolean isClipped() {
-            for ( int i = 0; i < inputs.size(); i++ ) {
-                if ( inputs.get(i).length() != expected.get(i).length() )
-                    return true;
-            }
-
-            return false;
-        }
-
-        public String toString() {
-            return String.format("ClipAllelesTest input=%s expected=%s", inputs, expected);
-        }
-    }
-    @DataProvider(name = "ClipAllelesTest")
-    public Object[][] makeClipAllelesTest() {
-        // do no harm
-        new ClipAllelesTest(10, 10, "A", "A");
-        new ClipAllelesTest(10, 10, "A", "C", "A", "C");
-        new ClipAllelesTest(10, 10, "A", "C", "G", "A", "C", "G");
-
-        // insertions
-        new ClipAllelesTest(10, 10, "A", "AA", "-", "A");
-        new ClipAllelesTest(10, 10, "A", "AAA", "-", "AA");
-        new ClipAllelesTest(10, 10, "A", "AG", "-", "G");
-
-        // deletions
-        new ClipAllelesTest(10, 11, "AA",  "A", "A",  "-");
-        new ClipAllelesTest(10, 12, "AAA", "A", "AA", "-");
-        new ClipAllelesTest(10, 11, "AG",  "A", "G",  "-");
-        new ClipAllelesTest(10, 12, "AGG", "A", "GG", "-");
-
-        // multi-allelic insertion and deletions
-        new ClipAllelesTest(10, 11, "AA",  "A", "AAA", "A",  "-", "AA");
-        new ClipAllelesTest(10, 11, "AA",  "A", "AAG", "A",  "-", "AG");
-        new ClipAllelesTest(10, 10, "A",  "AA", "AAA", "-",  "A", "AA");
-        new ClipAllelesTest(10, 10, "A",  "AA", "ACA", "-",  "A", "CA");
-        new ClipAllelesTest(10, 12, "ACG", "ATC", "AGG", "CG",  "TC", "GG");
-        new ClipAllelesTest(10, 11, "AC", "AT", "AG", "C",  "T", "G");
-
-        // cannot be clipped
-        new ClipAllelesTest(10, 11, "AC", "CT", "AG", "AC",  "CT", "AG");
-        new ClipAllelesTest(10, 11, "AC", "CT", "GG", "AC",  "CT", "GG");
-
-        // symbolic
-        new ClipAllelesTest(10, 100, "A", "<DEL>", "A", "<DEL>");
-        new ClipAllelesTest(50, 50, "G", "G]22:60]", "G", "G]22:60]");
-        new ClipAllelesTest(51, 51, "T", "]22:55]T", "T", "]22:55]T");
-        new ClipAllelesTest(52, 52, "C", "C[22:51[", "C", "C[22:51[");
-        new ClipAllelesTest(60, 60, "A", "A]22:50]", "A", "A]22:50]");
-
-        // symbolic with alleles that should be clipped
-        new ClipAllelesTest(10, 100, "A", "<DEL>", "AA", "-", "<DEL>", "A");
-        new ClipAllelesTest(10, 100, "AA", "<DEL>", "A", "A", "<DEL>", "-");
-        new ClipAllelesTest(10, 100, "AA", "<DEL>", "A", "AAA", "A", "<DEL>", "-", "AA");
-        new ClipAllelesTest(10, 100, "AG", "<DEL>", "A", "AGA", "G", "<DEL>", "-", "GA");
-        new ClipAllelesTest(10, 100, "G", "<DEL>", "A", "G", "<DEL>", "A");
-
-        // clipping from both ends
-        //
-        // TODO -- THIS CODE IS BROKEN BECAUSE CLIPPING DOES WORK WITH ALLELES CLIPPED FROM THE END
-        //
-//        new ClipAllelesTest(10, 10, "ATA",   "ATTA",   "-",  "T");
-//        new ClipAllelesTest(10, 10, "ATAA",  "ATTAA",  "-",  "T");
-//        new ClipAllelesTest(10, 10, "ATAAG", "ATTAAG", "-",  "T");
-//        new ClipAllelesTest(10, 11, "GTA",   "ATTA",   "G",  "AT");
-//        new ClipAllelesTest(10, 11, "GTAA",  "ATTAA",  "G",  "AT");
-//        new ClipAllelesTest(10, 11, "GTAAG", "ATTAAG", "G",  "AT");
-
-        // complex substitutions
-        new ClipAllelesTest(10, 10, "A", "GA", "A", "GA");
-
-        return ClipAllelesTest.getTests(ClipAllelesTest.class);
-    }
-
-    @Test(dataProvider = "ClipAllelesTest")
-    public void testClipAllelesTest(ClipAllelesTest cfg) {
-        final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(cfg.position, cfg.ref, cfg.inputs, cfg.stop);
-        Assert.assertNull(clipped.getError(), "Unexpected error occurred");
-        Assert.assertEquals(clipped.getStop(), cfg.stop, "Clipped alleles stop");
-        Assert.assertEquals(clipped.getClippedAlleles(), cfg.expected, "Clipped alleles");
-    }
-
-    @Test(dataProvider = "ClipAllelesTest", dependsOnMethods = "testClipAllelesTest")
-    public void testPaddingAllelesInVC(final ClipAllelesTest cfg) {
-        final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(cfg.position, cfg.ref, cfg.inputs, cfg.stop);
-        final VariantContext vc = new VariantContextBuilder("x", "1", cfg.position, cfg.stop, clipped.getClippedAlleles())
-                .referenceBaseForIndel(clipped.getRefBaseForIndel()).make();
-
-        if ( vc.isMixed() && vc.hasSymbolicAlleles() )
-            throw new SkipException("GATK cannot handle mixed variant contexts with symbolic and concrete alleles.  Remove this check when allele clipping and padding is generalized");
-
-        Assert.assertEquals(VCFAlleleClipper.needsPadding(vc), cfg.isClipped(), "needPadding method");
-
-        if ( cfg.isClipped() ) {
-            // TODO
-            // TODO note that the GATK currently uses a broken approach to the clipped alleles, so the expected stop is
-            // TODO actually the original stop, as the original stop is +1 its true size.
-            // TODO
-            final int expectedStop = vc.getEnd(); //  + (vc.hasSymbolicAlleles() ? 0 : 1);
-
-            final VariantContext padded = VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc);
-            Assert.assertEquals(padded.getStart(),   vc.getStart(),   "padded VC start");
-            Assert.assertEquals(padded.getAlleles(), cfg.inputs,      "padded VC alleles == original unclipped alleles");
-            Assert.assertEquals(padded.getEnd(),     expectedStop,    "padded VC end should be clipped VC + 1 (added a base to ref allele)");
-            Assert.assertFalse(VCFAlleleClipper.needsPadding(padded), "padded VC shouldn't need padding again");
-        }
-    }
-
-    // --------------------------------------------------------------------------------
-    //
-    // basic allele clipping test
-    //
-    // --------------------------------------------------------------------------------
-
-    private class ReverseClippingPositionTestProvider extends TestDataProvider {
-        final String ref;
-        final List<Allele> alleles = new ArrayList<Allele>();
-        final int expectedClip;
-
-        private ReverseClippingPositionTestProvider(final int expectedClip, final String ref, final String... alleles) {
-            super(ReverseClippingPositionTestProvider.class);
-            this.ref = ref;
-            for ( final String allele : alleles )
-                this.alleles.add(Allele.create(allele));
-            this.expectedClip = expectedClip;
-        }
-
-        @Override
-        public String toString() {
-            return String.format("ref=%s allele=%s reverse clip %d", ref, alleles, expectedClip);
-        }
-    }
-
-    @DataProvider(name = "ReverseClippingPositionTestProvider")
-    public Object[][] makeReverseClippingPositionTestProvider() {
-        // pair clipping
-        new ReverseClippingPositionTestProvider(0, "ATT", "CCG");
-        new ReverseClippingPositionTestProvider(1, "ATT", "CCT");
-        new ReverseClippingPositionTestProvider(2, "ATT", "CTT");
-        new ReverseClippingPositionTestProvider(2, "ATT", "ATT");  // cannot completely clip allele
-
-        // triplets
-        new ReverseClippingPositionTestProvider(0, "ATT", "CTT", "CGG");
-        new ReverseClippingPositionTestProvider(1, "ATT", "CTT", "CGT"); // the T can go
-        new ReverseClippingPositionTestProvider(2, "ATT", "CTT", "CTT"); // both Ts can go
-
-        return ReverseClippingPositionTestProvider.getTests(ReverseClippingPositionTestProvider.class);
-    }
-
-
-    @Test(dataProvider = "ReverseClippingPositionTestProvider")
-    public void testReverseClippingPositionTestProvider(ReverseClippingPositionTestProvider cfg) {
-        int result = VCFAlleleClipper.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false);
-        Assert.assertEquals(result, cfg.expectedClip);
-    }
-}
diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java
index e9b845d598..b2a4ac2dad 100644
--- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java
@@ -39,6 +39,17 @@ public void testReadingAndWritingBreakpointAlleles() {
         executeTest("Test reading and writing breakpoint VCF", spec1);
     }
 
+    @Test(enabled = true)
+    public void testReadingLowerCaseBases() {
+        String testVCF = privateTestDir + "lowercaseBases.vcf";
+
+        String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s ";
+
+        String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
+        WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("e0e308a25e56bde1c664139bb44ed19d"));
+        executeTest("Test reading VCF with lower-case bases", spec1);
+    }
+
     @Test(enabled = true)
     public void testReadingAndWriting1000GSVs() {
         String testVCF = privateTestDir + "1000G_SVs.chr1.vcf";
@@ -46,7 +57,7 @@ public void testReadingAndWriting1000GSVs() {
         String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s ";
 
         String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
-        WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList(""));
+        WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("bdab26dd7648a806dbab01f64db2bdab"));
         executeTest("Test reading and writing 1000G Phase I SVs", spec1);
     }
 
@@ -57,7 +68,7 @@ public void testReadingAndWritingSamtools() {
         String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s ";
 
         String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
-        WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("0f82ac11852e7f958c1a0ce52398c2ae"));
+        WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("38697c195e7abf18d95dcc16c8e6d284"));
         executeTest("Test reading and writing samtools vcf", spec1);
     }
 
@@ -66,7 +77,7 @@ public void testWritingSamtoolsWExBCFExample() {
         String testVCF = privateTestDir + "ex2.vcf";
         String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s ";
         String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
-        WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("9773d6a121cfcb18d090965bc520f120"));
+        WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("e8f721ce81e4fdadba13c5291027057f"));
         executeTest("Test writing samtools WEx BCF example", spec1);
     }
 
@@ -81,7 +92,7 @@ public void testReadingSamtoolsWExBCFExample() {
 
     //
     //
-    // Tests to ensure that -U LENIENT_VCF_PROCESS and header repairs are working
+    // Tests to ensure that -U LENIENT_VCF_PROCESS
     //
     //
 
@@ -95,11 +106,6 @@ public void testPassingOnVCFWithoutHeadersWithLenientProcessing() {
         runVCFWithoutHeaders("-U LENIENT_VCF_PROCESSING", "6de8cb7457154dd355aa55befb943f88", null, true);
     }
 
-    @Test
-    public void testPassingOnVCFWithoutHeadersRepairingHeaders() {
-        runVCFWithoutHeaders("-repairVCFHeader " + privateTestDir + "vcfexample2.justHeader.vcf", "ff61e9cad6653c7f93d82d391f7ecdcb", null, false);
-    }
-
     private void runVCFWithoutHeaders(final String moreArgs, final String expectedMD5, final Class expectedException, final boolean disableBCF) {
         final String testVCF = privateTestDir + "vcfexample2.noHeader.vcf";
         final String baseCommand = "-R " + b37KGReference
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/ContextCovariateUnitTest.java
similarity index 89%
rename from public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java
rename to public/java/test/org/broadinstitute/sting/utils/recalibration/ContextCovariateUnitTest.java
index 553b7e237a..2556448ada 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/ContextCovariateUnitTest.java
@@ -1,5 +1,8 @@
-package org.broadinstitute.sting.gatk.walkers.bqsr;
+package org.broadinstitute.sting.utils.recalibration;
 
+import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
+import org.broadinstitute.sting.utils.recalibration.covariates.ContextCovariate;
+import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
 import org.broadinstitute.sting.utils.clipping.ClippingRepresentation;
 import org.broadinstitute.sting.utils.clipping.ReadClipper;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/CycleCovariateUnitTest.java
similarity index 90%
rename from public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java
rename to public/java/test/org/broadinstitute/sting/utils/recalibration/CycleCovariateUnitTest.java
index 3fa1e916de..c3d93b2cbd 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/CycleCovariateUnitTest.java
@@ -1,5 +1,7 @@
-package org.broadinstitute.sting.gatk.walkers.bqsr;
+package org.broadinstitute.sting.utils.recalibration;
 
+import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
+import org.broadinstitute.sting.utils.recalibration.covariates.CycleCovariate;
 import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/QualQuantizerUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/QualQuantizerUnitTest.java
new file mode 100644
index 0000000000..0ff2eaf03a
--- /dev/null
+++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/QualQuantizerUnitTest.java
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+// our package
+package org.broadinstitute.sting.utils.recalibration;
+
+
+// the imports for unit testing.
+
+
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.utils.QualityUtils;
+import org.broadinstitute.sting.utils.Utils;
+import org.testng.Assert;
+import org.testng.annotations.BeforeSuite;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+
+public class QualQuantizerUnitTest extends BaseTest {
+    @BeforeSuite
+    public void before() {
+
+    }
+
+    // --------------------------------------------------------------------------------
+    //
+    // merge case Provider
+    //
+    // --------------------------------------------------------------------------------
+
+    private class QualIntervalTestProvider extends TestDataProvider {
+        final QualQuantizer.QualInterval left, right;
+        int exError, exTotal, exQual;
+        double exErrorRate;
+
+        private QualIntervalTestProvider(int leftE, int leftN, int rightE, int rightN, int exError, int exTotal) {
+            super(QualIntervalTestProvider.class);
+
+            QualQuantizer qq = new QualQuantizer(0);
+            left = qq.new QualInterval(10, 10, leftN, leftE, 0);
+            right = qq.new QualInterval(11, 11, rightN, rightE, 0);
+
+            this.exError = exError;
+            this.exTotal = exTotal;
+            this.exErrorRate = (leftE + rightE + 1) / (1.0 * (leftN + rightN + 1));
+            this.exQual = QualityUtils.probToQual(1-this.exErrorRate, 0);
+        }
+    }
+
+    @DataProvider(name = "QualIntervalTestProvider")
+    public Object[][] makeQualIntervalTestProvider() {
+        new QualIntervalTestProvider(10, 100, 10, 1000, 20, 1100);
+        new QualIntervalTestProvider(0, 100, 10, 900,   10, 1000);
+        new QualIntervalTestProvider(10, 900, 0, 100,   10, 1000);
+        new QualIntervalTestProvider(0, 0, 10, 100,     10, 100);
+        new QualIntervalTestProvider(1, 10, 9, 90,      10, 100);
+        new QualIntervalTestProvider(1, 10, 9, 100000,  10, 100010);
+        new QualIntervalTestProvider(1, 10, 9, 1000000, 10,1000010);
+
+        return QualIntervalTestProvider.getTests(QualIntervalTestProvider.class);
+    }
+
+    @Test(dataProvider = "QualIntervalTestProvider")
+    public void testQualInterval(QualIntervalTestProvider cfg) {
+        QualQuantizer.QualInterval merged = cfg.left.merge(cfg.right);
+        Assert.assertEquals(merged.nErrors, cfg.exError);
+        Assert.assertEquals(merged.nObservations, cfg.exTotal);
+        Assert.assertEquals(merged.getErrorRate(), cfg.exErrorRate);
+        Assert.assertEquals(merged.getQual(), cfg.exQual);
+    }
+
+    @Test
+    public void testMinInterestingQual() {
+        for ( int q = 0; q < 15; q++ ) {
+            for ( int minQual = 0; minQual <= 10; minQual ++ ) {
+                QualQuantizer qq = new QualQuantizer(minQual);
+                QualQuantizer.QualInterval left = qq.new QualInterval(q, q, 100, 10, 0);
+                QualQuantizer.QualInterval right = qq.new QualInterval(q+1, q+1, 1000, 100, 0);
+
+                QualQuantizer.QualInterval merged = left.merge(right);
+                boolean shouldBeFree = q+1 <= minQual;
+                if ( shouldBeFree )
+                    Assert.assertEquals(merged.getPenalty(), 0.0);
+                else
+                    Assert.assertTrue(merged.getPenalty() > 0.0);
+            }
+        }
+    }
+
+
+    // --------------------------------------------------------------------------------
+    //
+    // High-level case Provider
+    //
+    // --------------------------------------------------------------------------------
+
+    private class QuantizerTestProvider extends TestDataProvider {
+        final List<Long> nObservationsPerQual = new ArrayList<Long>();
+        final int nLevels;
+        final List<Integer> expectedMap;
+
+        private QuantizerTestProvider(final List<Integer> nObservationsPerQual, final int nLevels, final List<Integer> expectedMap) {
+            super(QuantizerTestProvider.class);
+
+            for ( int x : nObservationsPerQual )
+                this.nObservationsPerQual.add((long)x);
+            this.nLevels = nLevels;
+            this.expectedMap = expectedMap;
+        }
+
+        @Override
+        public String toString() {
+            return String.format("QQTest nLevels=%d nObs=[%s] map=[%s]",
+                    nLevels, Utils.join(",", nObservationsPerQual), Utils.join(",", expectedMap));
+        }
+    }
+
+    @DataProvider(name = "QuantizerTestProvider")
+    public Object[][] makeQuantizerTestProvider() {
+        List<Integer> allQ2 = Arrays.asList(0, 0, 1000, 0, 0);
+
+        new QuantizerTestProvider(allQ2, 5, Arrays.asList(0, 1, 2, 3, 4));
+        new QuantizerTestProvider(allQ2, 1, Arrays.asList(2, 2, 2, 2, 2));
+
+        new QuantizerTestProvider(Arrays.asList(0, 0, 1000, 0,  1000), 2, Arrays.asList(2, 2, 2, 2, 4));
+        new QuantizerTestProvider(Arrays.asList(0, 0, 1000, 1,  1000), 2, Arrays.asList(2, 2, 2, 4, 4));
+        new QuantizerTestProvider(Arrays.asList(0, 0, 1000, 10, 1000), 2, Arrays.asList(2, 2, 2, 2, 4));
+
+        return QuantizerTestProvider.getTests(QuantizerTestProvider.class);
+    }
+
+    @Test(dataProvider = "QuantizerTestProvider", enabled = true)
+    public void testQuantizer(QuantizerTestProvider cfg) {
+        QualQuantizer qq = new QualQuantizer(cfg.nObservationsPerQual, cfg.nLevels, 0);
+        logger.warn("cfg: " + cfg);
+        for ( int i = 0; i < cfg.expectedMap.size(); i++) {
+            int expected = cfg.expectedMap.get(i);
+            int observed = qq.originalToQuantizedMap.get(i);
+            //logger.warn(String.format("  qq map: %s : %d => %d", i, expected, observed));
+            Assert.assertEquals(observed, expected);
+        }
+    }
+}
\ No newline at end of file
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariatesUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/ReadCovariatesUnitTest.java
similarity index 92%
rename from public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariatesUnitTest.java
rename to public/java/test/org/broadinstitute/sting/utils/recalibration/ReadCovariatesUnitTest.java
index 37994cf121..dac26cb536 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariatesUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/ReadCovariatesUnitTest.java
@@ -1,5 +1,7 @@
-package org.broadinstitute.sting.gatk.walkers.bqsr;
+package org.broadinstitute.sting.utils.recalibration;
 
+import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
+import org.broadinstitute.sting.utils.recalibration.covariates.*;
 import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
@@ -41,7 +43,7 @@ public void testCovariateGeneration() {
         requestedCovariates[2] = coCov;
         requestedCovariates[3] = cyCov;
 
-        ReadCovariates rc = RecalDataManager.computeCovariates(read, requestedCovariates);
+        ReadCovariates rc = RecalUtils.computeCovariates(read, requestedCovariates);
 
         // check that the length is correct
         Assert.assertEquals(rc.getMismatchesKeySet().length, length);
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariateUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/ReadGroupCovariateUnitTest.java
similarity index 88%
rename from public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariateUnitTest.java
rename to public/java/test/org/broadinstitute/sting/utils/recalibration/ReadGroupCovariateUnitTest.java
index a835083535..78a74d2596 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariateUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/ReadGroupCovariateUnitTest.java
@@ -1,5 +1,7 @@
-package org.broadinstitute.sting.gatk.walkers.bqsr;
+package org.broadinstitute.sting.utils.recalibration;
 
+import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
+import org.broadinstitute.sting.utils.recalibration.covariates.ReadGroupCovariate;
 import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java
new file mode 100644
index 0000000000..33985e0ac4
--- /dev/null
+++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+// our package
+package org.broadinstitute.sting.utils.recalibration;
+
+
+// the imports for unit testing.
+
+
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.utils.QualityUtils;
+import org.broadinstitute.sting.utils.Utils;
+import org.testng.Assert;
+import org.testng.annotations.BeforeSuite;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+
+public class RecalDatumUnitTest extends BaseTest {
+
+    // --------------------------------------------------------------------------------
+    //
+    // merge case Provider
+    //
+    // --------------------------------------------------------------------------------
+
+    private class RecalDatumTestProvider extends TestDataProvider {
+        int exError, exTotal, reportedQual;
+
+        private RecalDatumTestProvider(int E, int N, int reportedQual) {
+            super(RecalDatumTestProvider.class);
+
+            this.exError = E;
+            this.exTotal = N;
+            this.reportedQual = reportedQual;
+        }
+
+        public double getErrorRate() {
+            return (exError + 1) / (1.0 * (exTotal + 2));
+        }
+
+        public double getErrorRatePhredScaled() {
+            return QualityUtils.phredScaleErrorRate(getErrorRate());
+        }
+
+        public int getReportedQual() {
+            return reportedQual;
+        }
+
+        public RecalDatum makeRecalDatum() {
+            return new RecalDatum(exTotal, exError, (byte)getReportedQual());
+        }
+
+        @Override
+        public String toString() {
+            return String.format("exError=%d, exTotal=%d, reportedQual=%d", exError, exTotal, reportedQual);
+        }
+    }
+
+    @DataProvider(name = "RecalDatumTestProvider")
+    public Object[][] makeRecalDatumTestProvider() {
+        for ( int E : Arrays.asList(1, 10, 100, 1000, 10000) )
+            for ( int N : Arrays.asList(10, 100, 1000, 10000, 100000, 1000000) )
+                for ( int reportedQual : Arrays.asList(10, 20) )
+                    if ( E <= N )
+                        new RecalDatumTestProvider(E, N, reportedQual);
+        return RecalDatumTestProvider.getTests(RecalDatumTestProvider.class);
+    }
+
+    @Test(dataProvider = "RecalDatumTestProvider")
+    public void testRecalDatumBasics(RecalDatumTestProvider cfg) {
+        final RecalDatum datum = cfg.makeRecalDatum();
+        assertBasicFeaturesOfRecalDatum(datum, cfg);
+    }
+
+    private static void assertBasicFeaturesOfRecalDatum(final RecalDatum datum, final RecalDatumTestProvider cfg) {
+        Assert.assertEquals(datum.getNumMismatches(), cfg.exError);
+        Assert.assertEquals(datum.getNumObservations(), cfg.exTotal);
+        if ( cfg.getReportedQual() != -1 )
+            Assert.assertEquals(datum.getEstimatedQReportedAsByte(), cfg.getReportedQual());
+        BaseTest.assertEqualsDoubleSmart(datum.getEmpiricalQuality(), cfg.getErrorRatePhredScaled());
+        BaseTest.assertEqualsDoubleSmart(datum.getEmpiricalErrorRate(), cfg.getErrorRate());
+    }
+
+    @Test(dataProvider = "RecalDatumTestProvider")
+    public void testRecalDatumCopyAndCombine(RecalDatumTestProvider cfg) {
+        final RecalDatum datum = cfg.makeRecalDatum();
+        final RecalDatum copy = new RecalDatum(datum);
+        assertBasicFeaturesOfRecalDatum(copy, cfg);
+
+        RecalDatumTestProvider combinedCfg = new RecalDatumTestProvider(cfg.exError * 2, cfg.exTotal * 2, cfg.reportedQual);
+        copy.combine(datum);
+        assertBasicFeaturesOfRecalDatum(copy, combinedCfg);
+    }
+
+    @Test(dataProvider = "RecalDatumTestProvider")
+    public void testRecalDatumModification(RecalDatumTestProvider cfg) {
+        RecalDatum datum = cfg.makeRecalDatum();
+        datum.setEmpiricalQuality(10.1);
+        Assert.assertEquals(datum.getEmpiricalQuality(), 10.1);
+
+        datum.setEstimatedQReported(10.1);
+        Assert.assertEquals(datum.getEstimatedQReported(), 10.1);
+        Assert.assertEquals(datum.getEstimatedQReportedAsByte(), 10);
+
+        datum = cfg.makeRecalDatum();
+        cfg.exTotal = 100000;
+        datum.setNumObservations(cfg.exTotal);
+        assertBasicFeaturesOfRecalDatum(datum, cfg);
+
+        datum = cfg.makeRecalDatum();
+        cfg.exError = 1000;
+        datum.setNumMismatches(cfg.exError);
+        assertBasicFeaturesOfRecalDatum(datum, cfg);
+
+        datum = cfg.makeRecalDatum();
+        datum.increment(true);
+        cfg.exError++;
+        cfg.exTotal++;
+        assertBasicFeaturesOfRecalDatum(datum, cfg);
+
+        datum = cfg.makeRecalDatum();
+        datum.increment(10, 5);
+        cfg.exError += 5;
+        cfg.exTotal += 10;
+        assertBasicFeaturesOfRecalDatum(datum, cfg);
+    }
+}
\ No newline at end of file
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReportUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java
similarity index 94%
rename from public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReportUnitTest.java
rename to public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java
index e4a77c016a..485da243f8 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReportUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java
@@ -1,9 +1,10 @@
-package org.broadinstitute.sting.gatk.walkers.bqsr;
+package org.broadinstitute.sting.utils.recalibration;
 
+import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
+import org.broadinstitute.sting.utils.recalibration.covariates.*;
 import org.broadinstitute.sting.utils.QualityUtils;
 import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
-import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
 import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
@@ -72,7 +73,7 @@ public void testOutput() {
 
         final int expectedKeys = expectedNumberOfKeys(4, length, RAC.INDELS_CONTEXT_SIZE, RAC.MISMATCHES_CONTEXT_SIZE);
         int nKeys = 0;                                                                                                  // keep track of how many keys were produced
-        final ReadCovariates rc = RecalDataManager.computeCovariates(read, requestedCovariates);
+        final ReadCovariates rc = RecalUtils.computeCovariates(read, requestedCovariates);
 
         final RecalibrationTables recalibrationTables = new RecalibrationTables(requestedCovariates);
         final NestedIntegerArray<RecalDatum> rgTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);
@@ -97,7 +98,7 @@ public void testOutput() {
         }
         Assert.assertEquals(nKeys, expectedKeys);
 
-        final RecalibrationReport report = new RecalibrationReport(quantizationInfo, recalibrationTables, RAC.generateReportTable(), RAC);
+        final RecalibrationReport report = new RecalibrationReport(quantizationInfo, recalibrationTables, RAC.generateReportTable("ignore"), RAC);
 
         File output = new File("RecalibrationReportUnitTestOutuput.grp");
         PrintStream out;
diff --git a/public/java/test/org/broadinstitute/sting/utils/threading/StateMonitoringThreadFactoryUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/threading/StateMonitoringThreadFactoryUnitTest.java
new file mode 100755
index 0000000000..5a606c50ef
--- /dev/null
+++ b/public/java/test/org/broadinstitute/sting/utils/threading/StateMonitoringThreadFactoryUnitTest.java
@@ -0,0 +1,175 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package org.broadinstitute.sting.utils.threading;
+
+import org.apache.log4j.Priority;
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.*;
+
+/**
+ * Tests for the state monitoring thread factory.
+ */
+public class StateMonitoringThreadFactoryUnitTest extends BaseTest {
+    // the duration of the tests -- 100 ms is tolerable given the number of tests we are doing
+    private final static long THREAD_TARGET_DURATION_IN_MILLISECOND = 100;
+    final static Object GLOBAL_LOCK = new Object();
+
+    private class StateTest extends TestDataProvider {
+        private final double TOLERANCE = 0.1; // willing to tolerate a 10% error
+
+        final List<Thread.State> statesForThreads;
+
+        public StateTest(final List<Thread.State> statesForThreads) {
+            super(StateTest.class);
+            this.statesForThreads = statesForThreads;
+            setName("StateTest " + Utils.join(",", statesForThreads));
+        }
+
+        public List<Thread.State> getStatesForThreads() {
+            return statesForThreads;
+        }
+
+        public int getNStates() { return statesForThreads.size(); }
+
+        public double maxStateFraction(final Thread.State state) { return fraction(state) + TOLERANCE; }
+        public double minStateFraction(final Thread.State state) { return fraction(state) - TOLERANCE; }
+
+        private double fraction(final Thread.State state) {
+            return Collections.frequency(statesForThreads, state) / (1.0 * statesForThreads.size());
+        }
+    }
+
+    /**
+     * Test helper threading class that puts the thread into RUNNING, BLOCKED, or WAITING state as
+     * requested for input argument
+     */
+    private static class StateTestThread implements Callable<Double> {
+        private final Thread.State stateToImplement;
+
+        private StateTestThread(final Thread.State stateToImplement) {
+            if ( ! StateMonitoringThreadFactory.TRACKED_STATES.contains(stateToImplement) )
+                throw new IllegalArgumentException("Unexpected state " + stateToImplement);
+            this.stateToImplement = stateToImplement;
+        }
+
+        @Override
+        public Double call() throws Exception {
+            switch ( stateToImplement ) {
+                case RUNNABLE:
+                    // do some work until we get to THREAD_TARGET_DURATION_IN_MILLISECOND
+                    double sum = 0.0;
+                    final long startTime = System.currentTimeMillis();
+                    for ( int i = 1; System.currentTimeMillis() - startTime < (THREAD_TARGET_DURATION_IN_MILLISECOND - 1); i++ ) {
+                        sum += Math.log10(i);
+                    }
+                    return sum;
+                case WAITING:
+                    Thread.currentThread().sleep(THREAD_TARGET_DURATION_IN_MILLISECOND);
+                    return 0.0;
+                case BLOCKED:
+                    if ( StateMonitoringThreadFactory.DEBUG ) logger.warn("Blocking...");
+                    synchronized (GLOBAL_LOCK) {
+                        // the GLOBAL_LOCK must be held by the unit test itself for this to properly block
+                        if ( StateMonitoringThreadFactory.DEBUG ) logger.warn("  ... done blocking");
+                    }
+                    return 0.0;
+                default:
+                    throw new ReviewedStingException("Unexpected thread test state " + stateToImplement);
+            }
+        }
+    }
+
+    @DataProvider(name = "StateTest")
+    public Object[][] createStateTest() {
+        for ( final int nThreads : Arrays.asList(1, 2, 3, 4) ) {
+            for (final List<Thread.State> states : Utils.makePermutations(StateMonitoringThreadFactory.TRACKED_STATES, nThreads, true) ) {
+                //if ( Collections.frequency(states, Thread.State.BLOCKED) > 0)
+                    new StateTest(states);
+            }
+        }
+
+        return StateTest.getTests(StateTest.class);
+    }
+
+    @Test(enabled = false, dataProvider = "StateTest")
+    public void testStateTest(final StateTest test) throws InterruptedException {
+        // allows us to test blocking
+        final StateMonitoringThreadFactory factory = new StateMonitoringThreadFactory(test.getNStates());
+        final ExecutorService threadPool = Executors.newFixedThreadPool(test.getNStates(), factory);
+
+        logger.warn("Running " + test);
+        synchronized (GLOBAL_LOCK) {
+            //logger.warn("  Have lock");
+            for ( final Thread.State threadToRunState : test.getStatesForThreads() )
+            threadPool.submit(new StateTestThread(threadToRunState));
+
+            // lock has to be here for the whole running of the activeThreads but end before the sleep so the blocked activeThreads
+            // can block for their allotted time
+            threadPool.shutdown();
+            Thread.sleep(THREAD_TARGET_DURATION_IN_MILLISECOND);
+        }
+        //logger.warn("  Releasing lock");
+        threadPool.awaitTermination(10, TimeUnit.SECONDS);
+        //logger.warn("  done awaiting termination");
+        //logger.warn("  waiting for all activeThreads to complete");
+        factory.waitForAllThreadsToComplete();
+        //logger.warn("  done waiting for activeThreads");
+
+        // make sure we counted everything properly
+        final long totalTime = factory.getTotalTime();
+        final long minTime = (long)(THREAD_TARGET_DURATION_IN_MILLISECOND * 0.5) * test.getNStates();
+        final long maxTime = (long)(THREAD_TARGET_DURATION_IN_MILLISECOND * 1.5) * test.getNStates();
+        //logger.warn("Testing total time");
+        Assert.assertTrue(totalTime >= minTime, "Factory results not properly accumulated: totalTime = " + totalTime + " < minTime = " + minTime);
+        Assert.assertTrue(totalTime <= maxTime, "Factory results not properly accumulated: totalTime = " + totalTime + " > maxTime = " + maxTime);
+
+        for (final Thread.State state : StateMonitoringThreadFactory.TRACKED_STATES ) {
+            final double min = test.minStateFraction(state);
+            final double max = test.maxStateFraction(state);
+            final double obs = factory.getStateFraction(state);
+//            logger.warn("  Checking " + state
+//                    + " min " + String.format("%.2f", min)
+//                    + " max " + String.format("%.2f", max)
+//                    + " obs " + String.format("%.2f", obs)
+//                    + " factor = " + factory);
+            Assert.assertTrue(obs >= min, "Too little time spent in state " + state + " obs " + obs + " min " + min);
+            Assert.assertTrue(obs <= max, "Too much time spent in state " + state + " obs " + obs + " max " + min);
+        }
+
+        // we actually ran the expected number of activeThreads
+        Assert.assertEquals(factory.getNThreadsCreated(), test.getNStates());
+
+        // should be called to ensure we don't format / NPE on output
+        factory.printUsageInformation(logger, Priority.INFO);
+    }
+}
\ No newline at end of file
diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/AlleleUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/AlleleUnitTest.java
index ed9805d19e..65398c373e 100755
--- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/AlleleUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/AlleleUnitTest.java
@@ -37,8 +37,6 @@
 //    public Allele(byte[] bases, boolean isRef) {
 //    public Allele(boolean isRef) {
 //    public Allele(String bases, boolean isRef) {
-//    public boolean isNullAllele()       { return length() == 0; }
-//    public boolean isNonNullAllele()    { return ! isNullAllele(); }
 //    public boolean isReference()        { return isRef; }
 //    public boolean isNonReference()     { return ! isReference(); }
 //    public byte[] getBases() { return bases; }
@@ -49,13 +47,10 @@
  * Basic unit test for RecalData
  */
 public class AlleleUnitTest {
-    Allele ARef, del, delRef, A, T, ATIns, ATCIns, NoCall;
+    Allele ARef, A, T, ATIns, ATCIns, NoCall;
     
     @BeforeSuite
     public void before() {
-        del = Allele.create("-");
-        delRef = Allele.create("-", true);
-
         A = Allele.create("A");
         ARef = Allele.create("A", true);
         T = Allele.create("T");
@@ -72,8 +67,6 @@ public void testCreatingSNPAlleles() {
         Assert.assertFalse(A.isReference());
         Assert.assertTrue(A.basesMatch("A"));
         Assert.assertEquals(A.length(), 1);
-        Assert.assertTrue(A.isNonNull());
-        Assert.assertFalse(A.isNull());
 
         Assert.assertTrue(ARef.isReference());
         Assert.assertFalse(ARef.isNonReference());
@@ -92,8 +85,8 @@ public void testCreatingNoCallAlleles() {
         Assert.assertFalse(NoCall.isReference());
         Assert.assertFalse(NoCall.basesMatch("."));
         Assert.assertEquals(NoCall.length(), 0);
-        Assert.assertTrue(NoCall.isNonNull());
-        Assert.assertFalse(NoCall.isNull());
+        Assert.assertTrue(NoCall.isNoCall());
+        Assert.assertFalse(NoCall.isCalled());
     }
 
 
@@ -103,16 +96,6 @@ public void testCreatingIndelAlleles() {
         Assert.assertEquals(ATCIns.length(), 3);
         Assert.assertEquals(ATIns.getBases(), "AT".getBytes());
         Assert.assertEquals(ATCIns.getBases(), "ATC".getBytes());
-
-        Assert.assertTrue(del.isNonReference());
-        Assert.assertFalse(delRef.isNonReference());
-        Assert.assertFalse(del.isReference());
-        Assert.assertTrue(delRef.isReference());
-        Assert.assertFalse(del.basesMatch("-"));
-        Assert.assertTrue(del.basesMatch(""));
-        Assert.assertEquals(del.length(), 0);
-        Assert.assertFalse(del.isNonNull());
-        Assert.assertTrue(del.isNull());
     }
 
 
@@ -128,18 +111,6 @@ public void testConstructors1() {
         Assert.assertFalse(a1.equals(a4));
     }
 
-    @Test
-    public void testDelConstructors() {
-        Allele a1 = Allele.create("-");
-        Allele a2 = Allele.create("-".getBytes());
-        Allele a3 = Allele.create("");
-        Allele a4 = Allele.create("", true);
-
-        Assert.assertTrue(a1.equals(a2));
-        Assert.assertTrue(a1.equals(a3));
-        Assert.assertFalse(a1.equals(a4));
-    }
-
     @Test
     public void testInsConstructors() {
         Allele a1 = Allele.create("AC");
@@ -156,7 +127,6 @@ public void testInsConstructors() {
     public void testEquals() {
         Assert.assertTrue(ARef.basesMatch(A));
         Assert.assertFalse(ARef.equals(A));
-        Assert.assertFalse(ARef.equals(del));
         Assert.assertFalse(ARef.equals(ATIns));
         Assert.assertFalse(ARef.equals(ATCIns));
 
@@ -164,11 +134,6 @@ public void testEquals() {
         Assert.assertFalse(T.basesMatch(A));
         Assert.assertFalse(T.equals(A));
 
-        Assert.assertTrue(del.basesMatch(del));
-        Assert.assertTrue(del.basesMatch(delRef));
-        Assert.assertTrue(del.equals(del));
-        Assert.assertFalse(del.equals(delRef));
-
         Assert.assertTrue(ATIns.equals(ATIns));
         Assert.assertFalse(ATIns.equals(ATCIns));
         Assert.assertTrue(ATIns.basesMatch("AT"));
@@ -209,7 +174,6 @@ public void testBadConstructorArgs5() {
     public void testExtend() {
         Assert.assertEquals("AT", Allele.extend(Allele.create("A"), "T".getBytes()).toString());
         Assert.assertEquals("ATA", Allele.extend(Allele.create("A"), "TA".getBytes()).toString());
-        Assert.assertEquals("A", Allele.extend(Allele.create("-"), "A".getBytes()).toString());
         Assert.assertEquals("A", Allele.extend(Allele.NO_CALL, "A".getBytes()).toString());
         Assert.assertEquals("ATCGA", Allele.extend(Allele.create("AT"), "CGA".getBytes()).toString());
         Assert.assertEquals("ATCGA", Allele.extend(Allele.create("ATC"), "GA".getBytes()).toString());
diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextBenchmark.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextBenchmark.java
index 7c522eadfb..0e5522e3a9 100644
--- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextBenchmark.java
+++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextBenchmark.java
@@ -152,7 +152,7 @@ public void run(final VariantContext vc) {
                     public void run(final VariantContext vc) {
                         if ( samples == null )
                             samples = new HashSet<String>(new ArrayList<String>(vc.getSampleNames()).subList(0, nSamplesToTake));
-                        VariantContext sub = vc.subContextFromSamples(samples, true);
+                        VariantContext sub = vc.subContextFromSamples(samples);
                         sub.getNSamples();
                     }
                 };
diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java
index 1a0e8e39da..26e2dbfbc4 100644
--- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java
+++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java
@@ -197,7 +197,7 @@ private static void createSyntheticHeader() {
         addHeaderLine(metaData, "FT", 1, VCFHeaderLineType.String);
 
         // prep the header
-        metaData.add(new VCFContigHeaderLine(VCFHeader.CONTIG_KEY, Collections.singletonMap("ID", "1"), 0));
+        metaData.add(new VCFContigHeaderLine(Collections.singletonMap("ID", "1"), 0));
 
         metaData.add(new VCFFilterHeaderLine("FILTER1"));
         metaData.add(new VCFFilterHeaderLine("FILTER2"));
@@ -225,10 +225,10 @@ private static void makeSyntheticTests() {
         add(builder());
         add(builder().alleles("A"));
         add(builder().alleles("A", "C", "T"));
-        add(builder().alleles("-", "C").referenceBaseForIndel("A"));
-        add(builder().alleles("-", "CAGT").referenceBaseForIndel("A"));
-        add(builder().loc("1", 10, 11).alleles("C", "-").referenceBaseForIndel("A"));
-        add(builder().loc("1", 10, 13).alleles("CGT", "-").referenceBaseForIndel("A"));
+        add(builder().alleles("A", "AC"));
+        add(builder().alleles("A", "ACAGT"));
+        add(builder().loc("1", 10, 11).alleles("AC", "A"));
+        add(builder().loc("1", 10, 13).alleles("ACGT", "A"));
 
         // make sure filters work
         add(builder().unfiltered());
@@ -302,8 +302,8 @@ private static void addGenotypesToTestData() {
 
         sites.add(builder().alleles("A").make());
         sites.add(builder().alleles("A", "C", "T").make());
-        sites.add(builder().alleles("-", "C").referenceBaseForIndel("A").make());
-        sites.add(builder().alleles("-", "CAGT").referenceBaseForIndel("A").make());
+        sites.add(builder().alleles("A", "AC").make());
+        sites.add(builder().alleles("A", "ACAGT").make());
 
         for ( VariantContext site : sites ) {
             addGenotypes(site);
@@ -888,20 +888,8 @@ public static void addComplexGenotypesTest() {
         }
     }
 
-    private static final List<List<Allele>> makeAllGenotypes(final List<Allele> alleles, final int highestPloidy) {
-        final List<List<Allele>> combinations = new ArrayList<List<Allele>>();
-        if ( highestPloidy == 1 ) {
-            for ( final Allele a : alleles )
-                combinations.add(Collections.singletonList(a));
-        } else {
-            final List<List<Allele>> sub = makeAllGenotypes(alleles, highestPloidy - 1);
-            for ( List<Allele> subI : sub ) {
-                for ( final Allele a : alleles ) {
-                    combinations.add(Utils.cons(a, subI));
-                }
-            }
-        }
-        return combinations;
+    private static List<List<Allele>> makeAllGenotypes(final List<Allele> alleles, final int highestPloidy) {
+        return Utils.makePermutations(alleles, highestPloidy, true);
     }
 
     public static void assertEquals(final VCFHeader actual, final VCFHeader expected) {
diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java
index 1d290118f1..272166c68c 100755
--- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java
@@ -28,27 +28,22 @@ public class VariantContextUnitTest extends BaseTest {
     int snpLocStart = 10;
     int snpLocStop = 10;
 
-    // - / ATC [ref] from 20-23
+    // - / ATC [ref] from 20-22
     String delLoc = "chr1";
     int delLocStart = 20;
-    int delLocStop = 23;
+    int delLocStop = 22;
 
     // - [ref] / ATC from 20-20
     String insLoc = "chr1";
     int insLocStart = 20;
     int insLocStop = 20;
 
-    // - / A / T / ATC [ref] from 20-23
-    String mixedLoc = "chr1";
-    int mixedLocStart = 20;
-    int mixedLocStop = 23;
-
     VariantContextBuilder basicBuilder, snpBuilder, insBuilder;
 
     @BeforeSuite
     public void before() {
-        del = Allele.create("-");
-        delRef = Allele.create("-", true);
+        del = Allele.create("A");
+        delRef = Allele.create("A", true);
 
         A = Allele.create("A");
         C = Allele.create("C");
@@ -62,9 +57,9 @@ public void before() {
 
     @BeforeMethod
     public void beforeTest() {
-        basicBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T)).referenceBaseForIndel((byte)'A');
-        snpBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T)).referenceBaseForIndel((byte)'A');
-        insBuilder = new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, ATC)).referenceBaseForIndel((byte)'A');
+        basicBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T));
+        snpBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T));
+        insBuilder = new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, ATC));
     }
 
     @Test
@@ -213,7 +208,7 @@ public void testCreatingRefVariantContext() {
     @Test
     public void testCreatingDeletionVariantContext() {
         List<Allele> alleles = Arrays.asList(ATCref, del);
-        VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).referenceBaseForIndel((byte)'A').make();
+        VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).make();
 
         Assert.assertEquals(vc.getChr(), delLoc);
         Assert.assertEquals(vc.getStart(), delLocStart);
@@ -240,8 +235,8 @@ public void testCreatingDeletionVariantContext() {
     @Test
     public void testMatchingAlleles() {
         List<Allele> alleles = Arrays.asList(ATCref, del);
-        VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).referenceBaseForIndel((byte)'A').make();
-        VariantContext vc2 = new VariantContextBuilder("test2", delLoc, delLocStart+12, delLocStop+12, alleles).referenceBaseForIndel((byte)'A').make();
+        VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).make();
+        VariantContext vc2 = new VariantContextBuilder("test2", delLoc, delLocStart+12, delLocStop+12, alleles).make();
 
         Assert.assertTrue(vc.hasSameAllelesAs(vc2));
         Assert.assertTrue(vc.hasSameAlternateAllelesAs(vc2));
@@ -386,13 +381,13 @@ public void testAccessingSimpleSNPGenotypes() {
 
     @Test
     public void testAccessingCompleteGenotypes() {
-        List<Allele> alleles = Arrays.asList(Aref, T, del);
+        List<Allele> alleles = Arrays.asList(Aref, T, ATC);
 
         Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
         Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
         Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T));
-        Genotype g4 = GenotypeBuilder.create("Td", Arrays.asList(T, del));
-        Genotype g5 = GenotypeBuilder.create("dd", Arrays.asList(del, del));
+        Genotype g4 = GenotypeBuilder.create("Td", Arrays.asList(T, ATC));
+        Genotype g5 = GenotypeBuilder.create("dd", Arrays.asList(ATC, ATC));
         Genotype g6 = GenotypeBuilder.create("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
 
         VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles)
@@ -408,7 +403,7 @@ public void testAccessingCompleteGenotypes() {
         Assert.assertEquals(10, vc.getCalledChrCount());
         Assert.assertEquals(3, vc.getCalledChrCount(Aref));
         Assert.assertEquals(4, vc.getCalledChrCount(T));
-        Assert.assertEquals(3, vc.getCalledChrCount(del));
+        Assert.assertEquals(3, vc.getCalledChrCount(ATC));
         Assert.assertEquals(2, vc.getCalledChrCount(Allele.NO_CALL));
     }
 
@@ -416,7 +411,7 @@ public void testAccessingCompleteGenotypes() {
     public void testAccessingRefGenotypes() {
         List<Allele> alleles1 = Arrays.asList(Aref, T);
         List<Allele> alleles2 = Arrays.asList(Aref);
-        List<Allele> alleles3 = Arrays.asList(Aref, T, del);
+        List<Allele> alleles3 = Arrays.asList(Aref, T);
         for ( List<Allele> alleles : Arrays.asList(alleles1, alleles2, alleles3)) {
             Genotype g1 = GenotypeBuilder.create("AA1", Arrays.asList(Aref, Aref));
             Genotype g2 = GenotypeBuilder.create("AA2", Arrays.asList(Aref, Aref));
@@ -438,7 +433,7 @@ public void testAccessingRefGenotypes() {
 
     @Test
     public void testFilters() {
-        List<Allele> alleles = Arrays.asList(Aref, T, del);
+        List<Allele> alleles = Arrays.asList(Aref, T);
         Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
         Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
 
@@ -470,15 +465,15 @@ public void testFilters() {
 
     @Test
     public void testRepeatAllele() {
-        Allele nullR = Allele.create(Allele.NULL_ALLELE_STRING, true);
-        Allele nullA = Allele.create(Allele.NULL_ALLELE_STRING, false);
-        Allele atc   = Allele.create("ATC", false);
-        Allele atcatc   = Allele.create("ATCATC", false);
-        Allele ccccR = Allele.create("CCCC", true);
-        Allele cc   = Allele.create("CC", false);
-        Allele cccccc   = Allele.create("CCCCCC", false);
-        Allele gagaR   = Allele.create("GAGA", true);
-        Allele gagagaga   = Allele.create("GAGAGAGA", false);
+        Allele nullR = Allele.create("A", true);
+        Allele nullA = Allele.create("A", false);
+        Allele atc   = Allele.create("AATC", false);
+        Allele atcatc   = Allele.create("AATCATC", false);
+        Allele ccccR = Allele.create("ACCCC", true);
+        Allele cc   = Allele.create("ACC", false);
+        Allele cccccc   = Allele.create("ACCCCCC", false);
+        Allele gagaR   = Allele.create("AGAGA", true);
+        Allele gagagaga   = Allele.create("AGAGAGAGA", false);
 
         Pair<List<Integer>,byte[]> result;
         byte[] refBytes = "TATCATCATCGGA".getBytes();
@@ -497,15 +492,15 @@ public void testRepeatAllele() {
         Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("AATAATA".getBytes()),7);
 
 
-        // -*,ATC, context = ATC ATC ATC : (ATC)3 -> (ATC)4
+        // A*,ATC, context = ATC ATC ATC : (ATC)3 -> (ATC)4
         VariantContext vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(nullR,atc)).make();
         result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes);
         Assert.assertEquals(result.getFirst().toArray()[0],3);
         Assert.assertEquals(result.getFirst().toArray()[1],4);
         Assert.assertEquals(result.getSecond().length,3);
 
-        // ATC*,-,ATCATC
-        vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(ATCref,nullA,atcatc)).make();
+        // ATC*,A,ATCATC
+        vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+3, Arrays.asList(Allele.create("AATC", true),nullA,atcatc)).make();
         result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes);
         Assert.assertEquals(result.getFirst().toArray()[0],3);
         Assert.assertEquals(result.getFirst().toArray()[1],2);
@@ -522,7 +517,7 @@ public void testRepeatAllele() {
 
         // CCCC*,CC,-,CCCCCC, context = CCC: (C)7 -> (C)5,(C)3,(C)9
         refBytes = "TCCCCCCCAGAGAGAG".getBytes();
-        vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(ccccR,cc, nullA,cccccc)).make();
+        vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+4, Arrays.asList(ccccR,cc, nullA,cccccc)).make();
         result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes);
         Assert.assertEquals(result.getFirst().toArray()[0],7);
         Assert.assertEquals(result.getFirst().toArray()[1],5);
@@ -532,7 +527,7 @@ public void testRepeatAllele() {
 
         // GAGA*,-,GAGAGAGA
         refBytes = "TGAGAGAGAGATTT".getBytes();
-        vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(gagaR, nullA,gagagaga)).make();
+        vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+4, Arrays.asList(gagaR, nullA,gagagaga)).make();
         result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes);
         Assert.assertEquals(result.getFirst().toArray()[0],5);
         Assert.assertEquals(result.getFirst().toArray()[1],3);
@@ -564,27 +559,24 @@ public void testGetGenotypeCounts() {
 
         @Test
     public void testVCFfromGenotypes() {
-        List<Allele> alleles = Arrays.asList(Aref, T, del);
+        List<Allele> alleles = Arrays.asList(Aref, T);
         Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
         Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
         Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T));
         Genotype g4 = GenotypeBuilder.create("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
-        Genotype g5 = GenotypeBuilder.create("--", Arrays.asList(del, del));
-        VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4,g5).make();
+        VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4).make();
 
         VariantContext vc12 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName(), g2.getSampleName())), true);
         VariantContext vc1 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName())), true);
         VariantContext vc23 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g2.getSampleName(), g3.getSampleName())), true);
         VariantContext vc4 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g4.getSampleName())), true);
         VariantContext vc14 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName(), g4.getSampleName())), true);
-        VariantContext vc5 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g5.getSampleName())), true);
 
         Assert.assertTrue(vc12.isPolymorphicInSamples());
         Assert.assertTrue(vc23.isPolymorphicInSamples());
         Assert.assertTrue(vc1.isMonomorphicInSamples());
         Assert.assertTrue(vc4.isMonomorphicInSamples());
         Assert.assertTrue(vc14.isMonomorphicInSamples());
-        Assert.assertTrue(vc5.isPolymorphicInSamples());
 
         Assert.assertTrue(vc12.isSNP());
         Assert.assertTrue(vc12.isVariant());
@@ -606,17 +598,11 @@ public void testVCFfromGenotypes() {
         Assert.assertFalse(vc14.isVariant());
         Assert.assertFalse(vc14.isBiallelic());
 
-        Assert.assertTrue(vc5.isIndel());
-        Assert.assertTrue(vc5.isSimpleDeletion());
-        Assert.assertTrue(vc5.isVariant());
-        Assert.assertTrue(vc5.isBiallelic());
-
         Assert.assertEquals(3, vc12.getCalledChrCount(Aref));
         Assert.assertEquals(1, vc23.getCalledChrCount(Aref));
         Assert.assertEquals(2, vc1.getCalledChrCount(Aref));
         Assert.assertEquals(0, vc4.getCalledChrCount(Aref));
         Assert.assertEquals(2, vc14.getCalledChrCount(Aref));
-        Assert.assertEquals(0, vc5.getCalledChrCount(Aref));
     }
 
     public void testGetGenotypeMethods() {
@@ -664,13 +650,12 @@ public String toString() {
     @DataProvider(name = "getAlleles")
     public Object[][] mergeAllelesData() {
         new GetAllelesTest("A*",   Aref);
-        new GetAllelesTest("-*",   delRef);
         new GetAllelesTest("A*/C", Aref, C);
         new GetAllelesTest("A*/C/T", Aref, C, T);
         new GetAllelesTest("A*/T/C", Aref, T, C);
-        new GetAllelesTest("A*/C/T/-", Aref, C, T, del);
-        new GetAllelesTest("A*/T/C/-", Aref, T, C, del);
-        new GetAllelesTest("A*/-/T/C", Aref, del, T, C);
+        new GetAllelesTest("A*/C/T/ATC", Aref, C, T, ATC);
+        new GetAllelesTest("A*/T/C/ATC", Aref, T, C, ATC);
+        new GetAllelesTest("A*/ATC/T/C", Aref, ATC, T, C);
 
         return GetAllelesTest.getTests(GetAllelesTest.class);
     }
@@ -678,7 +663,7 @@ public Object[][] mergeAllelesData() {
     @Test(dataProvider = "getAlleles")
     public void testMergeAlleles(GetAllelesTest cfg) {
         final List<Allele> altAlleles = cfg.alleles.subList(1, cfg.alleles.size());
-        final VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, cfg.alleles).referenceBaseForIndel((byte)'A').make();
+        final VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, cfg.alleles).make();
 
         Assert.assertEquals(vc.getAlleles(), cfg.alleles, "VC alleles not the same as input alleles");
         Assert.assertEquals(vc.getNAlleles(), cfg.alleles.size(), "VC getNAlleles not the same as input alleles size");
@@ -845,7 +830,6 @@ public void runSubContextTest(SubContextTest cfg) {
         Assert.assertEquals(sub.getLog10PError(), vc.getLog10PError());
         Assert.assertEquals(sub.getFilters(), vc.getFilters());
         Assert.assertEquals(sub.getID(), vc.getID());
-        Assert.assertEquals(sub.getReferenceBaseForIndel(), vc.getReferenceBaseForIndel());
         Assert.assertEquals(sub.getAttributes(), vc.getAttributes());
 
         Set<Genotype> expectedGenotypes = new HashSet<Genotype>();
diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java
index b09a10d07a..95e8458c88 100644
--- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java
@@ -39,7 +39,7 @@
 import java.util.*;
 
 public class VariantContextUtilsUnitTest extends BaseTest {
-    Allele Aref, T, C, delRef, Cref, ATC, ATCATC;
+    Allele Aref, T, C, Cref, ATC, ATCATC;
     private GenomeLocParser genomeLocParser;
 
     @BeforeSuite
@@ -56,7 +56,6 @@ public void setup() {
         // alleles
         Aref = Allele.create("A", true);
         Cref = Allele.create("C", true);
-        delRef = Allele.create("-", true);
         T = Allele.create("T");
         C = Allele.create("C");
         ATC = Allele.create("ATC");
@@ -99,7 +98,7 @@ private VariantContext makeVC(String source, List<Allele> alleles, Collection<Ge
     private VariantContext makeVC(String source, List<Allele> alleles, Collection<Genotype> genotypes, Set<String> filters) {
         int start = 10;
         int stop = start; // alleles.contains(ATC) ? start + 3 : start;
-        return new VariantContextBuilder(source, "1", start, stop, alleles).genotypes(genotypes).filters(filters).referenceBaseForIndel(Cref.getBases()[0]).make();
+        return new VariantContextBuilder(source, "1", start, stop, alleles).genotypes(genotypes).filters(filters).make();
     }
 
     // --------------------------------------------------------------------------------
@@ -156,28 +155,23 @@ public Object[][] mergeAllelesData() {
                 Arrays.asList(Aref, C),
                 Arrays.asList(Aref, T, C)); // in order of appearence
 
-        // The following is actually a pathological case - there's no way on a vcf to represent a null allele that's non-variant.
-        // The code converts this (correctly) to a single-base non-variant vc with whatever base was there as a reference.
-        new MergeAllelesTest(Arrays.asList(delRef),
-                Arrays.asList(Cref));
-
-        new MergeAllelesTest(Arrays.asList(delRef),
-                Arrays.asList(delRef, ATC),
-                Arrays.asList(delRef, ATC));
+        new MergeAllelesTest(Arrays.asList(Aref),
+                Arrays.asList(Aref, ATC),
+                Arrays.asList(Aref, ATC));
 
-        new MergeAllelesTest(Arrays.asList(delRef),
-                Arrays.asList(delRef, ATC, ATCATC),
-                Arrays.asList(delRef, ATC, ATCATC));
+        new MergeAllelesTest(Arrays.asList(Aref),
+                Arrays.asList(Aref, ATC, ATCATC),
+                Arrays.asList(Aref, ATC, ATCATC));
 
         // alleles in the order we see them
-        new MergeAllelesTest(Arrays.asList(delRef, ATCATC),
-                Arrays.asList(delRef, ATC, ATCATC),
-                Arrays.asList(delRef, ATCATC, ATC));
+        new MergeAllelesTest(Arrays.asList(Aref, ATCATC),
+                Arrays.asList(Aref, ATC, ATCATC),
+                Arrays.asList(Aref, ATCATC, ATC));
 
         // same
-        new MergeAllelesTest(Arrays.asList(delRef, ATC),
-                Arrays.asList(delRef, ATCATC),
-                Arrays.asList(delRef, ATC, ATCATC));
+        new MergeAllelesTest(Arrays.asList(Aref, ATC),
+                Arrays.asList(Aref, ATCATC),
+                Arrays.asList(Aref, ATC, ATCATC));
 
         return MergeAllelesTest.getTests(MergeAllelesTest.class);
     }
@@ -661,4 +655,52 @@ public void testRepeatDetectorTest(RepeatDetectorTest cfg) {
          // test alleles are equal
         Assert.assertEquals(VariantContextUtils.isTandemRepeat(cfg.vc, cfg.ref.getBytes()), cfg.isTrueRepeat);
     }
+
+    // --------------------------------------------------------------------------------
+    //
+    // basic allele clipping test
+    //
+    // --------------------------------------------------------------------------------
+
+    private class ReverseClippingPositionTestProvider extends TestDataProvider {
+        final String ref;
+        final List<Allele> alleles = new ArrayList<Allele>();
+        final int expectedClip;
+
+        private ReverseClippingPositionTestProvider(final int expectedClip, final String ref, final String... alleles) {
+            super(ReverseClippingPositionTestProvider.class);
+            this.ref = ref;
+            for ( final String allele : alleles )
+                this.alleles.add(Allele.create(allele));
+            this.expectedClip = expectedClip;
+        }
+
+        @Override
+        public String toString() {
+            return String.format("ref=%s allele=%s reverse clip %d", ref, alleles, expectedClip);
+        }
+    }
+
+    @DataProvider(name = "ReverseClippingPositionTestProvider")
+    public Object[][] makeReverseClippingPositionTestProvider() {
+        // pair clipping
+        new ReverseClippingPositionTestProvider(0, "ATT", "CCG");
+        new ReverseClippingPositionTestProvider(1, "ATT", "CCT");
+        new ReverseClippingPositionTestProvider(2, "ATT", "CTT");
+        new ReverseClippingPositionTestProvider(2, "ATT", "ATT");  // cannot completely clip allele
+
+        // triplets
+        new ReverseClippingPositionTestProvider(0, "ATT", "CTT", "CGG");
+        new ReverseClippingPositionTestProvider(1, "ATT", "CTT", "CGT"); // the T can go
+        new ReverseClippingPositionTestProvider(2, "ATT", "CTT", "CTT"); // both Ts can go
+
+        return ReverseClippingPositionTestProvider.getTests(ReverseClippingPositionTestProvider.class);
+    }
+
+
+    @Test(dataProvider = "ReverseClippingPositionTestProvider")
+    public void testReverseClippingPositionTestProvider(ReverseClippingPositionTestProvider cfg) {
+        int result = VariantContextUtils.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false);
+        Assert.assertEquals(result, cfg.expectedClip);
+    }
 }
diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContextUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContextUnitTest.java
index 6f5756bdc1..8f03f1d389 100644
--- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContextUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContextUnitTest.java
@@ -56,7 +56,7 @@ public class VariantJEXLContextUnitTest extends BaseTest {
 
     Allele A, Aref, T, Tref;
 
-    Allele del, delRef, ATC, ATCref;
+    Allele ATC, ATCref;
     // A [ref] / T at 10
 
     GenomeLoc snpLoc;
@@ -84,9 +84,6 @@ public void beforeClass() {
 
     @BeforeMethod
     public void before() {
-        del = Allele.create("-");
-        delRef = Allele.create("-", true);
-
         A = Allele.create("A");
         Aref = Allele.create("A", true);
         T = Allele.create("T");
diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriterUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriterUnitTest.java
index a7fff45596..5876efa120 100644
--- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriterUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriterUnitTest.java
@@ -139,8 +139,8 @@ private VariantContext createVC(VCFHeader header) {
         Map<String, Object> attributes = new HashMap<String,Object>();
         GenotypesContext genotypes = GenotypesContext.create(header.getGenotypeSamples().size());
 
-        alleles.add(Allele.create("-",true));
-        alleles.add(Allele.create("CC",false));
+        alleles.add(Allele.create("A",true));
+        alleles.add(Allele.create("ACC",false));
 
         attributes.put("DP","50");
         for (String name : header.getGenotypeSamples()) {
@@ -148,7 +148,7 @@ private VariantContext createVC(VCFHeader header) {
             genotypes.add(gt);
         }
         return new VariantContextBuilder("RANDOM", loc.getContig(), loc.getStart(), loc.getStop(), alleles)
-                .genotypes(genotypes).attributes(attributes).referenceBaseForIndel((byte)'A').make();
+                .genotypes(genotypes).attributes(attributes).make();
     }
 
 
diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleRetryMemoryLimit.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleRetryMemoryLimit.scala
new file mode 100644
index 0000000000..09a24e7822
--- /dev/null
+++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleRetryMemoryLimit.scala
@@ -0,0 +1,22 @@
+import org.broadinstitute.sting.queue.function.RetryMemoryLimit
+import org.broadinstitute.sting.queue.QScript
+import org.broadinstitute.sting.queue.extensions.gatk._
+
+class ExampleRetryMemoryLimit extends QScript {
+  @Input(doc="The reference file for the bam files.", shortName="R")
+  var referenceFile: File = _
+
+  @Input(doc="Bam file to genotype.", shortName="I")
+  var bamFile: File = _
+
+  def script() {
+    val ug = new UnifiedGenotyper with RetryMemoryLimit
+    // First run with 1m
+    ug.memoryLimit = .001
+    // On retry run with 1g
+    ug.retryMemoryFunction = (d => d * 1000)
+    ug.reference_sequence = referenceFile
+    ug.input_file = Seq(bamFile)
+    add(ug)
+  }
+}
diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala
index 8cb86db0b2..f5d750ac31 100644
--- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala
+++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala
@@ -5,7 +5,8 @@ import org.broadinstitute.sting.queue.extensions.gatk._
 
 /**
  * An example building on the intro ExampleCountReads.scala.
- * Runs an INCOMPLETE version of the UnifiedGenotyper with VariantEval and optional VariantFiltration.
+ * Runs an INCOMPLETE variant calling pipeline with just the UnifiedGenotyper, VariantEval and optional VariantFiltration.
+ * For a complete description of the suggested for a variant calling pipeline see the latest version of the Best Practice Variant Detection document
  */
 class ExampleUnifiedGenotyper extends QScript {
   // Create an alias 'qscript' to be able to access variables
@@ -43,14 +44,12 @@ class ExampleUnifiedGenotyper extends QScript {
   }
 
   def script() {
-    // Create the four function that we can run.
+    // Create the four functions that we may run depending on options.
     val genotyper = new UnifiedGenotyper with UnifiedGenotyperArguments
     val variantFilter = new VariantFiltration with UnifiedGenotyperArguments
     val evalUnfiltered = new VariantEval with UnifiedGenotyperArguments
     val evalFiltered = new VariantEval with UnifiedGenotyperArguments
 
-    // If you are running this on a compute farm, make sure that the Sting/shell
-    // folder is in your path to use mergeText.sh and splitIntervals.sh.
     genotyper.scatterCount = 3
     genotyper.input_file :+= qscript.bamFile
     genotyper.out = swapExt(qscript.bamFile, "bam", "unfiltered.vcf")
diff --git a/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala b/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala
index d9fed4ce8b..1a50301f10 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala
@@ -55,12 +55,18 @@ class QSettings {
   @Argument(fullName="memory_limit", shortName="memLimit", doc="Default memory limit for jobs, in gigabytes.", required=false)
   var memoryLimit: Option[Double] = None
 
+  @Argument(fullName="memory_limit_threshold", shortName="memLimitThresh", doc="After passing this threshold stop increasing memory limit for jobs, in gigabytes.", required=false)
+  var memoryLimitThreshold: Option[Double] = None
+
   @Argument(fullName="resident_memory_limit", shortName="resMemLimit", doc="Default resident memory limit for jobs, in gigabytes.", required=false)
   var residentLimit: Option[Double] = None
 
   @Argument(fullName="resident_memory_request", shortName="resMemReq", doc="Default resident memory request for jobs, in gigabytes.", required=false)
   var residentRequest: Option[Double] = None
 
+  @Argument(fullName="resident_memory_request_parameter", shortName="resMemReqParam", doc="Parameter for resident memory requests. By default not requested.", required=false)
+  var residentRequestParameter: String = _
+
   /** The name of the parallel environment (required for SGE, for example) */
   @Argument(fullName="job_parallel_env", shortName="jobParaEnv", doc="An SGE style parallel environment to use for jobs requesting more than 1 core.  Equivalent to submitting jobs with -pe ARG nt for jobs with nt > 1", required=false)
   var parallelEnvironmentName: String = "smp_pe" // Broad default
@@ -68,6 +74,9 @@ class QSettings {
   @Argument(fullName="dontRequestMultipleCores", shortName="multiCoreJerk", doc="If provided, Queue will not request multiple processors for jobs using multiple processors.  Sometimes you eat the bear, sometimes the bear eats you.", required=false)
   var dontRequestMultipleCores: Boolean = false
 
+  @Argument(fullName="disableDefaultJavaGCOptimizations", shortName="noGCOpt", doc="If provided, Queue will not ensure that java GC threads are limited and that the a minimum amount of time is spent in GC.")
+  var disableDefaultJavaGCOptimizations = false
+
   @Argument(fullName="run_directory", shortName="runDir", doc="Root directory to run functions from.", required=false)
   var runDirectory = new File(".")
 
diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/FunctionEdge.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/FunctionEdge.scala
index 8225d28ab3..2d4ff60f5c 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/engine/FunctionEdge.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/engine/FunctionEdge.scala
@@ -40,11 +40,6 @@ import org.apache.commons.lang.StringUtils
 class FunctionEdge(val function: QFunction, val inputs: QNode, val outputs: QNode) extends QEdge with Logging {
   var runner: JobRunner[_] =_
 
-  /**
-   * The number of times this edge has been run.
-   */
-  var retries = 0
-
   /**
    * The depth of this edge in the graph.
    */
@@ -87,14 +82,14 @@ class FunctionEdge(val function: QFunction, val inputs: QNode, val outputs: QNod
       runner.init()
       runner.start()
     } catch {
-      case e =>
+      case e: Throwable =>
         currentStatus = RunnerStatus.FAILED
         try {
           runner.cleanup()
           function.failOutputs.foreach(_.createNewFile())
           writeStackTrace(e)
         } catch {
-          case _ => /* ignore errors in the exception handler */
+          case _: Throwable => /* ignore errors in the exception handler */
         }
         logger.error("Error: " + function.description, e)
     }
@@ -114,7 +109,7 @@ class FunctionEdge(val function: QFunction, val inputs: QNode, val outputs: QNod
               runner.cleanup()
               function.failOutputs.foreach(_.createNewFile())
             } catch {
-              case _ => /* ignore errors in the error handler */
+              case _: Throwable => /* ignore errors in the error handler */
             }
             logger.error("Error: " + function.description)
             tailError()
@@ -123,19 +118,19 @@ class FunctionEdge(val function: QFunction, val inputs: QNode, val outputs: QNod
               runner.cleanup()
               function.doneOutputs.foreach(_.createNewFile())
             } catch {
-              case _ => /* ignore errors in the done handler */
+              case _: Throwable => /* ignore errors in the done handler */
             }
             logger.info("Done: " + function.description)
           }
         } catch {
-          case e =>
+          case e: Throwable =>
             currentStatus = RunnerStatus.FAILED
             try {
               runner.cleanup()
               function.failOutputs.foreach(_.createNewFile())
               writeStackTrace(e)
             } catch {
-              case _ => /* ignore errors in the exception handler */
+              case _: Throwable => /* ignore errors in the exception handler */
             }
             logger.error("Error retrieving status: " + function.description, e)
         }
@@ -168,6 +163,7 @@ class FunctionEdge(val function: QFunction, val inputs: QNode, val outputs: QNod
     currentStatus = RunnerStatus.PENDING
     if (cleanOutputs)
       function.deleteOutputs()
+    function.jobErrorLines = Nil
     runner = null
   }
 
@@ -189,6 +185,7 @@ class FunctionEdge(val function: QFunction, val inputs: QNode, val outputs: QNod
       val tailLines = IOUtils.tail(errorFile, maxLines)
       val nl = "%n".format()
       val summary = if (tailLines.size > maxLines) "Last %d lines".format(maxLines) else "Contents"
+      this.function.jobErrorLines = collection.JavaConversions.asScalaIterable(tailLines).toSeq
       logger.error("%s of %s:%n%s".format(summary, errorFile, StringUtils.join(tailLines, nl)))
     } else {
       logger.error("Unable to access log file: %s".format(errorFile))
diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala
index cee2c6e56a..e3a1714ffe 100755
--- a/public/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala
@@ -116,7 +116,7 @@ class QGraph extends Logging {
         val isReady = numMissingValues == 0
 
         if (this.jobGraph.edgeSet.isEmpty) {
-          logger.warn("Nothing to run! Were any Functions added?");
+          logger.warn("Nothing to run! Were any Functions added?")
         } else if (settings.getStatus) {
           logger.info("Checking pipeline status.")
           logStatus()
@@ -320,7 +320,7 @@ class QGraph extends Logging {
     if (settings.startFromScratch)
       logger.info("Will remove outputs from previous runs.")
 
-    updateGraphStatus(false)
+    updateGraphStatus(cleanOutputs = false)
 
     var readyJobs = getReadyJobs
     while (running && readyJobs.size > 0) {
@@ -361,7 +361,7 @@ class QGraph extends Logging {
    * Logs job statuses by traversing the graph and looking for status-related files
    */
   private def logStatus() {
-    updateGraphStatus(false)
+    updateGraphStatus(cleanOutputs = false)
     doStatus(status => logger.info(status))
   }
 
@@ -388,7 +388,7 @@ class QGraph extends Logging {
       if (settings.startFromScratch)
         logger.info("Removing outputs from previous runs.")
 
-      updateGraphStatus(true)
+      updateGraphStatus(cleanOutputs = true)
 
       var readyJobs = TreeSet.empty[FunctionEdge](functionOrdering)
       readyJobs ++= getReadyJobs
@@ -473,7 +473,7 @@ class QGraph extends Logging {
       logStatusCounts()
       deleteCleanup(-1)
     } catch {
-      case e =>
+      case e: Throwable =>
         logger.error("Uncaught error running jobs.", e)
         throw e
     } finally {
@@ -662,11 +662,12 @@ class QGraph extends Logging {
   private def checkRetryJobs(failed: Set[FunctionEdge]) {
     if (settings.retries > 0) {
       for (failedJob <- failed) {
-        if (failedJob.function.jobRestartable && failedJob.retries < settings.retries) {
-          failedJob.retries += 1
-          failedJob.resetToPending(true)
+        if (failedJob.function.jobRestartable && failedJob.function.retries < settings.retries) {
+          failedJob.function.retries += 1
+          failedJob.function.setupRetry()
+          failedJob.resetToPending(cleanOutputs = true)
           logger.info("Reset for retry attempt %d of %d: %s".format(
-            failedJob.retries, settings.retries, failedJob.function.description))
+            failedJob.function.retries, settings.retries, failedJob.function.description))
           statusCounts.failed -= 1
           statusCounts.pending += 1
         } else {
@@ -733,7 +734,7 @@ class QGraph extends Logging {
   private def emailDescription(edge: FunctionEdge) = {
     val description = new StringBuilder
     if (settings.retries > 0)
-      description.append("Attempt %d of %d.%n".format(edge.retries + 1, settings.retries + 1))
+      description.append("Attempt %d of %d.%n".format(edge.function.retries + 1, settings.retries + 1))
     description.append(edge.function.description)
     description.toString()
   }
@@ -1077,7 +1078,7 @@ class QGraph extends Logging {
               runner.checkUnknownStatus()
             }
           } catch {
-            case e => /* ignore */
+            case e: Throwable => /* ignore */
           }
       }
     }
@@ -1119,20 +1120,20 @@ class QGraph extends Logging {
               try {
                 manager.tryStop(managerRunners)
               } catch {
-                case e => /* ignore */
+                case e: Throwable => /* ignore */
               }
             for (runner <- managerRunners) {
               try {
                 runner.cleanup()
               } catch {
-                case e => /* ignore */
+                case e: Throwable => /* ignore */
               }
             }
           } finally {
             try {
               manager.exit()
             } catch {
-              case e => /* ignore */
+              case e: Throwable => /* ignore */
             }
           }
         }
diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/gridengine/GridEngineJobRunner.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/gridengine/GridEngineJobRunner.scala
index 76cefe2a53..0c94e9ecf3 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/engine/gridengine/GridEngineJobRunner.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/engine/gridengine/GridEngineJobRunner.scala
@@ -52,13 +52,9 @@ class GridEngineJobRunner(session: Session, function: CommandLineFunction) exten
       nativeSpec += " -q " + function.jobQueue
 
     // If the resident set size is requested pass on the memory request
-    // NOTE: 12/20/11: depristo commented this out because mem_free isn't
-    // such a standard feature in SGE (gsa-engineering queue doesn't support it)
-    // requiring it can make SGE not so usable.  It's dangerous to not enforce
-    // that we have enough memory to run our jobs, but I'd rather be dangerous
-    // than not be able to run my jobs at all.
-//    if (function.residentRequest.isDefined)
-//      nativeSpec += " -l mem_free=%dM".format(function.residentRequest.map(_ * 1024).get.ceil.toInt)
+    // mem_free is the standard, but may also be virtual_free or even not available
+    if (function.qSettings.residentRequestParameter != null && function.residentRequest.isDefined)
+      nativeSpec += " -l %s=%dM".format(function.qSettings.residentRequestParameter, function.residentRequest.map(_ * 1024).get.ceil.toInt)
 
     // If the resident set size limit is defined specify the memory limit
     if (function.residentLimit.isDefined)
diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobRunner.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobRunner.scala
index de996d1870..2fbea1497b 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobRunner.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobRunner.scala
@@ -35,7 +35,7 @@ import org.broadinstitute.sting.queue.engine.{RunnerStatus, CommandLineJobRunner
 import java.util.regex.Pattern
 import java.lang.StringBuffer
 import java.util.Date
-import com.sun.jna.{Structure, StringArray, NativeLong}
+import com.sun.jna.{Pointer, Structure, StringArray, NativeLong}
 import com.sun.jna.ptr.IntByReference
 
 /**
@@ -295,9 +295,17 @@ object Lsf706JobRunner extends Logging {
       // the platform LSF startTimes are in seconds, not milliseconds, so convert to the java convention
       runner.getRunInfo.startTime = new Date(jobInfo.startTime.longValue * 1000)
       runner.getRunInfo.doneTime = new Date(jobInfo.endTime.longValue * 1000)
-      val exHostsRaw = jobInfo.exHosts.getStringArray(0)
-      //logger.warn("exHostsRaw = " + exHostsRaw)
-      val exHostsList = exHostsRaw.toSeq
+
+      val exHostsList =
+        if (jobInfo.numExHosts != 1) {
+          // this is necessary because
+          val exHostsString = "multipleHosts_" + jobInfo.numExHosts
+          logger.debug("numExHosts = " + jobInfo.numExHosts + " != 1 for job " + runner.jobId + ", cannot safely get exhosts, setting to " + exHostsString)
+          List(exHostsString)
+        } else {
+          jobInfo.exHosts.getStringArray(0).toSeq
+        }
+
       //logger.warn("exHostsList = " + exHostsList)
       val exHosts = exHostsList.reduceLeft(_ + "," + _)
       //logger.warn("exHosts = " + exHosts)
diff --git a/public/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala
index eff4a2ba91..84b625760d 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala
@@ -137,12 +137,17 @@ trait CommandLineFunction extends QFunction with Logging {
     if (residentRequest.isEmpty)
       residentRequest = memoryLimit
 
-    if (residentLimit.isEmpty)
-      residentLimit = residentRequest.map( _ * 1.2 )
+    if (residentLimit.isEmpty || residentLimit == residentRequest)
+      residentLimit = residentRequest.map(residentLimitBuffer)
 
     super.freezeFieldValues()
   }
 
+  /**
+   * @return A function that decides how much memory cushion to add to the residentRequest to create the residentLimit
+   */
+  def residentLimitBuffer: (Double => Double) = (1.2 * _)
+
   /**
    * Safely construct a full required command-line argument with consistent quoting, whitespace separation, etc.
    *
@@ -223,7 +228,7 @@ trait CommandLineFunction extends QFunction with Logging {
    */
   protected def conditional( condition: Boolean, param: Any, escape: Boolean = true, format: String = "%s" ): String = {
     if ( condition ) {
-      " %s ".format(formatArgument("", param, "", false, escape, format))
+      " %s ".format(formatArgument("", param, "", spaceSeparated = false, escape = escape, paramFormat = format))
     }
     else {
       ""
diff --git a/public/scala/src/org/broadinstitute/sting/queue/function/JavaCommandLineFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/function/JavaCommandLineFunction.scala
index 13448afdd5..b9cb8540f5 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/function/JavaCommandLineFunction.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/function/JavaCommandLineFunction.scala
@@ -54,6 +54,16 @@ trait JavaCommandLineFunction extends CommandLineFunction {
    */
   var javaGCThreads: Option[Int] = None
 
+  /**
+   * Max percent of time spent in garbage collection
+   */
+  var javaGCTimeLimit: Option[Int] = None
+
+  /**
+   * Min percent of max heap freed during a garbage collection
+   */
+  var javaGCHeapFreeLimit: Option[Int] = None
+
   override def freezeFieldValues() {
     super.freezeFieldValues()
 
@@ -62,6 +72,37 @@ trait JavaCommandLineFunction extends CommandLineFunction {
 
     if (javaMainClass != null && javaClasspath.isEmpty)
       javaClasspath = JavaCommandLineFunction.currentClasspath
+
+    if (!this.qSettings.disableDefaultJavaGCOptimizations) {
+      // By default set the GC threads to 4
+      if (javaGCThreads.isEmpty)
+        javaGCThreads = Some(4)
+
+      // By default exit if more than 50% of time in GC
+      if (javaGCTimeLimit.isEmpty)
+        javaGCTimeLimit = Some(50)
+
+      // By default exit if GC does not free up 10% of the heap
+      if (javaGCHeapFreeLimit.isEmpty)
+        javaGCHeapFreeLimit = Some(10)
+    }
+  }
+
+
+  override def copySettingsTo(function: QFunction) {
+    super.copySettingsTo(function)
+    function match {
+      case java: JavaCommandLineFunction =>
+        if (java.javaMemoryLimit.isEmpty)
+          java.javaMemoryLimit = this.javaMemoryLimit
+        if (java.javaGCThreads.isEmpty)
+          java.javaGCThreads = this.javaGCThreads
+        if (java.javaGCTimeLimit.isEmpty)
+          java.javaGCTimeLimit = this.javaGCTimeLimit
+        if (java.javaGCHeapFreeLimit.isEmpty)
+          java.javaGCHeapFreeLimit = this.javaGCHeapFreeLimit
+      case _ => /* ignore */
+    }
   }
 
   /**
@@ -77,10 +118,13 @@ trait JavaCommandLineFunction extends CommandLineFunction {
       null
   }
 
-  def javaOpts = optional("-Xmx", javaMemoryLimit.map(gb => (gb * 1024).ceil.toInt), "m", spaceSeparated=false) +
-                 conditional(javaGCThreads.isDefined, "-XX:+UseParallelOldGC") +
-                 optional("-XX:ParallelGCThreads=", javaGCThreads, spaceSeparated=false) +
-                 required("-Djava.io.tmpdir=", jobTempDir, spaceSeparated=false)
+  def javaOpts = Array(
+    optional("-Xmx", javaMemoryLimit.map(gb => (gb * 1024).ceil.toInt), "m", spaceSeparated=false),
+    conditional(javaGCThreads.isDefined || javaGCTimeLimit.isDefined || javaGCHeapFreeLimit.isDefined, "-XX:+UseParallelOldGC"),
+    optional("-XX:ParallelGCThreads=", javaGCThreads, spaceSeparated=false),
+    optional("-XX:GCTimeLimit=", javaGCTimeLimit, spaceSeparated=false),
+    optional("-XX:GCHeapFreeLimit=", javaGCHeapFreeLimit, spaceSeparated=false),
+    required("-Djava.io.tmpdir=", jobTempDir, spaceSeparated=false)).mkString("")
 
   def commandLine = required("java") +
                     javaOpts +
diff --git a/public/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala
index 7d9debbdc6..9f7932d396 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala
@@ -112,6 +112,18 @@ trait QFunction extends Logging with QJobReport {
   /** File to redirect any errors.  Defaults to <jobName>.out */
   var jobErrorFile: File = _
 
+  /** Errors (if any) from the last failed run of jobErrorFiles. */
+  var jobErrorLines: Seq[String] = Nil
+
+  /**
+   * The number of times this function has previously been run.
+   */
+  var retries = 0
+
+  /** Change settings for the next run. Retries will be set to the number of times the function was run and jobErrorLines may contain the error text. */
+  def setupRetry() {
+  }
+
   /**
    * Description of this command line function.
    */
diff --git a/public/scala/src/org/broadinstitute/sting/queue/function/RetryMemoryLimit.scala b/public/scala/src/org/broadinstitute/sting/queue/function/RetryMemoryLimit.scala
new file mode 100644
index 0000000000..8bba5551ff
--- /dev/null
+++ b/public/scala/src/org/broadinstitute/sting/queue/function/RetryMemoryLimit.scala
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.queue.function
+
+/** A mixin that on retry increases the memory limit when certain text is found. */
+trait RetryMemoryLimit extends CommandLineFunction {
+
+  /** How to increase the memory. By default doubles the memory. */
+  var retryMemoryFunction: (Double => Double) = (2 * _)
+
+  /** Once the threshold is passed, no more memory will be added to memory limit. */
+  var memoryLimitThreshold: Option[Double] = None
+
+  /** Various strings to look for to determine we ran out of memory. */
+  var memoryLimitErrorText = Seq("OutOfMemory", "you did not provide enough memory", "TERM_MEMLIMIT")
+
+  override def freezeFieldValues() {
+    super.freezeFieldValues()
+    if (this.memoryLimitThreshold.isEmpty)
+      this.memoryLimitThreshold = this.qSettings.memoryLimitThreshold
+  }
+
+  override def setupRetry() {
+    super.setupRetry()
+    if (this.memoryLimitThreshold.isDefined && this.memoryLimit.isDefined) {
+
+      // NOTE: If we're already at or above the memoryLimit, don't do anything.
+      if (this.memoryLimit.get < this.memoryLimitThreshold.get) {
+          updateMemoryLimits()
+      }
+
+    } else {
+      updateMemoryLimits()
+    }
+  }
+
+  def updateMemoryLimits() {
+    if (isMemoryError) {
+      this.memoryLimit = this.memoryLimit.map(this.retryMemoryFunction)
+      this.residentRequest = this.residentRequest.map(this.retryMemoryFunction)
+      this.residentLimit = this.residentLimit.map(this.retryMemoryFunction)
+
+      // Rebuffer the memory limit if the limit was set exactly to the request
+      if (this.residentLimit == this.residentRequest)
+        this.residentLimit = this.residentRequest.map(this.residentLimitBuffer)
+
+      this match {
+        case java: JavaCommandLineFunction =>
+          java.javaMemoryLimit = java.javaMemoryLimit.map(this.retryMemoryFunction)
+        case _ => /* ignore */
+      }
+    }
+  }
+
+  def isMemoryError = this.jobErrorLines.exists(line => this.memoryLimitErrorText.exists(error => line.contains(error)))
+}
diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleRetryMemoryLimitPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleRetryMemoryLimitPipelineTest.scala
new file mode 100644
index 0000000000..a9a5928fc2
--- /dev/null
+++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleRetryMemoryLimitPipelineTest.scala
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.queue.pipeline.examples
+
+import org.testng.annotations.Test
+import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec}
+import org.broadinstitute.sting.BaseTest
+
+class ExampleRetryMemoryLimitPipelineTest {
+  @Test
+  def testRetryMemoryLimit() {
+    val spec = new PipelineTestSpec
+    spec.name = "RetryMemoryLimit"
+    spec.args = Array(
+      " -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleRetryMemoryLimit.scala",
+      " -R " + BaseTest.publicTestDir + "exampleFASTA.fasta",
+      " -I " + BaseTest.publicTestDir + "exampleBAM.bam",
+      " -retry 1").mkString
+    spec.jobRunners = PipelineTest.allJobRunners
+    PipelineTest.executeTest(spec)
+  }
+}
diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperPipelineTest.scala
index c9d8b59c95..f6fcd7c124 100644
--- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperPipelineTest.scala
+++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperPipelineTest.scala
@@ -24,7 +24,7 @@
 
 package org.broadinstitute.sting.queue.pipeline.examples
 
-import org.testng.annotations.Test
+import org.testng.annotations.{DataProvider, Test}
 import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec}
 import org.broadinstitute.sting.BaseTest
 
@@ -43,42 +43,53 @@ class ExampleUnifiedGenotyperPipelineTest {
     PipelineTest.executeTest(spec)
   }
 
-  @Test
-  def testUnifiedGenotyperWithGatkIntervals() {
+  @DataProvider(name = "ugIntervals")
+  def getUnifiedGenotyperIntervals =
+    Array(
+      Array("gatk_intervals", BaseTest.validationDataLocation + "intervalTest.intervals"),
+      Array("bed_intervals", BaseTest.validationDataLocation + "intervalTest.bed"),
+      Array("vcf_intervals", BaseTest.validationDataLocation + "intervalTest.1.vcf")
+    ).asInstanceOf[Array[Array[Object]]]
+
+  @Test(dataProvider = "ugIntervals")
+  def testUnifiedGenotyperWithIntervals(intervalsName: String, intervalsPath: String) {
     val spec = new PipelineTestSpec
-    spec.name = "unifiedgenotyper_with_gatk_intervals"
+    spec.name = "unifiedgenotyper_with_" + intervalsName
     spec.args = Array(
       " -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala",
       " -I " + BaseTest.validationDataLocation + "OV-0930.normal.chunk.bam",
       " -R " + BaseTest.hg18Reference,
-      " -L " + BaseTest.validationDataLocation + "intervalTest.intervals").mkString
+      " -L " + intervalsPath).mkString
     spec.jobRunners = Seq("Lsf706")
     PipelineTest.executeTest(spec)
   }
 
   @Test
-  def testUnifiedGenotyperWithBedIntervals() {
+  def testUnifiedGenotyperNoGCOpt() {
     val spec = new PipelineTestSpec
-    spec.name = "unifiedgenotyper_with_bed_intervals"
+    spec.name = "unifiedgenotyper_no_gc_opt"
     spec.args = Array(
       " -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala",
-      " -I " + BaseTest.validationDataLocation + "OV-0930.normal.chunk.bam",
-      " -R " + BaseTest.hg18Reference,
-      " -L " + BaseTest.validationDataLocation + "intervalTest.bed").mkString
-    spec.jobRunners = Seq("Lsf706")
+      " -R " + BaseTest.publicTestDir + "exampleFASTA.fasta",
+      " -I " + BaseTest.publicTestDir + "exampleBAM.bam",
+      " -noGCOpt").mkString
+    spec.jobRunners = PipelineTest.allJobRunners
     PipelineTest.executeTest(spec)
   }
 
-  @Test
-  def testUnifiedGenotyperWithVcfIntervals() {
+  @DataProvider(name="resMemReqParams")
+  def getResMemReqParam = Array(Array("mem_free"), Array("virtual_free")).asInstanceOf[Array[Array[Object]]]
+
+  @Test(dataProvider = "resMemReqParams")
+  def testUnifiedGenotyperResMemReqParam(reqParam: String) {
     val spec = new PipelineTestSpec
-    spec.name = "unifiedgenotyper_with_vcf_intervals"
+    spec.name = "unifiedgenotyper_" + reqParam
     spec.args = Array(
       " -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala",
-      " -I " + BaseTest.validationDataLocation + "OV-0930.normal.chunk.bam",
-      " -R " + BaseTest.hg18Reference,
-      " -L " + BaseTest.validationDataLocation + "intervalTest.1.vcf").mkString
-    spec.jobRunners = Seq("Lsf706")
+      " -R " + BaseTest.publicTestDir + "exampleFASTA.fasta",
+      " -I " + BaseTest.publicTestDir + "exampleBAM.bam",
+      " -resMemReqParam " + reqParam).mkString
+    spec.jobRunners = Seq("GridEngine")
     PipelineTest.executeTest(spec)
   }
 }
diff --git a/settings/repository/com.google.code.cofoja/cofoja-1.0-20110609.jar b/settings/repository/com.google.code.cofoja/cofoja-1.0-r139.jar
similarity index 73%
rename from settings/repository/com.google.code.cofoja/cofoja-1.0-20110609.jar
rename to settings/repository/com.google.code.cofoja/cofoja-1.0-r139.jar
index 2197e721fa..2cbdd380d8 100644
Binary files a/settings/repository/com.google.code.cofoja/cofoja-1.0-20110609.jar and b/settings/repository/com.google.code.cofoja/cofoja-1.0-r139.jar differ
diff --git a/settings/repository/com.google.code.cofoja/cofoja-1.0-20110609.xml b/settings/repository/com.google.code.cofoja/cofoja-1.0-r139.xml
similarity index 57%
rename from settings/repository/com.google.code.cofoja/cofoja-1.0-20110609.xml
rename to settings/repository/com.google.code.cofoja/cofoja-1.0-r139.xml
index 38d4e88f1a..202d3d0a3d 100644
--- a/settings/repository/com.google.code.cofoja/cofoja-1.0-20110609.xml
+++ b/settings/repository/com.google.code.cofoja/cofoja-1.0-r139.xml
@@ -1,3 +1,3 @@
 <ivy-module version="1.0">
-    <info organisation="com.google.code.cofoja" module="cofoja" revision="1.0-20110609" status="integration" publication="20110609114800" />
+    <info organisation="com.google.code.cofoja" module="cofoja" revision="1.0-r139" status="integration" publication="20110609114800" />
 </ivy-module>