diff --git a/lib/edu/mit/broad/arachne/Alignment.java b/lib/edu/mit/broad/arachne/Alignment.java
new file mode 100755
index 0000000000..52b7b59f20
--- /dev/null
+++ b/lib/edu/mit/broad/arachne/Alignment.java
@@ -0,0 +1,242 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2008 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever.
+ * Neither the Broad Institute nor MIT can be responsible for its use, misuse,
+ * or functionality.
+ */
+package edu.mit.broad.arachne;
+
+
+/**
+ * This class represents an arachne LookAlign alignment (or other related data structures).
+ */
+public class Alignment {
+
+    private static final char TAB = '\t';
+
+    private int mASequenceId;
+    private int mASequenceLength;
+    private int mAStart;
+    private int mAEnd;
+    private int mBSequenceId;
+    private int mBSequenceLength;
+    private int mBStart;
+    private int mBEnd;
+    private char mOrientation;
+    private int[] mAlignmentBlocks;
+
+
+    public Alignment() {
+    }
+
+    public int getASequenceId() {
+        return mASequenceId;
+    }
+
+    public void setASequenceId(int value) {
+        mASequenceId = value;
+    }
+
+    public int getASequenceLength() {
+        return mASequenceLength;
+    }
+
+    public void setASequenceLength(int value) {
+        mASequenceLength = value;
+    }
+
+    public int getAStart() {
+        return mAStart;
+    }
+
+    public void setAStart(int value) {
+        mAStart = value;
+    }
+
+    public int getAEnd() {
+        return mAEnd;
+    }
+
+    public void setAEnd(int value) {
+        mAEnd = value;
+    }
+
+    public int getBSequenceId() {
+        return mBSequenceId;
+    }
+
+    public void setBSequenceId(int value) {
+        mBSequenceId = value;
+    }
+
+    public int getBSequenceLength() {
+        return mBSequenceLength;
+    }
+
+    public void setBSequenceLength(int value) {
+        mBSequenceLength = value;
+    }
+
+    public int getBStart() {
+        return mBStart;
+    }
+
+    public void setBStart(int value) {
+        mBStart = value;
+    }
+
+    public int getBEnd() {
+        return mBEnd;
+    }
+
+    public void setBEnd(int value) {
+        mBEnd = value;
+    }
+
+    public char getOrientation() {
+        return mOrientation;
+    }
+
+    public void setOrientation(char value) {
+        mOrientation = value;
+    }
+
+    public int[] getAlignmentBlocks() {
+        return mAlignmentBlocks;
+    }
+
+    public void setAlignmentBlocks(int[] value) {
+        mAlignmentBlocks = value;
+    }
+
+    public static Alignment parse(String text) {
+
+        if (text == null) {
+            return null;
+        }
+
+        String[] fields = text.trim().split("\t");
+        if (fields.length == 0) {
+            return null;
+        }
+
+        if (!fields[0].equals("QUERY")) {
+            throw new IllegalArgumentException("Invalid alignment: " + text);
+        }
+        if (fields.length < 14) {
+            throw new IllegalArgumentException("Invalid alignment: " + text);
+        }
+
+        int seqAId = parseIntField(fields[1]);
+        int seqAStart = parseIntField(fields[2]);
+        int seqAEnd = parseIntField(fields[3]);
+        int seqALength = parseIntField(fields[4]);
+        int orientation = parseIntField(fields[5]);
+        int seqBId = parseIntField(fields[6]);
+        int seqBStart = parseIntField(fields[7]);
+        int seqBEnd = parseIntField(fields[8]);
+        int seqBLength = parseIntField(fields[9]);
+        int blockCount = parseIntField(fields[10]);
+
+        if (seqAStart < 0 || seqAEnd <= 0 || seqALength <= 0 ||
+            seqAStart >= seqALength || seqAEnd > seqALength || seqAStart >= seqAEnd) {
+            throw new IllegalArgumentException("Invalid alignment: " + text);
+        }
+        if (seqBStart < 0 || seqBEnd <= 0 || seqBLength <= 0 ||
+            seqBStart >= seqBLength || seqBEnd > seqBLength || seqBStart >= seqBEnd) {
+            throw new IllegalArgumentException("Invalid alignment: " + text);
+        }
+        if (orientation < 0 || orientation > 1) {
+            throw new IllegalArgumentException("Invalid alignment: " + text);
+        }
+        if (fields.length != (11 + 3*blockCount)) {
+            throw new IllegalArgumentException("Invalid alignment: " + text);
+        }
+
+        int[] alignmentBlocks = new int[3*blockCount];
+        for (int i = 0; i < 3*blockCount; i++) {
+            alignmentBlocks[i] = parseIntField(fields[11 + i]);
+        }
+
+        Alignment alignment = new Alignment();
+        alignment.setASequenceId(seqAId);
+        alignment.setASequenceLength(seqALength);
+        alignment.setAStart(seqAStart+1);
+        alignment.setAEnd(seqAEnd);
+        alignment.setBSequenceId(seqBId);
+        alignment.setBSequenceLength(seqBLength);
+        alignment.setBStart(seqBStart+1);
+        alignment.setBEnd(seqBEnd);
+        alignment.setOrientation((orientation == 0) ? '+' : '-');
+        alignment.setAlignmentBlocks(alignmentBlocks);
+        return alignment;
+    }
+
+    private static int parseIntField(String text) {
+        try {
+            return Integer.parseInt(text);
+        } catch (NumberFormatException exc) {
+            throw new IllegalArgumentException("Illegal alignment field: " + text);
+        }
+    }
+
+    public String arachneFormat() {
+        StringBuilder builder = new StringBuilder();
+        builder.append("QUERY");
+        builder.append(TAB);
+        builder.append(mASequenceId);
+        builder.append(TAB);
+        builder.append(mAStart-1); // zero based
+        builder.append(TAB);
+        builder.append(mAEnd);
+        builder.append(TAB);
+        builder.append(mASequenceLength);
+        builder.append(TAB);
+        builder.append(mOrientation == '+' ? 0 : 1);
+        builder.append(TAB);
+        builder.append(mBSequenceId);
+        builder.append(TAB);
+        builder.append(mBStart-1); // zero based
+        builder.append(TAB);
+        builder.append(mBEnd);
+        builder.append(TAB);
+        builder.append(mBSequenceLength);
+        builder.append(TAB);
+        builder.append(mAlignmentBlocks.length / 3);
+        for (int i = 0; i < mAlignmentBlocks.length; i++) {
+            builder.append(TAB);
+            builder.append(mAlignmentBlocks[i]);
+        }
+        return builder.toString();
+    }
+
+    public String format() {
+        StringBuilder builder = new StringBuilder();
+        builder.append("Alignment");
+        builder.append(' ');
+        builder.append(mASequenceId);
+        builder.append(' ');
+        builder.append(mAStart);
+        builder.append(' ');
+        builder.append(mAEnd);
+        builder.append(' ');
+        builder.append(mOrientation);
+        builder.append(' ');
+        builder.append(mBSequenceId);
+        builder.append(' ');
+        builder.append(mBStart);
+        builder.append(' ');
+        builder.append(mBEnd);
+        builder.append(' ');
+        builder.append(mAlignmentBlocks.length / 3);
+        for (int i = 0; i < mAlignmentBlocks.length; i++) {
+            builder.append(' ');
+            builder.append(mAlignmentBlocks[i]);
+        }
+        return builder.toString();
+    }        
+}
diff --git a/lib/edu/mit/broad/arachne/Fastb2Fasta.java b/lib/edu/mit/broad/arachne/Fastb2Fasta.java
new file mode 100644
index 0000000000..964e054ef5
--- /dev/null
+++ b/lib/edu/mit/broad/arachne/Fastb2Fasta.java
@@ -0,0 +1,132 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2008 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever.
+ * Neither the Broad Institute nor MIT can be responsible for its use, misuse,
+ * or functionality.
+ */
+package edu.mit.broad.arachne;
+
+import java.io.*;
+
+/**
+ * Utility to convert fastb to fasta files.
+ * More importantly, can be used to extract a subset of the reads.
+ */
+public class Fastb2Fasta {
+
+    private boolean mVerbose = false;
+    private boolean mDebug = false;
+    private String mInputPath = null;
+    private String mIdListFilePath = null;
+
+
+    public static void main(String[] args)
+        throws Exception {
+        new Fastb2Fasta().run(args);
+    }
+
+    private void usage() {
+        System.out.println("Usage: Fastb2Fasta ... <fastb-file>");
+        System.out.println("  -idlist <file-of-read-ids>");
+        System.out.println("  -verbose");
+        System.out.println("  -debug");
+    }
+
+    private boolean parseArguments(String[] args) {
+
+        int argpos = 0;
+        int argsleft = 0;
+
+        while (argpos < args.length) {
+            argsleft = args.length - argpos;
+            String arg = args[argpos];
+            if (arg.equals("-idlist") && argsleft > 1) {
+                argpos++;
+                mIdListFilePath = args[argpos++];
+            } else if (arg.equals("-verbose")) {
+                argpos++;
+                mVerbose = true;
+            } else if (arg.equals("-debug")) {
+                argpos++;
+                mDebug = true;
+            } else if (arg.startsWith("-")) {
+                usage();
+                return false;
+            } else {
+                break;
+            }
+        }
+
+        argsleft = args.length - argpos;
+        if (argsleft != 1) {
+            usage();
+            return false;
+        }
+
+        mInputPath = args[argpos];
+        return true;
+    }
+
+    private void run(String[] args)
+        throws Exception {
+
+        if (!parseArguments(args)) {
+            System.exit(1);
+        }
+
+        FastbReader fastbReader = new FastbReader(new File(mInputPath));
+        try {
+            if (mIdListFilePath != null) {
+                LineNumberReader reader = new LineNumberReader(new FileReader(mIdListFilePath));
+                while (true) {
+                    String line = reader.readLine();
+                    if (line == null) {
+                        reader.close();
+                        break;
+                    }
+                    Integer id = parseReadId(line);
+                    if (id == null) {
+                        continue;
+                    }
+                    if (id < 0 || id >= fastbReader.getSequenceCount()) {
+                        System.out.println("ERROR: Illegal sequence id: " + id);
+                        System.exit(1);
+                    }
+                    String sequence = fastbReader.readSequence(id);
+                    System.out.println(">" + id);
+                    System.out.println(sequence);
+                }
+            } else {
+                int id = 0;
+                while (fastbReader.hasNext()) {
+                    String sequence = fastbReader.next();
+                    System.out.println(">" + id);
+                    System.out.println(sequence);
+                    id++;
+                }
+            }
+        } finally {
+            fastbReader.close();
+        }
+    }
+
+    private Integer parseReadId(String line) {
+        String text = line.trim();
+        if (text.length() == 0 || text.charAt(0) == '#') {
+            return null;
+        }
+        String token = text.split("\\s+")[0];
+        Integer id = null;
+        try {
+            id = new Integer(token);
+        } catch (NumberFormatException exc) {
+            System.out.println("ERROR: Invalid sequence id: " + token);
+            System.exit(1);
+        }
+        return id;
+    }
+}
diff --git a/lib/edu/mit/broad/arachne/FastbReader.java b/lib/edu/mit/broad/arachne/FastbReader.java
new file mode 100755
index 0000000000..0d6cd3dd5a
--- /dev/null
+++ b/lib/edu/mit/broad/arachne/FastbReader.java
@@ -0,0 +1,220 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2008 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever.
+ * Neither the Broad Institute nor MIT can be responsible for its use, misuse,
+ * or functionality.
+ */
+package edu.mit.broad.arachne;
+
+
+import edu.mit.broad.sam.util.CloseableIterator;
+
+import java.io.*;
+
+
+/**
+ * Reader for arachne Fastb files.
+ */
+public class FastbReader
+    implements CloseableIterator<String> {
+
+    // Notes on fastb file format
+    //
+    // Fastb files contain the serialized contents of an arachne vecbasevector,
+    // which is a typedef for mastervec<basevector, unsigned int>.
+    // The serialization of mastervec objects starts with a 24 byte mv_file_control_block,
+    // followed by N variable length segments (one for each element of the mastervec vector),
+    // followed by an offset table containing N 8-byte file offsets to the N variable length
+    // segments, followed by N fixed length data segments, one for each vector element.
+    // Thus, reading a single element of the mastervec vector requires reading from three
+    // separate places in the file (the offset table, the variable length section and the
+    // fixed length section).
+    //
+    // The mastervec file header is 24 bytes arranged as follows:
+    //  n               4-byte signed(?) integer (number of entries)
+    //  c1              1-byte unsigned bit mask (see below)
+    //  reserved        1-byte unused
+    //  sizeX           1-byte unsigned, sizeof first template parameter (16 for fastb files)
+    //  sizeA           1-byte unsigned, sizeof second template parameter (4 for fastb files)
+    //  offsets_start   8-byte signed(?) integer, file offset of offset table
+    //  static_start    8-byte signed(?) integer, file offset of static data (fixed size section)
+    //
+    // For fastb files, the fixed size section contains 4 bytes for each object, which is the
+    // unsigned(?) count of the number of bases in this entry.
+    // For fastb files, the variable length section contains a bit vector with two bits per base.
+    // The bases are encoded as follows: A = 0, C = 1, G = 2, T = 3.
+    //
+    // For fastb files, in the file header N is the number of entries in the fastb file.
+    // c1 is unused/unimplemented except that the two low-order bits should be 0x01, indicating
+    // that we are using the single-file representation.  There is also apparently a three-file
+    // representation that looks the same except that the offset table and static (fixed length)
+    // table are in separate files named <basename>.offsets and <basename>.static.
+    // The sizeX should be 16 for fastb files and sizeA should be 4.
+    //
+    // Note that in fastb files, the sequences are not identified by name or id, only by index
+    // (zero based) into the mastervec object.  There is no representation for bases other than
+    // ACGT (i.e. Ns cannot be encoded).
+
+    private static final char[] BASES = { 'A', 'C', 'G', 'T' };
+
+    private File mFile;
+    private RandomAccessFile mRandomFile;
+    private int mEntryCount;
+    private long mOffsetTableOffset;
+    private long mLengthTableOffset;
+    private int mCurrentPosition;
+    private byte[] mIOBuffer = new byte[8];
+
+
+    public FastbReader(File file)
+        throws IOException {
+        mFile = file;
+        mRandomFile = new RandomAccessFile(mFile, "r");
+        readHeader();
+    }
+
+    public int getSequenceCount() {
+        return mEntryCount;
+    }
+
+    public boolean hasNext() {
+        return (mCurrentPosition < mEntryCount);
+    }
+
+    public String next() {
+        if (!hasNext()) {
+            throw new IllegalStateException("Iterator exhausted");
+        }
+        try {
+            return readSequence(mCurrentPosition);
+        } catch (IOException exc) {
+            throw new RuntimeException(exc.getMessage(), exc);
+        }
+    }
+
+    public void remove() {
+        throw new UnsupportedOperationException("Not supported: remove");
+    }
+
+    public void close() {
+        if (mRandomFile != null) {
+            mEntryCount = 0;
+            mCurrentPosition = 0;
+            try {
+                mRandomFile.close();
+            } catch (IOException exc) {
+                throw new RuntimeException(exc.getMessage(), exc);
+            } finally {
+                mRandomFile = null;
+            }
+        }
+    }
+
+    public String readSequence(int n)
+        throws IOException {
+        if (mRandomFile == null) {
+            throw new IllegalStateException("Reader is closed");
+        }
+        if (n < 0 || n >= mEntryCount) {
+            throw new IndexOutOfBoundsException("Illegal index: " + n);
+        }
+        long offset = getEntryOffset(n);
+        int length = getEntryBaseCount(n);
+        String result = readBases(offset, length);
+        mCurrentPosition = n+1;
+        return result;
+    }
+
+    private void readHeader()
+        throws IOException {
+
+        byte[] fileControlBlock = new byte[24];
+        mRandomFile.readFully(fileControlBlock, 0, 24);
+
+        int word2 = deserializeInt(fileControlBlock, 4);
+        int nFiles = word2 & 0x3;
+        int sizeX = (word2 >> 16) & 0xFF;
+        int sizeA = (word2 >> 24) & 0xFF;
+        if (nFiles != 1) {
+            throw new RuntimeException(mFile + ": Invalid file header: nFiles = " + nFiles);
+        }
+        if (sizeX != 16) {
+            throw new RuntimeException(mFile + ": Invalid file header: sizeX = " + sizeX);
+        }
+        if (sizeA != 4) {
+            throw new RuntimeException(mFile + ": Invalid file header: sizeX = " + sizeA);
+        }
+        mEntryCount = deserializeInt(fileControlBlock, 0);
+        mOffsetTableOffset = deserializeLong(fileControlBlock, 8);
+        mLengthTableOffset = deserializeLong(fileControlBlock, 16);
+    }
+
+    private long getEntryOffset(int n)
+        throws IOException {
+        mRandomFile.seek(mOffsetTableOffset + 8 * n);
+        mRandomFile.readFully(mIOBuffer, 0, 8);
+        return deserializeLong(mIOBuffer, 0);
+    }
+
+    private int getEntryBaseCount(int n)
+        throws IOException {
+        mRandomFile.seek(mLengthTableOffset + 4 * n);
+        mRandomFile.readFully(mIOBuffer, 0, 4);
+        return deserializeInt(mIOBuffer, 0);
+    }
+
+    private String readBases(long fileOffset, int baseCount)
+        throws IOException {
+
+
+        int byteCount = (baseCount + 3) / 4;
+        byte[] data = new byte[byteCount];
+        mRandomFile.seek(fileOffset);
+        mRandomFile.readFully(data, 0, byteCount);
+
+        int baseIndex = 0;
+        int dataIndex = 0;
+        char[] baseBuffer = new char[baseCount];
+        while (baseIndex < baseCount) {
+            int b = data[dataIndex++];
+            int count = Math.min(4, baseCount - baseIndex);
+            for (int i = 0; i < count; i++) {
+                baseBuffer[baseIndex++] = BASES[b & 0x3];
+                b = b >> 2;
+            }
+        }
+        return new String(baseBuffer);
+    }
+
+    private int deserializeInt(byte[] buffer, int offset) {
+        int byte1 = buffer[offset] & 0xFF;
+        int byte2 = buffer[offset+1] & 0xFF;
+        int byte3 = buffer[offset+2] & 0xFF;
+        int byte4 = buffer[offset+3] & 0xFF;
+        return (byte1 | (byte2 << 8) | (byte3 << 16) | (byte4 << 24));
+    }
+
+    private long deserializeLong(byte[] buffer, int offset) {
+        long int1 = deserializeInt(buffer, offset) & 0xFFFFFFFFL;
+        long int2 = deserializeInt(buffer, offset+4) & 0xFFFFFFFFL;
+        return (int1 | (int2 << 32));
+    }
+
+    // Stub for interactive use (see also Fastb2Fasta)
+    public static void main(String[] args)
+        throws Exception {
+        FastbReader reader = new FastbReader(new File(args[0]));
+        int readId = 0;
+        while (reader.hasNext()) {
+            System.out.println(">" + readId);
+            System.out.println(reader.next());
+            readId++;
+        }
+        reader.close();
+    }
+}
+
diff --git a/lib/edu/mit/broad/arachne/GenomeMask.java b/lib/edu/mit/broad/arachne/GenomeMask.java
new file mode 100644
index 0000000000..7e7ebdcb0c
--- /dev/null
+++ b/lib/edu/mit/broad/arachne/GenomeMask.java
@@ -0,0 +1,83 @@
+package edu.mit.broad.arachne;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.BitSet;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+/**
+ * Utility class to read in a set of contig-based genomic intervals in zero-based end inclusive
+ * and store them efficiently in memory as a 1-based bit-mask
+ */
+public class GenomeMask {
+
+    // if memory usage becomes a problem... this could be changed to a SparseBitSet
+    // http://java.sun.com/developer/onlineTraining/collections/magercises/BitSet/index.html
+    private SortedMap<Integer, BitSet> data = new TreeMap<Integer, BitSet>();
+
+
+    public GenomeMask(File maskFile) throws IOException {
+        BufferedReader baitReader = null;
+        try {
+            baitReader = new BufferedReader(new FileReader(maskFile));
+            String line;
+            while ((line = baitReader.readLine()) != null) {
+                String[] arr = line.split(" ");
+                int contig = Integer.parseInt(arr[0]);
+
+                // covert the coordinates from 0-based, end inclusive to
+                // 1-based end inclusive
+                int startPos = Integer.parseInt(arr[1]) + 1;
+                int endPos = Integer.parseInt(arr[2]) + 1;
+
+                BitSet bits = data.get(contig);
+                if (bits == null) {
+                    bits = new BitSet(endPos);
+                    data.put(contig,bits);
+                }
+
+                bits.set(startPos, endPos + 1); // set method is end exclusive
+            }
+        } finally {
+            if (baitReader != null) { baitReader.close(); }
+        }
+    }
+
+    /**
+     * This ctor is useful if initializing a GenomeMask externally.
+     */
+    public GenomeMask() {
+    }
+
+    public boolean get(int contig, int position) {
+        BitSet bits = data.get(contig);
+        return (bits != null) && bits.get(position);
+    }
+
+    public BitSet get(int contig) {
+        return data.get(contig);
+    }
+
+    /**
+     * Get an existing BitSet for the given contig, or create one if not already present.  This is
+     * useful when initializing a GenomeMask from an external source.
+     * @param contig which BitSet
+     * @param numBits if there was not already a BitSet for this contig, one is created and initialized to this size.
+     * @return the BitSet for the given contig, creating one if necessary
+     */
+    public BitSet getOrCreate(int contig, int numBits) {
+        BitSet ret = data.get(contig);
+        if (ret == null) {
+            ret = new BitSet(numBits);
+            data.put(contig, ret);
+        }
+        return ret;
+    }
+
+    public int getMaxContig() {
+        return data.lastKey();
+    }
+}
diff --git a/lib/edu/mit/broad/arachne/LookAlignReader.java b/lib/edu/mit/broad/arachne/LookAlignReader.java
new file mode 100755
index 0000000000..a00efcb7c9
--- /dev/null
+++ b/lib/edu/mit/broad/arachne/LookAlignReader.java
@@ -0,0 +1,136 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2008 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever.
+ * Neither the Broad Institute nor MIT can be responsible for its use, misuse,
+ * or functionality.
+ */
+package edu.mit.broad.arachne;
+
+
+import edu.mit.broad.sam.util.CloseableIterator;
+
+import java.io.*;
+
+
+/**
+ * Reader for arachne LookAlign text format alignment files.
+ * Supports filtering of the input by genomic locus.
+ */
+public class LookAlignReader
+    implements CloseableIterator<Alignment> {
+
+    private LineNumberReader mReader = null;
+    private Alignment mNextAlignment = null;
+    private int mBSequenceId = -1;
+    private int mBStart = 0;
+    private int mBEnd = 0;
+
+
+    public LookAlignReader(File file)
+        throws IOException {
+        this(new FileReader(file));
+    }
+
+    public LookAlignReader(Reader reader) {
+        if (reader instanceof LineNumberReader) {
+            mReader = (LineNumberReader) reader;
+        } else {
+            mReader = new LineNumberReader(reader);
+        }
+    }
+
+    public void setBSequenceId(int value) {
+        mBSequenceId = value;
+    }
+
+    public void setBStart(int value) {
+        mBStart = value;
+    }
+
+    public void setBEnd(int value) {
+        mBEnd = value;
+    }
+
+    public boolean hasNext() {
+        if (mNextAlignment != null) {
+            return true;
+        }
+        try {
+            mNextAlignment = nextAlignment();
+            return (mNextAlignment != null);
+        } catch (IOException exc) {
+            throw new RuntimeException(exc.getMessage(), exc);
+        }
+    }
+
+    public Alignment next() {
+        if (!hasNext()) {
+            throw new IllegalStateException("Iterator exhausted");
+        }
+        try {
+            Alignment result = mNextAlignment;
+            mNextAlignment = nextAlignment();
+            return result;
+        } catch (IOException exc) {
+            throw new RuntimeException(exc.getMessage(), exc);
+        }
+    }
+
+    public void remove() {
+        throw new UnsupportedOperationException("Not supported: remove");
+    }
+
+    public void close() {
+        if (mReader != null) {
+            try {
+                mReader.close();
+            } catch (IOException exc) {
+                throw new RuntimeException(exc.getMessage(), exc);
+            }
+            mReader = null;
+        }
+    }
+
+    private Alignment nextAlignment()
+        throws IOException {
+        if (mReader == null) {
+            return null;
+        }
+        while (true) {
+            String line = mReader.readLine();
+            if (line == null) {
+                close();
+                break;
+            }
+            if (!line.startsWith("QUERY")) {
+                continue;
+            }
+            Alignment alignment = Alignment.parse(line);
+            if (matchesFilters(alignment)) {
+                return alignment;
+            }
+        }
+        return null;
+    }
+
+    private boolean matchesFilters(Alignment alignment) {
+        if (mBSequenceId < 0) {
+            return true;
+        }
+        if (alignment.getBSequenceId() != mBSequenceId) {
+            return false;
+        }
+        if (mBStart > 0 && alignment.getBEnd() < mBStart) {
+            return false;
+        }
+        if (mBEnd > 0 && alignment.getBStart() > mBEnd) {
+            return false;
+        }
+        return true;
+    }
+}
+
diff --git a/lib/edu/mit/broad/cnv/AnalyzeCnvs.java b/lib/edu/mit/broad/cnv/AnalyzeCnvs.java
new file mode 100755
index 0000000000..07e9b79de6
--- /dev/null
+++ b/lib/edu/mit/broad/cnv/AnalyzeCnvs.java
@@ -0,0 +1,437 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2008 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever.
+ * Neither the Broad Institute nor MIT can be responsible for its use, misuse,
+ * or functionality.
+ */
+package edu.mit.broad.cnv;
+
+import edu.mit.broad.arachne.Alignment;
+import edu.mit.broad.arachne.LookAlignReader;
+
+import java.io.*;
+import java.util.*;
+
+
+/**
+ * Utility class to do data reduction on CNV data.
+ */
+public class AnalyzeCnvs {
+
+    public static void main(String[] args)
+        throws Exception {
+        new AnalyzeCnvs().run(args);
+    }
+
+    private void usage() {
+        System.out.println("Usage: AnalyzeCnvs ...");
+        System.out.println("  -action <action>");
+        System.out.println("  -alignments <alignment-file> or -");
+        System.out.println("  -alignmentList <alignment-fofn>");
+        System.out.println("  -chromosome <chrN>");
+        System.out.println("  -start <start-coordinate>");
+        System.out.println("  -end <end-coordinate>");
+        System.out.println("  -bestAlignments");
+        System.out.println("  -mismatchThreshold <n>");
+        System.out.println("  -binsize <n>");
+        System.out.println("  -output <coverage|all>");
+        System.out.println("  -verbose");
+        System.out.println("  -debug");
+    }
+
+    private boolean parseArguments(String[] args) {
+
+        int argpos = 0;
+        int argsleft = 0;
+
+        while (argpos < args.length) {
+            argsleft = args.length - argpos;
+            String arg = args[argpos];
+            if (arg.equals("-action") && argsleft > 1) {
+                argpos++;
+                mAction = args[argpos++];
+            } else if (arg.equals("-alignments") && argsleft > 1) {
+                argpos++;
+                mAlignmentFilePath = args[argpos++];
+            } else if (arg.equals("-alignmentList") && argsleft > 1) {
+                argpos++;
+                mAlignmentListFilePath = args[argpos++];
+            } else if (arg.equals("-chromosome") && argsleft > 1) {
+                argpos++;
+                mChromosome = args[argpos++];
+            } else if (arg.equals("-start") && argsleft > 1) {
+                argpos++;
+                mStartPosition = new Integer(args[argpos++]);
+            } else if (arg.equals("-end") && argsleft > 1) {
+                argpos++;
+                mEndPosition = new Integer(args[argpos++]);
+            } else if (arg.equals("-verbose")) {
+                argpos++;
+                mVerbose = true;
+            } else if (arg.equals("-mismatchThreshold") && argsleft > 1) {
+                argpos++;
+                mMismatchThreshold = new Integer(args[argpos++]);
+            } else if (arg.equals("-bestAlignments")) {
+                argpos++;
+                mReturnBestHits = true;
+            } else if (arg.equals("-binsize") && argsleft > 1) {
+                argpos++;
+                mBinSize = Integer.parseInt(args[argpos++]);
+            } else if (arg.equals("-output") && argsleft > 1) {
+                argpos++;
+                mOutputColumns = args[argpos++];
+            } else if (arg.equals("-debug")) {
+                argpos++;
+                mDebug = true;
+            } else if (arg.startsWith("-")) {
+                usage();
+                return false;
+            } else {
+                break;
+            }
+        }
+
+        argsleft = args.length - argpos;
+        if (argsleft != 0) {
+            usage();
+            return false;
+        }
+
+        return true;
+    }
+
+    private void run(String[] args)
+        throws Exception {
+
+        if (!parseArguments(args)) {
+            System.exit(1);
+        }
+
+        if (mAction == null) {
+            mAction = "alignmentCoverage";
+        }
+
+        if (mAction.equals("alignmentCoverage")) {
+            mainAlignmentCoverage();
+        } else {
+            System.out.println("Unknown action: " + mAction);
+            usage();
+            System.exit(1);
+        }
+    }
+
+    private void mainAlignmentCoverage()
+        throws IOException {
+
+        if (mStartPosition == null || mEndPosition == null) {
+            usage();
+            System.exit(1);
+        } else if (mStartPosition <= 0 || mEndPosition <= 0 || mStartPosition > mEndPosition) {
+            System.out.println("Invalid start/end positions: " + mStartPosition + " " + mEndPosition);
+            usage();
+            System.exit(1);
+        }
+
+        mSequenceId = chromosomeToSequenceId(mChromosome);
+        if (mSequenceId < 0) {
+            System.out.println("Invalid chromosome: " + mChromosome);
+            usage();
+            System.exit(1);
+        }
+
+        if (mBinSize <= 0) {
+            System.out.println("Invalid bin size: " + mBinSize);
+            usage();
+            System.exit(1);
+        }
+
+        runAlignmentCoverage();
+    }
+
+    private void runAlignmentCoverage()
+        throws IOException {
+
+        int length = (mEndPosition - mStartPosition + 1);
+        if (length <= 0) {
+            throw new RuntimeException("Invalid start/end positions");
+        }
+
+        int binSize = mBinSize;
+        int binCount = (length + binSize - 1) / binSize;
+        int[] readStarts = new int[binCount];
+        int[] readDepths = new int[binCount];
+        List<String> alignmentFiles = getAlignmentFiles();
+        for (String path : alignmentFiles) {
+            processAlignmentFile(path, readStarts, readDepths);
+        }
+        printStats(readStarts, readDepths);
+    }
+
+    private List<String> getAlignmentFiles()
+        throws IOException {
+        List<String> fileList = new ArrayList<String>();
+        if (mAlignmentListFilePath != null) {
+            LineNumberReader reader = new LineNumberReader(new FileReader(mAlignmentListFilePath));
+            while (true) {
+                String line = reader.readLine();
+                if (line == null) {
+                    reader.close();
+                    break;
+                }
+                String path = line.trim();
+                if (path.length() == 0 || path.startsWith("#")) {
+                    continue;
+                }
+                fileList.add(path);
+            }
+        } else if (mAlignmentFilePath != null) {
+            fileList.add(mAlignmentFilePath);
+        }
+        return fileList;
+    }
+
+    private void processAlignmentFile(String path, int[] readStarts, int[] readDepths)
+        throws IOException {
+
+        LookAlignReader reader = null;
+        if (path == null || path.equals("-")) {
+            reader = new LookAlignReader(new InputStreamReader(System.in));
+        } else {
+            reader = new LookAlignReader(new File(path));
+        }
+
+        while (true) {
+            Alignment alignment = getNextAlignment(reader);
+            if (alignment == null) {
+                reader.close();
+                break;
+            }
+            processAlignment(alignment, readStarts, readDepths);
+        }
+    }
+
+    private void processAlignment(Alignment alignment,
+                                  int[] readStarts,
+                                  int[] readDepths) {
+
+        if (readStarts != null) {
+            int baseOffset = alignment.getBStart() - mStartPosition;
+            int binIndex = baseOffset / mBinSize;
+            if (binIndex >= 0 && binIndex < readStarts.length) {
+                readStarts[binIndex]++;
+            }
+        }
+
+        if (readDepths != null) {
+            int baseOffset = alignment.getBStart() - mStartPosition;
+            int[] alignmentBlocks = alignment.getAlignmentBlocks();
+            for (int i = 0; i < alignmentBlocks.length; i += 3) {
+                int gap = alignmentBlocks[i];
+                int duration = alignmentBlocks[i+1];
+                if (gap > 0) {
+                    // Gap in B sequence (genome)
+                    // Negative gaps are gaps in A sequence (read)
+                    baseOffset += gap;
+                }
+                for (int j = 0; j < duration; j++) {
+                    int binIndex = baseOffset / mBinSize;
+                    if (binIndex >= 0 && binIndex < readDepths.length) {
+                        readDepths[binIndex]++;
+                    }
+                    baseOffset++;
+                }
+            }
+        }
+    }
+
+    private Alignment getNextAlignment(LookAlignReader reader)
+        throws IOException {
+
+        if (!mReturnBestHits) {
+            while (reader.hasNext()) {
+                Alignment alignment = reader.next();
+                if (passesAlignmentFilters(alignment)) {
+                    return alignment;
+                }
+            }
+            return null;
+        }
+
+        while (true) {
+            Alignment seed = mPendingAlignment;
+            mPendingAlignment = null;
+            if (seed == null && reader.hasNext()) {
+                seed = reader.next();
+            }
+            if (seed == null) {
+                return null;
+            }
+            List<Alignment> secondaryHits = null;
+            while (reader.hasNext()) {
+                Alignment alignment = reader.next();
+                if (alignment.getASequenceId() != seed.getASequenceId()) {
+                    if (alignment.getASequenceId() < seed.getASequenceId()) {
+                        throw new RuntimeException("Alignments not sorted by A sequence: " + alignment.format());
+                    }
+                    mPendingAlignment = alignment;
+                    break;
+                }
+                if (secondaryHits == null) {
+                    secondaryHits = new ArrayList<Alignment>();
+                }
+                secondaryHits.add(alignment);
+            }
+            if (secondaryHits == null) {
+                if (!passesAlignmentFilters(seed)) {
+                    continue;
+                }
+                return seed;
+            }
+            secondaryHits.add(seed);
+            Alignment result = getUniqueBestAlignment(secondaryHits);
+            if (result != null && passesAlignmentFilters(result)) {
+                return result;
+            }
+        }
+    }
+
+    private Alignment getUniqueBestAlignment(List<Alignment> alignments) {
+        int bestMismatches = 0;
+        List<Alignment> best = new ArrayList<Alignment>();
+        for (Alignment a : alignments) {
+            int mismatches = getAlignmentMismatches(a);
+            if (best.isEmpty()) {
+                best.add(a);
+                bestMismatches = mismatches;
+            }
+            if (mismatches == bestMismatches) {
+                best.add(a);
+            } else if (mismatches < bestMismatches) {
+                best.clear();
+                best.add(a);
+                bestMismatches = mismatches;
+            }
+        }
+        if (best.size() != 1) {
+            return null;
+        }
+        return best.get(0);
+    }
+
+    private boolean passesAlignmentFilters(Alignment alignment) {
+
+        if (mMismatchThreshold != null) {
+            if (getAlignmentMismatches(alignment) > mMismatchThreshold) {
+                return false;
+            }
+        }
+
+        if (mSequenceId != null) {
+            if (alignment.getBSequenceId() != mSequenceId) {
+                return false;
+            }
+        }
+
+        if (mStartPosition != null) {
+            if (alignment.getBEnd() < mStartPosition) {
+                return false;
+            }
+        }
+
+        if (mEndPosition != null) {
+            if (alignment.getBStart() > mEndPosition) {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    private int getAlignmentMismatches(Alignment alignment) {
+        int mismatches = 0;
+        int[] blocks = alignment.getAlignmentBlocks();
+        for (int i = 0; i < blocks.length; i += 3) {
+            int gap = blocks[i];
+            int duration = blocks[i+1];
+            int mm = blocks[i+2];
+            if (mm > duration) {
+                throw new RuntimeException("Invalid alignment? : " + alignment.format());
+            }
+            mismatches += Math.abs(gap);
+            mismatches += mm;
+        }
+        return mismatches;
+    }
+
+    private void printStats(int[] readStarts, int[] readDepths) {
+        if (mOutputColumns != null && mOutputColumns.equals("coverage")) {
+            // No headers, just coverage
+            for (int i = 0; i < readDepths.length; i++) {
+                String line = "";
+                if (mBinSize == 1) {
+                    line += readDepths[i];
+                } else {
+                    line += (readDepths[i] / (double) mBinSize);
+                }
+                System.out.println(line);
+            }
+        } else {
+            System.out.println("Position" + "\t" + "Starts" + "\t" + "Coverage");
+            for (int i = 0; i < readDepths.length; i++) {
+                String line = "";
+                int position = mStartPosition + i*mBinSize;
+                line += position + "\t" + readStarts[i] + "\t";
+                if (mBinSize == 1) {
+                    line += readDepths[i];
+                } else {
+                    line += (readDepths[i] / (double) mBinSize);
+                }
+                System.out.println(line);
+            }
+        }
+    }
+
+    private int chromosomeToSequenceId(String text) {
+        if (text == null || text.length() == 0) {
+            return -1;
+        }
+        if (text.matches("\\d+")) {
+            return Integer.parseInt(text);
+        }
+        if (text.startsWith("chr") && text.length() > 3) {
+            text = text.substring(3);
+        }
+        if (text.matches("\\d+") && !text.startsWith("0")) {
+            return Integer.parseInt(text);
+        }
+        if (text.equals("M")) {
+            return 0;
+        } else if (text.equals("X")) {
+            return 23;
+        } else if (text.equals("Y")) {
+            return 24;
+        } else {
+            return -1;
+        }
+    }
+
+    private boolean mDebug = false;
+    private boolean mVerbose = false;
+
+    private String mAction = null;
+    private String mAlignmentFilePath = null;
+    private String mAlignmentListFilePath = null;
+    private String mChromosome = null;
+    private Integer mStartPosition = null;
+    private Integer mEndPosition = null;
+    private Integer mSequenceId = null;
+    private boolean mReturnBestHits = false;
+    private Integer mMismatchThreshold = null;
+    private int mBinSize = 1;
+    private String mOutputColumns = null;
+    private Alignment mPendingAlignment = null;
+}
diff --git a/lib/edu/mit/broad/cnv/CountAlignments.java b/lib/edu/mit/broad/cnv/CountAlignments.java
new file mode 100644
index 0000000000..e0d60255d9
--- /dev/null
+++ b/lib/edu/mit/broad/cnv/CountAlignments.java
@@ -0,0 +1,283 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2008 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever.
+ * Neither the Broad Institute nor MIT can be responsible for its use, misuse,
+ * or functionality.
+ */
+package edu.mit.broad.cnv;
+
+import edu.mit.broad.arachne.Alignment;
+import edu.mit.broad.arachne.LookAlignReader;
+
+import java.io.*;
+import java.util.*;
+
+/**
+ * Utility to count alignments (rather than gathering).
+ */
+public class CountAlignments {
+
+    public static void main(String[] args)
+        throws Exception {
+        new CountAlignments().run(args);
+    }
+
+    private void usage() {
+        System.out.println("Usage: CountAlignments ...");
+        System.out.println("  -alignments <alignment-file> (- for stdin)");
+        System.out.println("  -chromosome <chromosome>");
+        System.out.println("  -start <start>");
+        System.out.println("  -end <end>");
+        System.out.println("  -bestAlignments");
+        System.out.println("  -mismatchThreshold <n>");
+        System.out.println("  -verbose");
+        System.out.println("  -debug");
+    }
+
+    private boolean parseArguments(String[] args) {
+
+        int argpos = 0;
+        int argsleft = 0;
+
+        while (argpos < args.length) {
+            argsleft = args.length - argpos;
+            String arg = args[argpos];
+            if (arg.equals("-alignments") && argsleft > 1) {
+                argpos++;
+                mAlignmentFilePath = args[argpos++];
+            } else if (arg.equals("-mismatchThreshold") && argsleft > 1) {
+                argpos++;
+                mMismatchThreshold = new Integer(args[argpos++]);
+            } else if (arg.equals("-bestAlignments")) {
+                argpos++;
+                mReturnBestHits = true;
+            } else if (arg.equals("-chromosome") && argsleft > 1) {
+                argpos++;
+                String chromosome = args[argpos++];
+                mSequenceId = chromosomeToSequenceId(chromosome);
+                if (mSequenceId < 0) {
+                    System.out.println("Invalid chromosome: " + chromosome);
+                    return false;
+                }
+            } else if (arg.equals("-start") && argsleft > 1) {
+                argpos++;
+                mStartPosition = new Integer(args[argpos++]);
+            } else if (arg.equals("-end") && argsleft > 1) {
+                argpos++;
+                mEndPosition = new Integer(args[argpos++]);
+            } else if (arg.equals("-verbose")) {
+                argpos++;
+                mVerbose = true;
+            } else if (arg.equals("-debug")) {
+                argpos++;
+                mDebug = true;
+            } else if (arg.startsWith("-")) {
+                usage();
+                return false;
+            } else {
+                break;
+            }
+        }
+
+        argsleft = args.length - argpos;
+        if (argsleft != 0) {
+            usage();
+            return false;
+        }
+
+        return true;
+    }
+
+    private void run(String[] args)
+        throws Exception {
+
+        if (!parseArguments(args)) {
+            System.exit(1);
+        }
+
+        long[] counts = countAlignments(mAlignmentFilePath);
+        String line = counts[0] + " " + counts[1];
+        if (mAlignmentFilePath != null) {
+            line = mAlignmentFilePath + " " + line;
+        }
+        System.out.println(line);
+    }
+
+    private long[] countAlignments(String path)
+        throws IOException {
+        long alignmentCount = 0;
+        long baseCount = 0;
+        LookAlignReader reader = null;
+        if (path == null || path.equals("-")) {
+            reader = new LookAlignReader(new InputStreamReader(System.in));
+        } else {
+            reader = new LookAlignReader(new File(path));
+        }
+        while (true) {
+            Alignment alignment = getNextAlignment(reader);
+            if (alignment == null) {
+                reader.close();
+                break;
+            }
+            if (mMismatchThreshold != null) {
+                if (getAlignmentMismatches(alignment) > mMismatchThreshold) {
+                    continue;
+                }
+            }
+            if (mSequenceId != null) {
+                if (alignment.getBSequenceId() != mSequenceId) {
+                    continue;
+                }
+            }
+            if (mStartPosition != null) {
+                if (alignment.getBEnd() < mStartPosition) {
+                    continue;
+                }
+            }
+            if (mEndPosition != null) {
+                if (alignment.getBStart() > mEndPosition) {
+                    continue;
+                }
+            }
+            alignmentCount++;
+            baseCount += getBaseCount(alignment);
+        }
+        long[] result = { alignmentCount, baseCount };
+        return result;
+    }
+
+    private Alignment getNextAlignment(LookAlignReader reader)
+        throws IOException {
+        if (!mReturnBestHits) {
+            if (!reader.hasNext()) {
+                return null;
+            }
+            return reader.next();
+        }
+        while (true) {
+            Alignment seed = mPendingAlignment;
+            mPendingAlignment = null;
+            if (seed == null && reader.hasNext()) {
+                seed = reader.next();
+            }
+            if (seed == null) {
+                return null;
+            }
+            List<Alignment> secondaryHits = null;
+            while (reader.hasNext()) {
+                Alignment alignment = reader.next();
+                if (alignment.getASequenceId() != seed.getASequenceId()) {
+                    if (alignment.getASequenceId() < seed.getASequenceId()) {
+                        throw new RuntimeException("Alignments not sorted by A sequence: " + alignment.format());
+                    }
+                    mPendingAlignment = alignment;
+                    break;
+                }
+                if (secondaryHits == null) {
+                    secondaryHits = new ArrayList<Alignment>();
+                }
+                secondaryHits.add(alignment);
+            }
+            if (secondaryHits == null) {
+                return seed;
+            }
+            secondaryHits.add(seed);
+            Alignment result = getUniqueBestAlignment(secondaryHits);
+            if (result != null) {
+                return result;
+            }
+        }
+    }
+
+    private Alignment getUniqueBestAlignment(List<Alignment> alignments) {
+        int bestMismatches = 0;
+        List<Alignment> best = new ArrayList<Alignment>();
+        for (Alignment a : alignments) {
+            int mismatches = getAlignmentMismatches(a);
+            if (best.isEmpty()) {
+                best.add(a);
+                bestMismatches = mismatches;
+            }
+            if (mismatches == bestMismatches) {
+                best.add(a);
+            } else if (mismatches < bestMismatches) {
+                best.clear();
+                best.add(a);
+                bestMismatches = mismatches;
+            }
+        }
+        if (best.size() != 1) {
+            return null;
+        }
+        return best.get(0);
+    }
+
+    private int getAlignmentMismatches(Alignment alignment) {
+        int mismatches = 0;
+        int[] blocks = alignment.getAlignmentBlocks();
+        for (int i = 0; i < blocks.length; i += 3) {
+            int gap = blocks[i];
+            int duration = blocks[i+1];
+            int mm = blocks[i+2];
+            if (mm > duration) {
+                throw new RuntimeException("Invalid alignment? : " + alignment.format());
+            }
+            mismatches += Math.abs(gap);
+            mismatches += mm;
+        }
+        return mismatches;
+    }
+
+    // Return the number of reference bases covered by this alignment.
+    private int getBaseCount(Alignment alignment) {
+        int count = 0;
+        int[] blocks = alignment.getAlignmentBlocks();
+        for (int i = 0; i < blocks.length; i += 3) {
+            // int gap = blocks[i];
+            int duration = blocks[i+1];
+            // int mm = blocks[i+2];
+            count += duration;
+        }
+        return count;
+    }
+
+    private int chromosomeToSequenceId(String text) {
+        if (text == null || text.length() == 0) {
+            return -1;
+        }
+        if (text.matches("\\d+")) {
+            return Integer.parseInt(text);
+        }
+        if (text.startsWith("chr") && text.length() > 3) {
+            text = text.substring(3);
+        }
+        if (text.matches("\\d+") && !text.startsWith("0")) {
+            return Integer.parseInt(text);
+        }
+        if (text.equals("M")) {
+            return 0;
+        } else if (text.equals("X")) {
+            return 23;
+        } else if (text.equals("Y")) {
+            return 24;
+        } else {
+            return -1;
+        }
+    }
+
+
+    private boolean mDebug = false;
+    private boolean mVerbose = false;
+
+    private String mAlignmentFilePath = null;
+    private boolean mReturnBestHits = false;
+    private Integer mMismatchThreshold = null;
+    private Integer mSequenceId = null;
+    private Integer mStartPosition = null;
+    private Integer mEndPosition = null;
+    private Alignment mPendingAlignment = null;
+}
diff --git a/lib/edu/mit/broad/cnv/CountKMers.java b/lib/edu/mit/broad/cnv/CountKMers.java
new file mode 100644
index 0000000000..0fa159615f
--- /dev/null
+++ b/lib/edu/mit/broad/cnv/CountKMers.java
@@ -0,0 +1,1301 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2008 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever.
+ * Neither the Broad Institute nor MIT can be responsible for its use, misuse,
+ * or functionality.
+ */
+package edu.mit.broad.cnv;
+
+import java.io.*;
+import java.util.*;
+
+
+/**
+ * Tool for counting unique kmers.
+ */
+public class CountKMers
+{
+    private static final int NONUNIQUE_MARKER = -1;
+    private static boolean mUseOldFormat = false;
+
+    private String mAction = null;
+    private static int mK = 0;
+    private int mBatchSize = 0;
+    private List<File> mInputFiles = null;
+    private File mInputDirectory = null;
+    private File mOutputDirectory = null;
+    private boolean mVerbose = false;
+    private boolean mDebug = false;
+
+    private List<String> mSequenceList = null;
+    private List<Integer> mSequenceOffsetList = null;
+    private List<File> mSpillFileList = null;
+    private double mSpillFactor = 0.9;
+
+    private long mKMerCount = 0;
+    private long mUniquePriorCount = 0;
+    private long mUniqueNewCount = 0;
+    private long mPriorMapUniqueCount = 0;
+
+    private InputStream mPriorMapStream = null;
+    private int mPriorMapPosition = -1;
+    private int mPriorMapValue = 0;
+    private int mInputFileIndex = 0;
+    private LineNumberReader mCurrentReader = null;
+    private String mNextSequence = null;
+    private char[] mKMerBuffer = null;
+    private int mKMerBufferedCount = 0;
+    private String mLineBuffer = null;
+    private int mLineBufferIndex = 0;
+    private int mBaseIndex = -1;
+    private byte[] mIOBuffer = null;
+
+    /* Design
+       Inputs:
+       - One or more fasta files to search (currently one).
+       - Output directory for the result files.
+       - Optionally an input k-1-mer file (output from previous pass).
+       Outputs:
+       - Unique kmer file: <kmer> <chr> <pos> (sorted by kmer)
+         This is unique globally or unique wrt unique (K-1) mers (i.e. K unique, K-1 not).
+       - Per chromosome bit map: pos (implicit) new-bit cum-bit
+         New-bit is 1 if Kmer starting at pos is unique but (K-1)-mer is not.
+         Cum-bit is 1 if Kmer starting at pos is unique for some L <= K.
+       - Statistics
+       Plan:
+       - Reducing memory footprint is crucial.
+       - Sequential pass over the input sequences to generate kmers.
+       - BatchSize kmers are cached in memory, then sorted and uniqified.
+       - As batch array fills, batches are spilled to disk.
+       - Batches are reloaded from disk and merged (N-finger algorithm)
+       - and streamed to a merge file.
+       - Merge file is read from disk and processed as final results.
+     */
+
+    public static void main(String[] args)
+        throws Exception {
+        new CountKMers().run(args);
+    }
+
+    private void usage() {
+        System.out.println("Usage: CountKMers ...");
+        System.out.println("  -action <action>");
+        System.out.println("  -genome <fasta-file>");
+        System.out.println("  -k <k>");
+        System.out.println("  -batchSize <n>");
+        System.out.println("  -inputDir <directory>");
+        System.out.println("  -outputDir <directory>");
+        System.out.println("  -verbose");
+        System.out.println("  -debug");
+    }
+
+    private boolean parseArguments(String[] args) {
+
+        int argpos = 0;
+        int argsleft = 0;
+
+        while (argpos < args.length) {
+            argsleft = args.length - argpos;
+            String arg = args[argpos];
+            if (arg.equals("-action") && argsleft > 1) {
+                argpos++;
+                mAction = args[argpos++];
+            } else if (arg.equals("-genome") && argsleft > 1) {
+                argpos++;
+                if (mInputFiles == null) {
+                    mInputFiles = new ArrayList<File>();
+                }
+                mInputFiles.add(new File(args[argpos++]));
+            } else if (arg.equals("-k") && argsleft > 1) {
+                argpos++;
+                mK = Integer.parseInt(args[argpos++]);
+            } else if (arg.equals("-batchSize") && argsleft > 1) {
+                argpos++;
+                mBatchSize = Integer.parseInt(args[argpos++]);
+            } else if (arg.equals("-inputDir") && argsleft > 1) {
+                argpos++;
+                mInputDirectory = new File(args[argpos++]);
+            } else if (arg.equals("-outputDir") && argsleft > 1) {
+                argpos++;
+                mOutputDirectory = new File(args[argpos++]);
+            } else if (arg.equals("-oldFormat")) {
+                argpos++;
+                mUseOldFormat = true;
+            } else if (arg.equals("-verbose")) {
+                argpos++;
+                mVerbose = true;
+            } else if (arg.equals("-debug")) {
+                argpos++;
+                mDebug = true;
+            } else if (arg.startsWith("-")) {
+                usage();
+                return false;
+            } else {
+                break;
+            }
+        }
+
+        argsleft = args.length - argpos;
+        if (argsleft != 0) {
+            usage();
+            return false;
+        }
+
+        return true;
+    }
+
+    private void run(String[] args)
+        throws Exception {
+        if (!parseArguments(args)) {
+            System.exit(1);
+        }
+        if (mAction == null || mAction.equals("mapKMers")) {
+            mapKMers();
+        } else if (mAction.equals("mapGaps")) {
+            mapGaps();
+        }
+    }
+
+    // Can be used to scan genome for sequence names/lengths.
+    private void scanKMers()
+        throws IOException {
+        mSequenceList = new ArrayList<String>();
+        mSequenceOffsetList = new ArrayList<Integer>();
+        File priorMapFile =
+            new File(mOutputDirectory, "unique_" + (mK-1) + "_mers_map.bin");
+        openPriorMap(priorMapFile);
+        while (true) {
+            String seqName = getNextSequence();
+            if (seqName == null) {
+                break;
+            }
+            mSequenceList.add(seqName);
+            mSequenceOffsetList.add(mBaseIndex+1);
+            log("Scanning " + seqName + " ...");
+            while (true) {
+                char[] kmerChars = getNextKMer();
+                if (kmerChars == null) {
+                    break;
+                }
+                mKMerCount++;
+                if (isUniqueInPriorMap(mBaseIndex)) {
+                    continue;
+                }
+            }
+        }
+        closePriorMap();
+    }
+
+    private void mapGaps()
+        throws IOException {
+        while (true) {
+            String seqName = getNextSequence();
+            if (seqName == null) {
+                break;
+            }
+            int pos = 0;
+            int gapStart = 0;
+            while (true) {
+                char base = getNextBase();
+                if (base == 0) {
+                    break;
+                }
+                pos++;
+                if (base == 'N') {
+                    if (gapStart == 0) {
+                        gapStart = pos;
+                    }
+                } else {
+                    if (gapStart > 0) {
+                        System.out.println(seqName + "\t" + gapStart + "\t" + (pos-1));
+                        gapStart = 0;
+                    }
+                }
+            }
+            if (gapStart > 0) {
+                System.out.println(seqName + "\t" + gapStart + "\t" + (pos-1));
+                gapStart = 0;
+            }
+        }
+    }
+
+    private void mapKMers()
+        throws IOException {
+
+        File textKMerFile =
+            new File(mOutputDirectory, "unique_" + mK + "_mers.txt");
+        File binaryKMerFile =
+            new File(mOutputDirectory, "unique_" + mK + "_mers.bin");
+        File exceptionFile =
+            new File(mOutputDirectory, "unique_" + mK + "_mers.extra");
+        File mapFile =
+            new File(mOutputDirectory, "unique_" + mK + "_mers_map.bin");
+        File priorMapFile =
+            new File(mOutputDirectory, "unique_" + (mK-1) + "_mers_map.bin");
+        File statsFile =
+            new File(mOutputDirectory, "unique_" + mK + "_mers_stats.txt");
+
+        if (mBatchSize == 0) {
+            throw new RuntimeException("Batch size not specified");
+        }
+
+        int kmerCount = 0;
+        int batchSize = mBatchSize;
+        KMerPosition[] kmerArray = new KMerPosition[batchSize];
+        List<StringKMerPosition> exceptionList = new ArrayList<StringKMerPosition>();
+        mSequenceList = new ArrayList<String>();
+        mSequenceOffsetList = new ArrayList<Integer>();
+        mIOBuffer = new byte[Math.max(20,4 + 2*((mK + 7)/8))];
+
+        openPriorMap(priorMapFile);
+
+        while (true) {
+            String seqName = getNextSequence();
+            if (seqName == null) {
+                break;
+            }
+            mSequenceList.add(seqName);
+            mSequenceOffsetList.add(mBaseIndex+1);
+            log("Processing " + seqName + " ...");
+            while (true) {
+                char[] kmerChars = getNextKMer();
+                if (kmerChars == null) {
+                    break;
+                }
+                mKMerCount++;
+                int baseIndex = mBaseIndex;
+                if (isUniqueInPriorMap(baseIndex)) {
+                    mUniquePriorCount++;
+                    continue;
+                }
+                KMerPosition kmp = encodeKMer(kmerChars, baseIndex);
+                if (kmp == null) {
+                    String kmer = new String(kmerChars);
+                    exceptionList.add(new StringKMerPosition(kmer, baseIndex));
+                    continue;
+                }
+                kmerArray[kmerCount++] = kmp;
+                if (kmerCount == batchSize) {
+                    kmerCount = compactKMers(kmerArray, kmerCount);
+                    if (kmerCount > mSpillFactor * batchSize) {
+                        spillKMers(kmerArray, kmerCount);
+                        kmerCount = 0;
+                    }
+                }
+            }
+        }
+        if (kmerCount > 0) {
+            kmerCount = compactKMers(kmerArray, kmerCount);
+            if (mSpillFileList != null) {
+                spillKMers(kmerArray, kmerCount);
+                kmerCount = 0;
+            }
+        }
+
+        closePriorMap();
+
+        // Write out the exception kmers (text file).
+        compactKMers(exceptionList);
+        writeExceptionFile(exceptionList, exceptionFile);
+
+        // Write out the binary file of unique encoded kmers.
+        if (mSpillFileList == null) {
+            kmerCount = removeNonUnique(kmerArray, kmerCount);
+            writeKMerBinaryFile(kmerArray, kmerCount, binaryKMerFile);
+            mUniqueNewCount = kmerCount;
+        } else {
+            mUniqueNewCount = mergeSpillFiles(mSpillFileList, binaryKMerFile);
+        }
+        mUniqueNewCount += countUniqueKMers(exceptionList);
+
+        // Write out the text file of (all) unique kmers.
+        writeKMerTextFile(binaryKMerFile, exceptionList, textKMerFile);
+
+        // Create map file from prior map plus the new unique kmers.
+        int mapSize = ((mBaseIndex >> 2) & 0x3FFFFFFF) + 1;
+        createMapFile(mapSize, binaryKMerFile, exceptionList, priorMapFile, mapFile);
+
+        // Write summary statistics file.
+        writeSummaryStatistics(statsFile);
+    }
+
+    private int compactKMers(KMerPosition[] kmerArray, int kmerCount) {
+        if (kmerCount == 0) {
+            return 0;
+        }
+        log("Compacting " + kmerCount + " kmers at index " +
+            Integer.toHexString(mBaseIndex) + " ...");
+        Arrays.sort(kmerArray, 0, kmerCount);
+        int newCount = 1;
+        KMerPosition current = kmerArray[0];
+        for (int i = 1; i < kmerCount; i++) {
+            KMerPosition kmp = kmerArray[i];
+            if (current.compareTo(kmp) == 0) {
+                current.setBaseIndex(NONUNIQUE_MARKER);
+            } else {
+                kmerArray[newCount++] = kmp;
+                current = kmp;
+            }
+        }
+        log("Compaction finished, new count is " + newCount);
+        return newCount;
+    }
+
+    private int compactKMers(StringKMerPosition[] kmerArray, int kmerCount) {
+        if (kmerCount == 0) {
+            return 0;
+        }
+        log("Compacting " + kmerCount + " string kmers ...");
+        Arrays.sort(kmerArray, 0, kmerCount);
+        int newCount = 1;
+        String kmerString = kmerArray[0].getKMer();
+        for (int i = 1; i < kmerCount; i++) {
+            StringKMerPosition kmp = kmerArray[i];
+            String ks = kmp.getKMer();
+            if (ks.equals(kmerString)) {
+                kmerArray[newCount-1].setBaseIndex(NONUNIQUE_MARKER);
+            } else {
+                kmerArray[newCount++] = kmp;
+                kmerString = ks;
+            }
+        }
+        log("Compaction finished, new count is " + newCount);
+        return newCount;
+    }
+
+    private void compactKMers(List<StringKMerPosition> kmerList) {
+        int kmerCount = kmerList.size();
+        if (kmerCount <= 1) {
+            return;
+        }
+        StringKMerPosition[] kmerArray =
+            kmerList.toArray(new StringKMerPosition[kmerCount]);
+        kmerCount = compactKMers(kmerArray, kmerCount);
+        kmerList.clear();
+        for (int i = 0; i < kmerCount; i++) {
+            kmerList.add(kmerArray[i]);
+        }
+    }
+
+    private int removeNonUnique(KMerPosition[] kmerArray, int kmerCount) {
+        int uniqueCount = 0;
+        for (int i = 0; i < kmerCount; i++) {
+            KMerPosition kmp = kmerArray[i];
+            if (kmp.getBaseIndex() != NONUNIQUE_MARKER) {
+                kmerArray[uniqueCount++] = kmp;
+            }
+        }
+        return uniqueCount;
+    }
+
+    private int countUniqueKMers(List<StringKMerPosition> kmerList) {
+        int uniqueCount = 0;
+        for (StringKMerPosition kmp : kmerList) {
+            if (kmp.getBaseIndex() != NONUNIQUE_MARKER) {
+                uniqueCount++;
+            }
+        }
+        return uniqueCount;
+    }
+
+    private void spillKMers(KMerPosition[] kmerArray, int kmerCount)
+        throws IOException {
+        if (mSpillFileList == null) {
+            mSpillFileList = new ArrayList<File>();
+        }
+        int fileNumber = mSpillFileList.size() + 1;
+        log("Spilling " + kmerCount + " kmers to file " + fileNumber + " ...");
+        File spillFile = new File(mOutputDirectory,
+                                  "spill_" + mK + "_" + fileNumber + ".tmp");
+        mSpillFileList.add(spillFile);
+        writeKMerBinaryFile(kmerArray, kmerCount, spillFile);
+        log("Spill file written");
+    }
+
+    private void writeKMerBinaryFile(KMerPosition[] kmerArray,
+                                     int kmerCount,
+                                     File outputFile)
+        throws IOException {
+        OutputStream outputStream =
+            new BufferedOutputStream(new FileOutputStream(outputFile));
+        for (int i = 0; i < kmerCount; i++) {
+            KMerPosition kmp = kmerArray[i];
+            writeKMerPosition(outputStream, kmerArray[i]);
+        }
+        outputStream.flush();
+        outputStream.close();
+    }
+
+    private void writeExceptionFile(List<StringKMerPosition> kmerList,
+                                    File outputFile)
+        throws IOException {
+        PrintWriter writer =
+            new PrintWriter(new BufferedWriter(new FileWriter(outputFile)));
+        for (StringKMerPosition kmer : kmerList) {
+            writeUniqueKMer(kmer, writer);
+        }
+        writer.flush();
+        writer.close();
+    }
+
+    private KMerPosition readKMerPosition(InputStream stream)
+        throws IOException {
+        if (mUseOldFormat) {
+            return readKMerPositionOldFormat(stream);
+        }
+        byte[] buffer = mIOBuffer;
+        int encodingLength = (mK + 7)/8;
+        int fileLength = 4 + 2*encodingLength;
+        int count = readFully(stream, buffer, 0, fileLength);
+        if (count <= 0) {
+            return null;
+        } else if (count != fileLength) {
+            throw new RuntimeException("Unexpected end of file");
+        }
+        char[] encoding = new char[encodingLength];
+        int baseIndex = ((buffer[0] & 0xFF) |
+                         (buffer[1] & 0xFF) << 8 |
+                         (buffer[2] & 0xFF) << 16 |
+                         (buffer[3] & 0xFF) << 24);
+        for (int i = 0; i < encodingLength; i++) {
+            encoding[i] = (char) ((buffer[2*i+4] & 0xFF) |
+                                  ((buffer[2*i+5] & 0xFF) << 8));
+        }
+        return new KMerPositionN(encoding, baseIndex);
+    }
+
+    private KMerPosition readKMerPositionOldFormat(InputStream stream)
+        throws IOException {
+        byte[] buffer = mIOBuffer;
+        int length = (mK >= 32 ? 20 : 12);
+        int count = readFully(stream, buffer, 0, length);
+        if (count <= 0) {
+            return null;
+        } else if (count != length) {
+            throw new RuntimeException("Unexpected end of file");
+        }
+        long encoding = (((long)(buffer[0] & 0xFF)) |
+                         ((long)(buffer[1] & 0xFF)) << 8 |
+                         ((long)(buffer[2] & 0xFF)) << 16 |
+                         ((long)(buffer[3] & 0xFF)) << 24 |
+                         ((long)(buffer[4] & 0xFF)) << 32 |
+                         ((long)(buffer[5] & 0xFF)) << 40 |
+                         ((long)(buffer[6] & 0xFF)) << 48 |
+                         ((long)(buffer[7] & 0xFF)) << 56);
+        int baseIndex = ((buffer[length-4] & 0xFF) |
+                         (buffer[length-3] & 0xFF) << 8 |
+                         (buffer[length-2] & 0xFF) << 16 |
+                         (buffer[length-1] & 0xFF) << 24);
+        if (length == 12) {
+            return new KMerPosition1(encoding, baseIndex);
+        } else {
+            long encoding2 = (((long)(buffer[8] & 0xFF)) |
+                              ((long)(buffer[9] & 0xFF)) << 8 |
+                              ((long)(buffer[10] & 0xFF)) << 16 |
+                              ((long)(buffer[11] & 0xFF)) << 24 |
+                              ((long)(buffer[12] & 0xFF)) << 32 |
+                              ((long)(buffer[13] & 0xFF)) << 40 |
+                              ((long)(buffer[14] & 0xFF)) << 48 |
+                              ((long)(buffer[15] & 0xFF)) << 56);
+            return new KMerPosition2(encoding, encoding2, baseIndex);
+        }
+    }
+
+    private int readFully(InputStream stream, byte[] buffer, int offset, int count)
+        throws IOException {
+        int readCount = 0;
+        while (readCount < count) {
+            int read = stream.read(buffer, offset, count-readCount);
+            if (read <= 0) {
+                break;
+            }
+            offset += read;
+            readCount += read;
+        }
+        return readCount;
+    }
+
+    private void writeKMerPosition(OutputStream stream, KMerPosition kmer)
+        throws IOException {
+        if (mUseOldFormat) {
+            writeKMerPositionOldFormat(stream, kmer);
+            return;
+        }
+        byte[] buffer = mIOBuffer;
+        int baseIndex = kmer.getBaseIndex();
+        char[] encoding = kmer.getKMerEncoding();
+        int offset = 0;
+        buffer[offset++] = (byte) ((baseIndex) & 0xFF);
+        buffer[offset++] = (byte) ((baseIndex >> 8) & 0xFF);
+        buffer[offset++] = (byte) ((baseIndex >> 16) & 0xFF);
+        buffer[offset++] = (byte) ((baseIndex >> 24) & 0xFF);
+        for (int i = 0; i < encoding.length; i++) {
+            buffer[offset++] = (byte) ((encoding[i]) & 0xFF);
+            buffer[offset++] = (byte) ((encoding[i] >> 8) & 0xFF);
+        }
+        stream.write(buffer, 0, offset);
+    }
+
+    private void writeKMerPositionOldFormat(OutputStream stream, KMerPosition kmer)
+        throws IOException {
+        byte[] buffer = mIOBuffer;
+        long encoding1 = kmer.getKMerEncoding1();
+        long encoding2 = kmer.getKMerEncoding2();
+        int baseIndex = kmer.getBaseIndex();
+        int offset = 0;
+        buffer[offset++] = (byte) ((encoding1) & 0xFF);
+        buffer[offset++] = (byte) ((encoding1 >> 8) & 0xFF);
+        buffer[offset++] = (byte) ((encoding1 >> 16) & 0xFF);
+        buffer[offset++] = (byte) ((encoding1 >> 24) & 0xFF);
+        buffer[offset++] = (byte) ((encoding1 >> 32) & 0xFF);
+        buffer[offset++] = (byte) ((encoding1 >> 40) & 0xFF);
+        buffer[offset++] = (byte) ((encoding1 >> 48) & 0xFF);
+        buffer[offset++] = (byte) ((encoding1 >> 56) & 0xFF);
+        if (mK >= 32) {
+            buffer[offset++] = (byte) ((encoding2) & 0xFF);
+            buffer[offset++] = (byte) ((encoding2 >> 8) & 0xFF);
+            buffer[offset++] = (byte) ((encoding2 >> 16) & 0xFF);
+            buffer[offset++] = (byte) ((encoding2 >> 24) & 0xFF);
+            buffer[offset++] = (byte) ((encoding2 >> 32) & 0xFF);
+            buffer[offset++] = (byte) ((encoding2 >> 40) & 0xFF);
+            buffer[offset++] = (byte) ((encoding2 >> 48) & 0xFF);
+            buffer[offset++] = (byte) ((encoding2 >> 56) & 0xFF);
+        }
+        buffer[offset++] = (byte) ((baseIndex) & 0xFF);
+        buffer[offset++] = (byte) ((baseIndex >> 8) & 0xFF);
+        buffer[offset++] = (byte) ((baseIndex >> 16) & 0xFF);
+        buffer[offset++] = (byte) ((baseIndex >> 24) & 0xFF);
+        stream.write(buffer, 0, offset);
+    }
+
+    private long mergeSpillFiles(List<File> spillFiles, File outputFile)
+        throws IOException {
+
+        if (spillFiles == null) {
+            return 0;
+        }
+
+        log("Merging spill files ...");
+        OutputStream outputStream =
+            new BufferedOutputStream(new FileOutputStream(outputFile));
+        long uniqueCount = 0;
+        int fileCount = spillFiles.size();
+        InputStream[] inputStreams = new InputStream[fileCount];
+        KMerPosition[] kmers = new KMerPosition[fileCount];
+        for (int i = 0; i < fileCount; i++) {
+            inputStreams[i] =
+                new BufferedInputStream(new FileInputStream(spillFiles.get(i)));
+        }
+        while (true) {
+            for (int i = 0; i < fileCount; i++) {
+                if (kmers[i] == null && inputStreams[i] != null) {
+                    kmers[i] = readKMerPosition(inputStreams[i]);
+                    if (kmers[i] == null) {
+                        inputStreams[i].close();
+                        inputStreams[i] = null;
+                    }
+                }
+            }
+            int count = 0;
+            KMerPosition kmer = null;
+            for (int i = 0; i < fileCount; i++) {
+                KMerPosition kmp = kmers[i];
+                if (kmp == null) {
+                    continue;
+                } else if (kmer == null) {
+                    kmer = kmp;
+                    count = 1;
+                } else {
+                    int cmp = kmp.compareTo(kmer);
+                    if (cmp == 0) {
+                        count++;
+                    } else if (cmp < 0) {
+                        kmer = kmp;
+                        count = 1;
+                    }
+                }
+            }
+            if (kmer == null) {
+                break;
+            }
+            for (int i = 0; i < fileCount; i++) {
+                if (kmers[i] == kmer) {
+                    kmers[i] = null;
+                }
+            }
+            if (count == 1 && kmer.getBaseIndex() != NONUNIQUE_MARKER) {
+                uniqueCount++;
+                writeKMerPosition(outputStream, kmer);
+            }
+        }
+        outputStream.flush();
+        outputStream.close();
+        for (int i = 0; i < fileCount; i++) {
+            // spillFiles.get(i).delete();
+        }
+        log("Spill files merged, unique count is " + uniqueCount);
+        return uniqueCount;
+    }
+
+    private void writeKMerTextFile(File inputFile,
+                                   List<StringKMerPosition> exceptionList,
+                                   File outputFile)
+        throws IOException {
+
+        log("Writing kmer file " + outputFile + " ...");
+        int exceptionIndex = 0;
+        StringKMerPosition excKMer = null;
+        Iterator<StringKMerPosition> excIter = null;
+        if (!exceptionList.isEmpty()) {
+            excIter = exceptionList.iterator();
+            excKMer = excIter.next();
+        }
+
+        InputStream inputStream =
+            new BufferedInputStream(new FileInputStream(inputFile));
+        PrintWriter writer =
+            new PrintWriter(new BufferedWriter(new FileWriter(outputFile)));
+        KMerPosition kmer = readKMerPosition(inputStream);
+        while (kmer != null || excKMer != null) {
+            if (excKMer == null) {
+                writeUniqueKMer(kmer, writer);
+                kmer = readKMerPosition(inputStream);
+            } else if (kmer == null) {
+                writeUniqueKMer(excKMer, writer);
+                excKMer = excIter.hasNext() ? excIter.next() : null;
+            } else if (kmer.getKMer().compareTo(excKMer.getKMer()) < 0) {
+                writeUniqueKMer(kmer, writer);
+                kmer = readKMerPosition(inputStream);
+            } else {
+                writeUniqueKMer(excKMer, writer);
+                excKMer = excIter.hasNext() ? excIter.next() : null;
+            }
+        }
+        inputStream.close();
+        writer.flush();
+        writer.close();
+        log("Wrote kmer file: " + outputFile);
+    }
+
+    private void writeUniqueKMer(KMerPosition kmer, PrintWriter writer) {
+        if (kmer.getBaseIndex() != NONUNIQUE_MARKER) {
+            writeKMer(kmer.getKMer(), kmer.getBaseIndex(), writer);
+        }
+    }
+
+    private void writeUniqueKMer(StringKMerPosition kmer, PrintWriter writer) {
+        if (kmer.getBaseIndex() != NONUNIQUE_MARKER) {
+            writeKMer(kmer.getKMer(), kmer.getBaseIndex(), writer);
+        }
+    }
+
+    private void writeKMer(String kmer, int baseIndex, PrintWriter writer) {
+        String chr = getBaseIndexSequenceName(baseIndex);
+        int pos = getBaseIndexCoordinate(baseIndex);
+        writer.println(kmer + "\t" + chr + "\t" + pos);
+    }
+
+    private void createMapFile(int mapSize,
+                               File kmerFile,
+                               List<StringKMerPosition> exceptionList,
+                               File priorMapFile,
+                               File mapFile)
+        throws IOException {
+        byte[] map = null;
+        long uniquePriorCount = 0;
+        if (priorMapFile.exists()) {
+            map = readMapFile(priorMapFile);
+            if (map.length != mapSize) {
+                throw new RuntimeException("Prior map is wrong size");
+            }
+            // Clear the new bits from prior map.
+            // Also count the prior unique positions while we are at it.
+            // Note that this is a count of positions, not kmers.
+            for (int i = 0; i < mapSize; i++) {
+                int cumBits = map[i] & 0x55;
+                uniquePriorCount += Integer.bitCount(cumBits);
+                map[i] = (byte) cumBits;
+            }
+        } else {
+            map = new byte[mapSize];
+        }
+        for (StringKMerPosition kmp : exceptionList) {
+            addToMap(kmp, map);
+        }
+        mPriorMapUniqueCount = uniquePriorCount;
+
+        InputStream inputStream =
+            new BufferedInputStream(new FileInputStream(kmerFile));
+        while (true) {
+            KMerPosition kmp = readKMerPosition(inputStream);
+            if (kmp == null) {
+                inputStream.close();
+                break;
+            }
+            addToMap(kmp, map);
+        }
+
+        long testCum = 0;
+        for (int i = 0; i < map.length; i++) {
+            testCum += Integer.bitCount(map[i] & 0x55);
+        }
+
+        writeMapFile(map, mapFile);
+    }
+
+    private void addToMap(KMerPosition kmp, byte[] map) {
+        int baseIndex = kmp.getBaseIndex();
+        if (baseIndex != NONUNIQUE_MARKER) {
+            addToMap(baseIndex, map);
+        }
+    }
+
+    private void addToMap(StringKMerPosition kmp, byte[] map) {
+        int baseIndex = kmp.getBaseIndex();
+        if (baseIndex != NONUNIQUE_MARKER) {
+            addToMap(baseIndex, map);
+        }
+    }
+
+    private void addToMap(int baseIndex, byte[] map) {
+        int mod = baseIndex & 0x3;
+        int offset = (baseIndex >> 2) & 0x3FFFFFFF;
+        if (((map[offset] >> (2*mod)) & 0x3) != 0) {
+            throw new RuntimeException("Map entry already set: " + baseIndex);
+        }
+        map[offset] |= (0x3 << (2*mod));
+    }
+
+    private void writeSummaryStatistics(File outputFile)
+        throws IOException {
+        PrintWriter writer =
+            new PrintWriter(new BufferedWriter(new FileWriter(outputFile)));
+        long baseCount = (mBaseIndex + 1) & 0xFFFFFFFFL;
+        long uniqueCount = mUniquePriorCount + mUniqueNewCount;
+        long nonUniqueCount = mKMerCount - uniqueCount;
+        writer.println("K: " + mK);
+        writer.println("Sequences: " + mSequenceList.size());
+        writer.println("Bases: " + baseCount);
+        writer.println("KMers: " + mKMerCount);
+        writer.println("Prior map count: " + mPriorMapUniqueCount);
+        writer.println("Unique prior: " + mUniquePriorCount +
+                       " (" + formatPercent(mUniquePriorCount, mKMerCount) + ")");
+        writer.println("Unique new: " + mUniqueNewCount +
+                       " (" + formatPercent(mUniqueNewCount, mKMerCount) + ")");
+        writer.println("Unique cumulative: " + uniqueCount +
+                       " (" + formatPercent(uniqueCount, mKMerCount) + ")");
+        writer.println("Nonunique: " + nonUniqueCount +
+                       " (" + formatPercent(nonUniqueCount, mKMerCount) + ")");
+        writer.flush();
+        writer.close();
+    }
+
+    private String formatPercent(long numerator, long denominator) {
+        double fraction = 0.0;
+        if (denominator != 0) {
+            fraction = numerator / (double) denominator;
+        }
+        return String.format("%1.1f%%", fraction * 100.0);
+    }
+
+    private void openPriorMap(File mapFile)
+        throws IOException {
+        if (mapFile.exists()) {
+            mPriorMapStream = new BufferedInputStream(new FileInputStream(mapFile));
+            mPriorMapPosition = -1;
+            mPriorMapValue = 0;
+        }
+    }
+
+    private void closePriorMap()
+        throws IOException {
+        if (mPriorMapStream != null) {
+            mPriorMapStream.close();
+        }
+        mPriorMapStream = null;
+        mPriorMapPosition = -1;
+        mPriorMapValue = 0;
+    }
+
+    private byte[] readMapFile(File file)
+        throws IOException {
+        long fileLength = file.length();
+        if (fileLength > 1000000000) {
+            throw new RuntimeException("Prior map too large: " + file);
+        }
+        int length = (int) fileLength;
+        byte[] map = new byte[length];
+        FileInputStream stream = new FileInputStream(file);
+        int count = readFully(stream, map, 0, length);
+        if (count != length) {
+            throw new RuntimeException("Failed to read map: " + file);
+        }
+        stream.close();
+        return map;
+    }
+
+    private void writeMapFile(byte[] map, File file)
+        throws IOException {
+        FileOutputStream stream = new FileOutputStream(file);
+        stream.write(map);
+        stream.flush();
+        stream.close();
+    }
+
+    private boolean isUniqueInPriorMap(int baseIndex)
+        throws IOException {
+        if (mPriorMapStream == null) {
+            return false;
+        }
+        int byteOffset = (baseIndex >> 2) & 0x3FFFFFFF;
+        if (byteOffset != mPriorMapPosition) {
+            int delta = byteOffset - mPriorMapPosition;
+            if (delta < 0) {
+                throw new RuntimeException("Attempt to seek backwards in prior map");
+            }
+            if (delta > 1) {
+                skipFully(mPriorMapStream, delta-1);
+            }
+            mPriorMapValue = mPriorMapStream.read();
+            if (mPriorMapValue < 0) {
+                throw new RuntimeException("Unexpected end of file in prior map");
+            }
+            mPriorMapPosition += delta;
+        }
+        int mod = baseIndex & 0x3;
+        return (((mPriorMapValue >> (2*mod)) & 1) != 0);
+    }
+
+    private void skipFully(InputStream stream, long amount)
+        throws IOException {
+        while (amount > 0) {
+            long skip = stream.skip(amount);
+            if (skip <= 0 || skip > amount) {
+                throw new RuntimeException("Skip failed");
+            }
+            amount -= skip;
+        }
+    }
+
+    private String getBaseIndexSequenceName(int baseIndex) {
+        int sequenceCount = mSequenceList.size();
+        for (int i = 0; i < sequenceCount-1; i++) {
+            int nextOffset = mSequenceOffsetList.get(i+1);
+            if (compareBaseIndex(nextOffset, baseIndex) > 0) {
+                return mSequenceList.get(i);
+            }
+        }
+        return mSequenceList.get(sequenceCount-1);
+    }
+
+    private int getBaseIndexCoordinate(int baseIndex) {
+        Integer sequenceOffset = null;
+        for (Integer offset : mSequenceOffsetList) {
+            if (compareBaseIndex(offset, baseIndex) > 0) {
+                break;
+            }
+            sequenceOffset = offset;
+        }
+        if (sequenceOffset == null) {
+            return 0;
+        }
+        int coordinate = baseIndex - sequenceOffset + 1;
+        if (coordinate <= 0) {
+            dumpSequenceList();
+            System.out.println("coordinate: " + coordinate);
+            System.out.println("sequenceOffset: " + Integer.toHexString(sequenceOffset));
+            System.out.println("baseIndex: " + Integer.toHexString(baseIndex));
+            throw new RuntimeException("Internal error: illegal coordinate " +
+                                       coordinate + " for base index " + baseIndex);
+        }
+        return coordinate;
+    }
+
+    private void dumpSequenceList() {
+        System.out.println("# Sequences:");
+        int count = mSequenceList.size();
+        for (int i = 0; i < count; i++) {
+            String seqName = mSequenceList.get(i);
+            int offset = mSequenceOffsetList.get(i);
+            System.out.println("# " + seqName +
+                               "\t" + offset +
+                               "\t" + Integer.toHexString(offset));
+        }
+    }
+
+    private int compareBaseIndex(int baseIndex1, int baseIndex2) {
+        // Implements unsigned comparison, a la compareTo
+        if (baseIndex1 < 0 ^ baseIndex2 < 0) {
+            return ((baseIndex1 < 0) ? 1 : -1);
+        } else {
+            return (baseIndex1 - baseIndex2);
+        }
+    }
+
+    private String getNextSequence()
+        throws IOException {
+
+        while (mNextSequence == null) {
+            if (mCurrentReader == null) {
+                mCurrentReader = getNextReader();
+                if (mCurrentReader == null) {
+                    return null;
+                }
+            }
+            String line = mCurrentReader.readLine();
+            if (line == null) {
+                mCurrentReader.close();
+                mCurrentReader = null;
+                continue;
+            }
+            if (line.startsWith(">")) {
+                String[] tokens = line.substring(1).trim().split("\\s+");
+                mNextSequence = tokens[0];
+            }
+        }
+        String result = mNextSequence;
+        mNextSequence = null;
+        return result;
+    }
+
+    private LineNumberReader getNextReader()
+        throws IOException {
+        if (mInputFileIndex >= mInputFiles.size()) {
+            return null;
+        }
+        File file = mInputFiles.get(mInputFileIndex++);
+        return new LineNumberReader(new FileReader(file));
+    }
+
+    private char[] getNextKMer()
+        throws IOException {
+
+        if (mKMerBuffer == null) {
+            mKMerBuffer = new char[mK];
+        }
+        System.arraycopy(mKMerBuffer, 1, mKMerBuffer, 0, mKMerBuffer.length - 1);
+        if (mKMerBufferedCount > 0) {
+            mKMerBufferedCount--;
+        }
+
+        while (mKMerBufferedCount < mK) {
+            char base = getNextBase();
+            if (base == 0) {
+                incrementBaseIndex(mKMerBufferedCount);
+                mKMerBufferedCount = 0;
+                return null;
+            } else if (base == 'N') {
+                incrementBaseIndex(mKMerBufferedCount+1);
+                mKMerBufferedCount = 0;
+            } else {
+                mKMerBuffer[mKMerBufferedCount++] = base;
+            }
+        }
+        incrementBaseIndex(1);
+        return mKMerBuffer;
+    }
+
+    private char getNextBase()
+        throws IOException {
+
+        if (mLineBuffer == null || mLineBufferIndex >= mLineBuffer.length()) {
+            if (mCurrentReader == null) {
+                return 0;
+            }
+            String line = mCurrentReader.readLine();
+            if (line == null) {
+                mLineBuffer = null;
+                mLineBufferIndex = 0;
+                mCurrentReader.close();
+                mCurrentReader = null;
+                return 0;
+            }
+            if (line.startsWith(">")) {
+                String[] tokens = line.substring(1).trim().split("\\s+");
+                mNextSequence = tokens[0];
+                mLineBuffer = null;
+                mLineBufferIndex = 0;
+                return 0;
+            }
+            mLineBuffer = line.toUpperCase();
+            mLineBufferIndex = 0;
+        }
+        return mLineBuffer.charAt(mLineBufferIndex++);
+    }
+
+    private void incrementBaseIndex(int amount) {
+        if (mBaseIndex < -1 && (mBaseIndex + amount) >= -1) {
+            throw new RuntimeException("Base index: 32-bit overflow");
+        }
+        mBaseIndex += amount;
+    }
+
+    private void log(String text) {
+        if (mVerbose) {
+            System.out.println("# " + new Date() + " " + text);
+        }
+    }
+
+    private static KMerPosition encodeKMer(char[] kmerChars, int baseIndex) {
+        if (mUseOldFormat) {
+            return encodeKMerOldFormat(kmerChars, baseIndex);
+        }
+        if (kmerChars == null) {
+            return null;
+        }
+        int kmerLength = kmerChars.length;
+        int encodingLength = (kmerLength + 7) / 8;
+        char[] encoding = new char[encodingLength];
+        int offset = kmerLength % 8;
+        offset = (offset == 0) ? 8 : offset;
+        int bits = encodeKMerBits(kmerChars, 0, offset);
+        if (bits < 0) {
+            return null;
+        }
+        encoding[0] = (char) bits;
+        for (int i = 1; i < encodingLength; i++) {
+            bits = encodeKMerBits(kmerChars, offset, 8);
+            if (bits < 0) {
+                return null;
+            }
+            encoding[i] = (char) bits;
+            offset += 8;
+        }
+        return new KMerPositionN(encoding, baseIndex);
+    }
+
+    private static KMerPosition encodeKMerOldFormat(char[] kmerChars, int baseIndex) {
+        if (kmerChars == null) {
+            return null;
+        }
+        int length = kmerChars.length;
+        if (length <= 31) {
+            long bits = encodeKMerBitsLong(kmerChars, 0, length);
+            if (bits == -1) {
+                return null;
+            }
+            return new KMerPosition1(bits, baseIndex);
+        } else if (length <= 62) {
+            long bits1 = encodeKMerBitsLong(kmerChars, 0, 31);
+            long bits2 = encodeKMerBitsLong(kmerChars, 31, length - 31);
+            if (bits1 == -1 || bits2 == -1) {
+                return null;
+            }
+            return new KMerPosition2(bits1, bits2, baseIndex);
+        } else {
+            return null;
+        }
+    }
+
+    private static int encodeKMerBits(char[] kmerChars, int offset, int length) {
+        int bits = 0;
+        for (int i = 0; i < length; i++) {
+            char base = kmerChars[offset + i];
+            int baseBits = "ACGT".indexOf(base);
+            if (baseBits < 0) {
+                return -1;
+            }
+            bits |= baseBits << (2*(length-i-1));
+        }
+        return bits;
+    }
+
+    private static long encodeKMerBitsLong(char[] kmerChars, int offset, int length) {
+        long bits = 0;
+        for (int i = 0; i < length; i++) {
+            char base = kmerChars[offset + i];
+            int baseBits = "ACGT".indexOf(base);
+            if (baseBits < 0) {
+                return -1;
+            }
+            bits |= ((long)baseBits) << (2*(length-i-1));
+        }
+        return bits;
+    }
+
+    private static String decodeKMer1(long bits) {
+        int length = mK;
+        char[] buffer = new char[length];
+        decodeKMerBits(bits, buffer, 0, length);
+        return new String(buffer);
+    }
+
+    private static String decodeKMer2(long bits1, long bits2) {
+        int length = mK;
+        char[] buffer = new char[length];
+        decodeKMerBits(bits1, buffer, 0, 31);
+        decodeKMerBits(bits2, buffer, 31, length-31);
+        return new String(buffer);
+    }
+
+    private static String decodeKMerN(char[] encoding) {
+        int length = mK;
+        char[] buffer = new char[length];
+        int offset = length % 8;
+        offset = (offset == 0) ? 8 : offset;
+        decodeKMerBits(encoding[0], buffer, 0, offset);
+        for (int i = 1; i < encoding.length; i++) {
+            decodeKMerBits(encoding[i], buffer, offset, 8);
+            offset += 8;
+        }
+        return new String(buffer);
+    }
+
+    private static void decodeKMerBits(char bits, char[] buffer, int offset, int length) {
+        for (int i = 0; i < length; i++) {
+            int baseBits = (int) ((bits >> (2*(length-i-1))) & 0x3);
+            buffer[offset + i] = "ACGT".charAt(baseBits);
+        }
+    }
+
+    private static void decodeKMerBits(long bits, char[] buffer, int offset, int length) {
+        for (int i = 0; i < length; i++) {
+            int baseBits = (int) ((bits >> (2*(length-i-1))) & 0x3);
+            buffer[offset + i] = "ACGT".charAt(baseBits);
+        }
+    }
+
+    static class KMerPosition
+        implements Comparable<KMerPosition> {
+
+        private int mBaseIndex;
+
+        KMerPosition(int baseIndex) {
+            mBaseIndex = baseIndex;
+        }
+
+        public String getKMer() {
+            return null;
+        }
+
+        public long getKMerEncoding1() {
+            return -1;
+        }
+
+        public long getKMerEncoding2() {
+            return -1;
+        }
+
+        public final int getBaseIndex() {
+            return mBaseIndex;
+        }
+
+        public final void setBaseIndex(int baseIndex) {
+            mBaseIndex = baseIndex;
+        }
+
+        public char[] getKMerEncoding() {
+            return null;
+        }
+
+        public int compareTo(KMerPosition kmp) {
+            char[] encoding1 = getKMerEncoding();
+            char[] encoding2 = kmp.getKMerEncoding();
+            int length = Math.max(encoding1.length, encoding2.length);
+            for (int i = 0; i < length; i++) {
+                int result = encoding1[i] - encoding2[i];
+                if (result != 0) {
+                    return result;
+                }
+            }
+            return 0;
+        }
+    }
+
+    static class KMerPosition1
+        extends KMerPosition {
+
+        private long mKMerEncoding1;
+
+        KMerPosition1(long kmer, int baseIndex) {
+            super(baseIndex);
+            mKMerEncoding1 = kmer;
+        }
+
+        public String getKMer() {
+            return decodeKMer1(getKMerEncoding1());
+        }
+
+        public final long getKMerEncoding1() {
+            return mKMerEncoding1;
+        }
+
+        public int compareTo(KMerPosition kmp) {
+            int result = Long.signum(getKMerEncoding1() - kmp.getKMerEncoding1());
+            if (result == 0) {
+                result = Long.signum(getKMerEncoding2() - kmp.getKMerEncoding2());
+            }
+            return result;
+        }
+    }
+
+    static class KMerPosition2
+        extends KMerPosition1 {
+
+        private long mKMerEncoding2;
+
+        KMerPosition2(long encoding1, long encoding2, int baseIndex) {
+            super(encoding1, baseIndex);
+            mKMerEncoding2 = encoding2;
+        }
+
+        public String getKMer() {
+            return decodeKMer2(getKMerEncoding1(), getKMerEncoding2());
+        }
+
+        public final long getKMerEncoding2() {
+            return mKMerEncoding2;
+        }
+    }
+
+    static class KMerPositionN
+        extends KMerPosition {
+
+        private char[] mKMerEncoding;
+
+        KMerPositionN(char[] encoding, int baseIndex) {
+            super(baseIndex);
+            mKMerEncoding = encoding;
+        }
+
+        public String getKMer() {
+            return decodeKMerN(mKMerEncoding);
+        }
+
+        public final char[] getKMerEncoding() {
+            return mKMerEncoding;
+        }
+    }
+
+    static class StringKMerPosition
+        implements Comparable<StringKMerPosition> {
+
+        private String mKMerString = null;
+        private int mBaseIndex;
+
+        StringKMerPosition(String kmer, int baseIndex) {
+            mKMerString = kmer;
+            mBaseIndex = baseIndex;
+        }
+
+        public final String getKMer() {
+            return mKMerString;
+        }
+
+        public final int getBaseIndex() {
+            return mBaseIndex;
+        }
+
+        public final void setBaseIndex(int baseIndex) {
+            mBaseIndex = baseIndex;
+        }
+
+        public int compareTo(StringKMerPosition kmp) {
+            return mKMerString.compareTo(kmp.mKMerString);
+        }
+    }
+}
diff --git a/lib/edu/mit/broad/cnv/CountKMers3.java b/lib/edu/mit/broad/cnv/CountKMers3.java
new file mode 100644
index 0000000000..81ddb17452
--- /dev/null
+++ b/lib/edu/mit/broad/cnv/CountKMers3.java
@@ -0,0 +1,1426 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2008 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever.
+ * Neither the Broad Institute nor MIT can be responsible for its use, misuse,
+ * or functionality.
+ */
+package edu.mit.broad.cnv;
+
+import java.io.*;
+import java.util.*;
+
+
+/**
+ * Tool for counting unique kmers.
+ */
+public class CountKMers3
+{
+    private static final int NONUNIQUE_MARKER = -1;
+    private static boolean mUseOldFormat = false;
+
+    private String mAction = null;
+    private static int mK = 0;
+    private int mBatchSize = 0;
+    private List<File> mInputFiles = null;
+    private File mInputDirectory = null;
+    private File mOutputDirectory = null;
+    private boolean mVerbose = false;
+    private boolean mDebug = false;
+
+    private List<String> mSequenceList = null;
+    private List<Integer> mSequenceOffsetList = null;
+    private List<File> mSpillFileList = null;
+    private double mSpillFactor = 0.9;
+
+    private long mKMerCount = 0;
+    private long mUniquePriorCount = 0;
+    private long mUniqueNewCount = 0;
+    private long mPriorMapUniqueCount = 0;
+
+    private InputStream mPriorMapStream = null;
+    private int mPriorMapPosition = -1;
+    private int mPriorMapValue = 0;
+    private int mInputFileIndex = 0;
+    private LineNumberReader mCurrentReader = null;
+    private String mNextSequence = null;
+    private char[] mKMerBuffer = null;
+    private int mKMerBufferedCount = 0;
+    private String mLineBuffer = null;
+    private int mLineBufferIndex = 0;
+    private int mBaseIndex = -1;
+    private byte[] mIOBuffer = null;
+
+    /* Design
+       Inputs:
+       - One or more fasta files to search (currently one).
+       - Output directory for the result files.
+       - Optionally an input k-1-mer file (output from previous pass).
+       Outputs:
+       - Unique kmer file: <kmer> <chr> <pos> (sorted by kmer)
+         This is unique globally or unique wrt unique (K-1) mers (i.e. K unique, K-1 not).
+       - Per chromosome bit map: pos (implicit) new-bit cum-bit
+         New-bit is 1 if Kmer starting at pos is unique but (K-1)-mer is not.
+         Cum-bit is 1 if Kmer starting at pos is unique for some L <= K.
+       - Statistics
+       Plan:
+       - Reducing memory footprint is crucial.
+       - Sequential pass over the input sequences to generate kmers.
+       - BatchSize kmers are cached in memory, then sorted and uniqified.
+       - As batch array fills, batches are spilled to disk.
+       - Batches are reloaded from disk and merged (N-finger algorithm)
+       - and streamed to a merge file.
+       - Merge file is read from disk and processed as final results.
+     */
+
+    public static void main(String[] args)
+        throws Exception {
+        new CountKMers3().run(args);
+    }
+
+    private void usage() {
+        System.out.println("Usage: CountKMers ...");
+        System.out.println("  -action <action>");
+        System.out.println("  -genome <fasta-file>");
+        System.out.println("  -k <k>");
+        System.out.println("  -batchSize <n>");
+        System.out.println("  -inputDir <directory>");
+        System.out.println("  -outputDir <directory>");
+        System.out.println("  -verbose");
+        System.out.println("  -debug");
+    }
+
+    private boolean parseArguments(String[] args) {
+
+        int argpos = 0;
+        int argsleft = 0;
+
+        while (argpos < args.length) {
+            argsleft = args.length - argpos;
+            String arg = args[argpos];
+            if (arg.equals("-action") && argsleft > 1) {
+                argpos++;
+                mAction = args[argpos++];
+            } else if (arg.equals("-genome") && argsleft > 1) {
+                argpos++;
+                if (mInputFiles == null) {
+                    mInputFiles = new ArrayList<File>();
+                }
+                mInputFiles.add(new File(args[argpos++]));
+            } else if (arg.equals("-k") && argsleft > 1) {
+                argpos++;
+                mK = Integer.parseInt(args[argpos++]);
+            } else if (arg.equals("-batchSize") && argsleft > 1) {
+                argpos++;
+                mBatchSize = Integer.parseInt(args[argpos++]);
+            } else if (arg.equals("-inputDir") && argsleft > 1) {
+                argpos++;
+                mInputDirectory = new File(args[argpos++]);
+            } else if (arg.equals("-outputDir") && argsleft > 1) {
+                argpos++;
+                mOutputDirectory = new File(args[argpos++]);
+            } else if (arg.equals("-oldFormat")) {
+                argpos++;
+                mUseOldFormat = true;
+            } else if (arg.equals("-verbose")) {
+                argpos++;
+                mVerbose = true;
+            } else if (arg.equals("-debug")) {
+                argpos++;
+                mDebug = true;
+            } else if (arg.startsWith("-")) {
+                usage();
+                return false;
+            } else {
+                break;
+            }
+        }
+
+        argsleft = args.length - argpos;
+        if (argsleft != 0) {
+            usage();
+            return false;
+        }
+
+        return true;
+    }
+
+    private void run(String[] args)
+        throws Exception {
+        if (!parseArguments(args)) {
+            System.exit(1);
+        }
+        if (mAction == null || mAction.equals("mapKMers")) {
+            mapKMers();
+        } else if (mAction.equals("mapGaps")) {
+            mapGaps();
+        }
+    }
+
+    // Can be used to scan genome for sequence names/lengths.
+    private void scanKMers()
+        throws IOException {
+        mSequenceList = new ArrayList<String>();
+        mSequenceOffsetList = new ArrayList<Integer>();
+        File priorMapFile =
+            new File(mOutputDirectory, "unique_" + (mK-1) + "_mers_map.bin");
+        openPriorMap(priorMapFile);
+        while (true) {
+            String seqName = getNextSequence();
+            if (seqName == null) {
+                break;
+            }
+            mSequenceList.add(seqName);
+            mSequenceOffsetList.add(mBaseIndex+1);
+            log("Scanning " + seqName + " ...");
+            while (true) {
+                char[] kmerChars = getNextKMer();
+                if (kmerChars == null) {
+                    break;
+                }
+                mKMerCount++;
+                if (isUniqueInPriorMap(mBaseIndex)) {
+                    continue;
+                }
+            }
+        }
+        closePriorMap();
+    }
+
+    private void mapGaps()
+        throws IOException {
+        while (true) {
+            String seqName = getNextSequence();
+            if (seqName == null) {
+                break;
+            }
+            int pos = 0;
+            int gapStart = 0;
+            while (true) {
+                char base = getNextBase();
+                if (base == 0) {
+                    break;
+                }
+                pos++;
+                if (base == 'N') {
+                    if (gapStart == 0) {
+                        gapStart = pos;
+                    }
+                } else {
+                    if (gapStart > 0) {
+                        System.out.println(seqName + "\t" + gapStart + "\t" + (pos-1));
+                        gapStart = 0;
+                    }
+                }
+            }
+            if (gapStart > 0) {
+                System.out.println(seqName + "\t" + gapStart + "\t" + (pos-1));
+                gapStart = 0;
+            }
+        }
+    }
+
+    private void mapKMers()
+        throws IOException {
+
+        File textKMerFile =
+            new File(mOutputDirectory, "unique_" + mK + "_mers.txt");
+        File binaryKMerFile =
+            new File(mOutputDirectory, "unique_" + mK + "_mers.bin");
+        File exceptionFile =
+            new File(mOutputDirectory, "unique_" + mK + "_mers.extra");
+        File mapFile =
+            new File(mOutputDirectory, "unique_" + mK + "_mers_map.bin");
+        File priorMapFile =
+            new File(mOutputDirectory, "unique_" + (mK-1) + "_mers_map.bin");
+        File statsFile =
+            new File(mOutputDirectory, "unique_" + mK + "_mers_stats.txt");
+
+        if (mBatchSize == 0) {
+            throw new RuntimeException("Batch size not specified");
+        }
+
+        int kmerCount = 0;
+        int batchSize = mBatchSize;
+        KMerPosition[] kmerArray = new KMerPosition[batchSize];
+        List<StringKMerPosition> exceptionList = new ArrayList<StringKMerPosition>();
+        mSequenceList = new ArrayList<String>();
+        mSequenceOffsetList = new ArrayList<Integer>();
+        mIOBuffer = new byte[Math.max(20,4 + 2*((mK + 7)/8))];
+
+        openPriorMap(priorMapFile);
+
+        while (true) {
+            String seqName = getNextSequence();
+            if (seqName == null) {
+                break;
+            }
+            mSequenceList.add(seqName);
+            mSequenceOffsetList.add(mBaseIndex+1);
+            log("Processing " + seqName + " ...");
+            while (true) {
+                char[] kmerChars = getNextKMer();
+                if (kmerChars == null) {
+                    break;
+                }
+                mKMerCount++;
+                int baseIndex = mBaseIndex;
+                if (isUniqueInPriorMap(baseIndex)) {
+                    mUniquePriorCount++;
+                    continue;
+                }
+
+                KMerPosition kmp = encodeKMer(kmerChars, baseIndex);
+                if (kmp == null) {
+                    // Note: We currently do not handle the reverse
+                    // complement of exception characters correctly.
+                    // For hg18, however, this doesn't matter as
+                    // none of the kmers containing non-ACGT characters
+                    // are present on the reverse strand.
+                    String kmer = new String(kmerChars);
+                    exceptionList.add(new StringKMerPosition(kmer, baseIndex));
+                    continue;
+                }
+                kmerArray[kmerCount++] = kmp;
+                if (kmerCount == batchSize) {
+                    kmerCount = compactKMers(kmerArray, kmerCount);
+                    if (kmerCount > mSpillFactor * batchSize) {
+                        spillKMers(kmerArray, kmerCount);
+                        kmerCount = 0;
+                    }
+                }
+            }
+        }
+        if (kmerCount > 0) {
+            kmerCount = compactKMers(kmerArray, kmerCount);
+            if (mSpillFileList != null) {
+                spillKMers(kmerArray, kmerCount);
+                kmerCount = 0;
+            }
+        }
+
+        closePriorMap();
+
+        // Write out the exception kmers (text file).
+        compactKMers(exceptionList);
+        writeExceptionFile(exceptionList, exceptionFile);
+
+        // Write out the binary file of unique encoded kmers.
+        if (mSpillFileList == null) {
+            kmerCount = removeNonUnique(kmerArray, kmerCount);
+            writeKMerBinaryFile(kmerArray, kmerCount, binaryKMerFile);
+            mUniqueNewCount = kmerCount;
+        } else {
+            mUniqueNewCount = mergeSpillFiles(mSpillFileList, binaryKMerFile);
+        }
+        mUniqueNewCount += countUniqueKMers(exceptionList);
+
+        // Write out the text file of (all) unique kmers.
+        writeKMerTextFile(binaryKMerFile, exceptionList, textKMerFile);
+
+        // Create map file from prior map plus the new unique kmers.
+        int mapSize = ((mBaseIndex >> 2) & 0x3FFFFFFF) + 1;
+        createMapFile(mapSize, binaryKMerFile, exceptionList, priorMapFile, mapFile);
+
+        // Write summary statistics file.
+        writeSummaryStatistics(statsFile);
+    }
+
+    private int compactKMers(KMerPosition[] kmerArray, int kmerCount) {
+        if (kmerCount == 0) {
+            return 0;
+        }
+        log("Compacting " + kmerCount + " kmers at index " +
+            Integer.toHexString(mBaseIndex) + " ...");
+        Arrays.sort(kmerArray, 0, kmerCount);
+        int newCount = 1;
+        KMerPosition current = kmerArray[0];
+        for (int i = 1; i < kmerCount; i++) {
+            KMerPosition kmp = kmerArray[i];
+            if (current.compareTo(kmp) == 0) {
+                current.setBaseIndex(NONUNIQUE_MARKER);
+            } else {
+                kmerArray[newCount++] = kmp;
+                current = kmp;
+            }
+        }
+        log("Compaction finished, new count is " + newCount);
+        return newCount;
+    }
+
+    private int compactKMers(StringKMerPosition[] kmerArray, int kmerCount) {
+        if (kmerCount == 0) {
+            return 0;
+        }
+        log("Compacting " + kmerCount + " string kmers ...");
+        Arrays.sort(kmerArray, 0, kmerCount);
+        int newCount = 1;
+        String kmerString = kmerArray[0].getKMer();
+        for (int i = 1; i < kmerCount; i++) {
+            StringKMerPosition kmp = kmerArray[i];
+            String ks = kmp.getKMer();
+            if (ks.equals(kmerString)) {
+                kmerArray[newCount-1].setBaseIndex(NONUNIQUE_MARKER);
+            } else {
+                kmerArray[newCount++] = kmp;
+                kmerString = ks;
+            }
+        }
+        log("Compaction finished, new count is " + newCount);
+        return newCount;
+    }
+
+    private void compactKMers(List<StringKMerPosition> kmerList) {
+        int kmerCount = kmerList.size();
+        if (kmerCount <= 1) {
+            return;
+        }
+        StringKMerPosition[] kmerArray =
+            kmerList.toArray(new StringKMerPosition[kmerCount]);
+        kmerCount = compactKMers(kmerArray, kmerCount);
+        kmerList.clear();
+        for (int i = 0; i < kmerCount; i++) {
+            kmerList.add(kmerArray[i]);
+        }
+    }
+
+    private int removeNonUnique(KMerPosition[] kmerArray, int kmerCount) {
+        int uniqueCount = 0;
+        for (int i = 0; i < kmerCount; i++) {
+            KMerPosition kmp = kmerArray[i];
+            if (kmp.getBaseIndex() != NONUNIQUE_MARKER) {
+                kmerArray[uniqueCount++] = kmp;
+            }
+        }
+        return uniqueCount;
+    }
+
+    private int countUniqueKMers(List<StringKMerPosition> kmerList) {
+        int uniqueCount = 0;
+        for (StringKMerPosition kmp : kmerList) {
+            if (kmp.getBaseIndex() != NONUNIQUE_MARKER) {
+                uniqueCount++;
+            }
+        }
+        return uniqueCount;
+    }
+
+    private void spillKMers(KMerPosition[] kmerArray, int kmerCount)
+        throws IOException {
+        if (mSpillFileList == null) {
+            mSpillFileList = new ArrayList<File>();
+        }
+        int fileNumber = mSpillFileList.size() + 1;
+        log("Spilling " + kmerCount + " kmers to file " + fileNumber + " ...");
+        File spillFile = new File(mOutputDirectory,
+                                  "spill_" + mK + "_" + fileNumber + ".tmp");
+        mSpillFileList.add(spillFile);
+        writeKMerBinaryFile(kmerArray, kmerCount, spillFile);
+        log("Spill file written");
+    }
+
+    private void writeKMerBinaryFile(KMerPosition[] kmerArray,
+                                     int kmerCount,
+                                     File outputFile)
+        throws IOException {
+        OutputStream outputStream =
+            new BufferedOutputStream(new FileOutputStream(outputFile));
+        for (int i = 0; i < kmerCount; i++) {
+            KMerPosition kmp = kmerArray[i];
+            writeKMerPosition(outputStream, kmerArray[i]);
+        }
+        outputStream.flush();
+        outputStream.close();
+    }
+
+    private void writeExceptionFile(List<StringKMerPosition> kmerList,
+                                    File outputFile)
+        throws IOException {
+        PrintWriter writer =
+            new PrintWriter(new BufferedWriter(new FileWriter(outputFile)));
+        for (StringKMerPosition kmer : kmerList) {
+            writeUniqueKMer(kmer, writer);
+        }
+        writer.flush();
+        writer.close();
+    }
+
+    private KMerPosition readKMerPosition(InputStream stream)
+        throws IOException {
+        if (mUseOldFormat) {
+            return readKMerPositionOldFormat(stream);
+        }
+        byte[] buffer = mIOBuffer;
+        int encodingLength = (mK + 7)/8;
+        int fileLength = 4 + 2*encodingLength;
+        int count = readFully(stream, buffer, 0, fileLength);
+        if (count <= 0) {
+            return null;
+        } else if (count != fileLength) {
+            throw new RuntimeException("Unexpected end of file");
+        }
+        char[] encoding = new char[encodingLength];
+        int baseIndex = ((buffer[0] & 0xFF) |
+                         (buffer[1] & 0xFF) << 8 |
+                         (buffer[2] & 0xFF) << 16 |
+                         (buffer[3] & 0xFF) << 24);
+        for (int i = 0; i < encodingLength; i++) {
+            encoding[i] = (char) ((buffer[2*i+4] & 0xFF) |
+                                  ((buffer[2*i+5] & 0xFF) << 8));
+        }
+        return new KMerPositionN(encoding, baseIndex);
+    }
+
+    private KMerPosition readKMerPositionOldFormat(InputStream stream)
+        throws IOException {
+        byte[] buffer = mIOBuffer;
+        int length = (mK >= 32 ? 20 : 12);
+        int count = readFully(stream, buffer, 0, length);
+        if (count <= 0) {
+            return null;
+        } else if (count != length) {
+            throw new RuntimeException("Unexpected end of file");
+        }
+        long encoding = (((long)(buffer[0] & 0xFF)) |
+                         ((long)(buffer[1] & 0xFF)) << 8 |
+                         ((long)(buffer[2] & 0xFF)) << 16 |
+                         ((long)(buffer[3] & 0xFF)) << 24 |
+                         ((long)(buffer[4] & 0xFF)) << 32 |
+                         ((long)(buffer[5] & 0xFF)) << 40 |
+                         ((long)(buffer[6] & 0xFF)) << 48 |
+                         ((long)(buffer[7] & 0xFF)) << 56);
+        int baseIndex = ((buffer[length-4] & 0xFF) |
+                         (buffer[length-3] & 0xFF) << 8 |
+                         (buffer[length-2] & 0xFF) << 16 |
+                         (buffer[length-1] & 0xFF) << 24);
+        if (length == 12) {
+            return new KMerPosition1(encoding, baseIndex);
+        } else {
+            long encoding2 = (((long)(buffer[8] & 0xFF)) |
+                              ((long)(buffer[9] & 0xFF)) << 8 |
+                              ((long)(buffer[10] & 0xFF)) << 16 |
+                              ((long)(buffer[11] & 0xFF)) << 24 |
+                              ((long)(buffer[12] & 0xFF)) << 32 |
+                              ((long)(buffer[13] & 0xFF)) << 40 |
+                              ((long)(buffer[14] & 0xFF)) << 48 |
+                              ((long)(buffer[15] & 0xFF)) << 56);
+            return new KMerPosition2(encoding, encoding2, baseIndex);
+        }
+    }
+
+    private int readFully(InputStream stream, byte[] buffer, int offset, int count)
+        throws IOException {
+        int readCount = 0;
+        while (readCount < count) {
+            int read = stream.read(buffer, offset, count-readCount);
+            if (read <= 0) {
+                break;
+            }
+            offset += read;
+            readCount += read;
+        }
+        return readCount;
+    }
+
+    private void writeKMerPosition(OutputStream stream, KMerPosition kmer)
+        throws IOException {
+        if (mUseOldFormat) {
+            writeKMerPositionOldFormat(stream, kmer);
+            return;
+        }
+        byte[] buffer = mIOBuffer;
+        int baseIndex = kmer.getBaseIndex();
+        char[] encoding = kmer.getKMerEncoding();
+        int offset = 0;
+        buffer[offset++] = (byte) ((baseIndex) & 0xFF);
+        buffer[offset++] = (byte) ((baseIndex >> 8) & 0xFF);
+        buffer[offset++] = (byte) ((baseIndex >> 16) & 0xFF);
+        buffer[offset++] = (byte) ((baseIndex >> 24) & 0xFF);
+        for (int i = 0; i < encoding.length; i++) {
+            buffer[offset++] = (byte) ((encoding[i]) & 0xFF);
+            buffer[offset++] = (byte) ((encoding[i] >> 8) & 0xFF);
+        }
+        stream.write(buffer, 0, offset);
+    }
+
+    private void writeKMerPositionOldFormat(OutputStream stream, KMerPosition kmer)
+        throws IOException {
+        byte[] buffer = mIOBuffer;
+        long encoding1 = kmer.getKMerEncoding1();
+        long encoding2 = kmer.getKMerEncoding2();
+        int baseIndex = kmer.getBaseIndex();
+        int offset = 0;
+        buffer[offset++] = (byte) ((encoding1) & 0xFF);
+        buffer[offset++] = (byte) ((encoding1 >> 8) & 0xFF);
+        buffer[offset++] = (byte) ((encoding1 >> 16) & 0xFF);
+        buffer[offset++] = (byte) ((encoding1 >> 24) & 0xFF);
+        buffer[offset++] = (byte) ((encoding1 >> 32) & 0xFF);
+        buffer[offset++] = (byte) ((encoding1 >> 40) & 0xFF);
+        buffer[offset++] = (byte) ((encoding1 >> 48) & 0xFF);
+        buffer[offset++] = (byte) ((encoding1 >> 56) & 0xFF);
+        if (mK >= 32) {
+            buffer[offset++] = (byte) ((encoding2) & 0xFF);
+            buffer[offset++] = (byte) ((encoding2 >> 8) & 0xFF);
+            buffer[offset++] = (byte) ((encoding2 >> 16) & 0xFF);
+            buffer[offset++] = (byte) ((encoding2 >> 24) & 0xFF);
+            buffer[offset++] = (byte) ((encoding2 >> 32) & 0xFF);
+            buffer[offset++] = (byte) ((encoding2 >> 40) & 0xFF);
+            buffer[offset++] = (byte) ((encoding2 >> 48) & 0xFF);
+            buffer[offset++] = (byte) ((encoding2 >> 56) & 0xFF);
+        }
+        buffer[offset++] = (byte) ((baseIndex) & 0xFF);
+        buffer[offset++] = (byte) ((baseIndex >> 8) & 0xFF);
+        buffer[offset++] = (byte) ((baseIndex >> 16) & 0xFF);
+        buffer[offset++] = (byte) ((baseIndex >> 24) & 0xFF);
+        stream.write(buffer, 0, offset);
+    }
+
+    private long mergeSpillFiles(List<File> spillFiles, File outputFile)
+        throws IOException {
+
+        if (spillFiles == null) {
+            return 0;
+        }
+
+        log("Merging spill files ...");
+        OutputStream outputStream =
+            new BufferedOutputStream(new FileOutputStream(outputFile));
+        long uniqueCount = 0;
+        int fileCount = spillFiles.size();
+        InputStream[] inputStreams = new InputStream[fileCount];
+        KMerPosition[] kmers = new KMerPosition[fileCount];
+        for (int i = 0; i < fileCount; i++) {
+            inputStreams[i] =
+                new BufferedInputStream(new FileInputStream(spillFiles.get(i)));
+        }
+        while (true) {
+            for (int i = 0; i < fileCount; i++) {
+                if (kmers[i] == null && inputStreams[i] != null) {
+                    kmers[i] = readKMerPosition(inputStreams[i]);
+                    if (kmers[i] == null) {
+                        inputStreams[i].close();
+                        inputStreams[i] = null;
+                    }
+                }
+            }
+            int count = 0;
+            KMerPosition kmer = null;
+            for (int i = 0; i < fileCount; i++) {
+                KMerPosition kmp = kmers[i];
+                if (kmp == null) {
+                    continue;
+                } else if (kmer == null) {
+                    kmer = kmp;
+                    count = 1;
+                } else {
+                    int cmp = kmp.compareTo(kmer);
+                    if (cmp == 0) {
+                        count++;
+                    } else if (cmp < 0) {
+                        kmer = kmp;
+                        count = 1;
+                    }
+                }
+            }
+            if (kmer == null) {
+                break;
+            }
+            for (int i = 0; i < fileCount; i++) {
+                if (kmers[i] != null && kmer.compareTo(kmers[i]) == 0) {
+                    kmers[i] = null;
+                }
+            }
+            if (count == 1 && kmer.getBaseIndex() != NONUNIQUE_MARKER) {
+                uniqueCount++;
+                writeKMerPosition(outputStream, kmer);
+            }
+
+        }
+        outputStream.flush();
+        outputStream.close();
+        for (int i = 0; i < fileCount; i++) {
+            // spillFiles.get(i).delete();
+        }
+        log("Spill files merged, unique count is " + uniqueCount);
+        return uniqueCount;
+    }
+
+    private void writeKMerTextFile(File inputFile,
+                                   List<StringKMerPosition> exceptionList,
+                                   File outputFile)
+        throws IOException {
+
+        log("Writing kmer file " + outputFile + " ...");
+        int exceptionIndex = 0;
+        StringKMerPosition excKMer = null;
+        Iterator<StringKMerPosition> excIter = null;
+        if (!exceptionList.isEmpty()) {
+            excIter = exceptionList.iterator();
+            excKMer = excIter.next();
+        }
+
+        InputStream inputStream =
+            new BufferedInputStream(new FileInputStream(inputFile));
+        PrintWriter writer =
+            new PrintWriter(new BufferedWriter(new FileWriter(outputFile)));
+        KMerPosition kmer = readKMerPosition(inputStream);
+        while (kmer != null || excKMer != null) {
+            if (excKMer == null) {
+                writeUniqueKMer(kmer, writer);
+                kmer = readKMerPosition(inputStream);
+            } else if (kmer == null) {
+                writeUniqueKMer(excKMer, writer);
+                excKMer = excIter.hasNext() ? excIter.next() : null;
+            } else if (kmer.getKMer().compareTo(excKMer.getKMer()) < 0) {
+                writeUniqueKMer(kmer, writer);
+                kmer = readKMerPosition(inputStream);
+            } else {
+                writeUniqueKMer(excKMer, writer);
+                excKMer = excIter.hasNext() ? excIter.next() : null;
+            }
+        }
+        inputStream.close();
+        writer.flush();
+        writer.close();
+        log("Wrote kmer file: " + outputFile);
+    }
+
+    private void writeUniqueKMer(KMerPosition kmer, PrintWriter writer) {
+        if (kmer.getBaseIndex() != NONUNIQUE_MARKER) {
+            writeKMer(kmer.getKMer(), kmer.getBaseIndex(), writer);
+        }
+    }
+
+    private void writeUniqueKMer(StringKMerPosition kmer, PrintWriter writer) {
+        if (kmer.getBaseIndex() != NONUNIQUE_MARKER) {
+            writeKMer(kmer.getKMer(), kmer.getBaseIndex(), writer);
+        }
+    }
+
+    private void writeKMer(String kmer, int baseIndex, PrintWriter writer) {
+        String chr = getBaseIndexSequenceName(baseIndex);
+        int pos = getBaseIndexCoordinate(baseIndex);
+        writer.println(kmer + "\t" + chr + "\t" + pos);
+    }
+
+    private void createMapFile(int mapSize,
+                               File kmerFile,
+                               List<StringKMerPosition> exceptionList,
+                               File priorMapFile,
+                               File mapFile)
+        throws IOException {
+        byte[] map = null;
+        long uniquePriorCount = 0;
+        if (priorMapFile.exists()) {
+            map = readMapFile(priorMapFile);
+            if (map.length != mapSize) {
+                throw new RuntimeException("Prior map is wrong size");
+            }
+            // Clear the new bits from prior map.
+            // Also count the prior unique positions while we are at it.
+            // Note that this is a count of positions, not kmers.
+            for (int i = 0; i < mapSize; i++) {
+                int cumBits = map[i] & 0x55;
+                uniquePriorCount += Integer.bitCount(cumBits);
+                map[i] = (byte) cumBits;
+            }
+        } else {
+            map = new byte[mapSize];
+        }
+        for (StringKMerPosition kmp : exceptionList) {
+            addToMap(kmp, map);
+        }
+        mPriorMapUniqueCount = uniquePriorCount;
+
+        InputStream inputStream =
+            new BufferedInputStream(new FileInputStream(kmerFile));
+        while (true) {
+            KMerPosition kmp = readKMerPosition(inputStream);
+            if (kmp == null) {
+                inputStream.close();
+                break;
+            }
+            addToMap(kmp, map);
+        }
+
+        long testCum = 0;
+        for (int i = 0; i < map.length; i++) {
+            testCum += Integer.bitCount(map[i] & 0x55);
+        }
+
+        writeMapFile(map, mapFile);
+    }
+
+    private void addToMap(KMerPosition kmp, byte[] map) {
+        int baseIndex = kmp.getBaseIndex();
+        if (baseIndex != NONUNIQUE_MARKER) {
+            addToMap(baseIndex, map);
+        }
+    }
+
+    private void addToMap(StringKMerPosition kmp, byte[] map) {
+        int baseIndex = kmp.getBaseIndex();
+        if (baseIndex != NONUNIQUE_MARKER) {
+            addToMap(baseIndex, map);
+        }
+    }
+
+    private void addToMap(int baseIndex, byte[] map) {
+        int mod = baseIndex & 0x3;
+        int offset = (baseIndex >> 2) & 0x3FFFFFFF;
+        if (((map[offset] >> (2*mod)) & 0x3) != 0) {
+            throw new RuntimeException("Map entry already set: " + baseIndex);
+        }
+        map[offset] |= (0x3 << (2*mod));
+    }
+
+    private void writeSummaryStatistics(File outputFile)
+        throws IOException {
+        PrintWriter writer =
+            new PrintWriter(new BufferedWriter(new FileWriter(outputFile)));
+        long baseCount = (mBaseIndex + 1) & 0xFFFFFFFFL;
+        long uniqueCount = mUniquePriorCount + mUniqueNewCount;
+        long nonUniqueCount = mKMerCount - uniqueCount;
+        writer.println("K: " + mK);
+        writer.println("Sequences: " + mSequenceList.size());
+        writer.println("Bases: " + baseCount);
+        writer.println("KMers: " + mKMerCount);
+        writer.println("Prior map count: " + mPriorMapUniqueCount);
+        writer.println("Unique prior: " + mUniquePriorCount +
+                       " (" + formatPercent(mUniquePriorCount, mKMerCount) + ")");
+        writer.println("Unique new: " + mUniqueNewCount +
+                       " (" + formatPercent(mUniqueNewCount, mKMerCount) + ")");
+        writer.println("Unique cumulative: " + uniqueCount +
+                       " (" + formatPercent(uniqueCount, mKMerCount) + ")");
+        writer.println("Nonunique: " + nonUniqueCount +
+                       " (" + formatPercent(nonUniqueCount, mKMerCount) + ")");
+        writer.flush();
+        writer.close();
+    }
+
+    private String formatPercent(long numerator, long denominator) {
+        double fraction = 0.0;
+        if (denominator != 0) {
+            fraction = numerator / (double) denominator;
+        }
+        return String.format("%1.1f%%", fraction * 100.0);
+    }
+
+    private void openPriorMap(File mapFile)
+        throws IOException {
+        if (mapFile.exists()) {
+            mPriorMapStream = new BufferedInputStream(new FileInputStream(mapFile));
+            mPriorMapPosition = -1;
+            mPriorMapValue = 0;
+        }
+    }
+
+    private void closePriorMap()
+        throws IOException {
+        if (mPriorMapStream != null) {
+            mPriorMapStream.close();
+        }
+        mPriorMapStream = null;
+        mPriorMapPosition = -1;
+        mPriorMapValue = 0;
+    }
+
+    private byte[] readMapFile(File file)
+        throws IOException {
+        long fileLength = file.length();
+        if (fileLength > 1000000000) {
+            throw new RuntimeException("Prior map too large: " + file);
+        }
+        int length = (int) fileLength;
+        byte[] map = new byte[length];
+        FileInputStream stream = new FileInputStream(file);
+        int count = readFully(stream, map, 0, length);
+        if (count != length) {
+            throw new RuntimeException("Failed to read map: " + file);
+        }
+        stream.close();
+        return map;
+    }
+
+    private void writeMapFile(byte[] map, File file)
+        throws IOException {
+        FileOutputStream stream = new FileOutputStream(file);
+        stream.write(map);
+        stream.flush();
+        stream.close();
+    }
+
+    private boolean isUniqueInPriorMap(int baseIndex)
+        throws IOException {
+        if (mPriorMapStream == null) {
+            return false;
+        }
+        int byteOffset = (baseIndex >> 2) & 0x3FFFFFFF;
+        if (byteOffset != mPriorMapPosition) {
+            int delta = byteOffset - mPriorMapPosition;
+            if (delta < 0) {
+                throw new RuntimeException("Attempt to seek backwards in prior map");
+            }
+            if (delta > 1) {
+                skipFully(mPriorMapStream, delta-1);
+            }
+            mPriorMapValue = mPriorMapStream.read();
+            if (mPriorMapValue < 0) {
+                throw new RuntimeException("Unexpected end of file in prior map");
+            }
+            mPriorMapPosition += delta;
+        }
+        int mod = baseIndex & 0x3;
+        return (((mPriorMapValue >> (2*mod)) & 1) != 0);
+    }
+
+    private void skipFully(InputStream stream, long amount)
+        throws IOException {
+        while (amount > 0) {
+            long skip = stream.skip(amount);
+            if (skip <= 0 || skip > amount) {
+                throw new RuntimeException("Skip failed");
+            }
+            amount -= skip;
+        }
+    }
+
+    private String getBaseIndexSequenceName(int baseIndex) {
+        int sequenceCount = mSequenceList.size();
+        for (int i = 0; i < sequenceCount-1; i++) {
+            int nextOffset = mSequenceOffsetList.get(i+1);
+            if (compareBaseIndex(nextOffset, baseIndex) > 0) {
+                return mSequenceList.get(i);
+            }
+        }
+        return mSequenceList.get(sequenceCount-1);
+    }
+
+    private int getBaseIndexCoordinate(int baseIndex) {
+        Integer sequenceOffset = null;
+        for (Integer offset : mSequenceOffsetList) {
+            if (compareBaseIndex(offset, baseIndex) > 0) {
+                break;
+            }
+            sequenceOffset = offset;
+        }
+        if (sequenceOffset == null) {
+            return 0;
+        }
+        int coordinate = baseIndex - sequenceOffset + 1;
+        if (coordinate <= 0) {
+            dumpSequenceList();
+            System.out.println("coordinate: " + coordinate);
+            System.out.println("sequenceOffset: " + Integer.toHexString(sequenceOffset));
+            System.out.println("baseIndex: " + Integer.toHexString(baseIndex));
+            throw new RuntimeException("Internal error: illegal coordinate " +
+                                       coordinate + " for base index " + baseIndex);
+        }
+        return coordinate;
+    }
+
+    private void dumpSequenceList() {
+        System.out.println("# Sequences:");
+        int count = mSequenceList.size();
+        for (int i = 0; i < count; i++) {
+            String seqName = mSequenceList.get(i);
+            int offset = mSequenceOffsetList.get(i);
+            System.out.println("# " + seqName +
+                               "\t" + offset +
+                               "\t" + Integer.toHexString(offset));
+        }
+    }
+
+    private int compareBaseIndex(int baseIndex1, int baseIndex2) {
+        // Implements unsigned comparison, a la compareTo
+        if (baseIndex1 < 0 ^ baseIndex2 < 0) {
+            return ((baseIndex1 < 0) ? 1 : -1);
+        } else {
+            return (baseIndex1 - baseIndex2);
+        }
+    }
+
+    private String getNextSequence()
+        throws IOException {
+
+        while (mNextSequence == null) {
+            if (mCurrentReader == null) {
+                mCurrentReader = getNextReader();
+                if (mCurrentReader == null) {
+                    return null;
+                }
+            }
+            String line = mCurrentReader.readLine();
+            if (line == null) {
+                mCurrentReader.close();
+                mCurrentReader = null;
+                continue;
+            }
+            if (line.startsWith(">")) {
+                String[] tokens = line.substring(1).trim().split("\\s+");
+                mNextSequence = tokens[0];
+            }
+        }
+        String result = mNextSequence;
+        mNextSequence = null;
+        return result;
+    }
+
+    private LineNumberReader getNextReader()
+        throws IOException {
+        if (mInputFileIndex >= mInputFiles.size()) {
+            return null;
+        }
+        File file = mInputFiles.get(mInputFileIndex++);
+        return new LineNumberReader(new FileReader(file));
+    }
+
+    private char[] getNextKMer()
+        throws IOException {
+
+        if (mKMerBuffer == null) {
+            mKMerBuffer = new char[mK];
+        }
+        System.arraycopy(mKMerBuffer, 1, mKMerBuffer, 0, mKMerBuffer.length - 1);
+        if (mKMerBufferedCount > 0) {
+            mKMerBufferedCount--;
+        }
+
+        while (mKMerBufferedCount < mK) {
+            char base = getNextBase();
+            if (base == 0) {
+                incrementBaseIndex(mKMerBufferedCount);
+                mKMerBufferedCount = 0;
+                return null;
+            } else if (base == 'N') {
+                incrementBaseIndex(mKMerBufferedCount+1);
+                mKMerBufferedCount = 0;
+            } else {
+                mKMerBuffer[mKMerBufferedCount++] = base;
+            }
+        }
+        incrementBaseIndex(1);
+        return mKMerBuffer;
+    }
+
+    private char getNextBase()
+        throws IOException {
+
+        if (mLineBuffer == null || mLineBufferIndex >= mLineBuffer.length()) {
+            if (mCurrentReader == null) {
+                return 0;
+            }
+            String line = mCurrentReader.readLine();
+            if (line == null) {
+                mLineBuffer = null;
+                mLineBufferIndex = 0;
+                mCurrentReader.close();
+                mCurrentReader = null;
+                return 0;
+            }
+            if (line.startsWith(">")) {
+                String[] tokens = line.substring(1).trim().split("\\s+");
+                mNextSequence = tokens[0];
+                mLineBuffer = null;
+                mLineBufferIndex = 0;
+                return 0;
+            }
+            mLineBuffer = line.toUpperCase();
+            mLineBufferIndex = 0;
+        }
+        return mLineBuffer.charAt(mLineBufferIndex++);
+    }
+
+    private void incrementBaseIndex(int amount) {
+        if (mBaseIndex < -1 && (mBaseIndex + amount) >= -1) {
+            throw new RuntimeException("Base index: 32-bit overflow");
+        }
+        mBaseIndex += amount;
+    }
+
+    private void log(String text) {
+        if (mVerbose) {
+            System.out.println("# " + new Date() + " " + text);
+        }
+    }
+
+    private static void dbg(String text) {
+        System.out.println("#DBG: " + text);
+    }
+
+    private static KMerPosition encodeKMer(char[] kmerChars, int baseIndex) {
+        if (mUseOldFormat) {
+            return encodeKMerOldFormat(kmerChars, baseIndex);
+        }
+        char[] encoding = encodeKMerChars(kmerChars);
+        if (encoding == null) {
+            return null;
+        }
+        char[] reverseEncoding = encodeKMerChars(reverseComplement(kmerChars));
+        if (compareEncodings(encoding, reverseEncoding) <= 0) {
+            return new KMerPositionN(encoding, baseIndex);
+        } else {
+            KMerPositionN kmp = new KMerPositionN(reverseEncoding, baseIndex);
+            kmp.setIsReversed(true);
+            return kmp;
+        }
+    }
+
+    private static char[] encodeKMerChars(char[] kmerChars) {
+        if (kmerChars == null) {
+            return null;
+        }
+
+        int kmerLength = kmerChars.length;
+        int encodingLength = (kmerLength + 7) / 8;
+        char[] encoding = new char[encodingLength];
+        int offset = kmerLength % 8;
+        offset = (offset == 0) ? 8 : offset;
+        int bits = encodeKMerBits(kmerChars, 0, offset);
+        if (bits < 0) {
+            return null;
+        }
+        encoding[0] = (char) bits;
+        for (int i = 1; i < encodingLength; i++) {
+            bits = encodeKMerBits(kmerChars, offset, 8);
+            if (bits < 0) {
+                return null;
+            }
+            encoding[i] = (char) bits;
+            offset += 8;
+        }
+        return encoding;
+    }
+
+    private static int compareEncodings(char[] encoding1, char[] encoding2) {
+        int length = Math.max(encoding1.length, encoding2.length);
+        for (int i = 0; i < length; i++) {
+            int result = encoding1[i] - encoding2[i];
+            if (result != 0) {
+                return result;
+            }
+        }
+        return 0;
+    }
+
+    private static KMerPosition encodeKMerOldFormat(char[] kmerChars, int baseIndex) {
+        if (kmerChars == null) {
+            return null;
+        }
+        int length = kmerChars.length;
+        if (length <= 31) {
+            long bits = encodeKMerBitsLong(kmerChars, 0, length);
+            if (bits == -1) {
+                return null;
+            }
+            return new KMerPosition1(bits, baseIndex);
+        } else if (length <= 62) {
+            long bits1 = encodeKMerBitsLong(kmerChars, 0, 31);
+            long bits2 = encodeKMerBitsLong(kmerChars, 31, length - 31);
+            if (bits1 == -1 || bits2 == -1) {
+                return null;
+            }
+            return new KMerPosition2(bits1, bits2, baseIndex);
+        } else {
+            return null;
+        }
+    }
+
+    private static int encodeKMerBits(char[] kmerChars, int offset, int length) {
+        int bits = 0;
+        for (int i = 0; i < length; i++) {
+            char base = kmerChars[offset + i];
+            int baseBits = "ACGT".indexOf(base);
+            if (baseBits < 0) {
+                return -1;
+            }
+            bits |= baseBits << (2*(length-i-1));
+        }
+        return bits;
+    }
+
+    private static long encodeKMerBitsLong(char[] kmerChars, int offset, int length) {
+        long bits = 0;
+        for (int i = 0; i < length; i++) {
+            char base = kmerChars[offset + i];
+            int baseBits = "ACGT".indexOf(base);
+            if (baseBits < 0) {
+                return -1;
+            }
+            bits |= ((long)baseBits) << (2*(length-i-1));
+        }
+        return bits;
+    }
+
+    private static String decodeKMer1(long bits) {
+        int length = mK;
+        char[] buffer = new char[length];
+        decodeKMerBits(bits, buffer, 0, length);
+        return new String(buffer);
+    }
+
+    private static String decodeKMer2(long bits1, long bits2) {
+        int length = mK;
+        char[] buffer = new char[length];
+        decodeKMerBits(bits1, buffer, 0, 31);
+        decodeKMerBits(bits2, buffer, 31, length-31);
+        return new String(buffer);
+    }
+
+    private static String decodeKMerN(char[] encoding, boolean reverse) {
+        int length = mK;
+        char[] buffer = new char[length];
+        int offset = length % 8;
+        offset = (offset == 0) ? 8 : offset;
+        decodeKMerBits(encoding[0], buffer, 0, offset);
+        for (int i = 1; i < encoding.length; i++) {
+            decodeKMerBits(encoding[i], buffer, offset, 8);
+            offset += 8;
+        }
+        if (reverse) {
+            reverseComplementInPlace(buffer);
+        }
+        return new String(buffer);
+    }
+
+    private static void decodeKMerBits(char bits, char[] buffer, int offset, int length) {
+        for (int i = 0; i < length; i++) {
+            int baseBits = (int) ((bits >> (2*(length-i-1))) & 0x3);
+            buffer[offset + i] = "ACGT".charAt(baseBits);
+        }
+    }
+
+    private static void decodeKMerBits(long bits, char[] buffer, int offset, int length) {
+        for (int i = 0; i < length; i++) {
+            int baseBits = (int) ((bits >> (2*(length-i-1))) & 0x3);
+            buffer[offset + i] = "ACGT".charAt(baseBits);
+        }
+    }
+
+    private static char[] reverseComplement(char[] buffer) {
+        int length = buffer.length;
+        char[] result = new char[length];
+        System.arraycopy(buffer, 0, result, 0, length);
+        reverseComplementInPlace(result);
+        return result;
+    }
+
+    private static void reverseComplementInPlace(char[] buffer) {
+        int length = buffer.length;
+        int limit = (length + 1)/2;
+        for (int i = 0; i < limit; i++) {
+            char ch1 = reverseComplement(buffer[i]);
+            char ch2 = reverseComplement(buffer[length-i-1]);
+            buffer[i] = ch2;
+            buffer[length-i-1] = ch1;
+        }
+    }
+
+    private static char reverseComplement(char base) {
+        switch (base) {
+            case 'A':
+                return 'T';
+            case 'C':
+                return 'G';
+            case 'G':
+                return 'C';
+            case 'T':
+                return 'A';
+        }
+        return base;
+    }
+
+    private static String formatEncoding(char[] encoding) {
+        if (encoding == null) {
+            return null;
+        }
+        StringBuilder builder = new StringBuilder();
+        builder.append('[');
+        for (int i = 0; i < encoding.length; i++) {
+            String hex = Integer.toHexString(encoding[i]);
+            int length = hex.length();
+            while (length < 4) {
+                builder.append('0');
+                length++;
+            }
+            builder.append(hex);
+        }
+        builder.append(']');
+        return builder.toString();
+    }
+
+    static class KMerPosition
+        implements Comparable<KMerPosition> {
+
+        private int mBaseIndex;
+
+        KMerPosition(int baseIndex) {
+            mBaseIndex = baseIndex;
+        }
+
+        public String getKMer() {
+            return null;
+        }
+
+        public long getKMerEncoding1() {
+            return -1;
+        }
+
+        public long getKMerEncoding2() {
+            return -1;
+        }
+
+        public final int getBaseIndex() {
+            return mBaseIndex;
+        }
+
+        public final void setBaseIndex(int baseIndex) {
+            mBaseIndex = baseIndex;
+        }
+
+        public char[] getKMerEncoding() {
+            return null;
+        }
+
+        public int compareTo(KMerPosition kmp) {
+            return compareEncodings(getKMerEncoding(), kmp.getKMerEncoding());
+        }
+
+        public boolean equals(Object object) {
+            if (!(object instanceof KMerPosition)) {
+                return false;
+            }
+            KMerPosition kmp = (KMerPosition) object;
+            return (getBaseIndex() == kmp.getBaseIndex() &&
+                    this.compareTo(kmp) == 0);
+        }
+
+        public String format() {
+            return(getKMer() +
+                   " " + formatEncoding(getKMerEncoding()) +
+                   " " + Integer.toHexString(mBaseIndex));
+        }
+    }
+
+    static class KMerPosition1
+        extends KMerPosition {
+
+        private long mKMerEncoding1;
+
+        KMerPosition1(long kmer, int baseIndex) {
+            super(baseIndex);
+            mKMerEncoding1 = kmer;
+        }
+
+        public String getKMer() {
+            return decodeKMer1(getKMerEncoding1());
+        }
+
+        public final long getKMerEncoding1() {
+            return mKMerEncoding1;
+        }
+
+        public int compareTo(KMerPosition kmp) {
+            int result = Long.signum(getKMerEncoding1() - kmp.getKMerEncoding1());
+            if (result == 0) {
+                result = Long.signum(getKMerEncoding2() - kmp.getKMerEncoding2());
+            }
+            return result;
+        }
+    }
+
+    static class KMerPosition2
+        extends KMerPosition1 {
+
+        private long mKMerEncoding2;
+
+        KMerPosition2(long encoding1, long encoding2, int baseIndex) {
+            super(encoding1, baseIndex);
+            mKMerEncoding2 = encoding2;
+        }
+
+        public String getKMer() {
+            return decodeKMer2(getKMerEncoding1(), getKMerEncoding2());
+        }
+
+        public final long getKMerEncoding2() {
+            return mKMerEncoding2;
+        }
+    }
+
+    static class KMerPositionN
+        extends KMerPosition {
+
+        private boolean mReversed;
+        private char[] mKMerEncoding;
+
+        KMerPositionN(char[] encoding, int baseIndex) {
+            super(baseIndex);
+            mReversed = false;
+            mKMerEncoding = encoding;
+        }
+
+        public boolean getIsReversed() {
+            return mReversed;
+        }
+
+        public void setIsReversed(boolean value) {
+            mReversed = value;
+        }
+
+        public String getKMer() {
+            return decodeKMerN(mKMerEncoding, mReversed);
+        }
+
+        public final char[] getKMerEncoding() {
+            return mKMerEncoding;
+        }
+
+        public String format() {
+            return(getKMer() +
+                   " " + formatEncoding(getKMerEncoding()) +
+                   " " + (mReversed ? 'R' : 'F') +
+                   " " + Integer.toHexString(getBaseIndex()));
+        }
+    }
+
+    static class StringKMerPosition
+        implements Comparable<StringKMerPosition> {
+
+        private String mKMerString = null;
+        private int mBaseIndex;
+
+        StringKMerPosition(String kmer, int baseIndex) {
+            mKMerString = kmer;
+            mBaseIndex = baseIndex;
+        }
+
+        public final String getKMer() {
+            return mKMerString;
+        }
+
+        public final int getBaseIndex() {
+            return mBaseIndex;
+        }
+
+        public final void setBaseIndex(int baseIndex) {
+            mBaseIndex = baseIndex;
+        }
+
+        public int compareTo(StringKMerPosition kmp) {
+            return mKMerString.compareTo(kmp.mKMerString);
+        }
+
+        public boolean equals(Object object) {
+            if (!(object instanceof StringKMerPosition)) {
+                return false;
+            }
+            StringKMerPosition kmp = (StringKMerPosition) object;
+            return (mBaseIndex == kmp.mBaseIndex &&
+                    mKMerString.equals(kmp.mKMerString));
+        }
+    }
+}
diff --git a/lib/edu/mit/broad/cnv/GatherAlignments.java b/lib/edu/mit/broad/cnv/GatherAlignments.java
new file mode 100644
index 0000000000..b0dc2d5afd
--- /dev/null
+++ b/lib/edu/mit/broad/cnv/GatherAlignments.java
@@ -0,0 +1,399 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2008 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever.
+ * Neither the Broad Institute nor MIT can be responsible for its use, misuse,
+ * or functionality.
+ */
+package edu.mit.broad.cnv;
+
+import edu.mit.broad.arachne.Alignment;
+import edu.mit.broad.arachne.LookAlignReader;
+
+import java.io.*;
+import java.util.*;
+
+/**
+ * Utility program to gather CNV alignments from LookAlign files in an I/O efficient manner.
+ */
+public class GatherAlignments {
+
+    public static void main(String[] args)
+        throws Exception {
+        new GatherAlignments().run(args);
+    }
+
+    private void usage() {
+        System.out.println("Usage: GatherAlignments ...");
+        System.out.println("  -cnpList <cnp-file>");
+        System.out.println("  -sampleId <sample-id>");
+        System.out.println("  -inputFileList <fofn>");
+        System.out.println("  -outputDirectory <dir>");
+        System.out.println("  -padding <n-bases>");
+        System.out.println("  -bestAlignments");
+        System.out.println("  -verbose");
+        System.out.println("  -debug");
+    }
+
+    private boolean parseArguments(String[] args) {
+
+        int argpos = 0;
+        int argsleft = 0;
+
+        while (argpos < args.length) {
+            argsleft = args.length - argpos;
+            String arg = args[argpos];
+            if (arg.equals("-cnpList") && argsleft > 1) {
+                argpos++;
+                mCnpListPath = args[argpos++];
+            } else if (arg.equals("-sampleId") && argsleft > 1) {
+                argpos++;
+                mSampleId = args[argpos++];
+            } else if (arg.equals("-inputFileList") && argsleft > 1) {
+                argpos++;
+                mInputFileListPath = args[argpos++];
+            } else if (arg.equals("-outputDirectory") && argsleft > 1) {
+                argpos++;
+                mOutputDirectory = args[argpos++];
+            } else if (arg.equals("-padding") && argsleft > 1) {
+                argpos++;
+                mCnpRegionPadding = Integer.parseInt(args[argpos++]);
+            } else if (arg.equals("-bestAlignments")) {
+                argpos++;
+                mReturnBestHits = true;
+            } else if (arg.equals("-verbose")) {
+                argpos++;
+                mVerbose = true;
+            } else if (arg.equals("-debug")) {
+                argpos++;
+                mDebug = true;
+            } else if (arg.startsWith("-")) {
+                usage();
+                return false;
+            } else {
+                break;
+            }
+        }
+
+        argsleft = args.length - argpos;
+        if (argsleft != 0) {
+            usage();
+            return false;
+        }
+
+        return true;
+    }
+
+    private void run(String[] args)
+        throws Exception {
+
+        if (!parseArguments(args)) {
+            System.exit(1);
+        }
+
+        List<File> mInputFileList = parseInputFiles(mInputFileListPath);
+        Map<Integer, List<CnpRegion>> mCnpMap = parseCnpFile(mCnpListPath);
+        for (File inputFile : mInputFileList) {
+            scanInputFile(inputFile, mCnpMap);
+        }
+    }
+
+    private List<File> parseInputFiles(String path)
+        throws IOException {
+        List<File> fileList = new ArrayList<File>();
+        LineNumberReader reader = new LineNumberReader(new FileReader(path));
+        while (true) {
+            String line = reader.readLine();
+            if (line == null) {
+                reader.close();
+                break;
+            }
+            line = line.trim();
+            if (line.length() == 0 || line.startsWith("#")) {
+                continue;
+            }
+            String[] fields = line.split("\\s+");
+            fileList.add(new File(fields[0]));
+        }
+        return fileList;
+    }
+
+    private Map<Integer, List<CnpRegion>> parseCnpFile(String path)
+        throws IOException {
+        Map<Integer, List<CnpRegion>> cnpMap = new HashMap<Integer, List<CnpRegion>>();
+        LineNumberReader reader = new LineNumberReader(new FileReader(path));
+        while (true) {
+            String line = reader.readLine();
+            if (line == null) {
+                reader.close();
+                break;
+            }
+            line = line.trim();
+            if (line.length() == 0 || line.startsWith("#")) {
+                continue;
+            }
+            String[] fields = line.split("\\s+");
+            if (fields.length != 4) {
+                throw new RuntimeException("Invalid CNP line: " + line);
+            }
+            if (fields[0].equalsIgnoreCase("CNPID")) {
+                continue;
+            }
+            String cnpId = fields[0];
+            String chromosome = fields[1];
+            int start = Integer.parseInt(fields[2].replaceAll(",", ""));
+            int end = Integer.parseInt(fields[3].replaceAll(",", ""));
+            int sequenceId = chromosomeToSequenceId(chromosome);
+            if (sequenceId < 0) {
+                throw new RuntimeException("Unrecognized chromosome: " + chromosome);
+            }
+            if (mCnpRegionPadding > 0) {
+                start = Math.max(1, start - mCnpRegionPadding);
+                end = end + mCnpRegionPadding;
+            }
+            CnpRegion cnp = new CnpRegion(cnpId, sequenceId, start, end);
+            List<CnpRegion> cnpList = cnpMap.get(sequenceId);
+            if (cnpList == null) {
+                cnpList = new ArrayList<CnpRegion>();
+                cnpMap.put(sequenceId, cnpList);
+            }
+            cnpList.add(cnp);
+        }
+        return cnpMap;
+    }
+
+    private int chromosomeToSequenceId(String text) {
+        if (text == null || text.length() == 0) {
+            return -1;
+        }
+        if (text.matches("\\d+")) {
+            return Integer.parseInt(text);
+        }
+        if (text.startsWith("chr") && text.length() > 3) {
+            text = text.substring(3);
+        }
+        if (text.matches("\\d+") && !text.startsWith("0")) {
+            return Integer.parseInt(text);
+        }
+        if (text.equals("M")) {
+            return 0;
+        } else if (text.equals("X")) {
+            return 23;
+        } else if (text.equals("Y")) {
+            return 24;
+        } else {
+            return -1;
+        }
+    }
+
+    private void scanInputFile(File inputFile,
+                               Map<Integer, List<CnpRegion>> cnpMap)
+        throws IOException {
+        LookAlignReader reader = new LookAlignReader(inputFile);
+        while (true) {
+            Alignment alignment = getNextAlignment(reader);
+            if (alignment == null) {
+                reader.close();
+                break;
+            }
+            List<CnpRegion> cnpList = cnpMap.get(alignment.getBSequenceId());
+            if (cnpList == null) {
+                continue;
+            }
+            for (CnpRegion cnp : cnpList) {
+                if (overlaps(cnp, alignment)) {
+                    saveCnpAlignment(cnp, alignment, inputFile);
+                }
+            }
+        }
+        flushCnpAlignments(inputFile);
+    }
+
+    private Alignment getNextAlignment(LookAlignReader reader)
+        throws IOException {
+        if (!mReturnBestHits) {
+            if (reader.hasNext()) {
+                return reader.next();
+            } else {
+                return null;
+            }
+        }
+        while (true) {
+            Alignment seed = mPendingAlignment;
+            mPendingAlignment = null;
+            if (seed == null && reader.hasNext()) {
+                seed = reader.next();
+            }
+            if (seed == null) {
+                return null;
+            }
+            List<Alignment> secondaryHits = null;
+            while (reader.hasNext()) {
+                Alignment alignment = reader.next();
+                if (alignment.getASequenceId() != seed.getASequenceId()) {
+                    if (alignment.getASequenceId() < seed.getASequenceId()) {
+                        throw new RuntimeException("Alignments not sorted by A sequence: " + alignment.format());
+                    }
+                    mPendingAlignment = alignment;
+                    break;
+                }
+                if (secondaryHits == null) {
+                    secondaryHits = new ArrayList<Alignment>();
+                }
+                secondaryHits.add(alignment);
+            }
+            if (secondaryHits == null) {
+                return seed;
+            }
+            secondaryHits.add(seed);
+            Alignment result = getUniqueBestAlignment(secondaryHits);
+            if (result != null) {
+                return result;
+            }
+        }
+    }
+
+    private Alignment getUniqueBestAlignment(List<Alignment> alignments) {
+        int bestMismatches = 0;
+        List<Alignment> best = new ArrayList<Alignment>();
+        for (Alignment a : alignments) {
+            int mismatches = getAlignmentMismatches(a);
+            if (best.isEmpty()) {
+                best.add(a);
+                bestMismatches = mismatches;
+            }
+            if (mismatches == bestMismatches) {
+                best.add(a);
+            } else if (mismatches < bestMismatches) {
+                best.clear();
+                best.add(a);
+                bestMismatches = mismatches;
+            }
+        }
+        if (best.size() != 1) {
+            return null;
+        }
+        return best.get(0);
+    }
+
+    private int getAlignmentMismatches(Alignment alignment) {
+        int mismatches = 0;
+        int[] blocks = alignment.getAlignmentBlocks();
+        for (int i = 0; i < blocks.length; i += 3) {
+            int gap = blocks[i];
+            int duration = blocks[i+1];
+            int mm = blocks[i+2];
+            if (mm > duration) {
+                throw new RuntimeException("Invalid alignment? : " + alignment.format());
+            }
+            mismatches += Math.abs(gap);
+            mismatches += mm;
+        }
+        return mismatches;
+    }
+
+    private boolean overlaps(CnpRegion cnp, Alignment alignment) {
+        return (cnp.getSequenceId() == alignment.getBSequenceId() &&
+                cnp.getStart() <= alignment.getBEnd() &&
+                cnp.getEnd() >= alignment.getBStart());
+    }
+
+    private void saveCnpAlignment(CnpRegion cnp, Alignment alignment, File inputFile)
+        throws IOException {
+        if (mCnpAlignmentCount > mCnpAlignmentLimit) {
+            flushCnpAlignments(inputFile);
+        }
+        String cnpId = cnp.getCnpId();
+        List<Alignment> alignmentList = mCnpAlignmentMap.get(cnpId);
+        if (alignmentList == null) {
+            alignmentList = new ArrayList<Alignment>();
+            mCnpAlignmentMap.put(cnpId, alignmentList);
+        }
+        alignmentList.add(alignment);
+        mCnpAlignmentCount++;
+    }
+
+    private void flushCnpAlignments(File inputFile)
+        throws IOException {
+        while (!mCnpAlignmentMap.isEmpty()) {
+            String cnpId = mCnpAlignmentMap.keySet().iterator().next();
+            List<Alignment> alignmentList = mCnpAlignmentMap.get(cnpId);
+            writeAlignments(cnpId, mSampleId, alignmentList, inputFile);
+            mCnpAlignmentMap.remove(cnpId);
+            mCnpAlignmentCount -= alignmentList.size();
+        }
+        if (mCnpAlignmentCount != 0) {
+            throw new RuntimeException("Unsynchronized alignment count");
+        }
+    }
+
+    private void writeAlignments(String cnpId, String sampleId, List<Alignment> alignmentList, File inputFile)
+        throws IOException {
+        File outputDir = new File(".");
+        if (mOutputDirectory != null) {
+            outputDir = new File(mOutputDirectory);
+        }
+        String cnpSample = cnpId;
+        if (sampleId != null) {
+            cnpSample = cnpSample + "_" + sampleId;
+        }
+        File cnpSampleDir = new File(outputDir, cnpSample);
+        if (!cnpSampleDir.exists()) {
+            if (!cnpSampleDir.mkdir()) {
+                throw new RuntimeException("Failed to create directory " + cnpSampleDir);
+            }
+        }
+        String fileName = inputFile.getName();
+        File alignmentFile = new File(cnpSampleDir, fileName);
+        PrintWriter writer = new PrintWriter(new FileWriter(alignmentFile, true));
+        for (Alignment alignment : alignmentList) {
+            writer.println(alignment.arachneFormat());
+        }
+        writer.flush();
+        writer.close();
+    }
+
+    private GatherAlignments() {
+    }
+
+    private static class CnpRegion {
+
+        private CnpRegion(String cnpId, int sequenceId, int start, int end) {
+            mCnpId = cnpId;
+            mSequenceId = sequenceId;
+            mStart = start;
+            mEnd = end;
+        }
+
+        public String getCnpId() { return mCnpId; };
+        public int getSequenceId() { return mSequenceId; };
+        public int getStart() { return mStart; };
+        public int getEnd() { return mEnd; };
+
+        private String mCnpId;
+        private int mSequenceId;
+        private int mStart;
+        private int mEnd;
+    }
+
+    private boolean mDebug = false;
+    private boolean mVerbose = false;
+
+    private boolean mReturnBestHits = false;
+    private String mCnpListPath = null;
+    private String mSampleId = null;
+    private String mInputFileListPath = null;
+    private String mOutputDirectory = null;
+    private int mCnpRegionPadding = 0;
+
+    private Alignment mPendingAlignment = null;
+    private int mCnpAlignmentCount = 0;
+    private int mCnpAlignmentLimit = 1000000;
+    private Map<String, List<Alignment>> mCnpAlignmentMap = new LinkedHashMap<String, List<Alignment>>();
+}
+
+
+
diff --git a/lib/edu/mit/broad/cnv/kmer/CountKMers.java b/lib/edu/mit/broad/cnv/kmer/CountKMers.java
new file mode 100644
index 0000000000..23b9d6af4b
--- /dev/null
+++ b/lib/edu/mit/broad/cnv/kmer/CountKMers.java
@@ -0,0 +1,1494 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2009 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever.
+ * Neither the Broad Institute nor MIT can be responsible for its use, misuse,
+ * or functionality.
+ */
+package edu.mit.broad.cnv.kmer;
+
+
+import edu.mit.broad.cnv.util.SequenceIterator;
+
+import java.io.*;
+import java.util.*;
+
+
+/**
+ * Tool for counting unique kmers.
+ */
+public class CountKMers
+{
+    private static final int NONUNIQUE_MARKER = -1;
+
+    private String mAction = null;
+    private static int mK = 0;
+    private int mMinimumK = 0;
+    private int mMaximumK = 0;
+    private int mBatchSize = 0;
+    private List<File> mInputFiles = null;
+    private File mSearchFile = null;
+    private String mSequenceName = null;
+    private File mInputDirectory = null;
+    private File mOutputDirectory = null;
+    private boolean mRunDistributed = false;
+    private int mDistributedWorkerCount = 0;
+    private boolean mVerbose = false;
+    private boolean mDebug = false;
+
+    private List<String> mSequenceList = null;
+    private List<Integer> mSequenceOffsetList = null;
+    private List<File> mSpillFileList = null;
+    private double mSpillFactor = 0.9;
+
+    private long mKMerCount = 0;
+    private long mUniquePriorCount = 0;
+    private long mUniqueNewCount = 0;
+    private long mPriorMapUniqueCount = 0;
+
+    private InputStream mPriorMapStream = null;
+    private int mPriorMapPosition = -1;
+    private int mPriorMapValue = 0;
+    private int mInputFileIndex = 0;
+    private LineNumberReader mCurrentReader = null;
+    private String mNextSequence = null;
+    private char[] mKMerBuffer = null;
+    private int mKMerBufferedCount = 0;
+    private String mLineBuffer = null;
+    private int mLineBufferIndex = 0;
+    private int mBaseIndex = -1;
+    private byte[] mIOBuffer = null;
+
+    /* Design
+       Inputs:
+       - One or more fasta files to search (currently one).
+       - Output directory for the result files.
+       - Optionally an input k-1-mer file (output from previous pass).
+       Outputs:
+       - Unique kmer file: <kmer> <chr> <pos> (sorted by kmer)
+         This is unique globally or unique wrt unique (K-1) mers (i.e. K unique, K-1 not).
+       - Per chromosome bit map: pos (implicit) new-bit cum-bit
+         New-bit is 1 if Kmer starting at pos is unique but (K-1)-mer is not.
+         Cum-bit is 1 if Kmer starting at pos is unique for some L <= K.
+       - Statistics
+       Plan:
+       - Reducing memory footprint is crucial.
+       - Sequential pass over the input sequences to generate kmers.
+       - BatchSize kmers are cached in memory, then sorted and uniqified.
+       - As batch array fills, batches are spilled to disk.
+       - Batches are reloaded from disk and merged (N-finger algorithm)
+       - and streamed to a merge file.
+       - Merge file is read from disk and processed as final results.
+     */
+
+    public static void main(String[] args)
+        throws Exception {
+        new CountKMers().run(args);
+    }
+
+    private void usage() {
+        System.out.println("Usage: CountKMers ...");
+        System.out.println("  -action <action>");
+        System.out.println("  -genome <fasta-file>");
+        System.out.println("  -chromosome <name>");
+        System.out.println("  -k <k>");
+        System.out.println("  -minK <k>");
+        System.out.println("  -maxK <k>");
+        System.out.println("  -batchSize <n>");
+        System.out.println("  -inputDir <directory>");
+        System.out.println("  -outputDir <directory>");
+        System.out.println("  -distributed");
+        System.out.println("  -workers <n>");
+        System.out.println("  -verbose");
+        System.out.println("  -debug");
+    }
+
+    private boolean parseArguments(String[] args) {
+
+        int argpos = 0;
+        int argsleft = 0;
+
+        while (argpos < args.length) {
+            argsleft = args.length - argpos;
+            String arg = args[argpos];
+            if (arg.equals("-action") && argsleft > 1) {
+                argpos++;
+                mAction = args[argpos++];
+            } else if (arg.equals("-genome") && argsleft > 1) {
+                argpos++;
+                if (mInputFiles == null) {
+                    mInputFiles = new ArrayList<File>();
+                }
+                mInputFiles.add(new File(args[argpos++]));
+            } else if (arg.equals("-chromosome") && argsleft > 1) {
+                argpos++;
+                mSequenceName = args[argpos++];
+            } else if (arg.equals("-k") && argsleft > 1) {
+                argpos++;
+                mK = Integer.parseInt(args[argpos++]);
+            } else if (arg.equals("-minK") && argsleft > 1) {
+                argpos++;
+                mMinimumK = Integer.parseInt(args[argpos++]);
+            } else if (arg.equals("-maxK") && argsleft > 1) {
+                argpos++;
+                mMaximumK = Integer.parseInt(args[argpos++]);
+            } else if (arg.equals("-batchSize") && argsleft > 1) {
+                argpos++;
+                mBatchSize = Integer.parseInt(args[argpos++]);
+            } else if (arg.equals("-inputDir") && argsleft > 1) {
+                argpos++;
+                mInputDirectory = new File(args[argpos++]);
+            } else if (arg.equals("-outputDir") && argsleft > 1) {
+                argpos++;
+                mOutputDirectory = new File(args[argpos++]);
+            } else if (arg.equals("-searchFile") && argsleft > 1) {
+                argpos++;
+                mSearchFile = new File(args[argpos++]);
+            } else if (arg.equals("-distributed")) {
+                argpos++;
+                mRunDistributed = true;
+            } else if (arg.equals("-workers") && argsleft > 1) {
+                argpos++;
+                mDistributedWorkerCount = Integer.parseInt(args[argpos++]);
+            } else if (arg.equals("-verbose")) {
+                argpos++;
+                mVerbose = true;
+            } else if (arg.equals("-debug")) {
+                argpos++;
+                mDebug = true;
+            } else if (arg.startsWith("-")) {
+                usage();
+                return false;
+            } else {
+                break;
+            }
+        }
+
+        argsleft = args.length - argpos;
+        if (argsleft != 0) {
+            usage();
+            return false;
+        }
+
+        return true;
+    }
+
+    private void run(String[] args)
+        throws Exception {
+        if (!parseArguments(args)) {
+            System.exit(1);
+        }
+        if (mAction == null || mAction.equals("mapKMers")) {
+            if (mRunDistributed) {
+                mapKMersDistributed();
+            } else {
+                mapKMers();
+            }
+        } else if (mAction.equals("mapGaps")) {
+            mapGaps();
+        } else if (mAction.equals("rollUp")) {
+            rollUp();
+        } else if (mAction.equals("search")) {
+            search();
+        }
+    }
+
+    private void search()
+        throws IOException {
+        char[][] searchStrings = loadSearchFile(mSearchFile);
+        while (true) {
+            String seqName = getNextSequence();
+            if (seqName == null) {
+                break;
+            }
+            int position = 0;
+            log("Scanning " + seqName + " ...");
+            while (true) {
+                char[] kmerChars = getNextKMer();
+                if (kmerChars == null) {
+                    break;
+                }
+                position++;
+                for (int i = 0; i < searchStrings.length; i++) {
+                    if (Arrays.equals(searchStrings[i], kmerChars)) {
+                        String kmer = new String(searchStrings[i]);
+                        String strand = ((i % 2) == 0) ? "F" : "R";
+                        System.out.println(kmer + "\t" + seqName + "\t" + position + "\t" + strand);
+                    }
+                }
+            }
+        }
+    }
+
+    private char[][] loadSearchFile(File file)
+        throws IOException {
+        List<char[]> list = new ArrayList<char[]>();
+        LineNumberReader reader = new LineNumberReader(new FileReader(file));
+        while (true) {
+            String line = reader.readLine();
+            if (line == null) {
+                reader.close();
+                break;
+            }
+            String text = line.trim();
+            if (text.length() == 0 || text.startsWith("#")) {
+                continue;
+            }
+            String[] fields = text.split("\\s+");
+            char[] kmer = fields[0].toUpperCase().toCharArray();
+            list.add(kmer);
+            list.add(reverseComplement(kmer));
+        }
+        return list.toArray(new char[0][0]);
+    }
+
+    // Can be used to scan genome for sequence names/lengths.
+    private void scanKMers()
+        throws IOException {
+        mSequenceList = new ArrayList<String>();
+        mSequenceOffsetList = new ArrayList<Integer>();
+        File priorMapFile =
+            new File(mOutputDirectory, "unique_" + (mK-1) + "_mers_map.bin");
+        openPriorMap(priorMapFile);
+        while (true) {
+            String seqName = getNextSequence();
+            if (seqName == null) {
+                break;
+            }
+            mSequenceList.add(seqName);
+            mSequenceOffsetList.add(mBaseIndex+1);
+            log("Scanning " + seqName + " ...");
+            while (true) {
+                char[] kmerChars = getNextKMer();
+                if (kmerChars == null) {
+                    break;
+                }
+                mKMerCount++;
+                if (isUniqueInPriorMap(mBaseIndex)) {
+                    continue;
+                }
+            }
+        }
+        closePriorMap();
+    }
+
+    private void mapGaps()
+        throws IOException {
+        while (true) {
+            String seqName = getNextSequence();
+            if (seqName == null) {
+                break;
+            }
+            int pos = 0;
+            int gapStart = 0;
+            while (true) {
+                char base = getNextBase();
+                if (base == 0) {
+                    break;
+                }
+                pos++;
+                if (base == 'N') {
+                    if (gapStart == 0) {
+                        gapStart = pos;
+                    }
+                } else {
+                    if (gapStart > 0) {
+                        System.out.println(seqName + "\t" + gapStart + "\t" + (pos-1));
+                        gapStart = 0;
+                    }
+                }
+            }
+            if (gapStart > 0) {
+                System.out.println(seqName + "\t" + gapStart + "\t" + (pos-1));
+                gapStart = 0;
+            }
+        }
+    }
+
+    private void rollUp()
+        throws IOException {
+        // Roll up based on the middle of the reads.
+        File[] mapFiles = getAllMapFiles();
+        if (mapFiles.length > 127) {
+            throw new RuntimeException("K to large for byte sized counts");
+        }
+        SequenceIterator seqIterator = new SequenceIterator(mInputFiles);
+        while (true) {
+            String seqName = seqIterator.getNextSequence();
+            if (seqName == null) {
+                break;
+            }
+            if (mSequenceName != null && !mSequenceName.equals(seqName)) {
+                continue;
+            }
+            log("Rolling up sequence " + seqName + " ...");
+            int seqBaseIndex = seqIterator.getBaseIndex() + 1;
+            char[] seqChars = loadSequence(seqIterator);
+            int seqLength = seqChars.length;
+            int seqMapOffset = (seqBaseIndex >> 3) & 0x1FFFFFFF;
+            int seqMapModulus = (seqBaseIndex & 0x7);
+            int seqMapLength = (seqMapModulus + seqLength + 7)/8;
+            // log("  seqLength = " + seqLength);
+            // log("  baseIndex = " + Integer.toHexString(seqBaseIndex)
+            //     + " (" + (((long)seqBaseIndex) & 0xFFFFFFFFL) + ")");
+            // log("  seqMapOffset = " + seqMapOffset);
+            // log("  seqMapLength = " + seqMapLength);
+            byte[] counts = new byte[seqLength];
+            for (int pos = 1; pos <= seqLength; pos++) {
+                if (seqChars[pos-1] == 'N') {
+                    counts[pos-1] = -1;
+                }
+            }
+            for (int k = 1; k <= mapFiles.length; k++) {
+                if (mapFiles[k-1] == null) {
+                    continue;
+                }
+                log("Processing map file " + mapFiles[k-1] + " ...");
+                byte[] kmerMap = readMapFileRegion(mapFiles[k-1], seqMapOffset, seqMapLength);
+                for (int pos = 1; pos <= seqLength; pos++) {
+                    if (counts[pos-1] != 0) {
+                        continue;
+                    } else if (isNearContigBoundary(pos, seqChars, k)) {
+                        counts[pos-1] = -1;
+                    } else {
+                        int baseOffset = pos - (k+1)/2;
+                        int mapIndex = seqMapModulus + baseOffset;
+                        if (isUniqueInMap(kmerMap, mapIndex)) {
+                            counts[pos-1] = (byte) k;
+                        }
+                    }
+                }
+            }
+            File outputFile =
+                new File(mOutputDirectory, "rollup_" + seqName + ".bin");
+            writeRollUpFile(outputFile, counts);
+        }
+    }
+
+    private boolean isNearContigBoundary(int pos, char[] seqChars, int k) {
+        int windowStart = pos - (k-1)/2;
+        int windowEnd = pos + k/2;
+        if (windowStart < 1 || windowEnd > seqChars.length) {
+            return true;
+        }
+        for (int i = windowStart-1; i < windowEnd; i++) {
+            if (seqChars[i] == 'N') {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    private void writeRollUpFile(File file, byte[] counts)
+        throws IOException {
+        FileOutputStream stream = new FileOutputStream(file);
+        stream.write(counts);
+        stream.flush();
+        stream.close();
+        if (mDebug) {
+            PrintWriter writer = new PrintWriter(file + ".dbg");
+            for (int i = 0; i < counts.length; i++) {
+                writer.println(counts[i]);
+            }
+            writer.flush();
+            writer.close();
+        }
+    }
+
+    /**
+     * Returns an array of files, indexed by K,
+     * where the array index = K-1 (i.e. K=1 is the first file).
+     * If there is no file for index K, then the array element is null.
+     */
+    private File[] getAllMapFiles() {
+        int maxK = mMaximumK;
+        if (maxK == 0) {
+            // Safe upper bound
+            maxK = 1000;
+        }
+        List<File> fileList = new ArrayList<File>();
+        for (int k = 1; k <= maxK; k++) {
+            if (mMinimumK > 0 && k < mMinimumK) {
+                continue;
+            }
+            File mapFile =
+                new File(mInputDirectory, "unique_" + k + "_mers_map.bin");
+            if (mapFile.exists()) {
+                while (fileList.size() < k-1) {
+                    fileList.add(null);
+                }
+                fileList.add(mapFile);
+            } else {
+                if (mMaximumK == 0 && !fileList.isEmpty()) {
+                    break;
+                }
+            }
+        }
+        File[] result = new File[fileList.size()];
+        result = fileList.toArray(result);
+        if (mDebug) {
+            for (int i = 0; i < result.length; i++) {
+                debug("mapFiles[k=" + (i+1) + "] = " + result[i]);
+            }
+        }
+        return result;
+    }
+
+    private char[] loadSequence(SequenceIterator seqIterator)
+        throws IOException {
+        StringBuilder builder = new StringBuilder();
+        while (true) {
+            char ch = seqIterator.getNextBase();
+            if (ch == 0) {
+                break;
+            }
+            builder.append(ch);
+        }
+        char[] result = new char[builder.length()];
+        builder.getChars(0, builder.length(), result, 0);
+        return result;
+    }
+
+    private void mapKMersDistributed()
+        throws Exception {
+        DistributedKMerCounter algorithm = new DistributedKMerCounter();
+        algorithm.setDebug(mDebug);
+        algorithm.setVerbose(mVerbose);
+        algorithm.setInputFiles(mInputFiles);
+        algorithm.setK(mK);
+        algorithm.setMaximumWorkerCount(mDistributedWorkerCount);
+        // algorithm.setLsfQueue(mLsfQueue);
+        // algorithm.setLsfLogDirectory(mLsfLogDirectory);
+        // algorithm.setEnableGcLogging(mEnableGcLogging);
+        algorithm.run();
+    }
+
+    private void mapKMers()
+        throws IOException {
+
+        File textKMerFile =
+            new File(mOutputDirectory, "unique_" + mK + "_mers.txt");
+        File binaryKMerFile =
+            new File(mOutputDirectory, "unique_" + mK + "_mers.bin");
+        File exceptionFile =
+            new File(mOutputDirectory, "unique_" + mK + "_mers.extra");
+        File mapFile =
+            new File(mOutputDirectory, "unique_" + mK + "_mers_map.bin");
+        File priorMapFile =
+            new File(mOutputDirectory, "unique_" + (mK-1) + "_mers_map.bin");
+        File statsFile =
+            new File(mOutputDirectory, "unique_" + mK + "_mers_stats.txt");
+
+        if (mBatchSize == 0) {
+            throw new RuntimeException("Batch size not specified");
+        }
+
+        int kmerCount = 0;
+        int batchSize = mBatchSize;
+        KMerPosition[] kmerArray = new KMerPosition[batchSize];
+        List<StringKMerPosition> exceptionList = new ArrayList<StringKMerPosition>();
+        mSequenceList = new ArrayList<String>();
+        mSequenceOffsetList = new ArrayList<Integer>();
+        mIOBuffer = new byte[Math.max(20,4 + 2*((mK + 7)/8))];
+
+        openPriorMap(priorMapFile);
+
+        while (true) {
+            String seqName = getNextSequence();
+            if (seqName == null) {
+                break;
+            }
+            mSequenceList.add(seqName);
+            mSequenceOffsetList.add(mBaseIndex+1);
+            log("Processing " + seqName + " ...");
+            while (true) {
+                char[] kmerChars = getNextKMer();
+                if (kmerChars == null) {
+                    break;
+                }
+                mKMerCount++;
+                int baseIndex = mBaseIndex;
+                if (isUniqueInPriorMap(baseIndex)) {
+                    mUniquePriorCount++;
+                    continue;
+                }
+                KMerPosition kmp = encodeKMer(kmerChars, baseIndex);
+                if (kmp == null) {
+                    // Note: We currently do not handle the reverse
+                    // complement of exception characters correctly.
+                    // For hg18, however, this doesn't matter as
+                    // none of the kmers containing non-ACGT characters
+                    // are present on the reverse strand.
+                    String kmer = new String(kmerChars);
+                    exceptionList.add(new StringKMerPosition(kmer, baseIndex));
+                    continue;
+                }
+                kmerArray[kmerCount++] = kmp;
+                if (kmerCount == batchSize) {
+                    kmerCount = compactKMers(kmerArray, kmerCount);
+                    if (kmerCount > mSpillFactor * batchSize) {
+                        spillKMers(kmerArray, kmerCount);
+                        kmerCount = 0;
+                    }
+                }
+            }
+        }
+        if (kmerCount > 0) {
+            kmerCount = compactKMers(kmerArray, kmerCount);
+            if (mSpillFileList != null) {
+                spillKMers(kmerArray, kmerCount);
+                kmerCount = 0;
+            }
+        }
+
+        closePriorMap();
+
+        // Write out the exception kmers (text file).
+        compactKMers(exceptionList);
+        writeExceptionFile(exceptionList, exceptionFile);
+
+        // Write out the binary file of unique encoded kmers.
+        if (mSpillFileList == null) {
+            kmerCount = removeNonUnique(kmerArray, kmerCount);
+            writeKMerBinaryFile(kmerArray, kmerCount, binaryKMerFile);
+            mUniqueNewCount = kmerCount;
+        } else {
+            mUniqueNewCount = mergeSpillFiles(mSpillFileList, binaryKMerFile);
+        }
+        mUniqueNewCount += countUniqueKMers(exceptionList);
+
+        // Write out the text file of (all) unique kmers.
+        writeKMerTextFile(binaryKMerFile, exceptionList, textKMerFile);
+
+        // Create map file from prior map plus the new unique kmers.
+        long mapSize = (mBaseIndex + 1) & 0xFFFFFFFFL;
+        createMapFile(mapSize, binaryKMerFile, exceptionList, priorMapFile, mapFile);
+
+        // Write summary statistics file.
+        writeSummaryStatistics(statsFile);
+    }
+
+    private int compactKMers(KMerPosition[] kmerArray, int kmerCount) {
+        if (kmerCount == 0) {
+            return 0;
+        }
+        log("Compacting " + kmerCount + " kmers at index " +
+            Integer.toHexString(mBaseIndex) + " ...");
+        Arrays.sort(kmerArray, 0, kmerCount);
+        int newCount = 1;
+        KMerPosition current = kmerArray[0];
+        for (int i = 1; i < kmerCount; i++) {
+            KMerPosition kmp = kmerArray[i];
+            if (current.compareTo(kmp) == 0) {
+                current.setBaseIndex(NONUNIQUE_MARKER);
+            } else {
+                kmerArray[newCount++] = kmp;
+                current = kmp;
+            }
+        }
+        log("Compaction finished, new count is " + newCount);
+        return newCount;
+    }
+
+    private int compactKMers(StringKMerPosition[] kmerArray, int kmerCount) {
+        if (kmerCount == 0) {
+            return 0;
+        }
+        log("Compacting " + kmerCount + " string kmers ...");
+        Arrays.sort(kmerArray, 0, kmerCount);
+        int newCount = 1;
+        String kmerString = kmerArray[0].getKMer();
+        for (int i = 1; i < kmerCount; i++) {
+            StringKMerPosition kmp = kmerArray[i];
+            String ks = kmp.getKMer();
+            if (ks.equals(kmerString)) {
+                kmerArray[newCount-1].setBaseIndex(NONUNIQUE_MARKER);
+            } else {
+                kmerArray[newCount++] = kmp;
+                kmerString = ks;
+            }
+        }
+        log("Compaction finished, new count is " + newCount);
+        return newCount;
+    }
+
+    private void compactKMers(List<StringKMerPosition> kmerList) {
+        int kmerCount = kmerList.size();
+        if (kmerCount <= 1) {
+            return;
+        }
+        StringKMerPosition[] kmerArray =
+            kmerList.toArray(new StringKMerPosition[kmerCount]);
+        kmerCount = compactKMers(kmerArray, kmerCount);
+        kmerList.clear();
+        for (int i = 0; i < kmerCount; i++) {
+            kmerList.add(kmerArray[i]);
+        }
+    }
+
+    private int removeNonUnique(KMerPosition[] kmerArray, int kmerCount) {
+        int uniqueCount = 0;
+        for (int i = 0; i < kmerCount; i++) {
+            KMerPosition kmp = kmerArray[i];
+            if (kmp.getBaseIndex() != NONUNIQUE_MARKER) {
+                kmerArray[uniqueCount++] = kmp;
+            }
+        }
+        return uniqueCount;
+    }
+
+    private int countUniqueKMers(List<StringKMerPosition> kmerList) {
+        int uniqueCount = 0;
+        for (StringKMerPosition kmp : kmerList) {
+            if (kmp.getBaseIndex() != NONUNIQUE_MARKER) {
+                uniqueCount++;
+            }
+        }
+        return uniqueCount;
+    }
+
+    private void spillKMers(KMerPosition[] kmerArray, int kmerCount)
+        throws IOException {
+        if (mSpillFileList == null) {
+            mSpillFileList = new ArrayList<File>();
+        }
+        int fileNumber = mSpillFileList.size() + 1;
+        log("Spilling " + kmerCount + " kmers to file " + fileNumber + " ...");
+        File spillFile = new File(mOutputDirectory,
+                                  "spill_" + mK + "_" + fileNumber + ".tmp");
+        mSpillFileList.add(spillFile);
+        writeKMerBinaryFile(kmerArray, kmerCount, spillFile);
+        log("Spill file written");
+    }
+
+    private void writeKMerBinaryFile(KMerPosition[] kmerArray,
+                                     int kmerCount,
+                                     File outputFile)
+        throws IOException {
+        OutputStream outputStream =
+            new BufferedOutputStream(new FileOutputStream(outputFile));
+        for (int i = 0; i < kmerCount; i++) {
+            KMerPosition kmp = kmerArray[i];
+            writeKMerPosition(outputStream, kmerArray[i]);
+        }
+        outputStream.flush();
+        outputStream.close();
+    }
+
+    private void writeExceptionFile(List<StringKMerPosition> kmerList,
+                                    File outputFile)
+        throws IOException {
+        PrintWriter writer =
+            new PrintWriter(new BufferedWriter(new FileWriter(outputFile)));
+        for (StringKMerPosition kmer : kmerList) {
+            writeUniqueKMer(kmer, writer);
+        }
+        writer.flush();
+        writer.close();
+    }
+
+    private KMerPosition readKMerPosition(InputStream stream)
+        throws IOException {
+        byte[] buffer = mIOBuffer;
+        int encodingLength = (mK + 7)/8;
+        int fileLength = 4 + 2*encodingLength;
+        int count = readFully(stream, buffer, 0, fileLength);
+        if (count <= 0) {
+            return null;
+        } else if (count != fileLength) {
+            throw new RuntimeException("Unexpected end of file");
+        }
+        char[] encoding = new char[encodingLength];
+        int baseIndex = ((buffer[0] & 0xFF) |
+                         (buffer[1] & 0xFF) << 8 |
+                         (buffer[2] & 0xFF) << 16 |
+                         (buffer[3] & 0xFF) << 24);
+        for (int i = 0; i < encodingLength; i++) {
+            encoding[i] = (char) ((buffer[2*i+4] & 0xFF) |
+                                  ((buffer[2*i+5] & 0xFF) << 8));
+        }
+        return new KMerPosition(encoding, baseIndex);
+    }
+
+    private int readFully(InputStream stream, byte[] buffer, int offset, int count)
+        throws IOException {
+        int readCount = 0;
+        while (readCount < count) {
+            int read = stream.read(buffer, offset, count-readCount);
+            if (read <= 0) {
+                break;
+            }
+            offset += read;
+            readCount += read;
+        }
+        return readCount;
+    }
+
+    private void skipBytes(InputStream stream, int count)
+        throws IOException {
+
+        long longCount = count;
+        long skipCount = 0;
+        while (skipCount < longCount) {
+            long skipped = stream.skip(longCount - skipCount);
+            if (skipped <= 0) {
+                throw new RuntimeException("Skip failed");
+            }
+            skipCount += skipped;
+        }
+    }
+
+    private void writeKMerPosition(OutputStream stream, KMerPosition kmer)
+        throws IOException {
+        byte[] buffer = mIOBuffer;
+        int baseIndex = kmer.getBaseIndex();
+        char[] encoding = kmer.getKMerEncoding();
+        int offset = 0;
+        buffer[offset++] = (byte) ((baseIndex) & 0xFF);
+        buffer[offset++] = (byte) ((baseIndex >> 8) & 0xFF);
+        buffer[offset++] = (byte) ((baseIndex >> 16) & 0xFF);
+        buffer[offset++] = (byte) ((baseIndex >> 24) & 0xFF);
+        for (int i = 0; i < encoding.length; i++) {
+            buffer[offset++] = (byte) ((encoding[i]) & 0xFF);
+            buffer[offset++] = (byte) ((encoding[i] >> 8) & 0xFF);
+        }
+        stream.write(buffer, 0, offset);
+    }
+
+    private long mergeSpillFiles(List<File> spillFiles, File outputFile)
+        throws IOException {
+
+        if (spillFiles == null) {
+            return 0;
+        }
+
+        log("Merging spill files ...");
+        OutputStream outputStream =
+            new BufferedOutputStream(new FileOutputStream(outputFile));
+        long uniqueCount = 0;
+        int fileCount = spillFiles.size();
+        InputStream[] inputStreams = new InputStream[fileCount];
+        KMerPosition[] kmers = new KMerPosition[fileCount];
+        for (int i = 0; i < fileCount; i++) {
+            inputStreams[i] =
+                new BufferedInputStream(new FileInputStream(spillFiles.get(i)));
+        }
+        while (true) {
+            for (int i = 0; i < fileCount; i++) {
+                if (kmers[i] == null && inputStreams[i] != null) {
+                    kmers[i] = readKMerPosition(inputStreams[i]);
+                    if (kmers[i] == null) {
+                        inputStreams[i].close();
+                        inputStreams[i] = null;
+                    }
+                }
+            }
+            int count = 0;
+            KMerPosition kmer = null;
+            for (int i = 0; i < fileCount; i++) {
+                KMerPosition kmp = kmers[i];
+                if (kmp == null) {
+                    continue;
+                } else if (kmer == null) {
+                    kmer = kmp;
+                    count = 1;
+                } else {
+                    int cmp = kmp.compareTo(kmer);
+                    if (cmp == 0) {
+                        count++;
+                    } else if (cmp < 0) {
+                        kmer = kmp;
+                        count = 1;
+                    }
+                }
+            }
+            if (kmer == null) {
+                break;
+            }
+            for (int i = 0; i < fileCount; i++) {
+                if (kmers[i] != null && kmer.compareTo(kmers[i]) == 0) {
+                    kmers[i] = null;
+                }
+            }
+            if (count == 1 && kmer.getBaseIndex() != NONUNIQUE_MARKER) {
+                uniqueCount++;
+                writeKMerPosition(outputStream, kmer);
+            }
+        }
+        outputStream.flush();
+        outputStream.close();
+        for (int i = 0; i < fileCount; i++) {
+            // spillFiles.get(i).delete();
+        }
+        log("Spill files merged, unique count is " + uniqueCount);
+        return uniqueCount;
+    }
+
+    private void writeKMerTextFile(File inputFile,
+                                   List<StringKMerPosition> exceptionList,
+                                   File outputFile)
+        throws IOException {
+
+        log("Writing kmer file " + outputFile + " ...");
+        int exceptionIndex = 0;
+        StringKMerPosition excKMer = null;
+        Iterator<StringKMerPosition> excIter = null;
+        if (!exceptionList.isEmpty()) {
+            excIter = exceptionList.iterator();
+            excKMer = excIter.next();
+        }
+
+        InputStream inputStream =
+            new BufferedInputStream(new FileInputStream(inputFile));
+        PrintWriter writer =
+            new PrintWriter(new BufferedWriter(new FileWriter(outputFile)));
+        KMerPosition kmer = readKMerPosition(inputStream);
+        while (kmer != null || excKMer != null) {
+            if (excKMer == null) {
+                writeUniqueKMer(kmer, writer);
+                kmer = readKMerPosition(inputStream);
+            } else if (kmer == null) {
+                writeUniqueKMer(excKMer, writer);
+                excKMer = excIter.hasNext() ? excIter.next() : null;
+            } else if (kmer.getKMer().compareTo(excKMer.getKMer()) < 0) {
+                writeUniqueKMer(kmer, writer);
+                kmer = readKMerPosition(inputStream);
+            } else {
+                writeUniqueKMer(excKMer, writer);
+                excKMer = excIter.hasNext() ? excIter.next() : null;
+            }
+        }
+        inputStream.close();
+        writer.flush();
+        writer.close();
+        log("Wrote kmer file: " + outputFile);
+    }
+
+    private void writeUniqueKMer(KMerPosition kmer, PrintWriter writer) {
+        if (kmer.getBaseIndex() != NONUNIQUE_MARKER) {
+            writeKMer(kmer.getKMer(), kmer.getBaseIndex(), writer);
+        }
+    }
+
+    private void writeUniqueKMer(StringKMerPosition kmer, PrintWriter writer) {
+        if (kmer.getBaseIndex() != NONUNIQUE_MARKER) {
+            writeKMer(kmer.getKMer(), kmer.getBaseIndex(), writer);
+        }
+    }
+
+    private void writeKMer(String kmer, int baseIndex, PrintWriter writer) {
+        String chr = getBaseIndexSequenceName(baseIndex);
+        int pos = getBaseIndexCoordinate(baseIndex);
+        writer.println(kmer + "\t" + chr + "\t" + pos);
+    }
+
+    private void createMapFile(long mapSize,
+                               File kmerFile,
+                               List<StringKMerPosition> exceptionList,
+                               File priorMapFile,
+                               File mapFile)
+        throws IOException {
+        byte[] map = null;
+        long uniquePriorCount = 0;
+        long byteSize = (mapSize + 7)/8;
+        int mapByteSize = (int) byteSize;
+        if (mapByteSize != byteSize) {
+            throw new RuntimeException("Map too large: " + mapSize);
+        }
+        if (priorMapFile.exists()) {
+            map = readMapFile(priorMapFile);
+            if (map.length != mapByteSize) {
+                throw new RuntimeException("Prior map is wrong size");
+            }
+            // Count the prior unique positions
+            for (int i = 0; i < mapByteSize; i++) {
+                uniquePriorCount += Integer.bitCount(map[i] & 0xFF);
+            }
+        } else {
+            map = new byte[mapByteSize];
+        }
+        for (StringKMerPosition kmp : exceptionList) {
+            addToMap(kmp, map);
+        }
+        mPriorMapUniqueCount = uniquePriorCount;
+
+        InputStream inputStream =
+            new BufferedInputStream(new FileInputStream(kmerFile));
+        while (true) {
+            KMerPosition kmp = readKMerPosition(inputStream);
+            if (kmp == null) {
+                inputStream.close();
+                break;
+            }
+            addToMap(kmp, map);
+        }
+
+        writeMapFile(map, mapFile);
+    }
+
+    private void addToMap(KMerPosition kmp, byte[] map) {
+        int baseIndex = kmp.getBaseIndex();
+        if (baseIndex != NONUNIQUE_MARKER) {
+            addToMap(baseIndex, map);
+        }
+    }
+
+    private void addToMap(StringKMerPosition kmp, byte[] map) {
+        int baseIndex = kmp.getBaseIndex();
+        if (baseIndex != NONUNIQUE_MARKER) {
+            addToMap(baseIndex, map);
+        }
+    }
+
+    private void addToMap(int baseIndex, byte[] map) {
+        int mod = baseIndex & 0x7;
+        int offset = (baseIndex >> 3) & 0x1FFFFFFF;
+        if ((map[offset] & (1 << mod)) != 0) {
+            throw new RuntimeException("Map entry already set: " + baseIndex);
+        }
+        map[offset] |= (1 << mod);
+    }
+
+    private boolean isUniqueInMap(byte[] map, int baseIndex) {
+        int mod = baseIndex & 0x7;
+        int offset = (baseIndex >> 3) & 0x1FFFFFFF;
+        return ((map[offset] & (1 << mod)) != 0);
+    }
+
+    private void writeSummaryStatistics(File outputFile)
+        throws IOException {
+        PrintWriter writer =
+            new PrintWriter(new BufferedWriter(new FileWriter(outputFile)));
+        long baseCount = (mBaseIndex + 1) & 0xFFFFFFFFL;
+        long uniqueCount = mUniquePriorCount + mUniqueNewCount;
+        long nonUniqueCount = mKMerCount - uniqueCount;
+        writer.println("K: " + mK);
+        writer.println("Sequences: " + mSequenceList.size());
+        writer.println("Bases: " + baseCount);
+        writer.println("KMers: " + mKMerCount);
+        writer.println("Prior map count: " + mPriorMapUniqueCount);
+        writer.println("Unique prior: " + mUniquePriorCount +
+                       " (" + formatPercent(mUniquePriorCount, mKMerCount) + ")");
+        writer.println("Unique new: " + mUniqueNewCount +
+                       " (" + formatPercent(mUniqueNewCount, mKMerCount) + ")");
+        writer.println("Unique cumulative: " + uniqueCount +
+                       " (" + formatPercent(uniqueCount, mKMerCount) + ")");
+        writer.println("Nonunique: " + nonUniqueCount +
+                       " (" + formatPercent(nonUniqueCount, mKMerCount) + ")");
+        writer.flush();
+        writer.close();
+    }
+
+    private String formatPercent(long numerator, long denominator) {
+        double fraction = 0.0;
+        if (denominator != 0) {
+            fraction = numerator / (double) denominator;
+        }
+        return String.format("%1.1f%%", fraction * 100.0);
+    }
+
+    private void openPriorMap(File mapFile)
+        throws IOException {
+        if (mapFile.exists()) {
+            mPriorMapStream = new BufferedInputStream(new FileInputStream(mapFile));
+            mPriorMapPosition = -1;
+            mPriorMapValue = 0;
+        }
+    }
+
+    private void closePriorMap()
+        throws IOException {
+        if (mPriorMapStream != null) {
+            mPriorMapStream.close();
+        }
+        mPriorMapStream = null;
+        mPriorMapPosition = -1;
+        mPriorMapValue = 0;
+    }
+
+    private byte[] readMapFile(File file)
+        throws IOException {
+        long fileLength = file.length();
+        if (fileLength > 1000000000) {
+            throw new RuntimeException("Prior map too large: " + file);
+        }
+        int length = (int) fileLength;
+        byte[] map = new byte[length];
+        FileInputStream stream = new FileInputStream(file);
+        int count = readFully(stream, map, 0, length);
+        if (count != length) {
+            throw new RuntimeException("Failed to read map: " + file);
+        }
+        stream.close();
+        return map;
+    }
+
+    /**
+     * Read just a subset of a map file.
+     */
+    private byte[] readMapFileRegion(File file, int offset, int length)
+        throws IOException {
+        byte[] map = new byte[length];
+        FileInputStream stream = new FileInputStream(file);
+        skipBytes(stream, offset);
+        int count = readFully(stream, map, 0, length);
+        if (count != length) {
+            throw new RuntimeException("Failed to read map: " + file);
+        }
+        stream.close();
+        return map;
+    }
+
+    private void writeMapFile(byte[] map, File file)
+        throws IOException {
+        FileOutputStream stream = new FileOutputStream(file);
+        stream.write(map);
+        stream.flush();
+        stream.close();
+    }
+
+    private boolean isUniqueInPriorMap(int baseIndex)
+        throws IOException {
+        if (mPriorMapStream == null) {
+            return false;
+        }
+        int byteOffset = (baseIndex >> 3) & 0x1FFFFFFF;
+        if (byteOffset != mPriorMapPosition) {
+            int delta = byteOffset - mPriorMapPosition;
+            if (delta < 0) {
+                throw new RuntimeException("Attempt to seek backwards in prior map");
+            }
+            if (delta > 1) {
+                skipFully(mPriorMapStream, delta-1);
+            }
+            mPriorMapValue = mPriorMapStream.read();
+            if (mPriorMapValue < 0) {
+                throw new RuntimeException("Unexpected end of file in prior map");
+            }
+            mPriorMapPosition += delta;
+        }
+        int mod = baseIndex & 0x7;
+        return (((1 << mod) & mPriorMapValue) != 0);
+    }
+
+    private void skipFully(InputStream stream, long amount)
+        throws IOException {
+        while (amount > 0) {
+            long skip = stream.skip(amount);
+            if (skip <= 0 || skip > amount) {
+                throw new RuntimeException("Skip failed");
+            }
+            amount -= skip;
+        }
+    }
+
+    private String getBaseIndexSequenceName(int baseIndex) {
+        int sequenceCount = mSequenceList.size();
+        for (int i = 0; i < sequenceCount-1; i++) {
+            int nextOffset = mSequenceOffsetList.get(i+1);
+            if (compareBaseIndex(nextOffset, baseIndex) > 0) {
+                return mSequenceList.get(i);
+            }
+        }
+        return mSequenceList.get(sequenceCount-1);
+    }
+
+    private int getBaseIndexCoordinate(int baseIndex) {
+        Integer sequenceOffset = null;
+        for (Integer offset : mSequenceOffsetList) {
+            if (compareBaseIndex(offset, baseIndex) > 0) {
+                break;
+            }
+            sequenceOffset = offset;
+        }
+        if (sequenceOffset == null) {
+            return 0;
+        }
+        int coordinate = baseIndex - sequenceOffset + 1;
+        if (coordinate <= 0) {
+            dumpSequenceList();
+            System.out.println("coordinate: " + coordinate);
+            System.out.println("sequenceOffset: " + Integer.toHexString(sequenceOffset));
+            System.out.println("baseIndex: " + Integer.toHexString(baseIndex));
+            throw new RuntimeException("Internal error: illegal coordinate " +
+                                       coordinate + " for base index " + baseIndex);
+        }
+        return coordinate;
+    }
+
+    private void dumpSequenceList() {
+        System.out.println("# Sequences:");
+        int count = mSequenceList.size();
+        for (int i = 0; i < count; i++) {
+            String seqName = mSequenceList.get(i);
+            int offset = mSequenceOffsetList.get(i);
+            System.out.println("# " + seqName +
+                               "\t" + offset +
+                               "\t" + Integer.toHexString(offset));
+        }
+    }
+
+    private int compareBaseIndex(int baseIndex1, int baseIndex2) {
+        // Implements unsigned comparison, a la compareTo
+        if (baseIndex1 < 0 ^ baseIndex2 < 0) {
+            return ((baseIndex1 < 0) ? 1 : -1);
+        } else {
+            return (baseIndex1 - baseIndex2);
+        }
+    }
+
+    private String getNextSequence()
+        throws IOException {
+
+        while (mNextSequence == null) {
+            if (mCurrentReader == null) {
+                mCurrentReader = getNextReader();
+                if (mCurrentReader == null) {
+                    return null;
+                }
+            }
+            String line = mCurrentReader.readLine();
+            if (line == null) {
+                mCurrentReader.close();
+                mCurrentReader = null;
+                continue;
+            }
+            if (line.startsWith(">")) {
+                String[] tokens = line.substring(1).trim().split("\\s+");
+                mNextSequence = tokens[0];
+            }
+        }
+        String result = mNextSequence;
+        mNextSequence = null;
+        return result;
+    }
+
+    private LineNumberReader getNextReader()
+        throws IOException {
+        if (mInputFileIndex >= mInputFiles.size()) {
+            return null;
+        }
+        File file = mInputFiles.get(mInputFileIndex++);
+        return new LineNumberReader(new FileReader(file));
+    }
+
+    private char[] getNextKMer()
+        throws IOException {
+
+        if (mKMerBuffer == null) {
+            mKMerBuffer = new char[mK];
+        }
+        System.arraycopy(mKMerBuffer, 1, mKMerBuffer, 0, mKMerBuffer.length - 1);
+        if (mKMerBufferedCount > 0) {
+            mKMerBufferedCount--;
+        }
+
+        while (mKMerBufferedCount < mK) {
+            char base = getNextBase();
+            if (base == 0) {
+                incrementBaseIndex(mKMerBufferedCount);
+                mKMerBufferedCount = 0;
+                return null;
+            } else if (base == 'N') {
+                incrementBaseIndex(mKMerBufferedCount+1);
+                mKMerBufferedCount = 0;
+            } else {
+                mKMerBuffer[mKMerBufferedCount++] = base;
+            }
+        }
+        incrementBaseIndex(1);
+        return mKMerBuffer;
+    }
+
+    private char getNextBase()
+        throws IOException {
+
+        if (mLineBuffer == null || mLineBufferIndex >= mLineBuffer.length()) {
+            if (mCurrentReader == null) {
+                return 0;
+            }
+            String line = mCurrentReader.readLine();
+            if (line == null) {
+                mLineBuffer = null;
+                mLineBufferIndex = 0;
+                mCurrentReader.close();
+                mCurrentReader = null;
+                return 0;
+            }
+            if (line.startsWith(">")) {
+                String[] tokens = line.substring(1).trim().split("\\s+");
+                mNextSequence = tokens[0];
+                mLineBuffer = null;
+                mLineBufferIndex = 0;
+                return 0;
+            }
+            mLineBuffer = line.toUpperCase();
+            mLineBufferIndex = 0;
+        }
+        return mLineBuffer.charAt(mLineBufferIndex++);
+    }
+
+    private void incrementBaseIndex(int amount) {
+        if (mBaseIndex < -1 && (mBaseIndex + amount) >= -1) {
+            throw new RuntimeException("Base index: 32-bit overflow");
+        }
+        mBaseIndex += amount;
+    }
+
+    private void log(String text) {
+        if (mVerbose) {
+            System.out.println("# " + new Date() + " " + text);
+        }
+    }
+
+    private void debug(String text) {
+        if (mDebug) {
+            System.out.println("# " + new Date() + " " + text);
+        }
+    }
+
+    private static KMerPosition encodeKMer(char[] kmerChars, int baseIndex) {
+        char[] encoding = encodeKMerChars(kmerChars);
+        if (encoding == null) {
+            return null;
+        }
+        char[] reverseEncoding = encodeKMerChars(reverseComplement(kmerChars));
+        if (compareEncodings(encoding, reverseEncoding) <= 0) {
+            return new KMerPosition(encoding, baseIndex);
+        } else {
+            KMerPosition kmp = new KMerPosition(reverseEncoding, baseIndex);
+            kmp.setIsReversed(true);
+            return kmp;
+        }
+    }
+
+    private static char[] encodeKMerChars(char[] kmerChars) {
+        if (kmerChars == null) {
+            return null;
+        }
+
+        int kmerLength = kmerChars.length;
+        int encodingLength = (kmerLength + 7) / 8;
+        char[] encoding = new char[encodingLength];
+        int offset = kmerLength % 8;
+        offset = (offset == 0) ? 8 : offset;
+        int bits = encodeKMerBits(kmerChars, 0, offset);
+        if (bits < 0) {
+            return null;
+        }
+        encoding[0] = (char) bits;
+        for (int i = 1; i < encodingLength; i++) {
+            bits = encodeKMerBits(kmerChars, offset, 8);
+            if (bits < 0) {
+                return null;
+            }
+            encoding[i] = (char) bits;
+            offset += 8;
+        }
+        return encoding;
+    }
+
+    private static int compareEncodings(char[] encoding1, char[] encoding2) {
+        int length = Math.max(encoding1.length, encoding2.length);
+        for (int i = 0; i < length; i++) {
+            int result = encoding1[i] - encoding2[i];
+            if (result != 0) {
+                return result;
+            }
+        }
+        return 0;
+    }
+
+    private static int encodeKMerBits(char[] kmerChars, int offset, int length) {
+        int bits = 0;
+        for (int i = 0; i < length; i++) {
+            char base = kmerChars[offset + i];
+            int baseBits = "ACGT".indexOf(base);
+            if (baseBits < 0) {
+                return -1;
+            }
+            bits |= baseBits << (2*(length-i-1));
+        }
+        return bits;
+    }
+
+    private static String decodeKMer(char[] encoding, boolean reverse) {
+        int length = mK;
+        char[] buffer = new char[length];
+        int offset = length % 8;
+        offset = (offset == 0) ? 8 : offset;
+        decodeKMerBits(encoding[0], buffer, 0, offset);
+        for (int i = 1; i < encoding.length; i++) {
+            decodeKMerBits(encoding[i], buffer, offset, 8);
+            offset += 8;
+        }
+        if (reverse) {
+            reverseComplementInPlace(buffer);
+        }
+        return new String(buffer);
+    }
+
+    private static void decodeKMerBits(char bits, char[] buffer, int offset, int length) {
+        for (int i = 0; i < length; i++) {
+            int baseBits = (int) ((bits >> (2*(length-i-1))) & 0x3);
+            buffer[offset + i] = "ACGT".charAt(baseBits);
+        }
+    }
+
+    private static void decodeKMerBits(long bits, char[] buffer, int offset, int length) {
+        for (int i = 0; i < length; i++) {
+            int baseBits = (int) ((bits >> (2*(length-i-1))) & 0x3);
+            buffer[offset + i] = "ACGT".charAt(baseBits);
+        }
+    }
+
+    private static char[] reverseComplement(char[] buffer) {
+        int length = buffer.length;
+        char[] result = new char[length];
+        System.arraycopy(buffer, 0, result, 0, length);
+        reverseComplementInPlace(result);
+        return result;
+    }
+
+    private static void reverseComplementInPlace(char[] buffer) {
+        int length = buffer.length;
+        int limit = (length + 1)/2;
+        for (int i = 0; i < limit; i++) {
+            char ch1 = reverseComplement(buffer[i]);
+            char ch2 = reverseComplement(buffer[length-i-1]);
+            buffer[i] = ch2;
+            buffer[length-i-1] = ch1;
+        }
+    }
+
+    private static char reverseComplement(char base) {
+        switch (base) {
+            case 'A':
+                return 'T';
+            case 'C':
+                return 'G';
+            case 'G':
+                return 'C';
+            case 'T':
+                return 'A';
+        }
+        return base;
+    }
+
+    private static String formatEncoding(char[] encoding) {
+        if (encoding == null) {
+            return null;
+        }
+        StringBuilder builder = new StringBuilder();
+        builder.append('[');
+        for (int i = 0; i < encoding.length; i++) {
+            String hex = Integer.toHexString(encoding[i]);
+            int length = hex.length();
+            while (length < 4) {
+                builder.append('0');
+                length++;
+            }
+            builder.append(hex);
+        }
+        builder.append(']');
+        return builder.toString();
+    }
+
+    static class KMerPosition
+        implements Comparable<KMerPosition> {
+
+        private int mBaseIndex;
+        private boolean mReversed;
+        private char[] mKMerEncoding;
+
+        KMerPosition(char[] encoding, int baseIndex) {
+            mBaseIndex = baseIndex;
+            mReversed = false;
+            mKMerEncoding = encoding;
+        }
+
+        public final String getKMer() {
+            return decodeKMer(mKMerEncoding, mReversed);
+        }
+
+        public final boolean getIsReversed() {
+            return mReversed;
+        }
+
+        public final void setIsReversed(boolean value) {
+            mReversed = value;
+        }
+
+        public final int getBaseIndex() {
+            return mBaseIndex;
+        }
+
+        public final void setBaseIndex(int baseIndex) {
+            mBaseIndex = baseIndex;
+        }
+
+        public final char[] getKMerEncoding() {
+            return mKMerEncoding;
+        }
+
+        public int compareTo(KMerPosition kmp) {
+            return compareEncodings(getKMerEncoding(), kmp.getKMerEncoding());
+        }
+
+        public boolean equals(Object object) {
+            if (!(object instanceof KMerPosition)) {
+                return false;
+            }
+            KMerPosition kmp = (KMerPosition) object;
+            return (getBaseIndex() == kmp.getBaseIndex() &&
+                    this.compareTo(kmp) == 0);
+        }
+
+        public String format() {
+            return(getKMer() +
+                   " " + formatEncoding(getKMerEncoding()) +
+                   " " + (mReversed ? 'R' : 'F') +
+                   " " + Integer.toHexString(mBaseIndex));
+        }
+    }
+
+    static class StringKMerPosition
+        implements Comparable<StringKMerPosition> {
+
+        private String mKMerString = null;
+        private int mBaseIndex;
+
+        StringKMerPosition(String kmer, int baseIndex) {
+            mKMerString = kmer;
+            mBaseIndex = baseIndex;
+        }
+
+        public final String getKMer() {
+            return mKMerString;
+        }
+
+        public final int getBaseIndex() {
+            return mBaseIndex;
+        }
+
+        public final void setBaseIndex(int baseIndex) {
+            mBaseIndex = baseIndex;
+        }
+
+        public int compareTo(StringKMerPosition kmp) {
+            return mKMerString.compareTo(kmp.mKMerString);
+        }
+
+        public boolean equals(Object object) {
+            if (!(object instanceof StringKMerPosition)) {
+                return false;
+            }
+            StringKMerPosition kmp = (StringKMerPosition) object;
+            return (mBaseIndex == kmp.mBaseIndex &&
+                    mKMerString.equals(kmp.mKMerString));
+        }
+    }
+}
diff --git a/lib/edu/mit/broad/cnv/kmer/DistributedKMerCounter.java b/lib/edu/mit/broad/cnv/kmer/DistributedKMerCounter.java
new file mode 100644
index 0000000000..90b26d0b1f
--- /dev/null
+++ b/lib/edu/mit/broad/cnv/kmer/DistributedKMerCounter.java
@@ -0,0 +1,151 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2009 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever.
+ * Neither the Broad Institute nor MIT can be responsible for its use, misuse,
+ * or functionality.
+ */
+package edu.mit.broad.cnv.kmer;
+
+
+import edu.mit.broad.dcp.DistributedAlgorithm;
+import edu.mit.broad.cnv.util.SequenceIterator;
+
+import java.io.*;
+import java.util.*;
+
+
+/**
+ * Distributed algorithm for counting unique kmers.
+ */
+public class DistributedKMerCounter
+    extends DistributedAlgorithm
+{
+    private boolean mDebug = false;
+    private boolean mVerbose = false;
+    private int mK = 0;
+    private List<File> mInputFiles = null;
+    private List<String> mSequenceList = null;
+    private List<Integer> mSequenceOffsetList = null;
+
+
+    public DistributedKMerCounter() {
+    }
+
+    public boolean getDebug() {
+        return mDebug;
+    }
+
+    public void setDebug(boolean value) {
+        mDebug = value;
+    }
+
+    public boolean getVerbose() {
+        return mVerbose;
+    }
+
+    public void setVerbose(boolean value) {
+        mVerbose = value;
+    }
+
+    public int getK() {
+        return mK;
+    }
+
+    public void setK(int value) {
+        mK = value;
+    }
+
+    public List<File> getInputFiles() {
+        return mInputFiles;
+    }
+
+    public void setInputFiles(List<File> value) {
+        mInputFiles = value;
+    }
+
+    public void run()
+        throws Exception {
+        super.run();
+        finish();
+    }
+
+    protected void init()
+        throws Exception {
+        if (getWorkerId() == MASTER) {
+            initMaster();
+        } else {
+            initWorker();
+        }
+    }
+
+    private void initMaster()
+        throws IOException {
+        // Tasks to be amortized
+        report("Scanning sequences ...");
+        scanSequences();
+        report("Scan complete.");
+    }
+
+    private void initWorker() {
+        // Tasks to be amortized
+    }
+
+    protected void start() {
+        // scan genome, divide into chromosomes and optionally segments, distribute calls
+    }
+
+    private void finish() {
+        // merge individual files, write out final results
+    }
+
+    private void scanSequences()
+        throws IOException {
+        List<String> sequenceList = new ArrayList<String>();
+        List<Integer> sequenceOffsetList = new ArrayList<Integer>();
+        SequenceIterator seqIterator = new SequenceIterator(getInputFiles());
+        while (true) {
+            String seqName = seqIterator.getNextSequence();
+            if (seqName == null) {
+                break;
+            }
+            int baseIndex = seqIterator.getBaseIndex() + 1;
+            sequenceList.add(seqName);
+            sequenceOffsetList.add(baseIndex);
+        }
+        mSequenceList = sequenceList;
+        mSequenceOffsetList = sequenceOffsetList;
+    }
+
+    // Currently not used
+    private void loadGenomeOffsets(File file)
+        throws IOException {
+        List<String> sequenceList = new ArrayList<String>();
+        List<Integer> sequenceOffsetList = new ArrayList<Integer>();
+        int baseIndex = 0;
+        LineNumberReader reader = new LineNumberReader(new FileReader(file));
+        while (true) {
+            String line = reader.readLine();
+            if (line == null) {
+                break;
+            }
+            String text = line.trim();
+            if (text.length() == 0 || text.startsWith("#")) {
+                continue;
+            }
+            String[] fields = text.split("\\s+");
+            if (fields.length != 2) {
+                throw new RuntimeException("Invalid input line: " + line);
+            }
+            int length = Integer.parseInt(fields[1]);
+            sequenceList.add(fields[0]);
+            sequenceOffsetList.add(baseIndex);
+            baseIndex += length;
+        }
+        mSequenceList = sequenceList;
+        mSequenceOffsetList = sequenceOffsetList;
+    }
+}
diff --git a/lib/edu/mit/broad/cnv/util/GenomeBaseIndex.java b/lib/edu/mit/broad/cnv/util/GenomeBaseIndex.java
new file mode 100644
index 0000000000..7ed22faf3d
--- /dev/null
+++ b/lib/edu/mit/broad/cnv/util/GenomeBaseIndex.java
@@ -0,0 +1,184 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2009 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever.
+ * Neither the Broad Institute nor MIT can be responsible for its use, misuse,
+ * or functionality.
+ */
+package edu.mit.broad.cnv.util;
+
+
+import java.io.*;
+import java.util.*;
+
+
+/**
+ * Utility class for transforming between a linear base index
+ * and a chromsome + position coordinate system.
+ */
+public class GenomeBaseIndex {
+
+    private List<String> mSequenceNames = null;
+    private int[] mLengths = null;
+    private long[] mOffsets = null;
+
+    private GenomeBaseIndex() {
+    }
+
+    public static GenomeBaseIndex read(File file)
+        throws IOException {
+        Reader reader = new BufferedReader(new FileReader(file));
+        try {
+            return read(reader);
+        } finally {
+            reader.close();
+        }
+    }
+
+    // The input is just a list of space-delimited sequence name and length.
+    public static GenomeBaseIndex read(Reader reader)
+        throws IOException {
+        List<String> sequenceNames = new ArrayList<String>();
+        List<Integer> sequenceLengths = new ArrayList<Integer>();
+        BufferedReader bufferedReader = new BufferedReader(reader);
+        while (true) {
+            String line = bufferedReader.readLine();
+            if (line == null) {
+                break;
+            }
+            String text = line.trim();
+            if (text.length() == 0 || text.startsWith("#")) {
+                continue;
+            }
+            String[] fields = text.split("\\s+");
+            if (fields.length < 2) {
+                throw new RuntimeException("Invalid input line: " + line);
+            }
+            int length = Integer.parseInt(fields[1]);
+            if (length <= 0) {
+                throw new RuntimeException("Invalid sequence length: " + length);
+            }
+            sequenceNames.add(fields[0]);
+            sequenceLengths.add(length);
+        }
+        int count = sequenceLengths.size();
+        int[] lengths = new int[count];
+        long[] offsets = new long[count];
+        long offset = 0;
+        for (int i = 0; i < count; i++) {
+            lengths[i] = sequenceLengths.get(i);
+            offsets[i] = offset;
+            offset += lengths[i];
+        }
+        GenomeBaseIndex result = new GenomeBaseIndex();
+        result.mSequenceNames = sequenceNames;
+        result.mLengths = lengths;
+        result.mOffsets = offsets;
+        return result;
+    }
+
+    public List<String> getSequenceNames() {
+        return mSequenceNames;
+    }
+
+    public boolean contains(String seqName) {
+        return (getSequenceIndex(seqName) >= 0);
+    }
+
+    public long getFirstIndex(String seqName) {
+        int index = getSequenceIndex(seqName);
+        if (index < 0) {
+            return -1;
+        }
+        return mOffsets[index];
+    }
+
+    public long getLastIndex(String seqName) {
+        int index = getSequenceIndex(seqName);
+        if (index < 0) {
+            return -1;
+        }
+        return (mOffsets[index] + mLengths[index] - 1);
+    }
+
+    public int getSequenceLength(String seqName) {
+        int index = getSequenceIndex(seqName);
+        if (index < 0) {
+            return 0;
+        }
+        return mLengths[index];
+    }
+
+    public long getBaseIndex(String seqName, int position) {
+        int index = getSequenceIndex(seqName);
+        if (index < 0) {
+            return -1;
+        }
+        if (position > mLengths[index]) {
+            return -1;
+        }
+        if (position < 1) {
+            // Zero or negative position means last base index
+            position = mLengths[index];
+        }
+        return (mOffsets[index] + position - 1);
+    }
+
+    public String getSequenceName(long baseIndex) {
+        int index = getSequenceIndex(baseIndex);
+        if (index < 0) {
+            return null;
+        }
+        return mSequenceNames.get(index);
+    }
+
+    public int getPosition(long baseIndex) {
+        if (baseIndex < 0) {
+            // Catch common sign-extension error when packing indexes as ints.
+            throw new IllegalArgumentException("Invalid base index: " + baseIndex);
+        }
+        int index = getSequenceIndex(baseIndex);
+        if (index < 0) {
+            return 0;
+        }
+        long offset = mOffsets[index];
+        long result = baseIndex - offset + 1;
+        return (int) result;
+    }
+
+    // Same as getSequenceName, but treat the argument as an unsigned int.
+    // This is useful for manipulating/storing indexes for the human
+    // genome as 4-byte unsigned ints.
+    public String getSequenceNameUnsigned(int baseIndex) {
+        return getSequenceName(baseIndex & 0xFFFFFFFFL);
+    }
+
+    // Same as getPosition, but treat the argument as an unsigned int.
+    // This is useful for manipulating/storing indexes for the human
+    // genome as 4-byte unsigned ints.
+    public int getPositionUnsigned(int baseIndex) {
+        return getPosition(baseIndex & 0xFFFFFFFFL);
+    }
+
+    private int getSequenceIndex(String seqName) {
+        return mSequenceNames.indexOf(seqName);
+    }
+
+    private int getSequenceIndex(long baseIndex) {
+        long offset = 0;
+        if (baseIndex < 0) {
+            return -1;
+        }
+        for (int i = 0; i < mLengths.length; i++) {
+            int length = mLengths[i];
+            if (offset + length > baseIndex) {
+                return i;
+            }
+            offset += length;
+        }
+        return -1;
+    }
+}
diff --git a/lib/edu/mit/broad/cnv/util/GenomeBinIndex.java b/lib/edu/mit/broad/cnv/util/GenomeBinIndex.java
new file mode 100644
index 0000000000..2d1a96f616
--- /dev/null
+++ b/lib/edu/mit/broad/cnv/util/GenomeBinIndex.java
@@ -0,0 +1,167 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2009 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever.
+ * Neither the Broad Institute nor MIT can be responsible for its use, misuse,
+ * or functionality.
+ */
+package edu.mit.broad.cnv.util;
+
+
+import java.io.*;
+import java.util.*;
+
+
+/**
+ * Utility class for transforming between a chromsome + position
+ * coordinate system and a binned coordinate system where each
+ * chromosome (separately) is divided into fixed sized bins,
+ * ragged on the right/upper end.
+ */
+public class GenomeBinIndex {
+
+    private int mBinSize;
+    private List<String> mSequenceNames;
+    private int[] mSequenceLengths;
+    private int[] mBinOffsets;
+
+    public GenomeBinIndex(GenomeBaseIndex gbi, int binSize) {
+        if (binSize <= 0) {
+            throw new IllegalArgumentException("Illegal bin size: " + binSize);
+        }
+        mBinSize = binSize;
+        mSequenceNames = new ArrayList<String>(gbi.getSequenceNames());
+        int count = mSequenceNames.size();
+        mSequenceLengths = new int[count];
+        mBinOffsets = new int[count];
+        long binOffset = 0;  // long to detect overflow
+        for (int i = 0; i < count; i++) {
+            int length = gbi.getSequenceLength(mSequenceNames.get(i));
+            int binCount = (length + binSize - 1) / binSize;
+            mSequenceLengths[i] = length;
+            mBinOffsets[i] = (int) binOffset;
+            binOffset += binCount;
+        }
+        if (binOffset > Integer.MAX_VALUE) {
+            // Check for integer overflow.
+            // This will happen, e.g., with the human genome and a bin size of 1.
+            throw new RuntimeException("Binsize too small: " + binSize);
+        }
+    }
+
+    public int getBinSize() {
+        return mBinSize;
+    }
+
+    public int getBinIndex(String seqName, int position) {
+        int index = getSequenceIndex(seqName);
+        if (index < 0) {
+            return -1;
+        }
+        if (position > mSequenceLengths[index]) {
+            return -1;
+        }
+        if (position < 1) {
+            position = mSequenceLengths[index];
+        }
+        int bin = (position - 1) / mBinSize;
+        return (mBinOffsets[index] + bin);
+    }
+
+    public String getSequenceName(int binIndex) {
+        int index = getSequenceIndex(binIndex);
+        if (index < 0) {
+            return null;
+        }
+        return mSequenceNames.get(index);
+    }
+
+    public int getStartPosition(int binIndex) {
+        int index = getSequenceIndex(binIndex);
+        if (index < 0) {
+            return -1;
+        }
+        int bin = binIndex - mBinOffsets[index];
+        return (bin * mBinSize + 1);
+    }
+
+    public int getEndPosition(int binIndex) {
+        int index = getSequenceIndex(binIndex);
+        if (index < 0) {
+            return -1;
+        }
+        int bin = binIndex - mBinOffsets[index];
+        int position = (bin+1) * mBinSize;
+        position = Math.min(position, mSequenceLengths[index]);
+        return position;
+    }
+
+    public List<String> getSequenceNames() {
+        return mSequenceNames;
+    }
+
+    public int getFirstBin(String seqName) {
+        return getBinIndex(seqName, 1);
+    }
+
+    public int getLastBin(String seqName) {
+        return getBinIndex(seqName, 0);
+    }
+
+    public int getBinCount() {
+        if (mBinOffsets.length == 0) {
+            return 0;
+        }
+        int lastIndex = mBinOffsets.length - 1;
+        int count = mBinOffsets[lastIndex];
+        count += (mSequenceLengths[lastIndex] + mBinSize - 1) / mBinSize;
+        return count;
+    }
+
+    public int getBinCount(String seqName) {
+        int index = getSequenceIndex(seqName);
+        if (index < 0) {
+            return -1;
+        }
+        return ((mSequenceLengths[index] + mBinSize - 1) / mBinSize);
+    }
+
+    public int getSequenceLength(String seqName) {
+        int index = getSequenceIndex(seqName);
+        if (index < 0) {
+            return 0;
+        }
+        return mSequenceLengths[index];
+    }
+
+    private int getSequenceIndex(String seqName) {
+        for (int i = 0; i < mSequenceNames.size(); i++) {
+            if (mSequenceNames.get(i).equals(seqName)) {
+                return i;
+            }
+        }
+        return -1;
+    }
+
+    private int getSequenceIndex(int binIndex) {
+        if (binIndex < 0) {
+            return -1;
+        }
+        for (int i = 1; i < mBinOffsets.length; i++) {
+            if (mBinOffsets[i] > binIndex) {
+                return i-1;
+            }
+        }
+        int lastIndex = mBinOffsets.length-1;
+        int lastBinIndex = mBinOffsets[lastIndex];
+        lastBinIndex += (mSequenceLengths[lastIndex] + mBinSize - 1) / mBinSize;
+        if (binIndex <= lastBinIndex) {
+            return lastIndex;
+        }
+        return -1;
+    }
+}
+
diff --git a/lib/edu/mit/broad/cnv/util/SequenceIterator.java b/lib/edu/mit/broad/cnv/util/SequenceIterator.java
new file mode 100644
index 0000000000..57bbae7a54
--- /dev/null
+++ b/lib/edu/mit/broad/cnv/util/SequenceIterator.java
@@ -0,0 +1,145 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2009 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever.
+ * Neither the Broad Institute nor MIT can be responsible for its use, misuse,
+ * or functionality.
+ */
+package edu.mit.broad.cnv.util;
+
+
+import java.io.*;
+import java.util.*;
+
+
+/**
+ * Utility class for iterating over fasta files.
+ * Also maintains an unsigned base index over the file set.
+ */
+public class SequenceIterator
+{
+    private List<File> mInputFiles = null;
+    private int mInputFileIndex = 0;
+    private int mBaseIndex = -1;
+    private LineNumberReader mCurrentReader = null;
+    private String mNextSequence = null;
+    private String mLineBuffer = null;
+    private int mLineBufferIndex = 0;
+
+    public SequenceIterator(File inputFile) {
+        mInputFiles = new ArrayList<File>();
+        mInputFiles.add(inputFile);
+    }
+
+    public SequenceIterator(List<File> inputFiles) {
+        mInputFiles = inputFiles;
+    }
+
+    public void close() {
+        if (mCurrentReader != null) {
+            try {
+                mCurrentReader.close();
+            } catch (IOException exc) {
+                throw new RuntimeException("Error closing reader: " + exc.getMessage(),
+                                           exc);
+            }
+        }
+        mCurrentReader = null;
+        mInputFiles = null;
+        mInputFileIndex = 0;
+        mBaseIndex = -1;
+        mNextSequence = null;
+        mLineBuffer = null;
+        mLineBufferIndex = 0;
+    }
+
+    public String getNextSequence()
+        throws IOException {
+
+        while (mNextSequence == null) {
+            if (mLineBuffer != null) {
+                incrementBaseIndex(mLineBuffer.length() - mLineBufferIndex);
+                mLineBuffer = null;
+                mLineBufferIndex = 0;
+            }
+            if (mCurrentReader == null) {
+                mCurrentReader = getNextReader();
+                if (mCurrentReader == null) {
+                    return null;
+                }
+            }
+            String line = mCurrentReader.readLine();
+            if (line == null) {
+                mCurrentReader.close();
+                mCurrentReader = null;
+                continue;
+            }
+            if (line.startsWith(">")) {
+                String[] tokens = line.substring(1).trim().split("\\s+");
+                mNextSequence = tokens[0];
+            } else {
+                incrementBaseIndex(line.length());
+            }
+        }
+        String result = mNextSequence;
+        mNextSequence = null;
+        return result;
+    }
+
+    public char getNextBase()
+        throws IOException {
+
+        if (mLineBuffer == null || mLineBufferIndex >= mLineBuffer.length()) {
+            if (mCurrentReader == null) {
+                return 0;
+            }
+            if (mNextSequence != null) {
+                return 0;
+            }
+            String line = mCurrentReader.readLine();
+            if (line == null) {
+                mLineBuffer = null;
+                mLineBufferIndex = 0;
+                mCurrentReader.close();
+                mCurrentReader = null;
+                return 0;
+            }
+            if (line.startsWith(">")) {
+                String[] tokens = line.substring(1).trim().split("\\s+");
+                mNextSequence = tokens[0];
+                mLineBuffer = null;
+                mLineBufferIndex = 0;
+                return 0;
+            }
+            mLineBuffer = line.toUpperCase();
+            mLineBufferIndex = 0;
+        }
+        char result = mLineBuffer.charAt(mLineBufferIndex++);
+        incrementBaseIndex(1);
+        return result;
+    }
+
+    public int getBaseIndex() {
+        return mBaseIndex;
+    }
+
+    private LineNumberReader getNextReader()
+        throws IOException {
+        if (mInputFileIndex >= mInputFiles.size()) {
+            return null;
+        }
+        File file = mInputFiles.get(mInputFileIndex++);
+        return new LineNumberReader(new FileReader(file));
+    }
+
+    private void incrementBaseIndex(int amount) {
+        if (mBaseIndex < -1 && (mBaseIndex + amount) >= -1) {
+            throw new RuntimeException("Base index: 32-bit overflow");
+        }
+        mBaseIndex += amount;
+    }
+}
+
diff --git a/lib/edu/mit/broad/dcp/CallStatus.java b/lib/edu/mit/broad/dcp/CallStatus.java
new file mode 100644
index 0000000000..e431b27dfe
--- /dev/null
+++ b/lib/edu/mit/broad/dcp/CallStatus.java
@@ -0,0 +1,18 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2007 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever. Neither
+ * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+ */
+package edu.mit.broad.dcp;
+
+public enum CallStatus
+{
+    PENDING,
+    PROCESSING
+}
+
+
diff --git a/lib/edu/mit/broad/dcp/CommandRunner.java b/lib/edu/mit/broad/dcp/CommandRunner.java
new file mode 100644
index 0000000000..b93b310dd6
--- /dev/null
+++ b/lib/edu/mit/broad/dcp/CommandRunner.java
@@ -0,0 +1,309 @@
+/**
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2006 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever. Neither
+ * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+ */
+package edu.mit.broad.dcp;
+
+import java.io.*;
+
+
+/**
+ * Utility class to run system commands synchronously and return the output.
+ * 
+ * The interface supports the typical case where you want to return a modest
+ * amount of information from the command's standard output or standard error
+ * as a string.  The caller can override this behavior, however, and provide
+ * alternative output destinations if necessary.
+ * 
+ * If setMergeOutput() is true, then this class will attempt to interleave
+ * the standard output and standard error streams of the command into one
+ * stream (standard output).  This may not produce exactly the same results
+ * as having the operating system interleave the output, but works well for
+ * simple executables that do not heavily intermix stdout and stderr.
+ * 
+ * A typical invocation is:
+ * <pre>
+ *  CommandRunner runner = new CommandRunner();
+ *  int status = runner.runCommand("ls");
+ *  if (status == 0) {
+ *      System.out.print(runner.getStandardOutput());
+ *  }
+ * </pre>
+ * 
+ * @author Bob Handsaker
+ */
+public class CommandRunner {
+
+    private boolean mMergeOutput = false;
+    private Writer mStandardOutputDestination = null;
+    private Writer mStandardErrorDestination = null;
+    private String mStandardOutputString = null;
+    private String mStandardErrorString = null;
+
+
+    /**
+     * Default constructor.
+     */
+    public CommandRunner() {
+    }
+
+    /**
+     * Get the standard output from the last command as a string.
+     * 
+     * If no command has been run or an explicit output destination
+     * was set, then this method returns null.
+     */
+    public String getStandardOutputString() {
+        return mStandardOutputString;
+    }
+
+    /**
+     * Get the standard error from the last command as a string.
+     * 
+     * If no command has been run or an explicit output destination
+     * was set, then this method returns null.
+     */
+    public String getStandardErrorString() {
+        return mStandardErrorString;
+    }
+
+    /**
+     * If true, the command's standard error stream will be interleaved
+     * with the command's standard output stream.  The standard error
+     * stream destination will not be used.
+     */
+    public boolean getMergeOutput() {
+        return mMergeOutput;
+    }
+
+    /**
+     * If true, the command's standard error stream will be interleaved
+     * with the command's standard output stream.
+     */
+    public void setMergeOutput(boolean value) {
+        mMergeOutput = value;
+    }
+
+    /**
+     * The destination for the command's standard output stream.
+     * If null, the standard output will be captured in a string.
+     */
+    public Writer getStandardOutputDestination() {
+        return mStandardOutputDestination;
+    }
+
+    /**
+     * The destination for the command's standard output stream.
+     * If set to null, the standard output will be captured in a string.
+     */
+    public void setStandardOutputDestination(Writer writer) {
+        mStandardOutputDestination = writer;
+    }
+
+    /**
+     * The destination for the command's standard error stream.
+     * If null, the standard error will be captured in a string.
+     */
+    public Writer getStandardErrorDestination() {
+        return mStandardErrorDestination;
+    }
+
+    /**
+     * The destination for the command's standard error stream.
+     * If set to null, the standard error will be captured in a string.
+     */
+    public void setStandardErrorDestination(Writer writer) {
+        mStandardErrorDestination = writer;
+    }
+
+    /**
+     * Run a command string as a system command.
+     * 
+     * Returns the exit status of the command.
+     * 
+     * When this method is called, the standard output string
+     * and standard error string are updated if no alternative output
+     * destinations have been set.
+     * 
+     * This method throws a RuntimeException if running the command fails
+     * (for example, if there are not enough system resources to spawn
+     * the process).
+     * 
+     * @param commmand The command string to run.
+     * @return Command exit status.
+     * @throws RuntimeException If command execution fails.
+     */
+    public int runCommand(String command)
+        throws RuntimeException {
+        return runCommand(command.split(" "), null, null);
+    }
+
+    /**
+     * Run a command string as a system command.
+     * 
+     * Returns the exit status of the command.
+     * 
+     * When this method is called, the standard output string
+     * and standard error string are updated if no alternative output
+     * destinations have been set.
+     * 
+     * This method throws a RuntimeException if running the command fails
+     * (for example, if there are not enough system resources to spawn
+     * the process).
+     * 
+     * @param commmand The command string to run.
+     * @param environment The command environment (or null to inherit).
+     * @param workingDirectory The working directory (or null to inherit).
+     * @return Command exit status.
+     * @throws RuntimeException If command execution fails.
+     */
+    public int runCommand(String command, String[] environment, File workingDirectory)
+        throws RuntimeException {
+        return runCommand(command.split(" "), environment, workingDirectory);
+    }
+
+    /**
+     * Run a command string as a system command.
+     * 
+     * Returns the exit status of the command.
+     * 
+     * When this method is called, the standard output string
+     * and standard error string are updated if no alternative output
+     * destinations have been set.
+     * 
+     * This method throws a RuntimeException if running the command fails
+     * (for example, if there are not enough system resources to spawn
+     * the process).
+     * 
+     * @param commmand The command to run (as a array of arguments).
+     * @param environment The command environment (or null to inherit).
+     * @param workingDirectory The working directory (or null to inherit).
+     * @return Command exit status.
+     * @throws RuntimeException If command execution fails.
+     */
+    public int runCommand(String[] command, String[] environment, File workingDirectory)
+        throws RuntimeException {
+
+        Writer stdout = mStandardOutputDestination;
+        Writer stderr = mStandardErrorDestination;
+        if (stdout == null) {
+            stdout = new StringWriter();
+        }
+        if (mMergeOutput) {
+            stderr = stdout;
+        } else if (stderr == null) {
+            stderr = new StringWriter();
+        }
+
+        mStandardOutputString = null;
+        mStandardErrorString = null;
+
+        int commandStatus = 0;
+        try {
+            Process process =
+                Runtime.getRuntime().exec(command, environment, workingDirectory);
+            StreamHandler stdoutHandler =
+                new StreamHandler(process.getInputStream(), stdout);
+            StreamHandler stderrHandler =
+                new StreamHandler(process.getErrorStream(), stderr);
+            
+            commandStatus = process.waitFor();
+
+            // Wait for the streams to drain.
+            stdoutHandler.join();
+            stderrHandler.join();
+        } catch (Exception exc) {
+            throw new RuntimeException("Command execution failed: " +
+                                       exc.getMessage(),
+                                       exc);
+        }
+
+        if (mStandardOutputDestination == null) {
+            mStandardOutputString = stdout.toString();
+        }
+        if (mStandardErrorDestination == null && !mMergeOutput) {
+            mStandardErrorString =  stderr.toString();
+        }
+
+        return commandStatus;
+    }
+
+
+    /**
+     * Internal class to asynchronously read from the standard output
+     * and standard error streams of the command being executed.
+     * 
+     * If you do not handle command output asynchronously, then execution
+     * of a command may block in some environments if the program produces
+     * too much output.  In this case, the call to run the process will
+     * never complete.
+     */
+    private static class StreamHandler extends Thread {
+
+        /**
+         * Constructor.
+         * Create an instance of this class, which is an asynchronous
+         * thread that will consume input from the given input stream
+         * and send the output to the given output destination.
+         *
+         * @param input The input stream to read.
+         * @param output The output destination.
+         */
+        StreamHandler(InputStream input, Writer output) {
+            m_input = input;
+            m_output = output;
+            start();
+        }
+
+
+        /**
+         * Standard thread run method.
+         * Pipe input from the input source to the output destination
+         * until there is no more input left.
+         *
+         * If an IOException occurs, the thread will make sure all
+         * available output has been flushed to the destination and
+         * then terminate.  The IOException is not propagated.
+         */
+        public void run() {
+
+            char[] buffer = new char[4096];
+            Reader reader =
+                new InputStreamReader(new BufferedInputStream(m_input));
+            Writer writer = m_output;
+
+            try {
+                while (true) {
+                    int count = reader.read(buffer);
+                    if (count <= 0) {
+                        break;
+                    }
+                    if (writer != null) {
+                        synchronized (writer) {
+                            writer.write(buffer, 0, count);
+                        }
+                    }
+                }
+            } catch (IOException ignore) {
+                // Ignore IO exceptions
+            } finally {
+                try {
+                    reader.close();
+                } catch (Exception ignore) {
+                }
+                try {
+                    m_output.flush();
+                } catch (Exception ignore) {
+                }
+            }
+        }
+
+        private InputStream m_input;
+        private Writer m_output;
+    }
+}
diff --git a/lib/edu/mit/broad/dcp/DistributedAlgorithm.java b/lib/edu/mit/broad/dcp/DistributedAlgorithm.java
new file mode 100644
index 0000000000..a223c03264
--- /dev/null
+++ b/lib/edu/mit/broad/dcp/DistributedAlgorithm.java
@@ -0,0 +1,618 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2007 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever. Neither
+ * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+ */
+package edu.mit.broad.dcp;
+
+import edu.mit.broad.dcp.message.*;
+
+import java.io.*;
+import java.util.*;
+import java.lang.reflect.Method;
+import java.net.InetAddress;
+import java.net.ServerSocket;
+import java.rmi.registry.*;
+
+/**
+ * Experimental.
+ */
+public abstract class DistributedAlgorithm
+    implements Serializable
+{
+    public static final Integer ANY = 0;
+    public static final Integer MASTER = 1;
+
+    public DistributedAlgorithm() {
+    }
+
+    public String getServerHost() {
+        return mServerHost;
+    }
+
+    public void setServerHost(String value) {
+        mServerHost = value;
+    }
+
+    public int getServerPort() {
+        return mServerPort;
+    }
+
+    public void setServerPort(int value) {
+        mServerPort = value;
+    }
+
+    public String getAlgorithmName() {
+        if (mAlgorithmName != null) {
+            return mAlgorithmName;
+        } else {
+            return getClassName();
+        }
+    }
+
+    public void setAlgorithmName(String value) {
+        mAlgorithmName = value;
+    }
+
+    public int getMaximumWorkerCount() {
+        return mMaximumWorkerCount;
+    }
+
+    public void setMaximumWorkerCount(int value) {
+        mMaximumWorkerCount = value;
+    }
+
+    /**
+     * Name of LSF queue to use for workers.
+     */
+    public String getLsfQueue() {
+        return mLsfQueue;
+    }
+
+    public void setLsfQueue(String value) {
+        mLsfQueue = value;
+    }
+
+    /**
+     * Directory to hold lsf log files.
+     */
+    public String getLsfLogDirectory() {
+        return mLsfLogDirectory;
+    }
+
+    public void setLsfLogDirectory(String value) {
+        mLsfLogDirectory = value;
+    }
+
+    public boolean getEnableGcLogging() {
+        return mEnableGcLogging;
+    }
+
+    public void setEnableGcLogging(boolean value) {
+        mEnableGcLogging = value;
+    }
+
+    public Integer getWorkerId() {
+        return mWorkerId;
+    }
+
+    public Integer getProcessId() {
+        return mProcessId;
+    }
+
+    protected void init()
+        throws Exception {
+    }
+
+    protected abstract void start()
+        throws Exception;
+
+    public void run()
+        throws Exception {
+
+        if (mIsRunning) {
+            throw new IllegalStateException("Algorithm is already running");
+        }
+
+        mIsRunning = true;
+        mWorkerId = MASTER;
+        mProcessId = MASTER;
+
+        try {
+            startDistributedServer();
+            init();
+            startWorkerThread();
+            startWorkers();
+            start();
+            waitForCompletion();
+        } finally {
+            // TBD: More cleanup (shutdown threads, etc.)
+            stopDistributedServer();
+            mIsRunning = false;
+        }
+    }
+
+    void runWorker(int workerId, int processId)
+        throws Exception {
+
+        if (mIsRunning) {
+            throw new IllegalStateException("Algorithm is already running");
+        }
+
+        mIsRunning = true;
+        mWorkerId = workerId;
+        mProcessId = processId;
+
+        try {
+            if (openDistributedServer() == null) {
+                report("Server " + mServerHost + ":" + mServerPort + " not responding");
+                return;
+            }
+            init();
+            startWorkerThread();
+            mWorkerThread.join();
+        } finally {
+            closeDistributedServer();
+            mIsRunning = false;
+        }
+    }
+
+    private void startWorkers() {
+        int workerCount = getMaximumWorkerCount();
+        if (workerCount <= 0) {
+            // Use single process execution for testing/debugging.
+            new InProcessWorker().start();
+            return;
+        }
+        if (workerCount > 1000) {
+            throw new RuntimeException("Excessive worker count: " + workerCount);
+        }
+        for (int i = 0; i < workerCount; i++) {
+            Integer workerId = (MASTER + i + 1);
+            Integer processId = workerId;  // for now
+            startWorker(workerId, processId);
+        }
+    }
+
+    private void startDistributedServer() {
+        try {
+            // Create a server socket to allocate a unique port.
+            // There is a window of vulnerability where the port
+            // can get reused, but in practice this works ok.
+            String serverHost = getCurrentHost();
+            ServerSocket socket = new ServerSocket(0);
+            int serverPort = socket.getLocalPort();
+            socket.close();
+            Registry registry = LocateRegistry.createRegistry(serverPort);
+            DistributedCallServer server = new DistributedCallServer();
+            server.setAlgorithm(this);
+            registry.bind("DistributedCallService", server);
+            mServerHost = serverHost;
+            mServerPort = serverPort;
+            mDistributedCallServer = server;
+            mDistributedCallService = server;
+        } catch (Exception exc) {
+            throw wrapException(exc);
+        }
+    }
+
+    private void stopDistributedServer() {
+        if (mDistributedCallServer != null) {
+            try {
+                Registry registry = LocateRegistry.getRegistry(mServerPort);
+                registry.unbind("DistributedCallService");
+                mDistributedCallServer.stop();
+            } catch (Exception exc) {
+                throw wrapException(exc);
+            }
+        }
+        mDistributedCallService = null;
+        mDistributedCallServer = null;
+    }
+
+    private DistributedCallService openDistributedServer() {
+        mDistributedCallService = null;
+        try {
+            String url = "rmi://" + getServerHost() + ":" + getServerPort() + "/DistributedCallService";
+            DistributedCallService server =
+                (DistributedCallService) java.rmi.Naming.lookup(url);
+            mDistributedCallService = server;
+        } catch (java.rmi.NotBoundException exc) {
+            // Server has exited
+        } catch (Exception exc) {
+            throw wrapException(exc);
+        }
+        return mDistributedCallService;
+    }
+
+    private void closeDistributedServer() {
+        mDistributedCallService = null;
+    }
+
+    private void startWorker(Integer workerId, Integer processId) {
+
+        String logFile = "worker_" + processId + "_%J.bsub";
+        if (mLsfLogDirectory != null) {
+            logFile = mLsfLogDirectory + "/" + logFile;
+        }
+
+        List<String> command = new ArrayList<String>();
+        command.add("bsub");
+        command.add("-o");
+        command.add(logFile);
+        if (mLsfQueue != null) {
+            command.add("-q");
+            command.add(mLsfQueue);
+        }
+        command.add("runDistributedWorker");
+        command.add("-serverHost");
+        command.add(getServerHost());
+        command.add("-serverPort");
+        command.add(Integer.toString(getServerPort()));
+        command.add("-workerId");
+        command.add(Integer.toString(workerId));
+        command.add("-processId");
+        command.add(Integer.toString(processId));
+
+        // Pass our -Xmx setting along to all workers.
+        Map<String, String> environment =
+            new LinkedHashMap<String, String>(System.getenv());
+        long maxMemory = Runtime.getRuntime().maxMemory();
+        long maxKbytes = maxMemory / 1024;
+        String memJavaOpt = "-Xmx" + maxKbytes + "K";
+
+        // Enable GC logging if requested
+        String gcJavaOpt = null;
+        if (mEnableGcLogging) {
+            String gcLogFile = "worker_" + processId + ".gc.log";
+            if (mLsfLogDirectory != null) {
+                gcLogFile = mLsfLogDirectory + "/" + gcLogFile;
+            }
+            gcJavaOpt = "-Xloggc:" + gcLogFile;
+        }
+
+        String javaOpts = environment.get("JAVAOPTS");
+        if (javaOpts == null) {
+            javaOpts = memJavaOpt;
+            if (gcJavaOpt != null) {
+                javaOpts = javaOpts + " " + gcJavaOpt;
+            }
+            environment.put("JAVAOPTS", javaOpts);
+        }
+
+        // Log output ourselves (rather than waiting for bsub).
+        String workerLogFile = "worker_" + processId + ".log";
+        if (mLsfLogDirectory != null) {
+            workerLogFile = mLsfLogDirectory + "/" + workerLogFile;
+        }
+        environment.put("DA_LOG_FILE", workerLogFile);
+
+        CommandRunner runner = new CommandRunner();
+        Writer output = new LsfOutputFilter();
+        runner.setStandardOutputDestination(output);
+        runner.setStandardErrorDestination(output);
+        String[] commandArray = command.toArray(new String[command.size()]);
+        String[] environmentArray = createEnvironmentArray(environment);
+        int status = runner.runCommand(commandArray, environmentArray, null);
+        if (status != 0) {
+            throw new RuntimeException("Error starting worker: " + status);
+        }
+    }
+
+    private String[] createEnvironmentArray(Map<String, String> map) {
+        if (map == null) {
+            return null;
+        }
+        int index = 0;
+        String[] array = new String[map.size()];
+        for (Map.Entry<String, String> entry : map.entrySet()) {
+            array[index++] = entry.getKey() + "=" + entry.getValue();
+        }
+        return array;
+    }
+
+    private String getCurrentHost() {
+        try {
+            return InetAddress.getLocalHost().getCanonicalHostName();
+        } catch (Exception exc) {
+            throw wrapException(exc);
+        }
+    }
+
+    private void waitForCompletion() {
+        DistributedCallServer server = mDistributedCallServer;
+        while (true) {
+            if (server.isQueueEmpty()) {
+                break;
+            }
+            try {
+                Thread.sleep(1000);
+            } catch (InterruptedException exc) {
+                // ignore
+            }
+        }
+    }
+
+    protected void callDistributed(String methodName, Object... methodArgs) {
+        callDistributed(null, methodName, methodArgs);
+    }
+
+    protected void callDistributed(Integer workerId, String methodName, Object... methodArgs) {
+        if (workerId == null) {
+            workerId = ANY;
+        }
+        try {
+            DistributedCallMessage message = new DistributedCallMessage();
+            message.setSenderWorkerId(getWorkerId());
+            message.setSenderProcessId(getProcessId());
+            message.setReceiverWorkerId(workerId);
+            message.setMethodName(methodName);
+            message.setMethodArgs(methodArgs);
+            mDistributedCallService.writeMessage(message);
+        } catch (Throwable exc) {
+            throw wrapException(exc);
+        }
+    }
+
+    private void callMethod(String methodName, Object[] methodArgs) {
+        try {
+            Object target = this;
+            Class targetClass = target.getClass();
+            Method targetMethod = findMethod(targetClass, methodName);
+            if (targetMethod == null) {
+                throw new RuntimeException("Cannot find target method: " + methodName);
+            }
+            targetMethod.invoke(target, methodArgs);
+        } catch (Throwable exc) {
+            throw wrapException(exc);
+        }
+    }
+
+    private Method findMethod(Class clazz, String methodName) throws Exception {
+        Method result = null;
+        Method[] methods = clazz.getDeclaredMethods();
+        for (int i = 0; i < methods.length; i++) {
+            if (methods[i].getName().equals(methodName)) {
+                if (result != null) {
+                    throw new RuntimeException("Duplicate method name: " + methodName);
+                }
+                result = methods[i];
+            }
+        }
+        return result;
+    }
+
+    private RuntimeException wrapException(Throwable exception) {
+        if (exception instanceof RuntimeException) {
+            return (RuntimeException) exception;
+        } else {
+            return new RuntimeException(exception.getMessage(), exception);
+        }
+    }
+
+    private void startWorkerThread() {
+        if (mWorkerThread != null) {
+            throw new IllegalStateException("WorkerThread is running");
+        }
+        mWorkerThread = new WorkerThread();
+        mWorkerThread.start();
+    }
+
+    private void stopWorkerThread() {
+        if (mWorkerThread == null) {
+            throw new IllegalStateException("WorkerThread is running");
+        }
+        mWorkerThread.stopThread();
+    }
+
+    private class WorkerThread extends Thread {
+
+        WorkerThread() {
+            setDaemon(true);
+        }
+
+        public void run() {
+            try {
+                DistributedCallService service = mDistributedCallService;
+                while (true) {
+                    if (isInterrupted()) {
+                        System.out.println("#DBG: Worker isInterrupted");
+                        throw new InterruptedException();
+                    }
+                    DistributedCallMessage message =
+                        service.acceptMessage(getWorkerId(), getProcessId());
+                    if (message == null) {
+                        Thread.sleep(1000);
+                    } else {
+                        processMessage(message);
+                    }
+                }
+            } catch (InterruptedException exc) {
+                // Interruption terminates this thread.
+                // System.out.println("#DBG: Worker caught InterruptedException");
+            } catch (Throwable exc) {
+                if (isDisconnectException(exc)) {
+                    report("Server disconnected");
+                } else {
+                    reportError("Exception in WorkerThread: " + exc.getMessage(), exc);
+                    System.exit(1);
+                }
+            }
+            report("WorkerThread terminated");
+        }
+
+        void stopThread() {
+            // System.out.println("#DBG: About to interrupt worker...");
+            interrupt();
+            // System.out.println("#DBG: Joining worker...");
+            try {
+                join();
+            } catch (InterruptedException exc) {
+                // ignore
+            }
+        }
+
+        private boolean isDisconnectException(Throwable exc) {
+            if (exc instanceof java.rmi.ConnectException) {
+                return true;
+            } else if (exc instanceof java.rmi.NoSuchObjectException) {
+                return true;
+            } else if (exc instanceof java.rmi.UnmarshalException &&
+                       exc.getCause() != null &&
+                       exc.getCause() instanceof EOFException) {
+                return true;
+            } else {
+                return false;
+            }
+        }
+    }
+
+    private void processMessage(DistributedCallMessage message) {
+        try {
+            Integer workerId = message.getReceiverWorkerId();
+            if (workerId == null || !workerId.equals(getWorkerId())) {
+                reportError("Invalid worker ID in message: " + message);
+                return;
+            }
+            callMethod(message.getMethodName(), message.getMethodArgs());
+        } catch (Throwable exc) {
+            reportError("Exception running message: " + message, exc);
+        } finally {
+            completeMessage(message);
+        }
+    }
+
+    private void completeMessage(DistributedCallMessage message) {
+        try {
+            DistributedCallService service = mDistributedCallService;
+            service.completeMessage(getWorkerId(), getProcessId(), message.getCallId());
+        } catch (Throwable exc) {
+            reportError("Exception completing message: " + message, exc);
+        }
+    }
+
+    protected void report(String message) {
+        String identity =
+            getAlgorithmName() + " " +
+            getWorkerId() + "/" + getProcessId();
+        System.out.println("# " + identity + " : " + message);
+    }
+
+    protected void reportError(String message) {
+        reportError(message, null);
+    }
+
+    protected void reportError(String message, Throwable exception) {
+        String identity =
+            getAlgorithmName() + " " +
+            getWorkerId() + "/" + getProcessId();
+        System.out.println("Error" +
+                           " [" + identity + "]" +
+                           ": " + message);
+        if (exception != null) {
+            System.out.println(" with exception: " + exception.getMessage());
+            exception.printStackTrace(System.out);
+        }
+    }
+
+    private String getClassName() {
+        String name = getClass().getName();
+        return name.substring(name.lastIndexOf('.')+1);
+    }
+
+    public String toString() {
+        StringBuilder builder = new StringBuilder();
+        builder.append("DistributedAlgorithm");
+        builder.append("(");
+        builder.append("" + getAlgorithmName());
+        builder.append(",");
+        builder.append("" + getWorkerId());
+        builder.append(",");
+        builder.append("" + getProcessId());
+        builder.append(",");
+        builder.append("" + getMaximumWorkerCount());
+        builder.append(",");
+        builder.append("" + getLsfQueue());
+        builder.append(",");
+        builder.append("" + mIsRunning);
+        builder.append(")");
+        return builder.toString();
+    }
+
+    // This class is used only during in-process execution/testing/debugging.
+    private class InProcessWorker extends Thread {
+
+        InProcessWorker() {
+            setDaemon(true);
+        }
+
+        public void run() {
+            report("InProcessWorker starting");
+            try {
+                String serverAddress = getServerHost() + ":" + getServerPort();
+                String url = "rmi://" + serverAddress + "/DistributedCallService";
+                DistributedCallService server =
+                    (DistributedCallService) java.rmi.Naming.lookup(url);
+                DistributedAlgorithm algorithm = server.getAlgorithm();
+                algorithm.setServerHost(getServerHost());
+                algorithm.setServerPort(getServerPort());
+                algorithm.runWorker(2, 1);
+            } catch (Throwable exc) {
+                reportError("Exception in InProcessWorker: " + exc.getMessage(), exc);
+                System.exit(1);
+            }
+            report("InProcessWorker terminated");
+        }
+    }
+
+    private static class LsfOutputFilter
+        extends FilterWriter {
+
+        LsfOutputFilter() {
+            super(new PrintWriter(System.out, true));
+        }
+
+        public void write(int ch)
+            throws IOException {
+            if (mAtLineStart) {
+                out.write("# ");
+                mAtLineStart = false;
+            }
+            out.write(ch);
+            mAtLineStart = (ch == '\n');
+        }
+
+        public void write(String s, int off, int len)
+            throws IOException {
+            write(s.toCharArray(), off, len);
+        }
+
+        public void write(char[] a, int off, int len)
+            throws IOException {
+            for (int i = 0; i < len; i++) {
+                write(a[off+i]);
+            }
+        }
+
+        private boolean mAtLineStart = true;
+    }
+
+
+    private transient int mMaximumWorkerCount = 0;
+    private transient String mLsfQueue = null;
+    private transient String mLsfLogDirectory = null;
+    private transient boolean mEnableGcLogging = false;
+    private transient boolean mIsRunning = false;
+    private transient int mWorkerId = 0;
+    private transient int mProcessId = 0;
+    private transient WorkerThread mWorkerThread = null;
+    private transient String mAlgorithmName = null;
+    private transient String mServerHost = null;
+    private transient int mServerPort = 0;
+    private transient DistributedCallService mDistributedCallService = null;
+    private transient DistributedCallServer mDistributedCallServer = null;
+}
diff --git a/lib/edu/mit/broad/dcp/DistributedAlgorithmWorker.java b/lib/edu/mit/broad/dcp/DistributedAlgorithmWorker.java
new file mode 100644
index 0000000000..dcee13eb80
--- /dev/null
+++ b/lib/edu/mit/broad/dcp/DistributedAlgorithmWorker.java
@@ -0,0 +1,134 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2007 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever. Neither
+ * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+ */
+package edu.mit.broad.dcp;
+
+import java.util.*;
+
+/**
+ * Command line driver for distributed worker invocation.
+ */
+public class DistributedAlgorithmWorker
+{
+    public static void main(String[] args)
+        throws Exception {
+        new DistributedAlgorithmWorker().run(args);
+    }
+
+    private void run(String[] args)
+        throws Exception {
+
+        if (!parseArguments(args)) {
+            System.exit(1);
+        }
+        System.out.println("# DistributedAlgorithmWorker");
+        System.out.println("# Started at " + new Date());
+        runDistributedWorker();
+        System.out.println("# Ended at " + new Date());
+    }
+
+    private boolean parseArguments(String[] args) {
+
+        int argpos = 0;
+        int argsleft = 0;
+
+        while (argpos < args.length) {
+            argsleft = args.length - argpos;
+            String arg = args[argpos];
+            if (arg.equals("-serverHost") && argsleft > 1) {
+                argpos++;
+                mServerHost = args[argpos++];
+            } else if (arg.equals("-serverPort") && argsleft > 1) {
+                argpos++;
+                mServerPort = Integer.parseInt(args[argpos++]);
+            } else if (arg.equals("-workerId") && argsleft > 1) {
+                argpos++;
+                mWorkerId = new Integer(args[argpos++]);
+            } else if (arg.equals("-processId") && argsleft > 1) {
+                argpos++;
+                mProcessId = new Integer(args[argpos++]);
+            } else if (arg.equals("-debug")) {
+                argpos++;
+                mDebug = true;
+                continue;
+            } else if (arg.equals("-verbose")) {
+                argpos++;
+                mVerbose = true;
+                continue;
+            } else if (arg.startsWith("-")) {
+                usage();
+                return false;
+            } else {
+                break;
+            }
+        }
+
+        argsleft = args.length - argpos;
+        if (argsleft != 0) {
+            usage();
+            return false;
+        }
+
+        return true;
+    }
+
+    private void usage() {
+        System.out.println("Usage: DistributedWorkerMain ...");
+        System.out.println("  -serverHost <hostname>");
+        System.out.println("  -serverPort <port>");
+        System.out.println("  -workerId <id>");
+        System.out.println("  -processId <id>");
+        System.out.println("  -verbose");
+        System.out.println("  -debug");
+    }
+
+    private void runDistributedWorker()
+        throws Exception {
+
+        DistributedAlgorithm algorithm = null;
+        String serverAddress = getServerHost() + ":" + getServerPort();
+        try {
+            String url = "rmi://" + serverAddress + "/DistributedCallService";
+            DistributedCallService server =
+                (DistributedCallService) java.rmi.Naming.lookup(url);
+            algorithm = server.getAlgorithm();
+        } catch (java.rmi.ConnectException exc) {
+            System.out.println("# Server " + serverAddress + " not responding.");
+            return;
+        }
+
+        algorithm.setServerHost(getServerHost());
+        algorithm.setServerPort(getServerPort());
+        algorithm.runWorker(getWorkerId(), getProcessId());
+    }
+
+    private Integer getWorkerId() {
+        return mWorkerId;
+    }
+
+    private Integer getProcessId() {
+        return mProcessId;
+    }
+
+    private String getServerHost() {
+        return mServerHost;
+    }
+
+    private int getServerPort() {
+        return mServerPort;
+    }
+
+
+    private boolean mDebug = false;
+    private boolean mVerbose = false;
+    private String mServerHost = null;
+    private int mServerPort = 0;
+    private Integer mWorkerId = null;
+    private Integer mProcessId = null;
+}
diff --git a/lib/edu/mit/broad/dcp/DistributedCallServer.java b/lib/edu/mit/broad/dcp/DistributedCallServer.java
new file mode 100644
index 0000000000..995eff5717
--- /dev/null
+++ b/lib/edu/mit/broad/dcp/DistributedCallServer.java
@@ -0,0 +1,133 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2008 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever. Neither
+ * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+ */
+package edu.mit.broad.dcp;
+
+
+import edu.mit.broad.dcp.message.*;
+
+import java.rmi.server.UnicastRemoteObject;
+import java.util.*;
+
+public class DistributedCallServer
+    extends UnicastRemoteObject
+    implements DistributedCallService
+{
+    public DistributedCallServer()
+        throws java.rmi.RemoteException {
+    }
+
+    public void setAlgorithm(DistributedAlgorithm algorithm) {
+        mAlgorithm = algorithm;
+    }
+
+    public DistributedAlgorithm getAlgorithm() {
+        return mAlgorithm;
+    }
+
+    public long writeMessage(DistributedCallMessage message) {
+        message.setCallStatus(CallStatus.PENDING);
+        message.setCallId(generateCallId());
+        if (message.getReceiverWorkerId().equals(0)) {
+            synchronized (mMessageQueue) {
+                mMessageQueue.addLast(message);
+            }
+        } else {
+            synchronized (mMessageQueue) {
+                mMessageQueue.addFirst(message);
+            }
+        }            
+        return message.getCallId();
+    }
+
+    public DistributedCallMessage acceptMessage(int workerId, int processId) {
+        if (workerId <= 0) {
+            throw new IllegalArgumentException("Invalid worker ID: " + workerId);
+        }
+        if (processId <= 0) {
+            throw new IllegalArgumentException("Invalid process ID: " + processId);
+        }
+        synchronized (mMessageQueue) {
+            Iterator<DistributedCallMessage> iterator = mMessageQueue.iterator();
+            while (iterator.hasNext()) {
+                DistributedCallMessage message = iterator.next();
+                if (message.getCallStatus() != CallStatus.PENDING) {
+                    continue;
+                }
+                int receiverId = message.getReceiverWorkerId();
+                if (receiverId == workerId ||
+                    (receiverId == 0 && workerId > 1)) {
+                    message.setCallStatus(CallStatus.PROCESSING);
+                    message.setReceiverWorkerId(workerId);
+                    message.setReceiverProcessId(processId);
+                    return message;
+                }
+            }
+        }
+
+        return null;
+    }
+
+    public void completeMessage(int workerId, int processId, long callId) {
+        if (workerId <= 0) {
+            throw new IllegalArgumentException("Invalid worker ID: " + workerId);
+        }
+        if (processId <= 0) {
+            throw new IllegalArgumentException("Invalid process ID: " + processId);
+        }
+        if (callId <= 0) {
+            throw new IllegalArgumentException("Invalid call ID: " + callId);
+        }
+        synchronized (mMessageQueue) {
+            Iterator<DistributedCallMessage> iterator = mMessageQueue.iterator();
+            while (iterator.hasNext()) {
+                DistributedCallMessage message = iterator.next();
+                if (message.getCallId().longValue() == callId) {
+                    if (message.getCallStatus() != CallStatus.PROCESSING) {
+                        throw new IllegalStateException("Call #" + callId + " not in state PROCESSING");
+                    }
+                    if (!message.getReceiverWorkerId().equals(workerId)) {
+                        throw new IllegalStateException("Call #" + callId + " assigned to worker " + message.getReceiverWorkerId() + " not worker " + workerId);
+                    }
+                    if (!message.getReceiverProcessId().equals(processId)) {
+                        throw new IllegalStateException("Call #" + callId + " assigned to process " + message.getReceiverProcessId() + " not process " + processId);
+                    }
+                    iterator.remove();
+                    return;
+                }
+            }
+        }
+
+        throw new IllegalArgumentException("Unrecognized call ID " + callId);
+    }
+
+    public boolean isQueueEmpty() {
+        synchronized (mMessageQueue) {
+            return mMessageQueue.isEmpty();
+        }
+    }
+
+    public void stop() {
+        try {
+            UnicastRemoteObject.unexportObject(this, false);
+        } catch (java.rmi.NoSuchObjectException exc) {
+            throw new RuntimeException("Exception unexporting object: " + exc.getMessage(),
+                                       exc);
+        }
+    }
+
+    private synchronized long generateCallId() {
+        return ++mCallIdGenerator;
+    }
+
+    private long mCallIdGenerator = 0;
+    private DistributedAlgorithm mAlgorithm = null;
+    private LinkedList<DistributedCallMessage> mMessageQueue =
+        new LinkedList<DistributedCallMessage>();
+}
diff --git a/lib/edu/mit/broad/dcp/DistributedCallService.java b/lib/edu/mit/broad/dcp/DistributedCallService.java
new file mode 100644
index 0000000000..202b25f42c
--- /dev/null
+++ b/lib/edu/mit/broad/dcp/DistributedCallService.java
@@ -0,0 +1,25 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2008 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever. Neither
+ * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+ */
+package edu.mit.broad.dcp;
+
+import edu.mit.broad.dcp.message.*;
+
+public interface DistributedCallService
+    extends java.rmi.Remote
+{
+    public DistributedAlgorithm getAlgorithm()
+        throws java.rmi.RemoteException;
+    public long writeMessage(DistributedCallMessage message)
+        throws java.rmi.RemoteException;
+    public DistributedCallMessage acceptMessage(int workerId, int processId)
+        throws java.rmi.RemoteException;
+    public void completeMessage(int workerId, int processId, long callId)
+        throws java.rmi.RemoteException;
+}
diff --git a/lib/edu/mit/broad/dcp/message/DistributedCallMessage.java b/lib/edu/mit/broad/dcp/message/DistributedCallMessage.java
new file mode 100644
index 0000000000..1b0fa0a4d3
--- /dev/null
+++ b/lib/edu/mit/broad/dcp/message/DistributedCallMessage.java
@@ -0,0 +1,90 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2007 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever. Neither
+ * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+ */
+package edu.mit.broad.dcp.message;
+
+import edu.mit.broad.dcp.CallStatus;
+
+public class DistributedCallMessage
+    extends DistributedMessage
+{
+    public DistributedCallMessage() {
+    }
+
+    public Long getCallId() {
+        return mCallId;
+    }
+
+    public void setCallId(Long value) {
+        mCallId = value;
+    }
+
+    public CallStatus getCallStatus() {
+        return mCallStatus;
+    }
+
+    public void setCallStatus(CallStatus value) {
+        mCallStatus = value;
+    }
+
+    public String getMethodName() {
+        return mMethodName;
+    }
+
+    public void setMethodName(String value) {
+        mMethodName = value;
+    }
+
+    public Object[] getMethodArgs() {
+        return mMethodArgs;
+    }
+
+    public void setMethodArgs(Object[] value) {
+        mMethodArgs = value;
+    }
+
+    public String toString() {
+        StringBuilder builder = new StringBuilder();
+        builder.append("DistributedCallMessage");
+        builder.append("(");
+        builder.append("" + getSenderWorkerId());
+        builder.append(",");
+        builder.append("" + getSenderProcessId());
+        builder.append(",");
+        builder.append("" + getReceiverWorkerId());
+        builder.append(",");
+        builder.append("" + getReceiverProcessId());
+        builder.append(",");
+        builder.append("" + mCallId);
+        builder.append(",");
+        builder.append("" + mCallStatus);
+        builder.append(",");
+        builder.append("" + mMethodName);
+        builder.append(",");
+        if (mMethodArgs == null) {
+            builder.append("" + mMethodArgs);
+        } else {
+            builder.append("[");
+            for (int i = 0; i < mMethodArgs.length; i++) {
+                if (i > 0) {
+                    builder.append(",");
+                }
+                builder.append("" + mMethodArgs[i]);
+            }
+            builder.append("]");
+        }
+        builder.append(")");
+        return builder.toString();
+    }
+
+    public Long mCallId;
+    public CallStatus mCallStatus;
+    public String mMethodName;
+    public Object[] mMethodArgs;
+}
diff --git a/lib/edu/mit/broad/dcp/message/DistributedMessage.java b/lib/edu/mit/broad/dcp/message/DistributedMessage.java
new file mode 100644
index 0000000000..a5e837a69d
--- /dev/null
+++ b/lib/edu/mit/broad/dcp/message/DistributedMessage.java
@@ -0,0 +1,54 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2007 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever. Neither
+ * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+ */
+package edu.mit.broad.dcp.message;
+
+
+public class DistributedMessage
+{
+    public DistributedMessage() {
+    }
+
+    public Integer getSenderWorkerId() {
+        return mSenderWorkerId;
+    }
+
+    public void setSenderWorkerId(Integer value) {
+        mSenderWorkerId = value;
+    }
+
+    public Integer getSenderProcessId() {
+        return mSenderProcessId;
+    }
+
+    public void setSenderProcessId(Integer value) {
+        mSenderProcessId = value;
+    }
+
+    public Integer getReceiverWorkerId() {
+        return mReceiverWorkerId;
+    }
+
+    public void setReceiverWorkerId(Integer value) {
+        mReceiverWorkerId = value;
+    }
+
+    public Integer getReceiverProcessId() {
+        return mReceiverProcessId;
+    }
+
+    public void setReceiverProcessId(Integer value) {
+        mReceiverProcessId = value;
+    }
+
+    public Integer mSenderWorkerId;
+    public Integer mSenderProcessId;
+    public Integer mReceiverWorkerId;
+    public Integer mReceiverProcessId;
+}
diff --git a/lib/edu/mit/broad/picard/PicardException.java b/lib/edu/mit/broad/picard/PicardException.java
new file mode 100644
index 0000000000..4e36ba6484
--- /dev/null
+++ b/lib/edu/mit/broad/picard/PicardException.java
@@ -0,0 +1,27 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard;
+
+/**
+ * Basic Picard runtime exception that, for now, does nothing much
+ *
+ * @author Kathleen Tibbetts
+ */
+public class PicardException extends RuntimeException
+{
+    public PicardException(String message) {
+        super(message);
+    }
+
+    public PicardException(String message, Throwable throwable) {
+        super(message, throwable);
+    }
+
+}
diff --git a/lib/edu/mit/broad/picard/aligner/AbstractBaseAligner.java b/lib/edu/mit/broad/picard/aligner/AbstractBaseAligner.java
new file mode 100644
index 0000000000..54f0ab9aa4
--- /dev/null
+++ b/lib/edu/mit/broad/picard/aligner/AbstractBaseAligner.java
@@ -0,0 +1,97 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.aligner;
+
+import edu.mit.broad.picard.io.IoUtil;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Map;
+import java.util.List;
+
+/**
+ * Abstract base class for use by <code>Aligner</code> implementations.  Provides a constructor and
+ * accessors for common inputs and outputs.
+ *
+ * @author Kathleen Tibbetts
+ */
+public abstract class AbstractBaseAligner implements Aligner {
+
+    private final Stringency stringency;        // The stringency of the alignment
+    private final File readsBamFile;            // The BAM file containing the read data
+    private final String outputPrefix;          // The directory and file name prefix for outputs
+    private final String referenceFileDir;      // The directory where the reference file can be found
+    private final int clipPoints[];             // The clip points to use
+    private final Integer expectedInsertSize;   // Expected insert size; null for non-paired-end lanes
+    private final Integer readsToAlign;         // The number of reads to align (all if null)
+    private final boolean pairedReads;          // Whether this is a paired-end run
+    private final int readLength;
+    // Parameters specific to the Aligner implementation being used
+    private final Map<String, String> customParametersMap;
+
+    /**
+     * Constructor that sets every parameter. 
+     *
+     * @param stringency            the stringency of the alignment
+     * @param readsBamFile          the BAM file containing the reads
+     * @param outputPrefix          the directory and filename prefix for output
+     * @param referenceFileDir      the directory where the reference file is located
+     * @param clipPoints            the clip points
+     * @param expectedInsertSize    the expected insert size (null for non-PE lanes)
+     * @param readsToAlign          the number of reads to align
+     * @param customParametersMap   parameters specific to the Aligner implementation
+     */
+    public AbstractBaseAligner(Stringency stringency, File readsBamFile, String outputPrefix,
+                               String referenceFileDir, int clipPoints[], Integer expectedInsertSize,
+                               Integer readsToAlign, Map<String, String> customParametersMap,
+                               boolean pairedReads, int readLength) {
+
+        // First, a little validation
+        if (clipPoints != null && clipPoints.length != 4) {
+            throw new IllegalArgumentException("Length of clipPoints array argument must be 4.");
+        }
+        IoUtil.assertFileIsReadable(readsBamFile);
+
+        this.stringency = stringency;
+        this.readsBamFile = readsBamFile;
+        this.outputPrefix = outputPrefix;
+        this.referenceFileDir = referenceFileDir;
+        this.clipPoints = clipPoints != null ? clipPoints : new int[4];
+        this.expectedInsertSize = expectedInsertSize;
+        this.readsToAlign = readsToAlign;
+        this.customParametersMap = customParametersMap;
+        this.pairedReads = pairedReads;
+        this.readLength = readLength;
+    }
+
+    /**
+     * Utility method for deleting a list of files, to be used by the
+     * cleanup method of sub-classes
+     *
+     * @param files         the list of files to delete
+     */
+    protected final void deleteFiles(List<File> files) {
+        for (File f : files) {
+            f.delete();
+        }
+    }
+
+    // Accessors
+    protected final Stringency getStringency() { return stringency; }
+    protected final File getReadsBamFile() { return readsBamFile; }
+    protected final String getOutputPrefix() { return outputPrefix; }
+    protected final String getReferenceFileDir() { return referenceFileDir; }
+    protected final int[] getClipPoints() { return clipPoints; }
+    protected final Integer getExpectedInsertSize() { return expectedInsertSize; }
+    protected final Integer getReadsToAlign() { return readsToAlign; }
+    protected final Map<String, String> getCustomParametersMap() { return customParametersMap; }
+    protected final boolean isPairedReads() { return pairedReads; }
+    protected final int getReadLength() { return readLength; }
+}
diff --git a/lib/edu/mit/broad/picard/aligner/Aligner.java b/lib/edu/mit/broad/picard/aligner/Aligner.java
new file mode 100644
index 0000000000..d0fdf47deb
--- /dev/null
+++ b/lib/edu/mit/broad/picard/aligner/Aligner.java
@@ -0,0 +1,45 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.aligner;
+
+/**
+ * API for aligners.  Clients must call these methods in order, as each depends on
+ * the previous one, but they may call them multiple times and need not call them all.
+ * This allows steps to be rerun and also lets the caller review intermediate files 
+ * when troubleshooting.
+ *
+ * @author Kathleen Tibbetts
+ */
+public interface Aligner {
+
+    public static enum Stringency{ low, high };
+
+    /**
+     * Prepares all the necessary inputs for the alignment process from a BAM file of read data.
+     */
+    public void prepareInputs();
+
+    /**
+     * Does the alignment and produces output in the underlying form of the aligner.
+     */
+    public void align();
+
+    /**
+     * Converts the output of the aligner to BAM format
+     */
+    public void prepareOutput();
+
+    /**
+     * Cleans up intermediate files (the files created in by and for the underlying aligner by the
+     * prepareInputs() and align() methods.  Does not clean up the original source files or the final BAM file.
+     */
+    public void cleanup();
+
+}
diff --git a/lib/edu/mit/broad/picard/aligner/maq/BamToBfqWriter.java b/lib/edu/mit/broad/picard/aligner/maq/BamToBfqWriter.java
new file mode 100644
index 0000000000..1f3cd55ac8
--- /dev/null
+++ b/lib/edu/mit/broad/picard/aligner/maq/BamToBfqWriter.java
@@ -0,0 +1,319 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.aligner.maq;
+
+import edu.mit.broad.sam.SAMFileReader;
+import edu.mit.broad.sam.SAMRecord;
+import edu.mit.broad.sam.util.BinaryCodec;
+import edu.mit.broad.picard.io.IoUtil;
+import edu.mit.broad.picard.PicardException;
+import edu.mit.broad.picard.filter.*;
+import edu.mit.broad.picard.util.PeekableIterator;
+import edu.mit.broad.picard.util.Log;
+import edu.mit.broad.picard.sam.ReservedTagConstants;
+
+import java.io.File;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.Arrays;
+
+/**
+ * Class to take unmapped reads in BAM file format and create Maq binary fastq format file(s) --
+ * one or two of them, depending on whether it's a paired-end read.  This relies on the unmapped
+ * BAM file having all paired reads together in order.
+ */
+public class BamToBfqWriter {
+
+    private final File bamFile;
+    private final String outputPrefix;
+    private boolean pairedReads = false;
+    private int wrote = 0;
+    private int increment = 1;
+    private int chunk = 0;
+    private BinaryCodec codec1;
+    private BinaryCodec codec2;
+    private final Log log = Log.getInstance(BamToBfqWriter.class);
+
+    /**
+     * Constructor
+     *
+     * @param bamFile        the BAM file to read from
+     * @param outputPrefix   the directory and file prefix for the binary fastq files
+     * @param total          the total number of records that should be written, drawn evenly
+     *                       from throughout the file (null for all).
+     * @param chunk          the maximum number of records taht should be written to any one file
+     * @param pairedReads    whether these reads are from  a paired-end run
+     */
+    public BamToBfqWriter(File bamFile, String outputPrefix, Integer total, Integer chunk, boolean pairedReads) {
+        this.bamFile = bamFile;
+        this.outputPrefix = outputPrefix;
+        this.pairedReads = pairedReads;
+        if (total != null) {
+            double writeable = (double)countWritableRecords();
+            this.increment = (int)Math.floor(writeable/total.doubleValue());
+        }
+        if (chunk != null) {
+            this.chunk = chunk;
+        }
+    }
+
+    /**
+     * Constructor
+     *
+     * @param bamFile   the BAM file to read from
+     * @param outputPrefix   the directory and file prefix for the binary fastq files
+     * @param pairedReads    whether these reads are from  a paired-end run
+     */
+    public BamToBfqWriter(File bamFile, String outputPrefix, boolean pairedReads) {
+        this(bamFile, outputPrefix, null, null, pairedReads);
+    }
+
+    /**
+     * Writes the binary fastq file(s) to the output directory
+     */
+    public void writeBfqFiles() {
+
+        Iterator<SAMRecord> iterator = (new SAMFileReader(IoUtil.openFileForReading(this.bamFile))).iterator();
+
+        // Filter out noise reads and reads that fail the quality filter
+        TagFilter tagFilter = new TagFilter(ReservedTagConstants.XN, 1);
+        FailsVendorReadQualityFilter qualityFilter = new FailsVendorReadQualityFilter();
+
+        if (!pairedReads) {
+            writeSingleEndBfqs(iterator, Arrays.asList(tagFilter, qualityFilter));
+            codec1.close();
+        }
+        else {
+            writePairedEndBfqs(iterator, tagFilter, qualityFilter);
+            codec1.close();
+            codec2.close();
+        }
+        log.info("Wrote " + wrote + " bfq records.");
+
+    }
+
+    /**
+     * Path for writing bfqs for paired-end reads
+     *
+     * @param iterator      the iterator witht he SAM Records to write
+     * @param tagFilter     the filter for noise reads
+     * @param qualityFilter the filter for PF reads
+     */
+    private void writePairedEndBfqs(Iterator<SAMRecord> iterator, TagFilter tagFilter,
+                                    FailsVendorReadQualityFilter qualityFilter) {
+        // Open the codecs for writing
+        int fileIndex = 0;
+        initializeNextBfqFiles(fileIndex++);
+
+        int records = 0;
+
+        while (iterator.hasNext()) {
+            SAMRecord first = iterator.next();
+            if (!iterator.hasNext()) {
+                throw new PicardException("Mismatched number of records in " + this.bamFile.getAbsolutePath());
+            }
+            SAMRecord second = iterator.next();
+            if (!second.getReadName().equals(first.getReadName()) ||
+                first.getFirstOfPairFlag() == second.getFirstOfPairFlag()) {
+                throw new PicardException("Unmatched read pairs in " + this.bamFile.getAbsolutePath() +
+                    ": " + first.getReadName() + ", " + second.getReadName() + ".");
+            }
+
+            // If both are noise reads, filter them out
+            if (tagFilter.filterOut(first) && tagFilter.filterOut(second))  {
+                // skip it
+            }
+            // If either fails to pass filter, then exclude them as well
+            else if (qualityFilter.filterOut(first) || qualityFilter.filterOut(second)) {
+                // skip it
+            }
+            // Otherwise, write them out
+            else {
+                records++;
+                if (records % increment == 0) {
+                    first.setReadName(first.getReadName() + "#0/1");
+                    writeFastqRecord(first.getFirstOfPairFlag() ? codec1 : codec2, first);
+                    second.setReadName(second.getReadName() + "#0/2");
+                    writeFastqRecord(second.getFirstOfPairFlag() ? codec1 : codec2, second);
+                    wrote++;
+                    if (wrote % 1000000 == 0) {
+                        log.info(wrote + " records written.");
+                    }
+                    if (chunk > 0 && wrote % chunk == 0) {
+                        initializeNextBfqFiles(fileIndex++);
+                    }
+                }
+            }
+        }
+    }
+
+    /**
+     * Path for writing bfqs for single-end reads
+     *
+     * @param iterator  the iterator witht he SAM Records to write
+     * @param filters   the list of filters to be applied
+     */
+    private void writeSingleEndBfqs(Iterator<SAMRecord> iterator, List<SamRecordFilter> filters) {
+
+        // Open the codecs for writing
+        int fileIndex = 0;
+        initializeNextBfqFiles(fileIndex++);
+
+        int records = 0;
+
+        FilteringIterator it = new FilteringIterator(iterator, new AggregateFilter(filters));
+        while (it.hasNext()) {
+            SAMRecord record = it.next();
+            records++;
+            if (records % increment == 0) {
+
+                writeFastqRecord(codec1, record);
+                wrote++;
+                if (wrote % 1000000 == 0) {
+                    log.info(wrote + " records processed.");
+                }
+                if (chunk > 0 && wrote % chunk == 0) {
+                    initializeNextBfqFiles(fileIndex++);
+                }
+            }
+        }
+    }
+
+    /**
+     * Closes any the open bfq file(s), if any, and opens the new one(s)
+     *
+     * @param fileIndex the index (counter) of the files to write
+     */
+    private void initializeNextBfqFiles(int fileIndex) {
+        // Close the codecs if they were writing before
+        if (codec1 != null) {
+            codec1.close();
+            if (pairedReads) {
+                codec2.close();
+            }
+        }
+
+        // Open new file, using the fileIndex.
+        File bfq1 = getOutputFile(this.outputPrefix , 1, fileIndex);
+        codec1 = new BinaryCodec(IoUtil.openFileForWriting(bfq1));
+        log.info("Now writing to file " + bfq1.getAbsolutePath());
+        if (pairedReads) {
+            File bfq2 = getOutputFile(this.outputPrefix , 2, fileIndex);
+            codec2 = new BinaryCodec(IoUtil.openFileForWriting(bfq2));
+            log.info("Now writing to file " + bfq2.getAbsolutePath());
+        }
+    }
+
+    /**
+     * Writes out a SAMRecord in Maq fastq format
+     *
+     * @param codec the code to write to
+     * @param rec   the SAMRecord to write
+     */
+    private void writeFastqRecord(BinaryCodec codec, SAMRecord rec) {
+
+        // Writes the length of the read name and then the name (null-terminated)
+        codec.writeString(rec.getReadName(), true, true);
+
+        char seqs[] = rec.getReadString().toCharArray();
+        char quals[] = rec.getBaseQualityString().toCharArray();
+        
+        // Write the length of the sequence
+        codec.writeInt(seqs.length);
+
+        // Calculate and write the sequence and qualities
+        byte seqsAndQuals[] = new byte[seqs.length];
+
+        for (int i = 0; i < seqs.length; i++) {
+            int quality = Math.min(quals[i]-33, 63);
+            int base;
+            switch(seqs[i]) {
+                case 'A':
+                case 'a':
+                    base = 0;
+                    break;
+                case 'C':
+                case 'c':
+                    base = 1;
+                    break;
+                case 'G':
+                case 'g':
+                    base = 2;
+                    break;
+                case 'T':
+                case 't':
+                    base = 3;
+                    break;
+                case 'N':
+                case 'n':
+                case '.':
+                    base = 0;
+                    quality = 0;
+                    break;
+                default:
+                    throw new PicardException("Unknown base when writing bfq file: " + seqs[i]);
+            }
+            seqsAndQuals[i] = (byte) (base << 6 | quality);
+        }
+        codec.writeBytes(seqsAndQuals);
+    }
+
+    private int countWritableRecords() {
+        int count = 0;
+        PeekableIterator<SAMRecord> it = new PeekableIterator<SAMRecord>((new SAMFileReader(IoUtil.openFileForReading(this.bamFile))).iterator());
+        if (!this.pairedReads) {
+            // Filter out noise reads and reads that fail the quality filter
+            List<SamRecordFilter> filters = new ArrayList<SamRecordFilter>();
+            filters.add(new TagFilter(ReservedTagConstants.XN, 1));
+            filters.add(new FailsVendorReadQualityFilter());
+            FilteringIterator itr = new FilteringIterator(it, new AggregateFilter(filters));
+            while (itr.hasNext()) {
+                itr.next();
+                count++;
+            }
+        }
+        else {
+            while (it.hasNext()) {
+                SAMRecord first = it.next();
+                SAMRecord second = it.next();
+                // If both are noise reads, filter them out
+                if (first.getAttribute(ReservedTagConstants.XN) != null &&
+                    second.getAttribute(ReservedTagConstants.XN) != null)  {
+                    // skip it
+                }
+                // If either fails to pass filter, then exclude them as well
+                else if (first.getReadFailsVendorQualityCheckFlag() || second.getReadFailsVendorQualityCheckFlag() ) {
+                    // skip it
+                }
+                // Otherwise, write them out
+                else {
+                    count++;
+                }
+            }
+        }
+        it.close();
+        return count;
+    }
+
+    /**
+     * Constructs the name for the output file and returns the file
+     *
+     * @param outputPrefix        the directory and file prefix for the output bfq file
+     * @param read                whether this is the file for the first or second read
+     * @return                    a new File object for the bfq file.
+     */
+    private File getOutputFile(String outputPrefix, int read, int index) {
+        File result = new File(outputPrefix + "." + index + "." + read + ".bfq");
+        IoUtil.assertFileIsWritable(result);
+        return result;
+    }
+
+}
diff --git a/lib/edu/mit/broad/picard/aligner/maq/MapFileIterator.java b/lib/edu/mit/broad/picard/aligner/maq/MapFileIterator.java
new file mode 100644
index 0000000000..af55741853
--- /dev/null
+++ b/lib/edu/mit/broad/picard/aligner/maq/MapFileIterator.java
@@ -0,0 +1,357 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.aligner.maq;
+
+import edu.mit.broad.sam.*;
+import edu.mit.broad.sam.util.CloseableIterator;
+import edu.mit.broad.sam.util.BinaryCodec;
+import edu.mit.broad.sam.util.StringUtil;
+import edu.mit.broad.picard.io.IoUtil;
+import edu.mit.broad.picard.PicardException;
+import edu.mit.broad.picard.util.SamPairUtil;
+
+import java.io.File;
+import java.io.BufferedInputStream;
+import java.util.*;
+
+/**
+ * Reads a Maq map file and returns an an iterator of SAMRecords and a populated header
+ *
+ * IMPORTANT!  Even though the reads in the map file are in coordinate order, this iterator
+ * will not necessarily return them in that order.  For paired reads, both will be
+ * returned only after *both* records have been seen.
+ *
+ * @author Kathleen Tibbetts
+ */
+public class MapFileIterator implements CloseableIterator<SAMRecord> {
+
+    public static final int MATE_UNMAPPED_FLAG = 64;
+    public static final int READ_UNMAPPED_FLAG = 192;
+
+    private static final int READ_NAME_LENGTH = 36;
+    private static final int MAP_FORMAT = -1;
+    private static final int MAX_READ_LENGTH = 128;
+
+    private static final byte ACGT[] = {'A', 'C', 'G', 'T'};
+
+    public static final String PROGRAM_RECORD = "0";
+
+    private long recordCount = 0L;
+    private int recordsRead = 0;
+    private BinaryCodec mapCodec;
+    private final SAMFileHeader header;
+    private final boolean pairedReads;
+    private final boolean jumpingLibrary;
+    private final List<SAMRecord> next = new ArrayList<SAMRecord>();
+    private final Map<String, SAMRecord> pending = new HashMap<String, SAMRecord>();
+    private final List<File> mapFiles = new LinkedList<File>();
+
+    /**
+     * Constructor.  Opens the map file, reads the record count and header from it,
+     * creates the SAMFileHeader, and queues up the first read
+     *
+     * @param mapFile           The Maq map file to read
+     * @param commandLine       The command line used to invoke Maq (for the header)
+     * @param pairedReads       Whether this is a paired-end run
+     */
+    public MapFileIterator(String commandLine, boolean pairedReads, boolean jumpingLibrary, File... mapFile) {
+        if (mapFile.length == 0) {
+            throw new IllegalArgumentException("At least one map file must be provided.");
+        }
+        mapFiles.addAll(Arrays.asList(mapFile));
+
+        this.pairedReads = pairedReads;
+        this.jumpingLibrary = jumpingLibrary;
+
+        header = new SAMFileHeader();
+        header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
+        SAMProgramRecord program = new SAMProgramRecord(PROGRAM_RECORD);
+        program.setProgramVersion(MaqConstants.getProgramVersion());
+        program.setCommandLine(commandLine);
+        header.addProgramRecord(program);
+
+        queueNextMapFile();
+    }
+
+    /**
+     * Queues up the next map file
+     *
+     * @return  true if there's another map file to iterate over
+     */
+    private boolean queueNextMapFile() {
+
+        // Close the old file
+        if (mapCodec != null) {
+            mapCodec.close();
+        }
+
+        // If there are no more map files, return fales
+        if (mapFiles.size() == 0) {
+            return false;
+        }
+
+        // Otherwise, open the next file and reset the recordsRead count
+        mapCodec = new BinaryCodec(new BufferedInputStream(IoUtil.openFileForReading(mapFiles.remove(0))));
+        int format = mapCodec.readInt();
+        if (format != MAP_FORMAT) {
+            mapCodec.close();
+            throw new PicardException("Unrecognized Maq map file format: " + format);
+        }
+        recordsRead = 0;
+
+
+        // Read the sequences out of the map file and set them on the header
+        int sequenceCount = mapCodec.readInt();
+        List<SAMSequenceRecord> sequences = new ArrayList<SAMSequenceRecord>();
+        for (int i = 0; i < sequenceCount; i++) {
+            int length = mapCodec.readInt();
+            // Write the sequence name, trimming off the null terminator
+            sequences.add(new SAMSequenceRecord(mapCodec.readString(length).substring(0, length-1)));
+        }
+        if (header.getSequences() == null || header.getSequences().size() == 0) {
+            header.setSequences(sequences);
+        }
+        else {
+            // TODO: Check that the sequences match and throw and exception if they don't
+        }
+        recordCount = mapCodec.readLong();
+
+        readNext();
+        return true;
+    }
+
+    /**
+     * Closes the BinaryCodec reading the map file
+     */
+    public void close() {
+        mapCodec.close();
+    }
+
+    /**
+     * @return true if the iteration has more elements
+     */
+    public boolean hasNext() {
+         return next.size() > 0;
+    }
+
+    /**
+     * @return the next SAMRecord in the iteration
+     * @throws NoSuchElementException if this is called when hasNext() returns false
+     */
+    public SAMRecord next() {
+        if (!hasNext()) {
+            throw new NoSuchElementException("No more elements in this iteration");
+        }
+        SAMRecord result = next.remove(0);
+        readNext();
+        return result;
+    }
+
+    /**
+     * Reads the next element from the map file.  If we are done with it, we put it in the <code>next</code>
+     * list; if we are waiting to see its mate, we put it in the <code>pending</code> map.  Calls itself
+     * repeatedly until there is at least one element in <code>next</code>.
+     */
+    private void readNext() {
+
+        // If there's already a record queued up, just return
+        if (next.size() > 0) {
+            return;
+        }
+
+        // If we've read all there is, then any remaining records in the pending map should be returned.  
+        // If this is not a PE run, then the pending map will be empty and we're done.
+        if (recordsRead == recordCount) {
+            if (pending.size() > 0) {
+                StringBuffer sb = new StringBuffer();
+                for (String item : pending.keySet()) {
+                    sb.append(item).append("\n");
+                }
+                throw new PicardException("MapFileIterator pending map should have been empty but contained " +
+                        "the following records: " + sb.toString());
+            }
+            queueNextMapFile();
+            return;
+        }
+
+        // Otherwise, we read until there is at least one record in the <code>next</code> list
+        readMapRecord();
+        if (next.size() == 0) {
+            readNext();
+        }
+    }
+
+    /**
+     * Reads one record from the map file and throws it onto the pending map or the next list,
+     * depending on whether we have already seen its mate
+     */
+    private void readMapRecord() {
+
+        // Now that we've got all the data from the binary file, write a SAMRecord and add it to
+        // the new BAM file
+        SAMRecord record = new SAMRecord();
+        record.setAttribute(SAMTag.PG.toString(), PROGRAM_RECORD);
+        record.setReadPairedFlag(this.pairedReads);
+        
+        // the last base is the single-end mapping quality.
+        byte seqsAndQuals[] = new byte[MAX_READ_LENGTH-1];
+        mapCodec.readBytes(seqsAndQuals);
+
+        byte singleEndMappingQualityOrIndelLength = mapCodec.readByte();
+
+         // the length of the read
+        int readLength = mapCodec.readUByte();
+        setSeqsAndQuals(seqsAndQuals, readLength, record);
+
+        // the final mapping quality (unless <code>flag</code> below is 130, then it is the
+        //  position of the indel (or 0 if no indel)
+        int mappingQuality = mapCodec.readUByte();
+
+        // mismatches in the 28bp (higher 4 bits) and mismatches (lower 4 bits)
+        mapCodec.readUByte();
+        // sum of errors of the best hit
+        mapCodec.readUByte();
+        // counts of all 0- and 1-mismatch hits on the reference
+        mapCodec.readUByte();
+        mapCodec.readUByte();
+
+        // A bitwise flag. See the Maq docs for its full meaning
+        int flag = mapCodec.readUByte();
+
+        // the lower mapQ of the two ends (equals map_qual if unpaired); if flag is 130: mapQ of its mate
+        int altQual = mapCodec.readUByte();
+
+        // Index of the sequence for this read
+        record.setReferenceIndex((int)mapCodec.readUInt(), getHeader());
+
+        // Start position and strand
+        long pos = mapCodec.readUInt();
+        int startPos = ((int)((pos>>1)& 0x7FFFFFFF)) + 1;
+        record.setAlignmentStart(startPos);
+        record.setReadNegativeStrandFlag((pos&1) == 1);
+
+        // offset of the mate (zero if unpaired, or two ends mapped to different chr)
+        mapCodec.readInt();
+
+        // The read name
+        byte nameBytes[] = new byte[READ_NAME_LENGTH];
+        mapCodec.readBytes(nameBytes);
+        String name = StringUtil.bytesToString(nameBytes).trim();
+        if (this.pairedReads) {
+            if (name.endsWith("/1")) {
+                record.setFirstOfPairFlag(true);
+                record.setSecondOfPairFlag(false);
+            }
+            else if (name.endsWith("/2")) {
+                record.setFirstOfPairFlag(false);
+                record.setSecondOfPairFlag(true);
+            }
+            else {
+                throw new PicardException("Unrecognized ending for paired read name: " + name);                
+            }
+            name = name.substring(0, name.length()-2);
+        }
+        record.setReadName(name);
+
+
+        if (flag != 130 || singleEndMappingQualityOrIndelLength == 0) { // No indel
+            record.setCigarString(readLength + "M");
+            record.setMappingQuality(mappingQuality);
+        }
+        else {  // Indel
+            int indelPos = mappingQuality;
+            String cigar = indelPos + "M" + Math.abs(singleEndMappingQualityOrIndelLength);
+            int remaining = readLength - indelPos;
+            if (singleEndMappingQualityOrIndelLength > 0) {
+                cigar += "I" + (remaining - singleEndMappingQualityOrIndelLength) + "M";
+            }
+            else {
+                cigar += "D" + remaining + "M";
+            }
+            record.setCigarString(cigar);
+            // In the docs, it look like there is a mapping quality for the mate, do we use that?
+            record.setMappingQuality(altQual);
+        }
+
+        if (!pairedReads) {
+            record.setProperPairFlag(false);
+            next.add(record);
+        }
+        else {
+            record.setMateUnmappedFlag(flag == MATE_UNMAPPED_FLAG);
+            SAMRecord mate = pending.remove(record.getReadName());
+
+            if (mate != null) {
+                boolean proper = SamPairUtil.isProperPair(record, mate, jumpingLibrary);
+                record.setProperPairFlag(proper);
+                mate.setProperPairFlag(proper);
+
+                SamPairUtil.setMateInfo(record, mate);
+
+                int insertSize = SamPairUtil.computeInsertSize(record, mate);
+                record.setInferredInsertSize(insertSize);
+                mate.setInferredInsertSize(insertSize);
+
+                if (!mate.getMateUnmappedFlag()) {
+                    next.add(record);
+                }
+                if (!record.getMateUnmappedFlag()) {
+                    next.add(mate);
+                }
+            }
+            else {
+                pending.put(record.getReadName(), record);
+            }
+        }
+
+        // TODO: Figure out what do do about noise reads long-term
+        // Note that it is possible that we have lost a "Noise read" annotation at this point.  Since
+        // we try to map a pair if only one of the reads is classified as "noise", then for any paired
+        // reads where one was a noise read and one was not, we will lose the noise annotation on the
+        // one noisy read.  We have discussed either re-doing the noise evaluation here, modifying the
+        // read name to carry the noise flag through Maq, or changing what reads we give to Maq.
+
+        recordsRead++;
+        
+    }
+
+    /**
+     * Decodes the sequence and the qualities and sets them on the SAMrecords
+     *
+     * @param seqsAndQuals  the list of seqs and quals
+     * @param readLength    the length of the read
+     * @param sam           the SAMRecord to populate
+     */
+    private void setSeqsAndQuals(byte seqsAndQuals[], int readLength, SAMRecord sam) {
+        byte sequence[] = new byte[readLength];
+        byte qualities[] = new byte[readLength];
+        for (int i = 0; i < readLength; i++) {
+            byte b = seqsAndQuals[i];
+            qualities[i] = (byte)(b & 0x3F);
+            if (b == 0) {
+                sequence[i] = 'N';
+            }
+            else {
+                sequence[i] = ACGT[(seqsAndQuals[i] >> 6) & 3];
+            }
+        }
+        sam.setReadBases(sequence);
+        sam.setBaseQualities(qualities);
+    }
+
+    /**
+     * @throws UnsupportedOperationException -- not implemented
+     */
+    public void remove() {
+        throw new UnsupportedOperationException("remove() not supported in MapFileIterator");
+    }
+
+    public SAMFileHeader getHeader() { return header;  }
+}
diff --git a/lib/edu/mit/broad/picard/aligner/maq/MaqAligner.java b/lib/edu/mit/broad/picard/aligner/maq/MaqAligner.java
new file mode 100644
index 0000000000..6c1890818b
--- /dev/null
+++ b/lib/edu/mit/broad/picard/aligner/maq/MaqAligner.java
@@ -0,0 +1,211 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.aligner.maq;
+
+import edu.mit.broad.picard.aligner.Aligner;
+import edu.mit.broad.picard.aligner.AbstractBaseAligner;
+import edu.mit.broad.picard.PicardException;
+import edu.mit.broad.picard.util.Log;
+
+import java.io.File;
+import java.io.FilenameFilter;
+import java.util.*;
+
+/**
+ * Maq implementation of the Aligner interface
+ */
+public class MaqAligner extends AbstractBaseAligner implements Aligner {
+
+    // Constants related to Maq output files
+    public static final String MAQ_MAP_SUFFIX = ".out.aln.map";
+    public static final String MAQ_LOG_SUFFIX = ".out.map.log";
+
+    // Internal constant for multi-plexing lane data
+    private static final int READ_CHUNK_SIZE = 2000000;
+
+    public static final String REFERENCE_FILE_SUFFIX = ".bfa";
+
+    private final Log log = Log.getInstance(MaqAligner.class);
+
+    private String commandLine = null;
+    
+
+    /**
+     * Constructor that sets every parameter.  All other constructors delegate to this one.
+     *
+     * @param stringency            the stringency of the alignment
+     * @param readsBamFile          the BAM file containing the reads
+     * @param outputPrefix          the directory and filename prefix for output
+     * @param referenceFileDir      the directory where the reference file is located
+     * @param clipPoints            the clip points
+     * @param expectedInsertSize    the expected insert size (null for non-PE lanes)
+     * @param readsToAlign          the number of reads to align
+     * @param customParametersMap   parameters specific to the Aligner implementation
+     */
+    public MaqAligner(Stringency stringency, File readsBamFile, String outputPrefix,
+                               String referenceFileDir, int clipPoints[], Integer expectedInsertSize,
+                               Integer readsToAlign, Map<String, String> customParametersMap,
+                               boolean pairedReads, int readLength) {
+
+        super(stringency, readsBamFile, outputPrefix, referenceFileDir, clipPoints,
+              expectedInsertSize, readsToAlign, customParametersMap, pairedReads, readLength);
+    }
+
+    /**
+     * Prepares all the necessary inputs for the alignment process from a BAM file of read data.
+     */
+    public void prepareInputs() {
+        log.info("Preparing Maq inputs.");
+        BamToBfqWriter writer = new BamToBfqWriter(this.getReadsBamFile(), this.getOutputPrefix(),
+                    this.getReadsToAlign(), READ_CHUNK_SIZE, isPairedReads());
+        writer.writeBfqFiles();
+    }
+
+    /**
+     * Does the alignment and produces output in the underlying form of the aligner.
+     */
+    public void align() {
+        log.info("Running Maq alignment.");
+
+        // Temporary hack until we get the multi-tasking code from Seva
+        List<String> mapFileNames = new ArrayList<String>(); // All map files that we will merge together at the end
+
+        String maqParams = MaqConstants.SWITCH_RANDOM_SEED + " " + MaqConstants.DEFAULT_RANDOM_SEED;
+
+        if (this.getStringency() == Stringency.high) {
+            maqParams += " " + MaqConstants.SWITCH_MAX_OUTER_DISTANCE + " " + Math.round(
+                    this.getExpectedInsertSize() * MaqConstants.HIGH_STRINGENCY_MAX_OUTER_DISTANCE_MULTIPLIER);
+            maqParams += " " + MaqConstants.SWITCH_SUM_MISMATCHES + " " +
+                    MaqConstants.HIGH_STRINGENCY_SUM_MISMATCHES;
+        }
+        else {
+            maqParams += " " + MaqConstants.SWITCH_MAX_OUTER_DISTANCE + " " +
+                    MaqConstants.LOW_STRINGENCY_MAX_OUTER_DISTANCE;
+            // For low stringency, get at least 30 bases and then let half of what's remaining mismatch
+            int maxMisMatches = (this.getReadLength() - 30)/2;
+            maqParams += " " + MaqConstants.SWITCH_SUM_MISMATCHES + " " +
+                    (maxMisMatches * MaqConstants.LOW_STRINGENCY_QUALITY_FOR_MISMATCHES);
+        }
+
+        String referenceFile = new File(this.getReferenceFileDir()).listFiles(new FilenameFilter() {
+            public boolean accept(File dir, String name) {
+                return name.endsWith(REFERENCE_FILE_SUFFIX);
+            }
+        })[0].getAbsolutePath();
+
+        ProcessBuilder builder;
+
+        // Map the bfq files, individually or in pairs
+        SortedSet<File> bfqs = new TreeSet<File>(this.getBfqFiles());
+        for (Iterator<File> it = bfqs.iterator(); it.hasNext();) {
+
+            String read1bfq = it.next().getAbsolutePath();
+            String read2bfq = (this.isPairedReads()) ? it.next().getAbsolutePath() : "";
+
+            String outputFileBase = read1bfq.substring(0, read1bfq.lastIndexOf('.')-2);
+            String mapFile = outputFileBase + MAQ_MAP_SUFFIX;
+            String logFile = outputFileBase + MAQ_LOG_SUFFIX;
+
+            String command = MaqConstants.MAQ_HOME + MaqConstants.MAQ_COMMAND + " " + MaqConstants.MAP_COMMAND +
+                    " " + maqParams + " " + mapFile + " " + referenceFile + " " + read1bfq + " " + read2bfq +
+                    " 2> " + logFile;
+            setCommandLine(getCommandLine() == null ? command : getCommandLine() + ";" + command);
+            log.info("Executing command: " + command);
+            try {
+                builder = new ProcessBuilder(command.split(" "));
+                Process p = builder.start();
+                p.waitFor();
+            }
+            catch (Exception e) {
+                throw new PicardException("Error starting Maq process", e);
+            }
+
+            mapFileNames.add(mapFile);   
+        }
+
+        // If there's more than one map file, then merge them.
+        String finalFileName = this.getOutputPrefix() + "." + this.getStringency() + MAQ_MAP_SUFFIX;
+        if (mapFileNames.size() > 1) {
+            String command = MaqConstants.MAQ_HOME + MaqConstants.MAQ_COMMAND + " " +
+                    MaqConstants.MERGE_COMMAND + " " + finalFileName;
+            for (String name : mapFileNames) {
+                command += " " + name;
+            }
+            setCommandLine(getCommandLine() == null ? command : getCommandLine() + ";" + command);
+            log.info("Executing command: " + command);
+
+            try {
+                builder = new ProcessBuilder(command.split(" "));
+                Process p = builder.start();
+                p.waitFor();
+            }
+            catch (Exception e) {
+                throw new PicardException("Error starting Maq process", e);
+            }
+        }
+        else { // Otherwise rename the single map file so we can find it later
+            File f = new File(mapFileNames.get(0));
+            if (!f.renameTo(new File(finalFileName))) {
+                throw new PicardException("Error renaming " + f.getAbsolutePath() + " to " + finalFileName);
+            }
+        }
+    }
+
+    /**
+     * Converts the output of the aligner to BAM format
+     */
+    public void prepareOutput() {
+        log.info("Preparing output from Maq alignment.");
+        // TODO: MaqToBam
+    }
+
+    /**
+     * Cleans up intermediate files (the files created in by and for the underlying aligner by the
+     * prepareInputs() and align() methods.  Does not clean up the original source files or the final BAM file.
+     */
+    public void cleanup() {
+        log.info("Cleaning up Maq intermediate files.");
+        this.deleteFiles(getBfqFiles());
+//        this.deleteFiles(getMaqAlignmentFiles());
+    }
+
+    /**
+     * Returns a list of zero to two BFQ files, depending on whether they are there
+     * and whether it was a paired-end run or not
+     *
+     * @return a list of BFQ files
+     */
+    private List<File> getBfqFiles() {
+        File dir = new File(this.getOutputPrefix().substring(0, this.getOutputPrefix().lastIndexOf("/")));
+        return Arrays.asList(dir.listFiles(new FilenameFilter() {
+            public boolean accept(File dir, String name) {
+                return name.endsWith(".bfq");
+            }
+        }));
+    }
+
+    /**
+     * Returns the Maq map files
+     *
+     * @return a list of Maq .map files
+     */
+    private List<File> getMaqAlignmentFiles() {
+        File dir = new File(this.getOutputPrefix().substring(0, this.getOutputPrefix().lastIndexOf("/")));
+        return Arrays.asList(dir.listFiles(new FilenameFilter() {
+            public boolean accept(File dir, String name) {
+                // TODO: Add the text files if we do not read the binary map files
+                return name.endsWith(MAQ_MAP_SUFFIX) || name.endsWith(MAQ_LOG_SUFFIX);
+            }
+        }));
+    }
+
+    public String getCommandLine() { return commandLine; }
+    public void setCommandLine(String commandLine) { this.commandLine = commandLine; }
+}
diff --git a/lib/edu/mit/broad/picard/aligner/maq/MaqConstants.java b/lib/edu/mit/broad/picard/aligner/maq/MaqConstants.java
new file mode 100644
index 0000000000..b5e4b9b59b
--- /dev/null
+++ b/lib/edu/mit/broad/picard/aligner/maq/MaqConstants.java
@@ -0,0 +1,39 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.aligner.maq;
+
+/**
+ * Utility class to hold Maq-related constants (program name, location, switches, etc)
+ */
+public class MaqConstants {
+    // General Maq constants
+    public static final String PROGRAM_NAME = "Maq";
+    public static final String PROGRAM_VERSION = "0.7.1";
+    public static final String MAQ_HOME = "/seq/dirseq/maq-0.7.1/";
+
+    // Command-related constants
+    public static final String MAQ_COMMAND = "maq";
+    public static final String MAP_COMMAND = "map";
+    public static final String MERGE_COMMAND = "mapmerge";
+
+    // Constants related to Maq map switches
+    public static final String SWITCH_SUM_MISMATCHES = "-e";
+    public static final int HIGH_STRINGENCY_SUM_MISMATCHES = 100;
+    public static final int LOW_STRINGENCY_QUALITY_FOR_MISMATCHES = 30;
+
+    public static final String SWITCH_MAX_OUTER_DISTANCE = "-a";
+    public static final int LOW_STRINGENCY_MAX_OUTER_DISTANCE = 1500;
+    public static final double HIGH_STRINGENCY_MAX_OUTER_DISTANCE_MULTIPLIER = 1.5d;
+
+    public static final String SWITCH_RANDOM_SEED =   "-s";
+    public static final int DEFAULT_RANDOM_SEED = 0;
+
+    public static String getProgramVersion() { return PROGRAM_VERSION; }
+}
diff --git a/lib/edu/mit/broad/picard/aligner/maq/MaqMapMerger.java b/lib/edu/mit/broad/picard/aligner/maq/MaqMapMerger.java
new file mode 100644
index 0000000000..3b82cc1063
--- /dev/null
+++ b/lib/edu/mit/broad/picard/aligner/maq/MaqMapMerger.java
@@ -0,0 +1,125 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.aligner.maq;
+
+import edu.mit.broad.picard.io.IoUtil;
+import edu.mit.broad.picard.util.StringSortingCollectionFactory;
+import edu.mit.broad.picard.util.Log;
+import edu.mit.broad.picard.PicardException;
+import edu.mit.broad.sam.util.SortingCollection;
+import edu.mit.broad.sam.util.BinaryCodec;
+import edu.mit.broad.sam.util.CloseableIterator;
+import edu.mit.broad.sam.*;
+
+import java.io.File;
+import java.io.BufferedInputStream;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.nio.ByteBuffer;
+
+/**
+ * Class to write a BAM file that includes the results from a Maq .map file along with the unaligned
+ * reads from the original BAM file.
+ *
+ * Information on the meaning of the elements of the map file is drawn from the Maq documentation
+ * on this page: http://maq.sourceforge.net/maqmap_format.shtml
+ */
+public class MaqMapMerger {
+
+    private final File mapFile;
+    private final File sourceBamFile;
+    private final File targetBamFile;
+    private final boolean pairedReads;
+    private final Log log = Log.getInstance(MaqMapMerger.class);
+    private String commandLine = null;
+    private List<SAMSequenceRecord> sequences = new ArrayList<SAMSequenceRecord>();
+
+
+    /**
+     * Constructor
+     *
+     * @param mapFile           The Maq map file to parse
+     * @param sourceBamFile     The BAM file that was used as the input to the Maq aligner, which will
+     *                          include info on all the reads that did not map
+     * @param targetBamFile     The file to which to write the merged
+     */
+    public MaqMapMerger(File mapFile, File sourceBamFile, File targetBamFile, boolean pairedReads) {
+        IoUtil.assertFileIsReadable(mapFile);
+        IoUtil.assertFileIsReadable(sourceBamFile);
+        IoUtil.assertFileIsWritable(targetBamFile);
+        this.mapFile = mapFile;
+        this.sourceBamFile = sourceBamFile;
+        this.targetBamFile = targetBamFile;
+        this.pairedReads = pairedReads;
+    }
+
+    /**
+     * Merges the alignment from the map file with the remaining records from the source BAM file.
+     */
+    public void mergeAlignment() {
+        log.info("Processing map file: " + mapFile.getAbsolutePath());
+        // Write the header
+        MapFileIterator it = new MapFileIterator(getCommandLine(), this.pairedReads, false, this.mapFile);
+        SAMFileHeader header = it.getHeader();
+        SAMFileWriter writer = new SAMFileWriterFactory().makeBAMWriter(header, false, targetBamFile);
+
+        // Write the alignments
+        SortingCollection<String> readNames = writeAlignments(it, writer);
+
+        // We're done with the map file, so close it
+        it.close();
+        writeUnalignedReads(writer, readNames.iterator());
+
+        // Now close the writer
+        writer.close();
+    }
+
+    
+    private void writeUnalignedReads(SAMFileWriter writer, CloseableIterator<String> nameIterator) {
+
+        int skipCount = 0;
+        SAMFileReader reader = new SAMFileReader(IoUtil.openFileForReading(this.sourceBamFile));
+        CloseableIterator<SAMRecord> bamRecords = reader.iterator();
+
+        String readName = nameIterator.hasNext() ? nameIterator.next() : null;
+        while(bamRecords.hasNext()) {
+            SAMRecord rec = bamRecords.next();
+            if (rec.getReadName().equals(readName)) {
+                // skip it and pull the next name off the name iterator
+                readName = nameIterator.hasNext() ? nameIterator.next() : null;
+                skipCount++;
+            }
+            else {
+                writer.addAlignment(rec);
+            }
+        }
+System.out.println("Skipped " + skipCount + " already-aligned records.");
+        bamRecords.close();
+        nameIterator.close();
+    }
+
+    private SortingCollection<String> writeAlignments(MapFileIterator iterator, SAMFileWriter writer) {
+
+int wrote = 0;
+        SortingCollection<String> readNames = StringSortingCollectionFactory.newCollection();
+        while (iterator.hasNext()) {
+            SAMRecord record = iterator.next();
+            readNames.add(record.getReadName());
+            writer.addAlignment(record);
+wrote++;
+        }
+System.out.println("Wrote " + wrote + " alignment records.");
+        return readNames;
+    }
+
+    public void setCommandLine(String commandLine) { this.commandLine = commandLine; }
+    public String getCommandLine() { return this.commandLine; }
+}
diff --git a/lib/edu/mit/broad/picard/aligner/maq/RunMaq.java b/lib/edu/mit/broad/picard/aligner/maq/RunMaq.java
new file mode 100644
index 0000000000..bc3741b029
--- /dev/null
+++ b/lib/edu/mit/broad/picard/aligner/maq/RunMaq.java
@@ -0,0 +1,133 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2008 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.aligner.maq;
+
+import edu.mit.broad.picard.cmdline.CommandLineProgram;
+import edu.mit.broad.picard.cmdline.Usage;
+import edu.mit.broad.picard.cmdline.Option;
+import edu.mit.broad.picard.aligner.Aligner;
+
+import java.io.File;
+import java.util.Map;
+import java.util.List;
+import java.util.HashMap;
+import java.util.ArrayList;
+
+/**
+ * CommandLineProgram to generate to invoke BustardToBamWriter
+ *
+ * @author Kathleen Tibbetts
+ */
+public class RunMaq extends CommandLineProgram {
+    private static final String PROGRAM_VERSION = "1.0";
+
+    // The following attributes define the command-line arguments
+    @Usage
+    public String USAGE =
+            "Usage: " + getClass().getName() + " [options]\n\n" +
+                    "Invoke the Maq aligner.\n" +
+            "Version: " + PROGRAM_VERSION +"\n";
+
+    @Option(shortName="I", doc="The BAM file to parse.", optional=true)
+    public File INPUT;
+    @Option(shortName="O", doc="The directory and file prefix for all output.", optional=false)
+    public String OUTPUT;
+    @Option(shortName="L", doc="The read length.", optional=false)
+    public Integer READ_LENGTH;
+    @Option(shortName="S", doc="Stringency of the alignment.", optional=true)
+    public Aligner.Stringency STRINGENCY;
+    @Option(shortName="R", doc="Directory where the reference file is located.", optional=true)
+    public String REFERENCE;
+    @Option(shortName="C", doc="Clip points for the alignment.", optional=true, minElements=0, maxElements=4)
+    public List<Integer> CLIP_POINT = new ArrayList<Integer>();
+    @Option(shortName="E", doc="Expected insert size.", optional=true)
+    public Integer EXPECTED_INSERT_SIZE;
+    @Option(doc="Whether this is a paired-end run.", optional=false)
+    public Boolean PE;
+    @Option(shortName="NUM", doc="Number of reads to align (null = all).", optional=true)
+    public Integer READS_TO_ALIGN;
+    @Option(shortName="CUSTOM", doc="Custom parameter in the form name=value.", optional=true)
+    public List<String> CUSTOM_PARAMETER = new ArrayList<String>();
+    @Option(shortName="PREP", doc="Whether to prepare inputs for the alignement.", optional=true)
+    public Boolean PREPARE = true;
+    @Option(doc="Whether to do the alignement.", optional=true)
+    public Boolean ALIGN = true;
+    @Option(shortName="BAM", doc="Whether to generate a BAM file from the alignment output.", optional=true)
+    public Boolean BAM_OUTPUT = true;
+    @Option(doc="Whether to clean up intermediate input and output.", optional=true)
+    public Boolean CLEANUP = true;
+
+    protected int doWork() {
+        int clipPoints[] = null;
+        if (CLIP_POINT != null) {
+            clipPoints = new int[4];
+            int index=0;
+            for (Integer i : CLIP_POINT) {
+                clipPoints[index++] = i;
+            }
+        }
+        Map<String, String> params = null;
+        if (CUSTOM_PARAMETER != null) {
+            params = new HashMap<String, String>();
+            for (String param : CUSTOM_PARAMETER) {
+                String nameAndVal[] = param.split("=");
+                params.put(nameAndVal[0], nameAndVal[1]);
+            }
+        }
+        Aligner aligner = new MaqAligner(STRINGENCY, INPUT, OUTPUT, REFERENCE, clipPoints,
+                EXPECTED_INSERT_SIZE, READS_TO_ALIGN, params, PE, READ_LENGTH);
+        if (PREPARE) {
+            aligner.prepareInputs();
+        }
+        if (ALIGN) {
+            aligner.align();
+        }
+        if (BAM_OUTPUT) {
+            aligner.prepareOutput();
+        }
+        if (CLEANUP) {
+            aligner.cleanup();
+        }
+        return 0;
+    }
+
+    /**
+     * This is kind of a mess.  Almost everything is optional, since you don't have to do all of the steps in the
+     * alignement.
+     * @return
+     */
+    protected boolean customCommandLineValidation() {
+        if (PREPARE) {
+            if( INPUT == null) {
+                System.err.println("ERROR: INPUT must be specified when preparing inputs for the alignment.");
+                return false;
+            }
+            if (CLIP_POINT.size() != 0 && CLIP_POINT.size() != 4) {
+                System.err.println("ERROR: You must supply either 0 or 4 values for CLIP_POINT: " + CLIP_POINT.size());
+                return false;
+            }
+        }
+        if (ALIGN) {
+            if (STRINGENCY == null) {
+                System.err.println("ERROR: STRINGENCY must be specified when doing an alignment.");
+                return false;
+            }
+            if (REFERENCE == null) {
+                System.err.println("ERROR: REFERENCE must be specified when doing an alignment.");
+                return false;
+            }
+        }
+        return true;
+    }
+
+    public static void main(String[] argv) {
+        System.exit(new RunMaq().instanceMain(argv));
+    }
+}
diff --git a/lib/edu/mit/broad/picard/cmdline/CommandLineParseException.java b/lib/edu/mit/broad/picard/cmdline/CommandLineParseException.java
new file mode 100644
index 0000000000..cfe74bbccf
--- /dev/null
+++ b/lib/edu/mit/broad/picard/cmdline/CommandLineParseException.java
@@ -0,0 +1,27 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2008 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.cmdline;
+
+public class CommandLineParseException extends RuntimeException{
+    public CommandLineParseException() {
+    }
+
+    public CommandLineParseException(String s) {
+        super(s);
+    }
+
+    public CommandLineParseException(String s, Throwable throwable) {
+        super(s, throwable);
+    }
+
+    public CommandLineParseException(Throwable throwable) {
+        super(throwable);
+    }
+}
diff --git a/lib/edu/mit/broad/picard/cmdline/CommandLineParser.java b/lib/edu/mit/broad/picard/cmdline/CommandLineParser.java
new file mode 100644
index 0000000000..69b681abb4
--- /dev/null
+++ b/lib/edu/mit/broad/picard/cmdline/CommandLineParser.java
@@ -0,0 +1,638 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2008 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.cmdline;
+
+import java.io.*;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Field;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.ParameterizedType;
+import java.lang.reflect.Type;
+import java.util.*;
+
+import edu.mit.broad.picard.util.StringUtil;
+import edu.mit.broad.picard.PicardException;
+
+/**
+ * Annotation-driven utility for parsing command-line arguments, checking for errors, and producing usage message.
+ *
+ * This class supports options of the form KEY=VALUE, plus positional arguments.  Positional arguments must not contain
+ * an equal sign lest they be mistaken for a KEY=VALUE pair.
+ *
+ * The caller must supply an object that both defines the command line and has the parsed options set into it.
+ * For each possible KEY=VALUE option, there must be a public data member annotated with @Option.  The KEY name is
+ * the name of the data member.  An abbreviated name may also be specified with the shortName attribute of @Option.
+ * If the data member is a List<T>, then the option may be specified multiple times.  The type of the data member,
+ * or the type of the List element must either have a ctor T(String), or must be an Enum.  List options must
+ * be initialized by the caller with some kind of list.  Any other option that is non-null is assumed to have the given
+ * value as a default.  If an option has no default value, and does not have the optional attribute of @Option set,
+ * is required.  For List options, minimum and maximum number of elements may be specified in the @Option annotation.
+ *
+ * A single List data member may be annotated with the @PositionalArguments.  This behaves similarly to a Option
+ * with List data member: the caller must initialize the data member, the type must be constructable from String, and
+ * min and max number of elements may be specified.  If no @PositionalArguments annotation appears in the object,
+ * then it is an error for the command line to contain positional arguments.
+ *
+ * A single String public data member may be annotated with @Usage.  This string, if present, is used to
+ * construct the usage message.  Details about the possible options are automatically appended to this string.
+ * If @Usage does not appear, a boilerplate usage message is used.
+ */
+public class CommandLineParser {
+    // For formatting option section of usage message.
+    private static final int OPTION_COLUMN_WIDTH = 30;
+    private static final int DESCRIPTION_COLUMN_WIDTH = 50;
+
+    private static final Boolean[] TRUE_FALSE_VALUES = {Boolean.TRUE, Boolean.FALSE};
+
+    // Use these if no @Usage annotation
+    private static final String defaultUsagePreamble = "Usage: program [options...]\n";
+    private static final String defaultUsagePreambleWithPositionalArguments =
+            "Usage: program [options...] [positional-arguments...]\n";
+    private static final String OPTIONS_FILE = "OPTIONS_FILE";
+
+    /**
+     * A typical command line program will call this to get the beginning of the usage message,
+     * and then append a description of the program, like this:
+     *
+     * \@Usage(programVersion=PROGRAM_VERSION)
+     * public String USAGE = CommandLineParser.getStandardUsagePreamble(getClass()) + "Frobnicates the freebozzle."
+     */
+    public static String getStandardUsagePreamble(Class mainClass) {
+        return "USAGE: " + mainClass.getName() + " [options]\n\n";
+    }
+
+    // This is the object that the caller has provided that contains annotations,
+    // and into which the values will be assigned.
+    private final Object callerOptions;
+
+    private String usagePreamble;
+    // null if no @PositionalArguments annotation
+    private Field positionalArguments;
+    private int minPositionalArguments;
+    private int maxPositionalArguments;
+
+    // List of all the data members with @Option annotation
+    private final List<OptionDefinition> optionDefinitions = new ArrayList<OptionDefinition>();
+
+    // Maps long name, and short name, if present, to an option definition that is
+    // also in the optionDefinitions list.
+    private final Map<String, OptionDefinition> optionMap = new HashMap<String, OptionDefinition>();
+
+    // For printing error messages when parsing command line.
+    private PrintStream messageStream;
+
+    // In case implementation wants to get at arg for some reason.
+    private String[] argv;
+
+
+    /**
+     * This attribute is here just to facilitate printing usage for OPTIONS_FILE
+     */
+    public File IGNORE_THIS_PROPERTY;
+
+    /**
+     * Prepare for parsing command line arguments, by validating annotations.
+     * @param callerOptions This object contains annotations that define the acceptable command-line options,
+     * and ultimately will receive the settings when a command line is parsed.
+     */
+    public CommandLineParser(final Object callerOptions) {
+        this.callerOptions = callerOptions;
+
+        for (final Field field : this.callerOptions.getClass().getFields()) {
+            if (field.getAnnotation(PositionalArguments.class) != null) {
+                handlePositionalArgumentAnnotation(field);
+            }
+            if (field.getAnnotation(Usage.class) != null) {
+                handleUsageAnnotation(field);
+            }
+            if (field.getAnnotation(Option.class) != null) {
+                handleOptionAnnotation(field);
+            }
+        }
+
+        if (usagePreamble == null) {
+            if (positionalArguments == null) {
+                usagePreamble = defaultUsagePreamble;
+            } else {
+                usagePreamble = defaultUsagePreambleWithPositionalArguments;
+            }
+        }
+    }
+
+    /**
+     * Print a usage message based on the options object passed to the ctor.
+     * @param stream Where to write the usage message.
+     */
+    public void usage(final PrintStream stream) {
+        stream.print(usagePreamble);
+        if (!optionDefinitions.isEmpty()) {
+            stream.println("\nOptions:\n");
+            for (final OptionDefinition optionDefinition : optionDefinitions) {
+                printOptionUsage(stream, optionDefinition);
+            }
+        }
+        final Field fileField;
+        try {
+            fileField = getClass().getField("IGNORE_THIS_PROPERTY");
+        } catch (NoSuchFieldException e) {
+            throw new PicardException("Should never happen", e);
+        }
+        final OptionDefinition optionsFileOptionDefinition =
+                new OptionDefinition(fileField, OPTIONS_FILE, "",
+                        "File of OPTION_NAME=value pairs.  No positional parameters allowed.  Unlike command-line options, " +
+                "unrecognized options are ignored.  " + "A single-valued option set in an options file may be overridden " +
+                "by a subsequent command-line option.  " +
+                "A line starting with '#' is considered a comment.", false, true, 0, Integer.MAX_VALUE, null, new String[0]);
+        printOptionUsage(stream, optionsFileOptionDefinition);
+    }
+
+    /**
+     * Parse command-line options, and store values in callerOptions object passed to ctor.
+     * @param messageStream Where to write error messages.
+     * @param args Command line tokens.
+     * @return true if command line is valid.
+     */
+    public boolean parseOptions(final PrintStream messageStream, final String[] args) {
+        this.argv = args;
+        this.messageStream = messageStream;
+        for (final String arg: args) {
+            if (arg.equals("-h") || arg.equals("--help")) {
+                usage(messageStream);
+                return false;
+            }
+            final String[] pair = arg.split("=", 2);
+            if (pair.length == 2) {
+                if (pair[0].equals(OPTIONS_FILE)) {
+                    if (!parseOptionsFile(pair[1])) {
+                        messageStream.println();
+                        usage(messageStream);
+                        return false;
+                    }
+                } else {
+                    if (!parseOption(pair[0], pair[1], false)) {
+                        messageStream.println();
+                        usage(messageStream);
+                        return false;
+                    }
+                }
+            } else if (!parsePositionalArgument(arg)) {
+                messageStream.println();
+                usage(messageStream);
+                return false;
+            }
+        }
+        if (!checkNumArguments()) {
+            messageStream.println();
+            usage(messageStream);
+            return false;
+        }
+        return true;
+    }
+
+    /**
+     * After command line has been parsed, make sure that all required options have values, and that
+     * lists with minimum # of elements have sufficient.
+     * @return true if valid
+     */
+    private boolean checkNumArguments() {
+        try {
+            for (final OptionDefinition optionDefinition : optionDefinitions) {
+                StringBuilder mutextOptionNames = new StringBuilder();
+                for (String mutexOption : optionDefinition.mutuallyExclusive) {
+                    OptionDefinition mutextOptionDef = optionMap.get(mutexOption);
+                    if (mutextOptionDef != null && mutextOptionDef.hasBeenSet) {
+                        mutextOptionNames.append(" ").append(mutextOptionDef.name);
+                    }
+                }
+                if (optionDefinition.hasBeenSet && mutextOptionNames.length() > 0) {
+                    messageStream.println("ERROR: Option '" + optionDefinition.name + 
+                            "' cannot be used in conjunction with option(s)" +
+                            mutextOptionNames.toString());
+                    return false;
+                }
+                if (optionDefinition.isCollection) {
+                    final Collection c = (Collection)optionDefinition.field.get(callerOptions);
+                    if (c.size() < optionDefinition.minElements) {
+                        messageStream.println("ERROR: Option '" + optionDefinition.name + "' must be specified at least " +
+                        optionDefinition.minElements + " times.");
+                        return false;
+                    }
+                } else if (!optionDefinition.optional && !optionDefinition.hasBeenSet && mutextOptionNames.length() == 0) {
+                    messageStream.print("ERROR: Option '" + optionDefinition.name + "' is required");
+                    if (optionDefinition.mutuallyExclusive.isEmpty()) {
+                        messageStream.println(".");
+                    } else {
+                        messageStream.println(" unless any of " + optionDefinition.mutuallyExclusive + " are specified.");
+                    }
+                    return false;
+                }
+            }
+            if (positionalArguments != null) {
+                final Collection c = (Collection)positionalArguments.get(callerOptions);
+                if (c.size() < minPositionalArguments) {
+                    messageStream.println("ERROR: At least " + minPositionalArguments +
+                            " positional arguments must be specified.");
+                    return false;
+                }
+            }
+            return true;
+        } catch (IllegalAccessException e) {
+            // Should never happen because lack of publicness has already been checked.
+            throw new RuntimeException(e);
+        }
+    }
+
+    private boolean parsePositionalArgument(final String stringValue) {
+        if (positionalArguments == null) {
+            messageStream.println("ERROR: Invalid argument '" + stringValue + "'.");
+            return false;
+        }
+        final Object value;
+        try {
+            value = constructFromString(getUnderlyingType(positionalArguments), stringValue);
+        } catch (CommandLineParseException e) {
+            messageStream.println("ERROR: " + e.getMessage());
+            return false;
+        }
+        final Collection c;
+        try {
+            c = (Collection)positionalArguments.get(callerOptions);
+        } catch (IllegalAccessException e) {
+            throw new RuntimeException(e);
+        }
+        if (c.size() >= maxPositionalArguments) {
+            messageStream.println("ERROR: No more than " + maxPositionalArguments +
+                    " positional arguments may be specified on the command line.");
+            return false;
+        }
+        c.add(value);
+        return true;
+    }
+
+    private boolean parseOption(String key, final String stringValue, final boolean optionsFile) {
+        key = key.toUpperCase();
+        final OptionDefinition optionDefinition = optionMap.get(key);
+        if (optionDefinition == null) {
+            if (optionsFile) {
+                // Silently ignore unrecognized option from options file
+                return true;
+            }
+            messageStream.println("ERROR: Unrecognized option: " + key);
+            return false;
+        }
+        if (!optionDefinition.isCollection) {
+            if (optionDefinition.hasBeenSet && !optionDefinition.hasBeenSetFromOptionsFile) {
+                messageStream.println("ERROR: Option '" + key + "' cannot be specified more than once.");
+                return false;
+            }
+        }
+        final Object value;
+        try {
+            value = constructFromString(getUnderlyingType(optionDefinition.field), stringValue);
+        } catch (CommandLineParseException e) {
+            messageStream.println("ERROR: " + e.getMessage());
+            return false;
+        }
+        try {
+            if (optionDefinition.isCollection) {
+                final Collection c = (Collection)optionDefinition.field.get(callerOptions);
+                if (c.size() >= optionDefinition.maxElements) {
+                    messageStream.println("ERROR: Option '" + key + "' cannot be used more than " +
+                            optionDefinition.maxElements + " times.");
+                    return false;
+                }
+                c.add(value);
+            } else {
+                optionDefinition.field.set(callerOptions, value);
+                optionDefinition.hasBeenSet = true;
+                optionDefinition.hasBeenSetFromOptionsFile = optionsFile;
+            }
+        } catch (IllegalAccessException e) {
+            // Should never happen because we only iterate through public fields.
+            throw new RuntimeException(e);
+        }
+        return true;
+    }
+
+    /**
+     * Parsing of options from file is looser than normal.  Any unrecognized options are
+     * ignored, and a single-valued option that is set in a file may be overridden by a
+     * subsequent appearance of that option.
+     * A line that starts with '#' is ignored.
+     * @param optionsFile
+     * @return false if a fatal error occurred
+     */
+    private boolean parseOptionsFile(final String optionsFile) {
+        try {
+            final BufferedReader reader = new BufferedReader(new FileReader(optionsFile));
+            String line;
+            while ((line = reader.readLine()) != null) {
+                if (line.startsWith("#")) {
+                    continue;
+                }
+                final String[] pair = line.split("=", 2);
+                if (pair.length == 2) {
+                    if (!parseOption(pair[0], pair[1], true)) {
+                        messageStream.println();
+                        usage(messageStream);
+                        return false;
+                    }
+                } else {
+                    messageStream.println("Strange line in OPTIONS_FILE " + optionsFile + ": " + line);
+                    usage(messageStream);
+                    return false;
+                }
+            }
+            reader.close();
+            return true;
+
+        } catch (IOException e) {
+            throw new PicardException("I/O error loading OPTIONS_FILE=" + optionsFile, e);
+        }
+    }
+
+    private void printOptionUsage(final PrintStream stream, final OptionDefinition optionDefinition) {
+        final String type = getUnderlyingType(optionDefinition.field).getSimpleName();
+        String optionLabel = optionDefinition.name + "=" + type;
+        stream.print(optionLabel);
+        if (optionDefinition.shortName.length() > 0) {
+            stream.println();
+        }
+        if (optionDefinition.shortName.length() > 0) {
+            optionLabel = optionDefinition.shortName + "=" + type;
+            stream.print(optionLabel);
+        }
+        int numSpaces = OPTION_COLUMN_WIDTH - optionLabel.length();
+        if (optionLabel.length() > OPTION_COLUMN_WIDTH) {
+            stream.println();
+            numSpaces = OPTION_COLUMN_WIDTH;
+        }
+        printSpaces(stream, numSpaces);
+        final StringBuilder sb = new StringBuilder();
+        if (optionDefinition.doc.length() > 0) {
+            sb.append(optionDefinition.doc);
+            sb.append(" ");
+        }
+        if (optionDefinition.optional && !optionDefinition.isCollection) {
+            sb.append("Default value: ");
+            sb.append(optionDefinition.defaultValue);
+            sb.append(". ");
+        } else if (!optionDefinition.isCollection){
+            sb.append("Required. ");
+        }
+        Object[] enumConstants = getUnderlyingType(optionDefinition.field).getEnumConstants();
+        if (enumConstants == null && getUnderlyingType(optionDefinition.field) == Boolean.class) {
+            enumConstants = TRUE_FALSE_VALUES;
+        }
+        if (enumConstants != null) {
+            sb.append("Possible values: {");
+            for (int i = 0; i < enumConstants.length; ++i) {
+                if (i > 0) {
+                    sb.append(", ");
+                }
+                sb.append(enumConstants[i].toString());
+            }
+            sb.append("} ");
+        }
+        if (optionDefinition.isCollection) {
+            if (optionDefinition.minElements == 0) {
+                if (optionDefinition.maxElements == Integer.MAX_VALUE) {
+                    sb.append("This option may be specified 0 or more times.");
+                } else {
+                    sb.append("This option must be specified no more than " + optionDefinition.maxElements + "times.");
+                }
+            } else  if (optionDefinition.maxElements == Integer.MAX_VALUE) {
+                sb.append("This option must be specified at least " + optionDefinition.minElements + " times.");
+            } else {
+                sb.append("This option may be specified between " + optionDefinition.minElements +
+                " and " + optionDefinition.maxElements + " times.");
+            }
+        }
+        if (!optionDefinition.mutuallyExclusive.isEmpty()) {
+            sb.append(" Cannot be used in conjuction with option(s)");
+            for (String option : optionDefinition.mutuallyExclusive) {
+                OptionDefinition mutextOptionDefinition = optionMap.get(option);
+                sb.append(" ").append(mutextOptionDefinition.name);
+                if (mutextOptionDefinition.shortName.length() > 0) {
+                    sb.append(" (").append(mutextOptionDefinition.shortName).append(")");
+                }
+            }
+        }
+        final String wrappedDescription = StringUtil.wordWrap(sb.toString(), DESCRIPTION_COLUMN_WIDTH);
+        final String[] descriptionLines = wrappedDescription.split("\n");
+        for (int i = 0; i < descriptionLines.length; ++i) {
+            if (i > 0) {
+                printSpaces(stream, OPTION_COLUMN_WIDTH);
+            }
+            stream.println(descriptionLines[i]);
+        }
+        stream.println();
+    }
+
+    private void printSpaces(final PrintStream stream, final int numSpaces) {
+        final StringBuilder sb = new StringBuilder();
+        for (int i = 0; i < numSpaces; ++i) {
+            sb.append(" ");
+        }
+        stream.print(sb);
+    }
+
+    private void handleOptionAnnotation(final Field field) {
+        try {
+            final Option optionAnnotation = field.getAnnotation(Option.class);
+            final boolean isCollection = isCollectionField(field);
+            if (isCollection) {
+                if (optionAnnotation.maxElements() == 0) {
+                    throw new CommandLineParserDefinitionException("@Option member " + field.getName() +
+                    "has maxElements = 0");
+                }
+                if (optionAnnotation.minElements() > optionAnnotation.maxElements()) {
+                    throw new CommandLineParserDefinitionException("In @Option member " + field.getName() +
+                            ", minElements cannot be > maxElements");
+                }
+            }
+            if (!canBeMadeFromString(getUnderlyingType(field))) {
+                throw new CommandLineParserDefinitionException("@Option member " + field.getName() +
+                " must have a String ctor or be an enum");
+            }
+
+            final OptionDefinition optionDefinition = new OptionDefinition(field,
+                    field.getName(),
+                    optionAnnotation.shortName(),
+                    optionAnnotation.doc(), optionAnnotation.optional() || (field.get(callerOptions) != null),
+                    isCollection, optionAnnotation.minElements(),
+                    optionAnnotation.maxElements(), field.get(callerOptions),
+                    optionAnnotation.mutex());
+            
+            for (String option : optionAnnotation.mutex()) {
+                OptionDefinition mutextOptionDef = optionMap.get(option);
+                if (mutextOptionDef != null) {
+                    mutextOptionDef.mutuallyExclusive.add(field.getName());
+                }
+            }
+            if (optionMap.containsKey(optionDefinition.name)) {
+                throw new CommandLineParserDefinitionException(optionDefinition.name + " has already been used");
+            }
+            optionMap.put(optionDefinition.name, optionDefinition);
+            if (optionDefinition.shortName.length() > 0) {
+                if (optionMap.containsKey(optionDefinition.shortName)) {
+                    throw new CommandLineParserDefinitionException(optionDefinition.shortName + " has already been used");
+                }
+                optionMap.put(optionDefinition.shortName, optionDefinition);
+            }
+            optionDefinitions.add(optionDefinition);
+        } catch (IllegalAccessException e) {
+            throw new CommandLineParserDefinitionException(field.getName() +
+                    " must have public visibility to have @Option annotation");
+        }
+    }
+
+    private void handleUsageAnnotation(final Field field) {
+        if (usagePreamble != null) {
+            throw new CommandLineParserDefinitionException
+                    ("@Usage cannot be used more than once in an option class.");
+        }
+        try {
+            usagePreamble = (String)field.get(callerOptions);
+            final Usage usageAnnotation = field.getAnnotation(Usage.class);
+            if (usageAnnotation.programVersion().length() > 0) {
+            	usagePreamble += "Version: " + usageAnnotation.programVersion() + "\n";
+            }
+        } catch (IllegalAccessException e) {
+            throw new CommandLineParserDefinitionException("@Usage data member must be public");
+        } catch (ClassCastException e) {
+            throw new CommandLineParserDefinitionException
+                    ("@Usage can only be applied to a String data member.");
+        }
+    }
+
+    private void handlePositionalArgumentAnnotation(final Field field) {
+        if (positionalArguments != null) {
+            throw new CommandLineParserDefinitionException
+                    ("@PositionalArguments cannot be used more than once in an option class.");
+        }
+        positionalArguments = field;
+        if (!isCollectionField(field)) {
+            throw new CommandLineParserDefinitionException("@PositionalArguments must be applied to a Collection");
+        }
+
+        if (!canBeMadeFromString(getUnderlyingType(field))) {
+            throw new CommandLineParserDefinitionException("@PositionalParameters member " + field.getName() +
+            "does not have a String ctor");
+        }
+
+        final PositionalArguments positionalArgumentsAnnotation = field.getAnnotation(PositionalArguments.class);
+        minPositionalArguments = positionalArgumentsAnnotation.minElements();
+        maxPositionalArguments = positionalArgumentsAnnotation.maxElements();
+        if (minPositionalArguments > maxPositionalArguments) {
+            throw new CommandLineParserDefinitionException("In @PositionalArguments, minElements cannot be > maxElements");
+        }
+    }
+
+    private boolean isCollectionField(final Field field) {
+        try {
+            field.getType().asSubclass(Collection.class);
+            return true;
+        } catch (ClassCastException e) {
+            return false;
+        }
+    }
+
+    private Class getUnderlyingType(final Field field) {
+        if (isCollectionField(field)) {
+            final ParameterizedType clazz = (ParameterizedType)(field.getGenericType());
+            final Type[] genericTypes = clazz.getActualTypeArguments();
+            if (genericTypes.length != 1) {
+                throw new CommandLineParserDefinitionException("Strange collection type for field " + field.getName());
+            }
+            return (Class)genericTypes[0];
+            
+        } else {
+            return field.getType();
+        }
+    }
+
+    // True if clazz is an enum, or if it has a ctor that takes a single String argument.
+    private boolean canBeMadeFromString(final Class clazz) {
+        if (clazz.isEnum()) {
+            return true;
+        }
+        try {
+            clazz.getConstructor(String.class);
+            return true;
+        } catch (NoSuchMethodException e) {
+            return false;
+        }
+    }
+
+    private Object constructFromString(final Class clazz, final String s) {
+        try {
+            if (clazz.isEnum()) {
+                try {
+                    return Enum.valueOf(clazz, s);
+                } catch (IllegalArgumentException e) {
+                    throw new CommandLineParseException("'" + s + "' is not a valid value for " +
+                            clazz.getSimpleName() + ".", e);
+                }
+            }
+            final Constructor ctor = clazz.getConstructor(String.class);
+            return ctor.newInstance(s);
+        } catch (NoSuchMethodException e) {
+            // Shouldn't happen because we've checked for presence of ctor
+            throw new CommandLineParseException(e);
+        } catch (InstantiationException e) {
+            throw new CommandLineParseException("Abstract class '" + clazz.getSimpleName() +
+                    "'cannot be used for an option value type.", e);
+        } catch (IllegalAccessException e) {
+            throw new CommandLineParseException("String constructor for option value type '" + clazz.getSimpleName() +
+                    "' must be public.", e);
+        } catch (InvocationTargetException e) {
+            throw new CommandLineParseException("Problem constructing " + clazz.getSimpleName() + " from the string '" + s + "'.",
+                    e.getCause());
+        }
+    }
+
+    public String[] getArgv() {
+        return argv;
+    }
+
+    private class OptionDefinition {
+        final Field field;
+        final String name;
+        final String shortName;
+        final String doc;
+        final boolean optional;
+        final boolean isCollection;
+        final int minElements;
+        final int maxElements;
+        final String defaultValue;
+        boolean hasBeenSet = false;
+        boolean hasBeenSetFromOptionsFile = false;
+        Set<String> mutuallyExclusive;
+
+        private OptionDefinition(final Field field, final String name, final String shortName, final String doc, final boolean optional, final boolean collection,
+                                 final int minElements, final int maxElements, final Object defaultValue, String[] mutuallyExclusive) {
+            this.field = field;
+            this.name = name.toUpperCase();
+            this.shortName = shortName.toUpperCase();
+            this.doc = doc;
+            this.optional = optional;
+            isCollection = collection;
+            this.minElements = minElements;
+            this.maxElements = maxElements;
+            if (defaultValue != null) {
+                this.defaultValue = defaultValue.toString();
+            } else {
+                this.defaultValue = "null";
+            }
+            this.mutuallyExclusive = new HashSet<String>(Arrays.asList(mutuallyExclusive));
+        }
+    }
+}
diff --git a/lib/edu/mit/broad/picard/cmdline/CommandLineParserDefinitionException.java b/lib/edu/mit/broad/picard/cmdline/CommandLineParserDefinitionException.java
new file mode 100644
index 0000000000..088755e2a1
--- /dev/null
+++ b/lib/edu/mit/broad/picard/cmdline/CommandLineParserDefinitionException.java
@@ -0,0 +1,27 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2008 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.cmdline;
+
+public class CommandLineParserDefinitionException extends RuntimeException {
+    public CommandLineParserDefinitionException() {
+    }
+
+    public CommandLineParserDefinitionException(String s) {
+        super(s);
+    }
+
+    public CommandLineParserDefinitionException(String s, Throwable throwable) {
+        super(s, throwable);
+    }
+
+    public CommandLineParserDefinitionException(Throwable throwable) {
+        super(throwable);
+    }
+}
diff --git a/lib/edu/mit/broad/picard/cmdline/CommandLineProgram.java b/lib/edu/mit/broad/picard/cmdline/CommandLineProgram.java
new file mode 100644
index 0000000000..10ee7635f4
--- /dev/null
+++ b/lib/edu/mit/broad/picard/cmdline/CommandLineProgram.java
@@ -0,0 +1,141 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.cmdline;
+
+import edu.mit.broad.picard.util.Log;
+import edu.mit.broad.picard.util.StringUtil;
+import edu.mit.broad.picard.metrics.Header;
+import edu.mit.broad.picard.metrics.StringHeader;
+import edu.mit.broad.picard.metrics.MetricsFile;
+import edu.mit.broad.picard.metrics.MetricBase;
+
+import java.io.File;
+import java.util.Date;
+import java.util.List;
+import java.util.ArrayList;
+
+/**
+ * Abstract class to facilitate writing command-line programs.
+ *
+ * To use:
+ *
+ * 1. Extend this class with a concrete class that has data members annotated with @Option, @PositionalArguments
+ * and/or @Usage annotations.
+ *
+ * 2. If there is any custom command-line validation, override customCommandLineValidation().  When this method is
+ * called, the command line has been parsed and set into the data members of the concrete class.
+ *
+ * 3. Implement a method doWork().  This is called after successful comand-line processing.  The value it returns is
+ * the exit status of the program.  It is assumed that the concrete class emits any appropriate error message before
+ * returning non-zero.  doWork() may throw unchecked exceptions, which are caught and reported appropriately.
+ *
+ * 4. Implement the following static method in the concrete class:
+ *
+ *     public static void main(String[] argv) {
+        System.exit(new MyConcreteClass().instanceMain(argv));
+    }
+
+
+ */
+public abstract class CommandLineProgram {
+
+    @Option
+    public File TMP_DIR = new File(System.getProperty("java.io.tmpdir"), System.getProperty("user.name"));
+
+    @Option(doc = "Control verbosity of logging")
+    public Log.LogLevel VERBOSITY = Log.LogLevel.INFO;
+
+    @Option(doc = "Whether to suppress job-summary info on System.out")
+    public Boolean QUIET = false;
+
+    private final String standardUsagePreamble = CommandLineParser.getStandardUsagePreamble(getClass());
+
+    /**
+     * Initialized in parseArgs.  Subclasses may want to access this to do
+     * their own validation, and then print usage using clp.
+     */
+    protected CommandLineParser clp;
+
+    private final List<Header> defaultHeaders = new ArrayList<Header>();
+
+    /**
+     * Do the work after command line has been parsed.
+     * RuntimeException may be thrown by this method, and are reported appropriately.
+     * @return program exit status.
+     */
+    protected abstract int doWork();
+
+    public int instanceMain(final String[] argv) {
+        // Build the default headers
+        final Date startDate = new Date();
+        final String cmdline = getClass().getName() + " " + StringUtil.join(" ", argv);
+        this.defaultHeaders.add(new StringHeader(cmdline));
+        this.defaultHeaders.add(new StringHeader("Started on: " + startDate));
+
+        if (!parseArgs(argv)) {
+            return 1;
+        }
+
+        Log.setGlobalLogLevel(VERBOSITY);
+
+        if (!TMP_DIR.exists()) {
+            // Intentially not checking the return value, because it may be that the program does not
+            // need a tmp_dir.  If this fails, the problem will be discovered downstream.
+            TMP_DIR.mkdir();
+        }
+        System.setProperty("java.io.tmpdir", TMP_DIR.getAbsolutePath());
+        if (!QUIET) {
+            System.out.println("[" + new Date() + "] " + cmdline);
+        }
+        final int ret = doWork();
+        if (!QUIET) {
+            System.out.println("[" + new Date() + "] " + getClass().getName() + " done.");
+            System.out.println("Runtime.totalMemory()=" + Runtime.getRuntime().totalMemory());
+        }
+        return ret;
+    }
+
+    /**
+     * Put any custom command-line validation in an override of this method.
+     * clp is initialized at this point and can be used to print usage and access argv.
+     * Any options set by command-line parser can be validated.
+     * @return true if command line is valid.
+     */
+    protected boolean customCommandLineValidation() {
+        return true;
+    }
+
+    /**
+     *
+     * @return true if command line is valid
+     */
+    protected boolean parseArgs(final String[] argv) {
+        clp = new CommandLineParser(this);
+        final boolean ret = clp.parseOptions(System.err, argv);
+        if (!ret) {
+            return false;
+        }
+        return customCommandLineValidation();
+    }
+
+    /** Gets a MetricsFile with default headers already written into it. */
+    protected <A extends MetricBase,B extends Comparable> MetricsFile<A,B> getMetricsFile() {
+        final MetricsFile<A,B> file = new MetricsFile<A,B>();
+        for (final Header h : this.defaultHeaders) {
+            file.addHeader(h);
+        }
+
+        return file;
+    }
+
+    public String getStandardUsagePreamble() {
+        return standardUsagePreamble;
+    }
+}
diff --git a/lib/edu/mit/broad/picard/cmdline/CommandLineUtils.java b/lib/edu/mit/broad/picard/cmdline/CommandLineUtils.java
new file mode 100644
index 0000000000..0702f3bc70
--- /dev/null
+++ b/lib/edu/mit/broad/picard/cmdline/CommandLineUtils.java
@@ -0,0 +1,39 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.cmdline;
+
+import java.io.*;
+import java.util.regex.Pattern;
+
+public class CommandLineUtils {
+    /** Regex for splitting on spaces. */
+    public static final Pattern SPACE_SPLITTER = Pattern.compile(" ");
+
+    // Regexes to split things apart on white space
+    public static final Pattern TAB_SPLITTER = Pattern.compile("\\t");
+
+    /** Checks that a file exists and is readable, and then returns a buffered reader for it. */
+    public static BufferedReader getReader(File file) throws IOException {
+        return new BufferedReader(new InputStreamReader(getInputStream(file)));
+	}
+
+    /** Checks that a file exists and is readable, and then returns a input stream  for it. */
+    public static InputStream getInputStream(File file) throws IOException {
+		if (!file.exists()) {
+			throw new RuntimeException("Specified file does not exist: " + file);
+		}
+
+		if (!file.canRead()) {
+			throw new RuntimeException("Specified file is not readable: " + file);
+		}
+
+		return new FileInputStream(file);
+	}
+}
diff --git a/lib/edu/mit/broad/picard/cmdline/Option.java b/lib/edu/mit/broad/picard/cmdline/Option.java
new file mode 100644
index 0000000000..b7ffebdd9a
--- /dev/null
+++ b/lib/edu/mit/broad/picard/cmdline/Option.java
@@ -0,0 +1,60 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2008 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.cmdline;
+
+import java.lang.annotation.Documented;
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+/**
+ * Used to annotate which fields of a CommandLineProgram are options given at the command line.
+ * If a command line call looks like "cmd option=foo x=y bar baz" the CommandLineProgram
+ * would have annotations on fields to handle the values of option and x. All options
+ * must be in the form name=value on the command line. The java type of the option
+ * will be inferred from the type of the field or from the generic type of the collection
+ * if this option is allowed more than once. The type must be an enum or
+ * have a constructor with a single String parameter.
+ *
+ * @author Alec Wysoker
+ */
+@Retention(RetentionPolicy.RUNTIME)
+@Target(ElementType.FIELD)
+@Documented
+public @interface Option {
+	/** The name of the option as it would appear on the command line. */
+    String shortName() default "";
+    
+    /** Text that appears for this option in text describing usage of the command line program. */
+    String doc() default "";
+    
+    /**
+     * If set to false, an exception will be thrown if the option is not specified.
+     * If 2 options are mutually exclusive and both have optional=false it will be
+     * interpreted as one or the other is required and an exception will only be thrown if
+     * neither are specified. 
+     */
+    boolean optional() default false;
+    
+    /** 
+     * Array of option names that cannot be used in conjunction with this one.
+     * If 2 options are mutually exclusive and both have optional=false it will be
+     * interpreted as one OR the other is required and an exception will only be thrown if
+     * neither are specified. 
+     */ 
+    String[] mutex() default {};
+    
+    /** The minimum number of times that this option is required. */
+    int minElements() default 0;
+    
+    /** The maximum number of times this option is allowed. */
+    int maxElements() default Integer.MAX_VALUE;
+}
diff --git a/lib/edu/mit/broad/picard/cmdline/PositionalArguments.java b/lib/edu/mit/broad/picard/cmdline/PositionalArguments.java
new file mode 100644
index 0000000000..f45301439a
--- /dev/null
+++ b/lib/edu/mit/broad/picard/cmdline/PositionalArguments.java
@@ -0,0 +1,38 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2008 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.cmdline;
+
+import java.lang.annotation.Documented;
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+/**
+ * Used to annotate which field of a CommandLineProgram should store parameters given at the 
+ * command line which are not options. Fields with this annotation must be a Collection
+ * (and probably should be a List if order is important).
+ * If a command line call looks like "cmd option=foo x=y bar baz" the values "bar" and "baz"
+ * would be added to the collection with this annotation. The java type of the arguments
+ * will be inferred from the generic type of the collection. The type must be an enum or
+ * have a constructor with a single String parameter.
+ *
+ * @author Alec Wysoker
+ */
+@Retention(RetentionPolicy.RUNTIME)
+@Target(ElementType.FIELD)
+@Documented
+public @interface PositionalArguments {
+    /** The minimum number of arguments required. */
+    int minElements() default 0;
+    
+    /** The maximum number of arguments allowed. */
+    int maxElements() default Integer.MAX_VALUE;
+}
diff --git a/lib/edu/mit/broad/picard/cmdline/Usage.java b/lib/edu/mit/broad/picard/cmdline/Usage.java
new file mode 100644
index 0000000000..13aef94671
--- /dev/null
+++ b/lib/edu/mit/broad/picard/cmdline/Usage.java
@@ -0,0 +1,26 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2008 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.cmdline;
+
+import java.lang.annotation.Documented;
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+/**
+ * Annotates the field that contains text to be displayed in a usage message.
+ */
+@Retention(RetentionPolicy.RUNTIME)
+@Target(ElementType.FIELD)
+@Documented
+public @interface Usage {
+    String programVersion() default "";
+}
diff --git a/lib/edu/mit/broad/picard/directed/ArachneMapToIntervalList.java b/lib/edu/mit/broad/picard/directed/ArachneMapToIntervalList.java
new file mode 100644
index 0000000000..75fb98b16b
--- /dev/null
+++ b/lib/edu/mit/broad/picard/directed/ArachneMapToIntervalList.java
@@ -0,0 +1,62 @@
+package edu.mit.broad.picard.directed;
+
+import edu.mit.broad.picard.cmdline.CommandLineProgram;
+import edu.mit.broad.picard.cmdline.Option;
+import edu.mit.broad.picard.io.IoUtil;
+import edu.mit.broad.picard.util.BasicTextFileParser;
+import edu.mit.broad.picard.util.Interval;
+import edu.mit.broad.picard.util.FormatUtil;
+import edu.mit.broad.sam.SAMFileReader;
+import edu.mit.broad.sam.SAMFileHeader;
+import edu.mit.broad.sam.SAMSequenceRecord;
+
+import java.io.File;
+import java.util.List;
+
+/**
+ * Converts an arachne style map file to the new interval list format.
+ *
+ * @author Tim Fennell
+ */
+public class ArachneMapToIntervalList extends CommandLineProgram {
+    @Option(shortName="M", doc="The path to an archne style map file") public File MAP;
+    @Option(shortName="SD", doc="A sequence dictionary in SAM or BAM format") public File SEQUENCE_DICTIONARY;
+    @Option(shortName="O", doc="The output file to write the interval list to") public File OUTPUT;
+    @Option(shortName="P", doc="Prefix to use when generating names") public String PREFIX;
+
+    /** Stock main method. */
+    public static void main(String[] argv) {
+        System.exit(new ArachneMapToIntervalList().instanceMain(argv));
+    }
+
+    protected int doWork() {
+        IoUtil.assertFileIsReadable(MAP);
+        IoUtil.assertFileIsReadable(SEQUENCE_DICTIONARY);
+        IoUtil.assertFileIsWritable(OUTPUT);
+
+        SAMFileReader sam = new SAMFileReader(SEQUENCE_DICTIONARY);
+        SAMFileHeader header = sam.getFileHeader();
+        List<SAMSequenceRecord> seqs = header.getSequences();
+        IntervalList list = new IntervalList(header);
+
+        BasicTextFileParser parser = new BasicTextFileParser(true, 3, MAP);
+        FormatUtil format = new FormatUtil();
+        int i=1;
+
+        while (parser.hasNext()) {
+            String[] fields = parser.next();
+            int seqIndex = format.parseInt(fields[0]);
+            int start    = format.parseInt(fields[1]) + 1;
+            int end      = format.parseInt(fields[2]) + 1;
+            String seq = seqs.get(seqIndex).getSequenceName();
+
+            Interval interval = new Interval(seq, start, end, false, PREFIX + "_" + i++);
+            list.add(interval);
+        }
+
+        list.sort();
+        list.write(OUTPUT);
+
+        return 0;
+    }
+}
diff --git a/lib/edu/mit/broad/picard/directed/CalculateHsMetrics.java b/lib/edu/mit/broad/picard/directed/CalculateHsMetrics.java
new file mode 100644
index 0000000000..d3be86825f
--- /dev/null
+++ b/lib/edu/mit/broad/picard/directed/CalculateHsMetrics.java
@@ -0,0 +1,51 @@
+package edu.mit.broad.picard.directed;
+
+import edu.mit.broad.picard.cmdline.CommandLineProgram;
+import edu.mit.broad.picard.cmdline.Option;
+import edu.mit.broad.picard.cmdline.Usage;
+import edu.mit.broad.picard.io.IoUtil;
+import edu.mit.broad.picard.metrics.MetricsFile;
+import edu.mit.broad.sam.SAMFileReader;
+
+import java.io.File;
+
+/**
+ * Calculates a set of HS metrics from a sam or bam file.
+ *
+ * @author Tim Fennell
+ */
+public class CalculateHsMetrics extends CommandLineProgram {
+    @Usage public final String USAGE =
+            "Calculates a set of Hybrid Selection specific metrics from an aligned SAM" +
+            "or BAM file.";
+    @Option(shortName="BI") public File BAIT_INTERVALS;
+    @Option(shortName="TI") public File TARGET_INTERVALS;
+    @Option(shortName="I") public File INPUT;
+    @Option(shortName="M") public File METRICS_FILE;
+
+    /** Stock main method. */
+    public static void main(String[] argv) {
+        System.exit(new CalculateHsMetrics().instanceMain(argv));
+    }
+
+    /**
+     * Asserts that files are readable and writable and then fires off an
+     * HsMetricsCalculator instance to do the real work.
+     */
+    protected int doWork() {
+        IoUtil.assertFileIsReadable(BAIT_INTERVALS);
+        IoUtil.assertFileIsReadable(TARGET_INTERVALS);
+        IoUtil.assertFileIsReadable(INPUT);
+        IoUtil.assertFileIsWritable(METRICS_FILE);
+
+        HsMetricsCalculator calculator = new HsMetricsCalculator(BAIT_INTERVALS, TARGET_INTERVALS);
+        SAMFileReader sam = new SAMFileReader(INPUT);
+        calculator.analyze(sam.iterator());
+
+        MetricsFile<HsMetrics, Integer> metrics = getMetricsFile();
+        metrics.addMetric(calculator.getMetrics());
+
+        metrics.write(METRICS_FILE);
+        return 0;
+    }
+}
diff --git a/lib/edu/mit/broad/picard/directed/GenomeMask.java b/lib/edu/mit/broad/picard/directed/GenomeMask.java
new file mode 100644
index 0000000000..27be5df717
--- /dev/null
+++ b/lib/edu/mit/broad/picard/directed/GenomeMask.java
@@ -0,0 +1,52 @@
+package edu.mit.broad.picard.directed;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.BitSet;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+/**
+ * Utility class to store coordinates of interest in per-sequence bitmasks.
+ */
+public class GenomeMask {
+
+    // if memory usage becomes a problem... this could be changed to a SparseBitSet
+    // http://java.sun.com/developer/onlineTraining/collections/magercises/BitSet/index.html
+    private SortedMap<Integer, BitSet> data = new TreeMap<Integer, BitSet>();
+
+
+    public GenomeMask() {
+    }
+
+    public boolean get(int contig, int position) {
+        BitSet bits = data.get(contig);
+        return (bits != null) && bits.get(position);
+    }
+
+    public BitSet get(int contig) {
+        return data.get(contig);
+    }
+
+    /**
+     * Get an existing BitSet for the given contig, or create one if not already present.  This is
+     * useful when initializing a GenomeMask from an external source.
+     * @param contig which BitSet
+     * @param numBits if there was not already a BitSet for this contig, one is created and initialized to this size.
+     * @return the BitSet for the given contig, creating one if necessary
+     */
+    public BitSet getOrCreate(int contig, int numBits) {
+        BitSet ret = data.get(contig);
+        if (ret == null) {
+            ret = new BitSet(numBits);
+            data.put(contig, ret);
+        }
+        return ret;
+    }
+
+    public int getMaxContig() {
+        return data.lastKey();
+    }
+}
diff --git a/lib/edu/mit/broad/picard/directed/GenomeMaskFactory.java b/lib/edu/mit/broad/picard/directed/GenomeMaskFactory.java
new file mode 100644
index 0000000000..ba81a7eb6e
--- /dev/null
+++ b/lib/edu/mit/broad/picard/directed/GenomeMaskFactory.java
@@ -0,0 +1,47 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.directed;
+
+import edu.mit.broad.sam.SAMFileHeader;
+import edu.mit.broad.picard.util.Interval;
+import edu.mit.broad.picard.io.IoUtil;
+
+import java.util.List;
+import java.util.BitSet;
+import java.io.File;
+
+/**
+ * Create a GenomeMask from an IntervalList or a file containing an IntervalList
+ */
+public class GenomeMaskFactory {
+
+    public GenomeMask makeGenomeMaskFromIntervalList(IntervalList intervalList) {
+        if (intervalList.getHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
+            intervalList.sort();
+        }
+        List<Interval> uniqueIntervals = intervalList.getUniqueIntervals();
+        GenomeMask ret = new GenomeMask();
+
+        SAMFileHeader samHeader = intervalList.getHeader();
+
+        for (Interval interval : uniqueIntervals) {
+            // TODO: Maybe figure out more intelligently how big the bitset might be?
+            BitSet bitSet = ret.getOrCreate(samHeader.getSequenceIndex(interval.getSequence()), interval.getEnd() + 1);
+            bitSet.set(interval.getStart(), interval.getEnd() + 1);
+        }
+        return ret;
+    }
+
+    public GenomeMask makeGenomeMaskFromIntervalList(File intervalListFile) {
+        IoUtil.assertFileIsReadable(intervalListFile);
+        IntervalList intervalList = IntervalList.fromFile(intervalListFile);
+        return makeGenomeMaskFromIntervalList(intervalList);
+    }
+}
diff --git a/lib/edu/mit/broad/picard/directed/HsMetrics.java b/lib/edu/mit/broad/picard/directed/HsMetrics.java
new file mode 100644
index 0000000000..74817f9198
--- /dev/null
+++ b/lib/edu/mit/broad/picard/directed/HsMetrics.java
@@ -0,0 +1,108 @@
+package edu.mit.broad.picard.directed;
+
+import edu.mit.broad.picard.metrics.MetricBase;
+
+/**
+ * The set of metrics captured that are specific to a hybrid selection analysis.
+ *
+ * @author Tim Fennell
+ */
+public class HsMetrics extends MetricBase {
+    /** The name of the bait set used in the hybrid selection. */
+    public String BAIT_SET;
+
+    /** The number of bases in the reference genome used for alignment. */
+    public long GENOME_SIZE;
+
+    /** The number of bases which have one or more baits on top of them. */
+    public long BAIT_TERRITORY;
+
+    /** The unique number of target bases in the experiment where target is usually exons etc. */
+    public long TARGET_TERRITORY;
+
+    /** Target terrirtoy / bait territory.  1 == perfectly efficient, 0.5 = half of baited bases are not target. */
+    public double BAIT_DESIGN_EFFICIENCY;
+
+    /** The total number of reads in the SAM or BAM file examine. */
+    public int TOTAL_READS;
+
+    /** The number of reads that pass the vendor's filter. */
+    public int PF_READS;
+
+    /** The number of PF reads that are not marked as duplicates. */
+    public int PF_UNIQUE_READS;
+
+    /** PF reads / total reads.  The percent of reads passing filter. */
+    public double PCT_PF_READS;
+
+    /** PF Unique Reads / Total Reads. */
+    public double PCT_PF_UQ_READS;
+
+    /** The number of PF reads that are aligned with mapping score > 0 to the reference genome. */
+    public int PF_READS_ALIGNED;
+
+    /** PF Reads Aligned / PF Reads. */
+    public double PCT_PF_READS_ALIGNED;
+
+    /** The number of bases in the PF aligned reads that are mapped to a reference base. Accounts for clipping and gaps. */
+    public int PF_BASES_ALIGNED;
+
+    /** The number of PF aligned bases that mapped to a baited region of the genome. */
+    public long ON_BAIT_BASES;
+
+    /** The number of PF aligned bases that mapped to within a fixed interval of a baited region, but not on a baited region. */
+    public long NEAR_BAIT_BASES;
+
+    /** The number of PF aligned bases that mapped to neither on or near a bait. */
+    public long OFF_BAIT_BASES;
+
+    /** The number of PF aligned bases that mapped to a targetted region of the genome. */
+    public long ON_TARGET_BASES;
+
+    /** On+Near Bait Bases / PF Bases Aligned. */
+    public double PCT_SELECTED_BASES;
+
+    /** The percentage of aligned PF bases that mapped neither on or near a bait. */
+    public double PCT_OFF_BAIT;
+
+    /** The percentage of on+near bait bases that are on as opposed to near. */
+    public double ON_BAIT_VS_SELECTED;
+
+    /** The mean coverage of all baits in the experiment. */
+    public double MEAN_BAIT_COVERAGE;
+
+    /** The mean coverage of targets that recieved at least coverage depth = 2 at one base. */
+    public double MEAN_TARGET_COVERAGE;
+
+    /** The fold by which the baited region has been amplified above genomic background. */
+    public double FOLD_ENRICHMENT;
+
+    /** The number of targets that did not reach coverage=2 over any base. */
+    public double ZERO_CVG_TARGETS_PCT;
+
+    /**
+     * The fold over-coverage necessary to raise 80% of bases in "non-zero-cvg" targets to
+     * the mean coverage level in those targets.
+     */
+    public double FOLD_80_BASE_PENALTY;
+
+
+    /**
+     * Calculates the metrics in this class that can be derived from other metrics in the class.
+     */
+    public void calculateDerivedMetrics() {
+        BAIT_DESIGN_EFFICIENCY = (double) TARGET_TERRITORY / (double) BAIT_TERRITORY;
+
+        PCT_PF_READS         = PF_READS / (double) TOTAL_READS;
+        PCT_PF_UQ_READS      = PF_UNIQUE_READS / (double) TOTAL_READS;
+        PCT_PF_READS_ALIGNED = PF_READS_ALIGNED / (double) PF_UNIQUE_READS; 
+
+        double denominator   = (ON_BAIT_BASES + NEAR_BAIT_BASES + OFF_BAIT_BASES);
+
+        PCT_SELECTED_BASES   = (ON_BAIT_BASES + NEAR_BAIT_BASES) / denominator;
+        PCT_OFF_BAIT         = OFF_BAIT_BASES / denominator;
+        ON_BAIT_VS_SELECTED  = ON_BAIT_BASES / (double) (ON_BAIT_BASES + NEAR_BAIT_BASES);
+        MEAN_BAIT_COVERAGE   = ON_BAIT_BASES / (double) BAIT_TERRITORY;
+        FOLD_ENRICHMENT = (ON_BAIT_BASES/ denominator) / ((double) BAIT_TERRITORY / GENOME_SIZE);
+    }
+}
diff --git a/lib/edu/mit/broad/picard/directed/HsMetricsCalculator.java b/lib/edu/mit/broad/picard/directed/HsMetricsCalculator.java
new file mode 100644
index 0000000000..a454642a79
--- /dev/null
+++ b/lib/edu/mit/broad/picard/directed/HsMetricsCalculator.java
@@ -0,0 +1,207 @@
+package edu.mit.broad.picard.directed;
+
+import edu.mit.broad.picard.util.*;
+import edu.mit.broad.sam.SAMFileReader;
+import edu.mit.broad.sam.SAMRecord;
+import edu.mit.broad.sam.AlignmentBlock;
+import edu.mit.broad.sam.SAMSequenceRecord;
+
+import java.util.*;
+import java.io.*;
+
+/**
+ * Calculates HS metrics for a given SAM or BAM file. Requires the input of a list of
+ * target intervals and a list of bait intervals. Can be invoked either on an entire
+ * iterator of SAMRecords or be passed SAMRecords one at a time.
+ *
+ * @author Tim Fennell
+ */
+public class HsMetricsCalculator {
+    // What is considered "near" to the bait
+    private static final int NEAR_BAIT_DISTANCE = 250;
+    private static final Log log = Log.getInstance(HsMetricsCalculator.class);
+
+    // Holds file names and other parameter related junk
+    private SAMFileReader sam;
+    private File baitFile;
+    private File targetFile;
+    private IntervalList baits;
+    private IntervalList targets;
+
+    // Overlap detector for finding overlaps between reads and the experimental targets
+    private OverlapDetector<Interval> targetDetector = new OverlapDetector<Interval>(0,0);
+
+	// Overlap detector for finding overlaps between the reads and the baits (and the near bait space)
+    private OverlapDetector<Interval> baitDetector = new OverlapDetector<Interval>(-NEAR_BAIT_DISTANCE,0);
+
+    // A Map to accumulate per-bait-region (i.e. merge of overlapping baits) coverage. */
+    private Map<Interval, Coverage> coverageByTarget = new HashMap<Interval, Coverage>();
+
+    private HsMetrics metrics = new HsMetrics();
+
+    /**
+	 * Constructor that parses the squashed reference to genome reference file and stores the
+	 * information in a map for later use.
+	 */
+    public HsMetricsCalculator(File baits, File targets) {
+        this.baitFile     = baits;
+        this.targetFile   = targets;
+        this.baits        = IntervalList.fromFile(baits);
+        this.targets      = IntervalList.fromFile(targets);
+
+        this.metrics.BAIT_SET = baits.getName();
+        int tmp = this.metrics.BAIT_SET.indexOf(".");
+        if (tmp > 0) {
+            this.metrics.BAIT_SET = this.metrics.BAIT_SET.substring(0, tmp);
+        }
+
+        List<Interval> uniqueBaits = this.baits.getUniqueIntervals();
+        this.baitDetector.addAll(uniqueBaits, uniqueBaits);
+        this.metrics.BAIT_TERRITORY = Interval.countBases(uniqueBaits);
+
+        List<Interval> uniqueTargets = this.targets.getUniqueIntervals();
+        this.targetDetector.addAll(uniqueTargets, uniqueTargets);
+        this.metrics.TARGET_TERRITORY = Interval.countBases(uniqueTargets);
+
+        for (SAMSequenceRecord seq : this.baits.getHeader().getSequences()) {
+            this.metrics.GENOME_SIZE += seq.getSequenceLength();
+        }
+
+        // Populate the coverage by target map
+        for (Interval target : this.targets.getIntervals()) {
+            this.coverageByTarget.put(target, new Coverage(target, 0));
+        }
+    }
+
+    /** Iterates over all records in the file and collects metrics. */
+    public void analyze(Iterator<SAMRecord> records) {
+        int i = 0;
+        while (records.hasNext()) {
+            analyze(records.next());
+
+            if (++i % 1000000 == 0) {
+                log.info("Processed " + i + " records so far.");
+            }
+        }
+    }
+
+    /** Adds information about an individual SAMRecord to the statistics. */
+    public void analyze(SAMRecord rec) {
+        // Just plain avoid records that are marked as not-primary
+        if (rec.getNotPrimaryAlignmentFlag()) return;
+        
+        this.metrics.TOTAL_READS += 1;
+
+        // Check for PF reads
+        if (rec.getReadFailsVendorQualityCheckFlag()) {
+            return;
+        }
+        else {
+            ++this.metrics.PF_READS;
+        }
+
+        // Check for reads that are marked as duplicates
+        if (rec.getDuplicateReadFlag()) {
+            return;
+        }
+        else {
+            ++this.metrics.PF_UNIQUE_READS;
+        }
+
+        // Don't bother with reads that didn't align uniquely
+        if (rec.getReadUnmappedFlag() || rec.getMappingQuality() == 0) {
+            return;
+        }
+
+        this.metrics.PF_READS_ALIGNED += 1;
+        for (AlignmentBlock block : rec.getAlignmentBlocks()) {
+            this.metrics.PF_BASES_ALIGNED += block.getLength();
+        }
+
+        Interval read = new Interval(rec.getReferenceName(), rec.getAlignmentStart(), rec.getAlignmentEnd());
+
+        // Find the target overlaps
+        Collection<Interval> targets = this.targetDetector.getOverlaps(read);
+        if (targets != null && !targets.isEmpty()) {
+            for (Interval target : targets) {
+                Coverage coverage = this.coverageByTarget.get(target);
+
+                for (AlignmentBlock block : rec.getAlignmentBlocks()) {
+                    int end = CoordMath.getEnd(block.getReferenceStart(), block.getLength());
+                    for (int pos=block.getReferenceStart(); pos<=end; ++ pos) {
+                        if (pos >= target.getStart() && pos <= target.getEnd()) {
+                            ++this.metrics.ON_TARGET_BASES;
+                            coverage.addBase(pos - target.getStart());
+                        }
+                    }
+                }
+            }
+        }
+
+        // Now do the bait overlaps
+        int mappedBases = 0;
+        for (AlignmentBlock block : rec.getAlignmentBlocks()) mappedBases += block.getLength();
+        Collection<Interval> baits = this.baitDetector.getOverlaps(read);
+        int onBaitBases = 0;
+
+        if (baits != null && !baits.isEmpty()) {
+            for (Interval bait : baits) {
+                for (AlignmentBlock block : rec.getAlignmentBlocks()) {
+                    int end = CoordMath.getEnd(block.getReferenceStart(), block.getLength());
+
+                    for (int pos=block.getReferenceStart(); pos<=end; ++pos) {
+                        if (pos >= bait.getStart() && pos <= bait.getEnd()) ++onBaitBases;
+                    }
+                }
+            }
+
+            this.metrics.ON_BAIT_BASES   += onBaitBases;
+            this.metrics.NEAR_BAIT_BASES += (mappedBases - onBaitBases);            
+        }
+        else {
+            this.metrics.OFF_BAIT_BASES += mappedBases;
+        }
+
+    }
+
+    /** Calculates a few last summary metrics and then returns the metrics calculated. */
+    public HsMetrics getMetrics() {
+        this.metrics.calculateDerivedMetrics();
+        calculateTargetCoverageMetrics();
+        return this.metrics;
+    }
+
+    /** Calculates how much additional sequencing is needed to raise 80% of bases to the mean for the lane. */
+    private void calculateTargetCoverageMetrics() {
+        short[] depths = new short[(int) this.metrics.TARGET_TERRITORY];  // may not use entire array
+        int zeroCoverageTargets = 0;
+        int depthIndex = 0;
+        double totalCoverage = 0;
+        int basesConsidered = 0;
+
+        for (Coverage c : this.coverageByTarget.values()) {
+            if (!c.hasCoverage()) {
+                ++zeroCoverageTargets;
+                continue;
+            }
+
+            final short[] targetDepths = c.getDepths();
+            basesConsidered += targetDepths.length;
+
+            for (short depth : targetDepths) {
+                depths[depthIndex++] = depth;
+                totalCoverage += depth;
+            }
+        }
+
+        this.metrics.MEAN_TARGET_COVERAGE = totalCoverage / basesConsidered;
+
+        // Sort the array (ASCENDING) and then find the base the coverage value that lies at the 80%
+        // line, which is actually at 20% into the array now
+        Arrays.sort(depths);
+        int indexOf80thPercentile = (depths.length - basesConsidered) + (int) (basesConsidered * 0.2);
+        int coverageAt80thPercentile = depths[indexOf80thPercentile];
+        this.metrics.FOLD_80_BASE_PENALTY = this.metrics.MEAN_TARGET_COVERAGE / coverageAt80thPercentile;
+        this.metrics.ZERO_CVG_TARGETS_PCT = zeroCoverageTargets / (double) this.targets.getIntervals().size();
+    }
+}
diff --git a/lib/edu/mit/broad/picard/directed/IntervalList.java b/lib/edu/mit/broad/picard/directed/IntervalList.java
new file mode 100644
index 0000000000..087537c0a2
--- /dev/null
+++ b/lib/edu/mit/broad/picard/directed/IntervalList.java
@@ -0,0 +1,240 @@
+package edu.mit.broad.picard.directed;
+
+import edu.mit.broad.picard.util.Interval;
+import edu.mit.broad.picard.util.FormatUtil;
+import edu.mit.broad.picard.io.IoUtil;
+import edu.mit.broad.picard.PicardException;
+import edu.mit.broad.sam.SAMFileHeader;
+import edu.mit.broad.sam.SAMTextHeaderCodec;
+import edu.mit.broad.sam.util.StringLineReader;
+
+import java.util.*;
+import java.io.*;
+
+/**
+ * Represents a list of intervals against a reference sequence that can be written to
+ * and read from a file.  The file format is relatively simple and reflects the SAM
+ * alignment format to a degree.
+ *
+ * A SAM style header must be present in the file which lists the sequence records
+ * against which the intervals are described.  After the header the file then contains
+ * records one per line in text format with the following values tab-separated:
+ *   - Sequence name
+ *   - Start position (1-based)
+ *   - End position (1-based, end inclusive)
+ *   - Strand (either + or -)
+ *   - Interval name (an, ideally unique, name for the interval)
+ *
+ * @author Tim Fennell
+ */
+public class IntervalList implements Iterable<Interval> {
+    private SAMFileHeader header;
+    private List<Interval> intervals = new ArrayList<Interval>();
+
+    /** Constructs a new interval list using the supplied header information. */
+    public IntervalList(SAMFileHeader header) {
+        if (header == null) {
+            throw new IllegalArgumentException("SAMFileHeader must be supplied.");
+        }
+        this.header = header;
+    }
+
+    /** Gets the header (if there is one) for the interval list. */
+    public SAMFileHeader getHeader() { return header; }
+
+    /** Returns an iterator over the intervals. */
+    public Iterator<Interval> iterator() { return this.intervals.iterator(); }
+
+    /** Adds an interval to the list of intervals. */
+    public void add(Interval interval) { this.intervals.add(interval); }
+
+    /** Sorts the internal collection of intervals by coordinate. */
+    public void sort() {
+        Collections.sort(this.intervals, new IntervalCoordinateComparator(this.header));
+        this.header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
+    }
+
+    /** Gets the set of intervals as held internally. */
+    public List<Interval> getIntervals() {
+        return Collections.unmodifiableList(this.intervals);
+    }
+
+    /**
+     * Merges the list of intervals and then reduces them down where regions overlap
+     * or are directly adjacent to one another.  During this process the "merged" interval
+     * will retain the strand and name of the 5' most interval merged.
+     *
+     * @return the set of unique intervals condensed from the contained intervals
+     */
+    public List<Interval> getUniqueIntervals() {
+        List<Interval> unique = new ArrayList<Interval>();
+        ListIterator<Interval> iterator = this.intervals.listIterator();
+        Interval previous = iterator.next();
+
+        while (iterator.hasNext()) {
+            Interval next = iterator.next();
+            if (previous.intersects(next) || previous.abuts(next)) {
+                previous = new Interval(previous.getSequence(),
+                                        previous.getStart(),
+                                        Math.max(previous.getEnd(), next.getEnd()),
+                                        previous.isNegativeStrand(),
+                                        previous.getName());
+            }
+            else {
+                unique.add(previous);
+                previous = next;
+            }
+        }
+
+        if (previous != null) unique.add(previous);
+
+        return unique;
+    }
+
+    /** Gets the (potentially redundant) sum of the length of the intervals in the list. */
+    public long getBaseCount() {
+        return Interval.countBases(this.intervals);
+    }
+
+    /** Gets the count of unique bases represented by the intervals in the list. */
+    public long getUniqueBaseCount() {
+        return Interval.countBases(getUniqueIntervals());
+    }
+
+    /**
+     * Parses an interval list from a file.
+     * @param file the file containing the intervals
+     * @return an IntervalList object that contains the headers and intervals from the file
+     */
+    public static IntervalList fromFile(File file) {
+        BufferedReader in = new BufferedReader(new InputStreamReader(IoUtil.openFileForReading(file)));
+
+        try {
+            // Setup a reader and parse the header
+            StringBuilder builder = new StringBuilder(4096);
+            String line = null;
+
+            while ((line = in.readLine()) != null) {
+                if (line.startsWith("@")) {
+                    builder.append(line).append('\n');
+                }
+                else {
+                    break;
+                }
+            }
+
+            if (builder.length() == 0) {
+                throw new IllegalStateException("Interval list file must contain header: " + file.getAbsolutePath());
+            }
+
+            StringLineReader headerReader = new StringLineReader(builder.toString());
+            SAMTextHeaderCodec codec = new SAMTextHeaderCodec();
+            IntervalList list = new IntervalList(codec.decode(headerReader, file));
+            
+            // Then read in the intervals
+            FormatUtil format = new FormatUtil();
+            do {
+                if (line.trim().length() == 0) continue; // skip over blank lines
+
+                // Make sure we have the right number of fields
+                String fields[] = line.split("\t");
+                if (fields.length != 5) {
+                    throw new PicardException("Invalid interval record contains " +
+                                              fields.length + " fields: " + line);
+                }
+
+                // Then parse them out
+                String seq = fields[0];
+                int start = format.parseInt(fields[1]);
+                int end   = format.parseInt(fields[2]);
+
+                boolean negative;
+                if (fields[3].equals("-")) negative = true;
+                else if (fields[3].equals("+")) negative = false;
+                else throw new IllegalArgumentException("Invalid strand field: " + fields[3]);
+
+                String name = fields[4];
+
+                Interval interval = new Interval(seq, start, end, negative, name);
+                list.intervals.add(interval);
+            }
+            while ((line = in.readLine()) != null);
+
+            return list;
+        }
+        catch (IOException ioe) {
+            throw new PicardException("Error parsing interval list file: " + file.getAbsolutePath(), ioe);
+        }
+        finally {
+            try { in.close(); } catch (Exception e) { /* do nothing */ }
+        }
+    }
+
+    /**
+     * Writes out the list of intervals to the supplied file.
+     * @param file a file to write to.  If exists it will be overwritten.
+     */
+    public void write(File file) {
+        try {
+            BufferedWriter out = new BufferedWriter(new OutputStreamWriter(IoUtil.openFileForWriting(file)));
+            FormatUtil format = new FormatUtil();
+
+            // Write out the header
+            if (this.header != null) {
+                SAMTextHeaderCodec codec = new SAMTextHeaderCodec();
+                codec.encode(out, this.header);
+            }
+
+            // Write out the intervals
+            for (Interval interval : this) {
+                out.write(interval.getSequence());
+                out.write('\t');
+                out.write(format.format(interval.getStart()));
+                out.write('\t');
+                out.write(format.format(interval.getEnd()));
+                out.write('\t');
+                out.write(interval.isPositiveStrand() ? '+' : '-');
+                out.write('\t');
+                out.write(interval.getName());
+                out.newLine();
+            }
+
+            out.flush();
+            out.close();
+        }
+        catch (IOException ioe) {
+            throw new PicardException("Error writing out interval list to file: " + file.getAbsolutePath(), ioe);
+        }
+    }
+}
+
+/**
+ * Comparator that orders intervals based on their sequence index, by coordinate
+ * then by strand and finally by name.
+ */
+class IntervalCoordinateComparator implements Comparator<Interval> {
+    private SAMFileHeader header;
+
+    /** Constructs a comparator using the supplied sequence header. */
+    IntervalCoordinateComparator(SAMFileHeader header) {
+        this.header = header;
+    }
+
+    public int compare(Interval lhs, Interval rhs) {
+        int lhsIndex = this.header.getSequenceIndex(lhs.getSequence());
+        int rhsIndex = this.header.getSequenceIndex(rhs.getSequence());
+        int retval = lhsIndex - rhsIndex;
+
+        if (retval == 0) retval = lhs.getStart() - rhs.getStart();
+        if (retval == 0) retval = lhs.getEnd()   - rhs.getEnd();
+        if (retval == 0) {
+            if (lhs.isPositiveStrand() && rhs.isNegativeStrand()) retval = -1;
+            else if (lhs.isNegativeStrand() && rhs.isPositiveStrand()) retval = 1;
+        }
+        if (retval == 0) {
+            retval = lhs.getName().compareTo(rhs.getName());
+        }
+
+        return retval;
+    }
+}
\ No newline at end of file
diff --git a/lib/edu/mit/broad/picard/filter/AggregateFilter.java b/lib/edu/mit/broad/picard/filter/AggregateFilter.java
new file mode 100644
index 0000000000..3ee558c992
--- /dev/null
+++ b/lib/edu/mit/broad/picard/filter/AggregateFilter.java
@@ -0,0 +1,46 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2008 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.filter;
+
+import edu.mit.broad.sam.SAMRecord;
+
+import java.util.List;
+
+/**
+ * Aggregates multiple filters and provides a method for applying them all to a given record with
+ * one method call.
+ */
+public class AggregateFilter implements SamRecordFilter {
+
+    private final List<SamRecordFilter> filters;
+
+    /**
+     * Constructor
+     * @param filters   the list of filters that this Aggregator applies
+     */
+    public AggregateFilter(List<SamRecordFilter> filters) {
+        this.filters = filters;
+    }
+
+    /**
+     * Determines whether a SAMRecord matches this filter
+     *
+     * @param record    the SAMRecord to evaluate
+     * @return  true if the SAMRecord matches at least one filter, otherwise false
+     */
+    public boolean filterOut(SAMRecord record) {
+        for (SamRecordFilter filter : filters) {
+            if (filter.filterOut(record)) {
+                return true;
+            }
+        }
+        return false;
+    }
+}
diff --git a/lib/edu/mit/broad/picard/filter/FailsVendorReadQualityFilter.java b/lib/edu/mit/broad/picard/filter/FailsVendorReadQualityFilter.java
new file mode 100644
index 0000000000..3e0c9bb3ff
--- /dev/null
+++ b/lib/edu/mit/broad/picard/filter/FailsVendorReadQualityFilter.java
@@ -0,0 +1,28 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.filter;
+
+import edu.mit.broad.sam.SAMRecord;
+
+/**
+ * Filter for filtering out reads that do not pass the quality filter
+ */
+public class FailsVendorReadQualityFilter implements SamRecordFilter {
+
+    /**
+     * Determines whether a SAMRecord matches this filter
+     *
+     * @param record    the SAMRecord to evaluate
+     * @return  true if the SAMRecord matches the filter, otherwise false
+     */
+    public boolean filterOut(SAMRecord record) {
+        return record.getReadFailsVendorQualityCheckFlag();
+    }
+}
diff --git a/lib/edu/mit/broad/picard/filter/FilteringIterator.java b/lib/edu/mit/broad/picard/filter/FilteringIterator.java
new file mode 100644
index 0000000000..375036394c
--- /dev/null
+++ b/lib/edu/mit/broad/picard/filter/FilteringIterator.java
@@ -0,0 +1,94 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2008 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.filter;
+
+import edu.mit.broad.sam.SAMRecord;
+import edu.mit.broad.sam.util.CloseableIterator;
+import edu.mit.broad.picard.util.CloserUtil;
+
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+/**
+ * Filtering Iterator which takes a filter and an iterator and iterates
+ * through only those records which are not rejected by the filter.
+ *
+ * @author Kathleen Tibbetts
+ */
+public class FilteringIterator implements CloseableIterator<SAMRecord> {
+
+    private final Iterator<SAMRecord> iterator;
+    private final SamRecordFilter filter;
+    private SAMRecord next = null;
+
+    /**
+     * Constructor
+     *
+     * @param iterator  the backing iterator
+     * @param filter    the filter (which may be a FilterAggregator)
+     */
+    public FilteringIterator(Iterator<SAMRecord> iterator, SamRecordFilter filter) {
+        this.iterator = iterator;
+        this.filter = filter;
+        next = getNextRecord();
+    }
+
+    /**
+     * Returns true if the iteration has more elements.
+     *
+     * @return  true if the iteration has more elements.  Otherwise returns false.
+     */
+    public boolean hasNext() {
+        return next != null;
+    }
+
+    /**
+     * Returns the next element in the iteration.
+     *
+     * @return  the next element in the iteration
+     * @throws java.util.NoSuchElementException
+     */
+    public SAMRecord next() {
+        if (next == null) {
+            throw new NoSuchElementException("Iterator has no more elements.");
+        }
+        SAMRecord result = next;
+        next = getNextRecord();
+        return result;
+    }
+
+    /**
+     * Required method for Iterator API.
+     *
+     * @throws UnsupportedOperationException
+     */
+    public void remove() {
+        throw new UnsupportedOperationException("Remove() not supported by FilteringIterator");
+    }
+
+    public void close() {
+        CloserUtil.close(iterator);
+    }
+
+    /**
+     * Gets the next record from the underlying iterator that passes the filter
+     *
+     * @return SAMRecord    the next filter-passing record
+     */
+    private SAMRecord getNextRecord() {
+        while (iterator.hasNext()) {
+            SAMRecord record = iterator.next();
+            if (!filter.filterOut(record)) {
+                return next;
+            }
+        }
+        return null;
+    }
+}
diff --git a/lib/edu/mit/broad/picard/filter/SamRecordFilter.java b/lib/edu/mit/broad/picard/filter/SamRecordFilter.java
new file mode 100644
index 0000000000..d8936ca8aa
--- /dev/null
+++ b/lib/edu/mit/broad/picard/filter/SamRecordFilter.java
@@ -0,0 +1,26 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2008 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.filter;
+
+import edu.mit.broad.sam.SAMRecord;
+
+/**
+ * API for filtering SAMRecords
+ */
+public interface SamRecordFilter {
+
+    /**
+     * Determines whether a SAMRecord matches this filter
+     *
+     * @param record    the SAMRecord to evaluate
+     * @return  true if the SAMRecord matches the filter, otherwise false
+     */
+    public boolean filterOut(SAMRecord record);
+}
diff --git a/lib/edu/mit/broad/picard/filter/SolexaNoiseFilter.java b/lib/edu/mit/broad/picard/filter/SolexaNoiseFilter.java
new file mode 100644
index 0000000000..9969ae2e3a
--- /dev/null
+++ b/lib/edu/mit/broad/picard/filter/SolexaNoiseFilter.java
@@ -0,0 +1,37 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2008 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.filter;
+
+import edu.mit.broad.picard.util.SequenceUtil;
+import edu.mit.broad.sam.SAMRecord;
+
+/**
+ * Filter to determine whether a read is "noisy" due to a poly-A run that is a sequencing artifact.
+ * Currently we filter out only reads that are composed entirely of As.
+ */
+public class SolexaNoiseFilter implements SamRecordFilter {
+
+    /**
+     * Determines whether a SAMRecord matches this filter
+     *
+     * @param record    the SAMRecord to evaluate
+     * @return  true if the SAMRecord matches the filter, otherwise false
+     */
+    public boolean filterOut(SAMRecord record) {
+        byte sequence[] = record.getReadBases();
+        for (byte base : sequence) {
+            if (base != 'A' && base != 'a' &&
+                !SequenceUtil.isNoCall(base)) {
+                return false;
+            }
+        }
+        return true;
+    }
+}
diff --git a/lib/edu/mit/broad/picard/filter/TagFilter.java b/lib/edu/mit/broad/picard/filter/TagFilter.java
new file mode 100644
index 0000000000..f35957ba09
--- /dev/null
+++ b/lib/edu/mit/broad/picard/filter/TagFilter.java
@@ -0,0 +1,56 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.filter;
+
+import edu.mit.broad.sam.SAMRecord;
+
+import java.util.List;
+import java.util.Arrays;
+
+/**
+ * Filter class for matching tag attributes in SAMRecords
+ */
+public class TagFilter implements SamRecordFilter {
+
+    private final String tag;           // The key of the tag to match
+    private final List<Object> values;  // The list of matching values
+
+    /**
+     * Constructor for a single value
+     *
+     * @param tag       the key of the tag to match
+     * @param value     the value to match
+     */
+    public TagFilter(String tag, Object value) {
+        this.tag = tag;
+        this.values = Arrays.asList(value);
+    }
+
+    /**
+     * Constructor for multiple values
+     *
+     * @param tag       the key of the tag to match
+     * @param values    the matching values
+     */
+    public TagFilter(String tag, List<Object> values) {
+        this.tag = tag;
+        this.values = values;
+    }
+
+    /**
+     * Determines whether a SAMRecord matches this filter
+     *
+     * @param record    the SAMRecord to evaluate
+     * @return  true if the SAMRecord matches the filter, otherwise false
+     */
+    public boolean filterOut(SAMRecord record) {
+        return values.contains(record.getAttribute(tag));
+    }
+ }
diff --git a/lib/edu/mit/broad/picard/genotype/GeliException.java b/lib/edu/mit/broad/picard/genotype/GeliException.java
new file mode 100644
index 0000000000..5d6fed76c1
--- /dev/null
+++ b/lib/edu/mit/broad/picard/genotype/GeliException.java
@@ -0,0 +1,30 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+
+package edu.mit.broad.picard.genotype;
+
+import edu.mit.broad.picard.PicardException;
+
+/**
+ * Generic exception thrown by GELI format machinery.
+ *
+ * @author Doug Voet
+ */
+public class GeliException extends PicardException {
+
+    public GeliException(String message, Throwable throwable) {
+        super(message, throwable);
+    }
+
+    public GeliException(String message) {
+        super(message);
+    }
+
+}
diff --git a/lib/edu/mit/broad/picard/genotype/GeliFileConstants.java b/lib/edu/mit/broad/picard/genotype/GeliFileConstants.java
new file mode 100644
index 0000000000..6f14962511
--- /dev/null
+++ b/lib/edu/mit/broad/picard/genotype/GeliFileConstants.java
@@ -0,0 +1,20 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+
+package edu.mit.broad.picard.genotype;
+
+/**
+ * Misc constants for GELI format
+ *
+ * @author Doug Voet
+ */
+public interface GeliFileConstants {
+    public static final byte[] GELI_MAGIC = "GELI".getBytes();
+}
diff --git a/lib/edu/mit/broad/picard/genotype/GeliFileReader.java b/lib/edu/mit/broad/picard/genotype/GeliFileReader.java
new file mode 100644
index 0000000000..de72b1639d
--- /dev/null
+++ b/lib/edu/mit/broad/picard/genotype/GeliFileReader.java
@@ -0,0 +1,103 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2008 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever.
+ * Neither the Broad Institute nor MIT can be responsible for its use, misuse,
+ * or functionality.
+ */
+package edu.mit.broad.picard.genotype;
+
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import edu.mit.broad.sam.SAMFileHeader;
+import edu.mit.broad.sam.util.BlockCompressedInputStream;
+import edu.mit.broad.sam.util.CloseableIterator;
+import edu.mit.broad.sam.util.RuntimeIOException;
+
+
+/**
+ * Class for reading GELI (GEnotype LIkelihood) files.
+ * 
+ * @author Doug Voet
+ */
+public class GeliFileReader implements Iterable<GenotypeLikelihoods>
+{
+    private ReaderImplementation mReader = null;
+
+    /**
+     * Internal interface for SAM/BAM file reader implementations.
+     * Implemented as an abstract class to enforce better access control.
+     */
+    static abstract class ReaderImplementation {
+        abstract SAMFileHeader getFileHeader();
+        abstract CloseableIterator<GenotypeLikelihoods> getIterator();
+        abstract void close();
+    }
+
+
+    public GeliFileReader(final InputStream stream) {
+        try {
+            final BufferedInputStream bufferedStream = toBufferedStream(stream);
+            if (isValidGELIFile(bufferedStream)) {
+                mReader = new GeliFileReaderImplementation(bufferedStream);
+            } else {
+                throw new GeliException("Unrecognized file format");
+            }
+        } catch (IOException e) {
+            throw new RuntimeIOException(e);
+        }
+    }
+
+    public GeliFileReader(final File file) {
+        try {
+            final BufferedInputStream bufferedStream =
+                new BufferedInputStream(new FileInputStream(file));
+            if (isValidGELIFile(bufferedStream)) {
+                bufferedStream.close();
+                final GeliFileReaderImplementation reader = new GeliFileReaderImplementation(file);
+                mReader = reader;
+            } else {
+                bufferedStream.close();
+                throw new GeliException("Unrecognized file format");
+            }
+        } catch (IOException e) {
+            throw new RuntimeIOException(e);
+        }
+    }
+
+    public void close() {
+        if (mReader != null) {
+            mReader.close();
+        }
+        mReader = null;
+    }
+
+    public SAMFileHeader getFileHeader() {
+        return mReader.getFileHeader();
+    }
+
+    public CloseableIterator<GenotypeLikelihoods> iterator() {
+        return mReader.getIterator();
+    }
+
+    private boolean isValidGELIFile(final InputStream stream)
+        throws IOException {
+        return BlockCompressedInputStream.isValidFile(stream);
+    }
+
+    private BufferedInputStream toBufferedStream(final InputStream stream) {
+        if (stream instanceof BufferedInputStream) {
+            return (BufferedInputStream) stream;
+        } else {
+            return new BufferedInputStream(stream);
+        }
+    }
+}
diff --git a/lib/edu/mit/broad/picard/genotype/GeliFileReaderImplementation.java b/lib/edu/mit/broad/picard/genotype/GeliFileReaderImplementation.java
new file mode 100644
index 0000000000..7f544532ee
--- /dev/null
+++ b/lib/edu/mit/broad/picard/genotype/GeliFileReaderImplementation.java
@@ -0,0 +1,189 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2008 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever.
+ * Neither the Broad Institute nor MIT can be responsible for its use, misuse,
+ * or functionality.
+ */
+package edu.mit.broad.picard.genotype;
+
+
+import java.io.DataInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.LineNumberReader;
+import java.io.StringReader;
+import java.util.Arrays;
+
+import edu.mit.broad.sam.SAMFileHeader;
+import edu.mit.broad.sam.SAMSequenceRecord;
+import edu.mit.broad.sam.SAMTextHeaderCodec;
+import edu.mit.broad.sam.util.BinaryCodec;
+import edu.mit.broad.sam.util.BlockCompressedInputStream;
+import edu.mit.broad.sam.util.CloseableIterator;
+import edu.mit.broad.sam.util.StringLineReader;
+
+/**
+ * Internal class for reading GELI files.
+ */
+class GeliFileReaderImplementation extends GeliFileReader.ReaderImplementation {
+    
+    private boolean mIsSeekable = false;
+    private BinaryCodec mStream = null;
+    private final BlockCompressedInputStream mCompressedInputStream;
+    private SAMFileHeader mFileHeader = null;
+    private long mFirstRecordPointer = 0;
+    private CloseableIterator<GenotypeLikelihoods> mCurrentIterator = null;
+
+
+    GeliFileReaderImplementation(final InputStream stream)
+        throws IOException {
+        mIsSeekable = false;
+        mCompressedInputStream = new BlockCompressedInputStream(stream);
+        mStream = new BinaryCodec(new DataInputStream(mCompressedInputStream));
+        readHeader(null);
+    }
+
+    GeliFileReaderImplementation(final File file)
+        throws IOException {
+        mIsSeekable = true;
+        mCompressedInputStream = new BlockCompressedInputStream(file);
+        mStream = new BinaryCodec(new DataInputStream(mCompressedInputStream));
+        readHeader(file);
+        mFirstRecordPointer = mCompressedInputStream.getFilePointer();
+    }
+
+    void close() {
+        if (mStream != null) {
+            mStream.close();
+        }
+        mStream = null;
+        mFileHeader = null;
+    }
+
+    SAMFileHeader getFileHeader() {
+        return mFileHeader;
+    }
+
+    CloseableIterator<GenotypeLikelihoods> getIterator() {
+        if (mStream == null) {
+            throw new IllegalStateException("File reader is closed");
+        }
+        if (mCurrentIterator != null) {
+            throw new IllegalStateException("Iteration in progress");
+        }
+        if (mIsSeekable) {
+            try {
+                mCompressedInputStream.seek(mFirstRecordPointer);
+            } catch (IOException exc) {
+                throw new RuntimeException(exc.getMessage(), exc);
+            }
+        }
+        mCurrentIterator = new GELIFileIterator();
+        return mCurrentIterator;
+    }
+
+    private void readHeader(final File file)
+        throws IOException {
+
+        final byte[] buffer = new byte[4];
+        mStream.readBytes(buffer);
+        if (!Arrays.equals(buffer, GeliFileConstants.GELI_MAGIC)) {
+            throw new IOException("Invalid GELI file header");
+        }
+
+        final int headerTextLength = mStream.readInt();
+        final String textHeader = mStream.readString(headerTextLength);
+        mFileHeader = new SAMTextHeaderCodec().decode(new StringLineReader(textHeader),
+                file);
+
+        final int sequenceCount = mStream.readInt();
+        if (sequenceCount != mFileHeader.getSequences().size()) {
+            throw new GeliException("Number of sequences in text header (" + mFileHeader.getSequences().size() +
+            ") != number of sequences in binary header (" + sequenceCount + ") for file " + file);
+        }
+        for (int i = 0; i < sequenceCount; i++) {
+            readSequenceRecord(file);
+//            final SAMSequenceRecord sequenceRecord = mFileHeader.getSequence(i);
+//            if (!sequenceRecord.getSequenceName().equals(binarySequenceRecord.getSequenceName())) {
+//                throw new GELIException("For sequence " + i + ", text and binary have different names in file " +
+//                file);
+//            }
+//            if (sequenceRecord.getSequenceLength() != binarySequenceRecord.getSequenceLength()) {
+//                throw new GELIException("For sequence " + i + ", text and binary have different lengths in file " +
+//                file);
+//            }
+        }
+    }
+
+    private SAMSequenceRecord readSequenceRecord(final File file) {
+        final int nameLength = mStream.readInt();
+        if (nameLength <= 1) {
+            throw new GeliException("Invalid BAM file header: missing sequence name in file " + file);
+        }
+        final String sequenceName = mStream.readString(nameLength - 1);
+        // Skip the null terminator
+        mStream.readByte();
+        final int sequenceLength = mStream.readInt();
+        final SAMSequenceRecord record = new SAMSequenceRecord(sequenceName);
+        record.setSequenceLength(sequenceLength);
+        return record;
+    }
+
+    private class GELIFileIterator
+        implements CloseableIterator<GenotypeLikelihoods> {
+
+        private GenotypeLikelihoods mNextRecord = null;
+        private final GenotypeLikelihoodsCodec likelihoodsCodec = new GenotypeLikelihoodsCodec();
+
+
+        GELIFileIterator() {
+            this(true);
+        }
+
+        GELIFileIterator(final boolean advance) {
+            likelihoodsCodec.setInputStream(mStream.getInputStream());
+            if (advance) {
+                advance();
+            }
+        }
+
+        public void close() {
+            if (this != mCurrentIterator) {
+                throw new IllegalStateException("Attempt to close non-current iterator");
+            }
+            mCurrentIterator = null;
+        }
+
+        public boolean hasNext() {
+            return (mNextRecord != null);
+        }
+
+        public GenotypeLikelihoods next() {
+            final GenotypeLikelihoods result = mNextRecord;
+            advance();
+            return result;
+        }
+
+        public void remove() {
+            throw new UnsupportedOperationException("Not supported: remove");
+        }
+
+        void advance() {
+            try {
+                mNextRecord = getNextRecord();
+            } catch (IOException exc) {
+                throw new RuntimeException(exc.getMessage(), exc);
+            }
+        }
+
+        GenotypeLikelihoods getNextRecord()
+            throws IOException {
+            return likelihoodsCodec.decode();
+        }
+    }
+}
diff --git a/lib/edu/mit/broad/picard/genotype/GeliFileWriter.java b/lib/edu/mit/broad/picard/genotype/GeliFileWriter.java
new file mode 100644
index 0000000000..84196b2392
--- /dev/null
+++ b/lib/edu/mit/broad/picard/genotype/GeliFileWriter.java
@@ -0,0 +1,168 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.genotype;
+
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.StringWriter;
+
+import edu.mit.broad.picard.genotype.GenotypeLikelihoods.GenotypeLikelihoodsComparator;
+import edu.mit.broad.sam.SAMFileHeader;
+import edu.mit.broad.sam.SAMSequenceRecord;
+import edu.mit.broad.sam.SAMTextHeaderCodec;
+import edu.mit.broad.sam.SAMFileHeader.SortOrder;
+import edu.mit.broad.sam.util.BinaryCodec;
+import edu.mit.broad.sam.util.BlockCompressedOutputStream;
+import edu.mit.broad.sam.util.SortingCollection;
+
+/**
+ * Class for writing GELI (GEnotype LIkelihood) files.
+ */
+public class GeliFileWriter {
+    private static final int MAX_RECORDS_IN_RAM = 1000000;
+    private SAMFileHeader.SortOrder sortOrder = SortOrder.coordinate;
+    private SAMFileHeader header;
+    private SortingCollection<GenotypeLikelihoods> likelihoodsSorter;
+
+    // These two fields are for validating presorted records.
+    private GenotypeLikelihoods prevLikelihoods;
+    private GenotypeLikelihoodsComparator presortedComparator;
+
+    // If true, records passed to addAlignment are already in the order specified by sortOrder
+    private boolean presorted;
+    protected final BinaryCodec outputBinaryCodec;
+    private GenotypeLikelihoodsCodec genotypeLikelihoodsCodec = null;
+    
+    public GeliFileWriter(final File path) {
+        this(path, false);
+    }
+
+    public GeliFileWriter(final File path, boolean presorted) {
+        outputBinaryCodec = new BinaryCodec(new DataOutputStream(new BlockCompressedOutputStream(path)));
+        outputBinaryCodec.setOutputFileName(path.toString());
+        this.presorted = presorted;
+    }
+    
+    /**
+     * Must be called before addAlignment.
+     * @param header
+     */
+    public void setHeader(final SAMFileHeader header)
+    {
+        this.header = header;
+        header.setSortOrder(sortOrder);
+        final StringWriter headerTextBuffer = new StringWriter();
+        new SAMTextHeaderCodec().encode(headerTextBuffer, header);
+        final String headerText = headerTextBuffer.toString();
+
+        writeHeader(headerText);
+
+        if (presorted) {
+            presortedComparator = makeComparator();
+        } else if (!sortOrder.equals(SAMFileHeader.SortOrder.unsorted)) {
+            likelihoodsSorter = SortingCollection.newInstance(GenotypeLikelihoods.class,
+                    new GenotypeLikelihoodsCodec(), makeComparator(), MAX_RECORDS_IN_RAM);
+        }
+    }
+
+    protected SAMFileHeader getHeader() {
+        return header;
+    }
+
+    private GenotypeLikelihoodsComparator makeComparator() {
+        return new GenotypeLikelihoodsComparator();
+    }
+
+    public void addGenotypeLikelihoods(GenotypeLikelihoods genotypeLikelihoods)
+    {
+        if (presorted) {
+            assertPresorted(genotypeLikelihoods);
+            writeGenotypeLikelihoods(genotypeLikelihoods);
+        } else {
+            likelihoodsSorter.add(genotypeLikelihoods);
+        }
+    }
+
+    private void assertPresorted(final GenotypeLikelihoods genotypeLikelihoods) {
+        if (prevLikelihoods != null) {
+            if (presortedComparator.compare(prevLikelihoods, genotypeLikelihoods) > 0) {
+                throw new IllegalArgumentException("GenotypeLikelihoods added out of order in GELIFileWriterImpl.addGenotypeLikelihoods for " +
+                getFilename() + ". Sort order is " + this.sortOrder + ". Offending records are at ["
+                        + prevLikelihoods.getReferenceIndex() + ":" + prevLikelihoods.getPosition() + "] and ["
+                        + genotypeLikelihoods.getReferenceIndex() + ":" + genotypeLikelihoods.getPosition() + "]");
+            }
+        }
+        prevLikelihoods = genotypeLikelihoods;
+    }
+
+    public final void close()
+    {
+        if (likelihoodsSorter != null) {
+            for (final GenotypeLikelihoods genotypeLikelihoods : likelihoodsSorter) {
+                writeGenotypeLikelihoods(genotypeLikelihoods);
+            }
+            likelihoodsSorter.cleanup();
+        }
+        finish();
+    }
+
+    private void prepareToWriteAlignments() {
+        if (genotypeLikelihoodsCodec == null) {
+            genotypeLikelihoodsCodec = new GenotypeLikelihoodsCodec();
+            genotypeLikelihoodsCodec.setOutputStream(outputBinaryCodec.getOutputStream());
+        }
+    }
+
+    /**
+     * Writes the record to disk.  Sort order has been taken care of by the time
+     * this method is called.
+     * @param alignment
+     */
+    protected void writeGenotypeLikelihoods(GenotypeLikelihoods genotypeLikelihoods) {
+        prepareToWriteAlignments();
+        genotypeLikelihoodsCodec.encode(genotypeLikelihoods);
+    }
+
+    /**
+     * Write the header to disk.  Header object is available via getHeader().
+     * @param textHeader for convenience if the implementation needs it.
+     */
+    protected void writeHeader(final String textHeader) {
+        outputBinaryCodec.writeBytes(GeliFileConstants.GELI_MAGIC);
+    
+        // calculate and write the length of the SAM file header text and the header text
+        outputBinaryCodec.writeInt(textHeader.length());
+        outputBinaryCodec.writeBytes(textHeader.getBytes());
+    
+        // write the sequences binarily.  This is redundant with the text header
+        outputBinaryCodec.writeInt(getHeader().getSequences().size());
+        for (final SAMSequenceRecord sequenceRecord: getHeader().getSequences()) {
+            outputBinaryCodec.writeInt(sequenceRecord.getSequenceName().length() + 1);
+            outputBinaryCodec.writeBytes(sequenceRecord.getSequenceName().getBytes());
+            outputBinaryCodec.writeByte(0);
+            outputBinaryCodec.writeInt(sequenceRecord.getSequenceLength());
+        }
+    }
+
+    /**
+     * Do any required flushing here.
+     */
+    protected void finish() {
+        outputBinaryCodec.close();
+    }
+
+    /**
+     * For producing error messages.
+     * @return Output filename, or null if there isn't one.
+     */
+    protected String getFilename() {
+        return outputBinaryCodec.getOutputFileName();
+    }
+}
diff --git a/lib/edu/mit/broad/picard/genotype/GenotypeLikelihoods.java b/lib/edu/mit/broad/picard/genotype/GenotypeLikelihoods.java
new file mode 100644
index 0000000000..d19a637c44
--- /dev/null
+++ b/lib/edu/mit/broad/picard/genotype/GenotypeLikelihoods.java
@@ -0,0 +1,164 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+
+package edu.mit.broad.picard.genotype;
+
+import java.util.Arrays;
+import java.util.Comparator;
+
+/**
+ * Data object for Genotype Likelihoods. One object represents one row in a GELI file.
+ *
+ * @author Doug Voet
+ */
+public class GenotypeLikelihoods {
+    /** this is a guess at how much memory an instance of this object occupies */
+    public static final int OBJECT_SIZE_BYTES = 150;
+    
+    public static final int AA_GENOTYPE = 0;
+    public static final int AC_GENOTYPE = 1;
+    public static final int AG_GENOTYPE = 2;
+    public static final int AT_GENOTYPE = 3;
+    public static final int CC_GENOTYPE = 4;
+    public static final int CG_GENOTYPE = 5;
+    public static final int CT_GENOTYPE = 6;
+    public static final int GG_GENOTYPE = 7;
+    public static final int GT_GENOTYPE = 8;
+    public static final int TT_GENOTYPE = 9;
+    
+    private static final char[][] GENOTYPES = {
+        "AA".toCharArray(),
+        "AC".toCharArray(),
+        "AG".toCharArray(),
+        "AT".toCharArray(),
+        "CC".toCharArray(),
+        "CG".toCharArray(),
+        "CT".toCharArray(),
+        "GG".toCharArray(),
+        "GT".toCharArray(),
+        "TT".toCharArray()
+    };
+    
+    /** compares first by reference index then by position */
+    public static class GenotypeLikelihoodsComparator implements Comparator<GenotypeLikelihoods> {
+        @Override
+        public int compare(GenotypeLikelihoods thing1, GenotypeLikelihoods thing2) {
+            long refCompare = thing1.referenceIndex - thing2.referenceIndex;
+            if (refCompare == 0) {
+                long posCompare = thing1.position - thing2.position;
+                return (int) posCompare;
+            } else {
+                return (int) refCompare;
+            }
+        }
+    }
+
+
+    private long referenceIndex;
+    private long position;
+    private byte referenceBase;
+    private int numReads;
+    private short maxMappingQuality;
+    private float[] likelihoods = new float[10];
+    private byte bestLikelihoodIndex = -1; // stored as byte to reduce memory footprint
+    private byte secondBestLikelihoodIndex = -1; // stored as byte to reduce memory footprint
+    
+    public static int getLikelihoodIndex(char[] genotype) {
+        char first = Character.isLowerCase(genotype[0]) ? Character.toUpperCase(genotype[0]) : genotype[0];
+        char second = Character.isLowerCase(genotype[1]) ? Character.toUpperCase(genotype[1]) : genotype[1];
+        if (first > second) {
+            char temp = first;
+            first = second;
+            second = temp;
+        }
+        for (int i=0; i<GENOTYPES.length; i++) {
+            if (first == GENOTYPES[i][0] && second == GENOTYPES[i][1]) {
+                return i;
+            }
+        }
+        throw new IllegalArgumentException("Unknown genotype string [" + new String(genotype) + 
+                "], any pair of ACTG case insensitive is acceptable");
+    }
+    
+    public float getLikelihood(int genotype) {
+        return likelihoods[genotype];
+    }
+    
+    public void setLikelihood(int genotype, float value) {
+        likelihoods[genotype] = value;
+    }
+    
+    public String toString() {
+        StringBuilder builder = new StringBuilder();
+        builder.append("referenc ").append(referenceIndex).append(":").append(position);
+        builder.append(", ref base ").append((char) referenceBase);
+        builder.append(", #reads ").append(numReads);
+        builder.append(", quality ").append(maxMappingQuality);
+        builder.append(" [");
+        for (int i=0; i<likelihoods.length; i++) {
+            builder.append(GENOTYPES[i]).append(":").append(likelihoods[i]).append(" ");
+        }
+        builder.append("]");
+        return builder.toString();
+    }
+    
+    @Override
+    public int hashCode() {
+        final int prime = 31;
+        int result = 1;
+        result = prime * result + Arrays.hashCode(likelihoods);
+        result = prime * result + maxMappingQuality;
+        result = prime * result + numReads;
+        result = prime * result + (int) (position ^ (position >>> 32));
+        result = prime * result + referenceBase;
+        result = prime * result + (int) (referenceIndex ^ (referenceIndex >>> 32));
+        return result;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+        if (this == obj)
+            return true;
+        if (obj == null)
+            return false;
+        if (getClass() != obj.getClass())
+            return false;
+        GenotypeLikelihoods other = (GenotypeLikelihoods) obj;
+        if (!Arrays.equals(likelihoods, other.likelihoods))
+            return false;
+        if (maxMappingQuality != other.maxMappingQuality)
+            return false;
+        if (numReads != other.numReads)
+            return false;
+        if (position != other.position)
+            return false;
+        if (referenceBase != other.referenceBase)
+            return false;
+        if (referenceIndex != other.referenceIndex)
+            return false;
+        return true;
+    }
+
+    public long getReferenceIndex() { return referenceIndex; }
+    public void setReferenceIndex(long sequenceIndex) { this.referenceIndex = sequenceIndex; }
+    public long getPosition() { return position; }
+    public void setPosition(long position) { this.position = position; }
+    public byte getReferenceBase() { return referenceBase; }
+    public void setReferenceBase(byte referenceBase) { this.referenceBase = referenceBase; }
+    public int getNumReads() { return numReads; }
+    public void setNumReads(int numReads) { this.numReads = numReads; }
+    public short getMaxMappingQuality() { return maxMappingQuality; }
+    public void setMaxMappingQuality(short maxMappingQuality) { this.maxMappingQuality = maxMappingQuality; }
+    float[] getLikelihoods() { return likelihoods; }
+    public int getBestLikelihoodIndex() { return bestLikelihoodIndex; }
+    public void setBestLikelihoodIndex(int bestLikelihoodIndex) { this.bestLikelihoodIndex = (byte) bestLikelihoodIndex; }
+    public int getSecondBestLikelihoodIndex() { return secondBestLikelihoodIndex; }
+    public void setSecondBestLikelihoodIndex(int secondBestLikelihoodIndex) { this.secondBestLikelihoodIndex = (byte) secondBestLikelihoodIndex; }
+}
diff --git a/lib/edu/mit/broad/picard/genotype/GenotypeLikelihoodsCodec.java b/lib/edu/mit/broad/picard/genotype/GenotypeLikelihoodsCodec.java
new file mode 100644
index 0000000000..aa06799415
--- /dev/null
+++ b/lib/edu/mit/broad/picard/genotype/GenotypeLikelihoodsCodec.java
@@ -0,0 +1,126 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.genotype;
+
+import java.io.InputStream;
+import java.io.OutputStream;
+
+import edu.mit.broad.sam.util.BinaryCodec;
+import edu.mit.broad.sam.util.RuntimeEOFException;
+import edu.mit.broad.sam.util.SortingCollection;
+
+public class GenotypeLikelihoodsCodec implements SortingCollection.Codec<GenotypeLikelihoods> {
+    private static final int SIG_FIG_MULTIPLIER = 100;
+    private static final short BLOCK_SIZE = 12 + 10 * 4;
+
+    private OutputStream os;
+    private InputStream is;
+    private BinaryCodec binaryCodec;
+
+    /** Returns a new genotype likelihood codec. */
+    public SortingCollection.Codec<GenotypeLikelihoods> clone() {
+        return new GenotypeLikelihoodsCodec();
+    }
+
+    /**
+     * Write object to OutputStream.
+     *
+     * @param genotypeLikelihoods what to write
+     */
+    public void encode(final GenotypeLikelihoods genotypeLikelihoods) {
+        this.binaryCodec.writeShort(BLOCK_SIZE);
+        this.binaryCodec.writeUInt(genotypeLikelihoods.getReferenceIndex());
+        this.binaryCodec.writeUInt(genotypeLikelihoods.getPosition());
+        this.binaryCodec.writeByte(genotypeLikelihoods.getReferenceBase());
+        this.binaryCodec.writeUShort(genotypeLikelihoods.getNumReads());
+        this.binaryCodec.writeByte(genotypeLikelihoods.getMaxMappingQuality());
+        
+        for (int i = 0; i < genotypeLikelihoods.getLikelihoods().length; i++) {
+            writeLikelihood(genotypeLikelihoods.getLikelihoods()[i]);
+        }
+    }
+
+    /**
+     * Read the next record from the input stream and convert into a java object.
+     *
+     * @return null if no more records.  Should throw exception if EOF is encountered in the middle of
+     *         a record.
+     */
+    public GenotypeLikelihoods decode() {
+        int recordLength = 0;
+        try {
+            recordLength = this.binaryCodec.readShort();
+        } catch (RuntimeEOFException e) {
+            return null;
+        }
+        if (recordLength != BLOCK_SIZE) {
+            throw new GeliException("Invalid record length: " + recordLength);
+        }
+        
+        final GenotypeLikelihoods genotypeLikelihoods = new GenotypeLikelihoods();
+        genotypeLikelihoods.setReferenceIndex(this.binaryCodec.readUInt());
+        genotypeLikelihoods.setPosition(this.binaryCodec.readUInt());
+        genotypeLikelihoods.setReferenceBase(this.binaryCodec.readByte());
+        genotypeLikelihoods.setNumReads(this.binaryCodec.readUShort());
+        genotypeLikelihoods.setMaxMappingQuality(this.binaryCodec.readByte());
+        
+        int bestIndex = -1;
+        int secondBestIndex = -1;
+        for (int i = 0; i < genotypeLikelihoods.getLikelihoods().length; i++) {
+            float likelihood = readLikelihood();
+            genotypeLikelihoods.getLikelihoods()[i] = likelihood;
+            
+            if (bestIndex == -1 || genotypeLikelihoods.getLikelihood(bestIndex) < likelihood) {
+                secondBestIndex = bestIndex;
+                bestIndex = i;
+            } else if (secondBestIndex == -1 || genotypeLikelihoods.getLikelihood(secondBestIndex) < likelihood) {
+                secondBestIndex = i;
+            }
+        }
+        genotypeLikelihoods.setBestLikelihoodIndex(bestIndex);
+        genotypeLikelihoods.setSecondBestLikelihoodIndex(secondBestIndex);
+        
+        return genotypeLikelihoods;
+    }
+
+    /**
+     * Where to write encoded output
+     *
+     * @param os
+     */
+    public void setOutputStream(final OutputStream os) {
+        this.os = os;
+        this.binaryCodec = new BinaryCodec(os);
+    }
+
+    /**
+     * Where to read encoded input from
+     *
+     * @param is
+     */
+    public void setInputStream(final InputStream is) {
+        this.is = is;
+        this.binaryCodec = new BinaryCodec(is);
+    }
+    
+    private void writeLikelihood(float likelihood) {
+        float shiftedLikelihood = likelihood * SIG_FIG_MULTIPLIER;
+        this.binaryCodec.writeInt((int) Math.round(shiftedLikelihood));
+    }
+
+    /**
+     * @return
+     */
+    private float readLikelihood() {
+        float likelihood = (float) this.binaryCodec.readInt() / SIG_FIG_MULTIPLIER;
+        return likelihood;
+    }
+
+}
diff --git a/lib/edu/mit/broad/picard/genotype/caller/AbstractAlleleCaller.java b/lib/edu/mit/broad/picard/genotype/caller/AbstractAlleleCaller.java
new file mode 100644
index 0000000000..3893e7bd1f
--- /dev/null
+++ b/lib/edu/mit/broad/picard/genotype/caller/AbstractAlleleCaller.java
@@ -0,0 +1,192 @@
+package edu.mit.broad.picard.genotype.caller;
+
+import edu.mit.broad.picard.sam.SamLocusIterator;
+import edu.mit.broad.sam.SAMFileHeader;
+import edu.mit.broad.picard.reference.ReferenceSequenceFile;
+import edu.mit.broad.picard.reference.ReferenceSequenceFileFactory;
+import edu.mit.broad.picard.reference.ReferenceSequence;
+import edu.mit.broad.picard.PicardException;
+
+import java.io.IOException;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.util.SortedSet;
+import java.util.List;
+
+/**
+ * Base class for AlleleCallers.  Handles efficient access to the reference, output of data to a
+ * standard file format, and application of priors
+ */
+public abstract class AbstractAlleleCaller {
+    // writer for output
+    private final BufferedWriter writer;
+
+    // for providing access to reference data
+    private final ReferenceSequenceFile referenceSequenceFile;
+    private final SAMFileHeader samHeader;
+    private ReferenceSequence referenceSequence;
+
+    public AbstractAlleleCaller(final File reference, final SAMFileHeader samHeader, final BufferedWriter writer) {
+        this.writer = writer;
+        this.referenceSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(reference);
+        this.samHeader = samHeader;
+    }
+
+
+    /**
+     * emit allele calls to the writer specified in the constructor
+     * 
+     * @param li Locus to call
+     */
+    public void callAlleles(final SamLocusIterator.LocusInfo li) throws IOException {
+
+
+        cacheReferenceSequence(li.getSequenceIndex());
+
+        final char ref = Character.toUpperCase((char)(referenceSequence.getBases()[li.getPosition() - 1] & 0xff));
+
+
+        // delegate to the specific implementation
+        final SortedSet<GenotypeTheory> likelihoods = call(ref, li.getBasesAsString(), li.getQualities());
+
+
+        final GenotypeTheory bestTheory = likelihoods.first();
+        GenotypeTheory nextBestTheory = null;
+        GenotypeTheory refTheory = null;
+        final String refString = new String(new char[]{ref,ref});
+        final DiploidGenotype refGenotype = DiploidGenotype.valueOf(refString);
+
+
+        final StringBuilder theoryString = new StringBuilder();
+        int k=0;
+        for(final GenotypeTheory t : likelihoods) {
+            if (k == 1) { nextBestTheory = t; }
+            if (t.getGenotype() == refGenotype) { refTheory = t; }
+
+            theoryString.append(t.getGenotype())
+                        .append(":")
+                        .append(String.format("%.2f",t.getLikelihood()))
+                        .append(" ");
+            k++;
+        }
+
+        final double btnb = bestTheory.getLikelihood() - nextBestTheory.getLikelihood();
+        final double btr = bestTheory.getLikelihood() - refTheory.getLikelihood();
+
+        final DiploidGenotype gt = likelihoods.first().getGenotype();
+
+        final String type;
+        if (!gt.isHet() && gt.getAllele1() == ref) {
+            type = "homozygous";
+        } else if (!gt.isHet() && gt.getAllele1() != ref) {
+            type = "homozygous-SNP";
+        } else {
+            type = "heterozygous-SNP";
+        }
+
+        final String bases = li.getBasesAsString();
+        int a = 0,c = 0,g = 0,t = 0;
+        for(int i=0; i<bases.length(); i++) {
+            if (bases.charAt(i) == 'A') { a++; }
+            else if (bases.charAt(i) == 'C') { c++; }
+            else if (bases.charAt(i) == 'G') { g++; }
+            else if (bases.charAt(i) == 'T') { t++; }
+            else { throw new RuntimeException("Unknown Base " + bases.charAt(i)); }
+        }
+
+        writer.write(
+                li.getSequenceIndex() + ":" +
+                (li.getPosition()-1) + " " +   // arachne output is 0-based
+                ref + " " +
+                gt + " " +
+                String.format("%f %f", btnb,btr) + " " +
+                type + " " +
+                "A:" + a + " " +
+                "C:" + c + " " +
+                "G:" + g + " " +
+                "T:" + t + " " +
+                bases.length() + " " +
+                "0 1 1 " + // used prior, is alignable, bait present
+                theoryString
+        );
+
+
+        writer.write("\n");
+    }
+
+    /**
+     * Ensure that the referenceSequence member points to the sequenceIndex-th sequence.  Note that
+     * this is not random access.  It is required that current sequenceIndex is >= the arg in the previous
+     * call to this method.
+     */
+    private void cacheReferenceSequence(int sequenceIndex) {
+        if (referenceSequence != null && referenceSequence.getContigIndex() == sequenceIndex) {
+            return;
+        }
+        referenceSequence = null;
+        for(referenceSequence = referenceSequenceFile.nextSequence();
+                referenceSequence != null;
+                referenceSequence = referenceSequenceFile.nextSequence()) {
+            // Sanity check the sequence names against the sequence dictionary while scanning through.
+            if (!referenceSequence.getName().equals(samHeader.getSequence(referenceSequence.getContigIndex()).getSequenceName())) {
+                throw new PicardException("Sequence name mismatch at sequence index " + referenceSequence.getContigIndex() +
+                ": " + referenceSequence.getName() + " != " +
+                        samHeader.getSequence(referenceSequence.getContigIndex()).getSequenceName());
+            }
+            if (referenceSequence.getContigIndex() == sequenceIndex) {
+                break;
+            }
+            if (referenceSequence.getContigIndex() > sequenceIndex) {
+                throw new PicardException("Never found reference sequence with index " + sequenceIndex);
+            }
+        }
+        if (referenceSequence == null) {
+            throw new PicardException("Reference sequence with index " + sequenceIndex + " was not found");
+        }
+    }
+
+    /**
+     * Override this to implement a concrete genotype caller
+     * @param ref the reference base
+     * @param bases each element in the String is the base at current locus for a given read
+     * @param quals same length as bases. the ith element corresponds to the ith element of bases.
+     * @return
+     */
+    abstract protected SortedSet<GenotypeTheory> call(char ref, String bases, List<Byte> quals);
+    
+
+    /**
+     * Apply a general population-based prior to the likelihood:
+     * <ul>
+     * <li>ref is .999</li>
+     * <li>het is 10^-3</li>
+     * <li>homozygous, non-reference is 10^-5</li>
+     *
+     * @param ref reference allele
+     * @return prior, given the reference and genotype alleles
+     */
+    protected double getPrior(final char ref, final DiploidGenotype gt) {
+        final double prior;
+        if (gt.isHom() && gt.getAllele1() == ref) {
+            prior = 0.999; // reference
+        } else {
+            if (gt.getAllele1() != ref && gt.getAllele2() != ref) {
+                prior = 0.00001; // neither base is reference
+            } else {
+                prior = 0.001; // het, one base is reference
+            }
+        }
+        return prior;
+    }
+
+    // --------------------------------------------------------------------------------------------
+    // Helper methods below this point...
+    // --------------------------------------------------------------------------------------------
+
+
+    public boolean isHet(final String alleles) {
+        return (alleles.charAt(0) != (alleles.charAt(1)));
+    }
+
+
+}
diff --git a/lib/edu/mit/broad/picard/genotype/caller/CallGenotypes.java b/lib/edu/mit/broad/picard/genotype/caller/CallGenotypes.java
new file mode 100644
index 0000000000..06b5a42007
--- /dev/null
+++ b/lib/edu/mit/broad/picard/genotype/caller/CallGenotypes.java
@@ -0,0 +1,93 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2008 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.genotype.caller;
+
+import edu.mit.broad.picard.cmdline.CommandLineProgram;
+import edu.mit.broad.picard.cmdline.Option;
+import edu.mit.broad.picard.cmdline.Usage;
+import edu.mit.broad.picard.directed.GenomeMaskFactory;
+import edu.mit.broad.sam.SAMFileHeader;
+import edu.mit.broad.sam.SAMFileReader;
+import edu.mit.broad.picard.sam.SamLocusIterator;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+
+/**
+ * Call genotypes given a SAM file of aligned reads, reference sequences, and optionally a target map.
+ */
+public class CallGenotypes extends CommandLineProgram {
+    // Usage and parameters
+    @Usage(programVersion="1.0") public String USAGE = "Basic Allele Caller\n";
+    @Option(shortName="I", doc="SAM or BAM file for calling") public File INPUT_FILE;
+    @Option(shortName="O", doc="Allele Call output GELI file") public File OUTPUT_FILE;
+    @Option(shortName="R", doc="Reference fasta or fasta.gz file") public File REF_FILE;
+    @Option(shortName="T", doc="IntervalList-format target map file", optional = true) public File TARGET_FILE;
+    @Option(shortName="Q", doc="Minimum quality score threshold to use in allele calling", optional = true) public Integer QUAL_SCORE_THRESHOLD;
+
+
+    /** Required main method implementation. */
+    public static void main(final String[] argv) {
+        System.exit(new CallGenotypes().instanceMain(argv));
+    }
+
+
+    protected int doWork() {
+        try {
+            final BufferedWriter writer = new BufferedWriter(new FileWriter(OUTPUT_FILE));
+
+            final SAMFileReader samReader = getSamReader(INPUT_FILE);
+
+            // TODO -- parameterize, or create separate executables...
+    //        AbstractAlleleCaller caller = new FlatQualityAlleleCaller(reference, writer);
+            final AbstractAlleleCaller caller = new QualityScoreAlleleCaller(REF_FILE, samReader.getFileHeader(), writer);
+            final long startTime = System.currentTimeMillis();
+
+            final SamLocusIterator sli = new SamLocusIterator(samReader.iterator());
+
+            if (TARGET_FILE != null) {
+                sli.setGenomeMask(new GenomeMaskFactory().makeGenomeMaskFromIntervalList(TARGET_FILE));
+            }
+
+            if (QUAL_SCORE_THRESHOLD != null) {
+                System.out.println("Masking out bases with < Q"+QUAL_SCORE_THRESHOLD);
+                sli.setQualityScoreCutoff(QUAL_SCORE_THRESHOLD);
+            }
+
+            for (final SamLocusIterator.LocusInfo li : sli) {
+                if (li != null) caller.callAlleles(li);
+            }
+
+            final long elapsed = System.currentTimeMillis() - startTime;
+            System.out.println("Completed in " + elapsed + "ms");
+
+            writer.flush();
+            writer.close();
+        } catch (IOException ioe) {
+            throw new RuntimeException(ioe);                                          
+        }
+        return 0;
+    }
+
+    private SAMFileReader getSamReader(final File samFile) {
+        final SAMFileReader samReader = new SAMFileReader(samFile);
+
+        // ensure the file is sorted
+        if (samReader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
+            System.out.println("SAM Files must be coordinate-sorted, this is " + samReader.getFileHeader().getSortOrder());
+            System.exit(1);
+        }
+
+        return samReader;
+    }
+
+}
\ No newline at end of file
diff --git a/lib/edu/mit/broad/picard/genotype/caller/DiploidGenotype.java b/lib/edu/mit/broad/picard/genotype/caller/DiploidGenotype.java
new file mode 100644
index 0000000000..1d9e01f583
--- /dev/null
+++ b/lib/edu/mit/broad/picard/genotype/caller/DiploidGenotype.java
@@ -0,0 +1,27 @@
+package edu.mit.broad.picard.genotype.caller;
+
+public enum DiploidGenotype {
+    AA('A','A'),
+    AC('A','C'),
+    AG('A','G'),
+    AT('A','T'),
+    CC('C','C'),
+    CG('C','G'),
+    CT('C','T'),
+    GG('G','G'),
+    GT('G','T'),
+    TT('T','T');
+
+    private final char allele1;
+    private final char allele2;
+
+    private DiploidGenotype(final char allele1, final char allele2) {
+        this.allele1 = allele1;
+        this.allele2 = allele2;
+    }
+
+    public char getAllele1() { return allele1; }
+    public char getAllele2() { return allele2; }
+    public boolean isHet() { return this.allele1 != this.allele2; }
+    public boolean isHom() { return this.allele1 == this.allele2; }
+}
diff --git a/lib/edu/mit/broad/picard/genotype/caller/FlatQualityAlleleCaller.java b/lib/edu/mit/broad/picard/genotype/caller/FlatQualityAlleleCaller.java
new file mode 100644
index 0000000000..c437a911ee
--- /dev/null
+++ b/lib/edu/mit/broad/picard/genotype/caller/FlatQualityAlleleCaller.java
@@ -0,0 +1,76 @@
+package edu.mit.broad.picard.genotype.caller;
+
+import edu.mit.broad.sam.SAMFileHeader;
+
+import java.io.IOException;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.util.*;
+import static java.lang.Math.*;
+
+
+/**
+ * Bayesian-based allele caller using flat qualities and a 1e-3 error rate, based on CRD algorithm
+ */
+public class FlatQualityAlleleCaller extends AbstractAlleleCaller {
+
+    public FlatQualityAlleleCaller(final File fastbReference, SAMFileHeader samHeader, final BufferedWriter writer) {
+        super(fastbReference, samHeader, writer);
+    }
+
+
+    protected SortedSet<GenotypeTheory> call(final char ref, final String bases, final List<Byte> quals) {
+        final float eps = 1e-3f;
+
+        // count up the base by nucleotide and put them into a map
+        final int depth = bases.length();
+        int a = 0,c = 0,g = 0,t = 0;
+        for(int i=0; i< bases.length(); i++) {
+            if (bases.charAt(i) == 'A') { a++; }
+            else if (bases.charAt(i) == 'C') { c++; }
+            else if (bases.charAt(i) == 'G') { g++; }
+            else if (bases.charAt(i) == 'T') { t++; }
+            else { throw new RuntimeException("Unknown Base " + bases.charAt(i)); }
+        }
+
+        final Map<Character, Integer> counts = new HashMap<Character, Integer>();
+        counts.put('A', a);
+        counts.put('C', c);
+        counts.put('G', g);
+        counts.put('T', t);
+
+
+        // for each of the 10 theories, calculate the likelihood
+        final SortedSet<GenotypeTheory> results = new TreeSet<GenotypeTheory>();
+        for(final DiploidGenotype theory : DiploidGenotype.values()) {
+            final double likelihood;
+            final char allele1 = theory.getAllele1();
+            final char allele2 = theory.getAllele2();
+
+            if (!theory.isHet()) {
+                likelihood = log10(1-eps)*counts.get(allele1) + log10(eps)*(depth - counts.get(allele1));
+            } else {
+                final int major_allele_counts;
+                final int minor_allele_counts;
+                if (counts.get(allele1) > counts.get(allele2)) {
+                    major_allele_counts = counts.get(allele1);
+                    minor_allele_counts = counts.get(allele2);
+                } else {
+                    major_allele_counts = counts.get(allele2);
+                    minor_allele_counts = counts.get(allele1);
+                }
+
+                likelihood = log10(0.5 - (eps/2.0) )*major_allele_counts +
+                    log10(0.5 - (eps/2.0) )*minor_allele_counts +
+                    log10(eps)*(depth - major_allele_counts - minor_allele_counts);
+            }
+
+            final double prior = getPrior(ref, theory);
+            results.add(new GenotypeTheory(theory, likelihood + log10(prior)));
+        }
+
+
+        return results;
+
+    }
+}
diff --git a/lib/edu/mit/broad/picard/genotype/caller/GenotypeTheory.java b/lib/edu/mit/broad/picard/genotype/caller/GenotypeTheory.java
new file mode 100644
index 0000000000..a97e83a972
--- /dev/null
+++ b/lib/edu/mit/broad/picard/genotype/caller/GenotypeTheory.java
@@ -0,0 +1,46 @@
+package edu.mit.broad.picard.genotype.caller;
+
+/**
+ * Datastructure to hold a single genotype along with a likelihood.
+ */
+public class GenotypeTheory implements Comparable<GenotypeTheory> {
+    private DiploidGenotype genotype;
+    private double likelihood;
+
+    public GenotypeTheory(final DiploidGenotype genotype, final double likelihood) {
+        this.genotype = genotype;
+        this.likelihood = likelihood;
+    }
+
+    public DiploidGenotype getGenotype() {
+        return genotype;
+    }
+
+    public void setGenotype(final DiploidGenotype genotype) {
+        this.genotype = genotype;
+    }
+
+    public double getLikelihood() {
+        return likelihood;
+    }
+
+    public void setLikelihood(final double likelihood) {
+        this.likelihood = likelihood;
+    }
+
+    /**
+     * Genotype Theories are sorted first by descending likelihood (ie
+     * the GenotypeTheory with biggest likelihood comes first).  Ties are
+     * broken by lexical sorting of the genotypes themselves
+     *
+     */
+    public int compareTo(final GenotypeTheory other) {
+        if (this.getLikelihood() == other.getLikelihood()) {
+            return this.getGenotype().compareTo(other.getGenotype());
+        } else if (this.getLikelihood() > other.getLikelihood()) {
+            return -1;
+        } else {
+            return 1;
+        }
+    }
+}
diff --git a/lib/edu/mit/broad/picard/genotype/caller/QualityScoreAlleleCaller.java b/lib/edu/mit/broad/picard/genotype/caller/QualityScoreAlleleCaller.java
new file mode 100644
index 0000000000..f9863546d2
--- /dev/null
+++ b/lib/edu/mit/broad/picard/genotype/caller/QualityScoreAlleleCaller.java
@@ -0,0 +1,82 @@
+package edu.mit.broad.picard.genotype.caller;
+
+import edu.mit.broad.sam.SAMFileHeader;
+
+import java.util.*;
+import static java.lang.Math.log10;
+import static java.lang.Math.pow;
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.io.File;
+
+/**
+ * Bayesian-based allele caller using quality scores, based on CRD algorithm
+ */
+public class QualityScoreAlleleCaller extends AbstractAlleleCaller {
+
+    public QualityScoreAlleleCaller(final File fastbReference, SAMFileHeader samHeader, final BufferedWriter writer) {
+        super(fastbReference, samHeader, writer);
+    }
+
+    protected SortedSet<GenotypeTheory> call(final char ref, final String bases, final List<Byte> quals) {
+
+        // for each of the 10 theories, calculate the likelihood using quality scores
+        final SortedSet<GenotypeTheory> results = new TreeSet<GenotypeTheory>();
+        for(final DiploidGenotype theory : DiploidGenotype.values()) {
+            double likelihood = 0;
+
+            for(int i=0; i<bases.length(); i++) {
+                final char base = bases.charAt(i);
+                final byte qual = quals.get(i);
+
+                if (theory.isHom()) {
+                    if (base == theory.getAllele1() || base == theory.getAllele2()) {
+                        likelihood += getOneMinusQual(qual);
+                    } else {
+                        // the real math would be
+                        //     likelihood += log10(pow(10,(qual/-10.0)));
+                        // but it simplifies to
+                        likelihood += qual/-10.0;
+                    }
+                } else {
+                    if (base == theory.getAllele1() || base == theory.getAllele2()) {
+                        likelihood += getOneHalfMinusQual(qual);
+                    } else {
+                        // the real math would be
+                        //     likelihood += log10(pow(10,(qual/-10.0)));
+                        // but it simplifies to
+                        likelihood += qual/-10.0;
+                    }
+                }
+            }
+
+            final double prior = getPrior(ref, theory);
+            results.add(new GenotypeTheory(theory, likelihood + log10(prior)));
+        }
+
+
+        return results;
+    }
+
+    private static final double[] oneMinusData = new double[Byte.MAX_VALUE];
+    {
+        for(int qual=0; qual < Byte.MAX_VALUE; qual++) {
+            oneMinusData[qual] = log10(1.0 - pow(10,(qual/-10.0)));
+        }
+    }
+    private double getOneMinusQual(final byte qual) {
+        return oneMinusData[qual];
+    }
+
+    private static final double[] oneHalfMinusData = new double[Byte.MAX_VALUE];
+    {
+        for(int qual=0; qual < Byte.MAX_VALUE; qual++) {
+            oneHalfMinusData[qual] = log10(0.5-pow(10,(qual/-10.0))/2.0);
+        }
+    }
+
+    private double getOneHalfMinusQual(final byte qual) {
+        return oneHalfMinusData[qual];
+    }
+
+}
diff --git a/lib/edu/mit/broad/picard/illumina/BustardFileParser.java b/lib/edu/mit/broad/picard/illumina/BustardFileParser.java
new file mode 100644
index 0000000000..3f715f0569
--- /dev/null
+++ b/lib/edu/mit/broad/picard/illumina/BustardFileParser.java
@@ -0,0 +1,257 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.illumina;
+
+import edu.mit.broad.picard.util.PasteParser;
+import edu.mit.broad.picard.util.FormatUtil;
+import edu.mit.broad.picard.util.BasicTextFileParser;
+import edu.mit.broad.picard.PicardException;
+
+import java.io.File;
+import java.io.FilenameFilter;
+import java.io.Closeable;
+import java.util.*;
+
+/**
+ * Class to parse the data in an Illumina Bustard directory and return an iterator over that data, in order
+ * by tile.
+ *
+ * @author Kathleen Tibbetts
+ */
+public class BustardFileParser implements Iterator<BustardReadData>, Iterable<BustardReadData>, Closeable {
+
+    private final File bustardDirectory;
+    private final int lane;
+    private final boolean pairedEnd;
+    private PasteParser parser;
+    private BustardReadData next = null;
+    private final FormatUtil formatter = new FormatUtil();
+    private boolean iterating = false;
+
+    /**
+     * Constructor
+     *
+     * @param bustardDirectory  directory where the Bustard files can be located
+     * @param lane              the lane to parse
+     * @param pairedEnd         whether this is a paired-end run
+     */
+    public BustardFileParser(File bustardDirectory, int lane, boolean pairedEnd) {
+        this.bustardDirectory = bustardDirectory;
+        this.lane = lane;
+        this.pairedEnd = pairedEnd;
+        initialize();
+    }
+
+    /**
+     * Finds the relevant files in the bustardDirectory, sorts them, and puts them into the
+     * <code>sortedFiles</code> iterator.  Does some basic sanity checking to ensure that some files
+     * are found and that they are the expected multiple for paired-end or not.
+     * 
+     */
+    private void initialize()
+    {
+        final String qseq1Regex = "s_" + lane + "_1_\\d{4}_qseq.txt(.gz)?";
+        final String qseq2Regex = "s_" + lane + "_2_\\d{4}_qseq.txt(.gz)?";
+        final String intensityRegex = "s_" + lane + "_\\d{4}_sig2.txt(.gz)?";
+
+        File read1files[] = bustardDirectory.listFiles( new FilenameFilter() {
+            public boolean accept(File dir, String name) {
+                return name.matches(qseq1Regex);
+            }
+        });
+
+        File read2files[] = bustardDirectory.listFiles( new FilenameFilter() {
+            public boolean accept(File dir, String name) {
+                return name.matches(qseq2Regex);
+            }
+        });
+
+        File intensityFiles[] = bustardDirectory.listFiles( new FilenameFilter() {
+            public boolean accept(File dir, String name) {
+                return name.matches(intensityRegex);
+            }
+        });
+
+        // Some basic sanity checking on file counts
+        if (read1files.length == 0 && read2files.length == 0 && intensityFiles.length == 0) {
+            throw new PicardException("No Bustard files found in " +
+                    bustardDirectory.getAbsolutePath() + " for lane " + lane);
+        }
+        if (pairedEnd) {
+            if (read1files.length != read2files.length || read2files.length != intensityFiles.length) {
+                throw new PicardException("Incorrect number of Bustard files found in " +
+                        bustardDirectory.getAbsolutePath() + " for lane " + lane + ".  Found " +
+                        read1files.length + " read 1 qseq files, " + read2files.length + " read 2 " +
+                        "qseq files, and " + intensityFiles.length + " sig2 files.  There should be " +
+                        "the same number of each type of file");
+            }
+        }
+        else {
+            if (read1files.length != intensityFiles.length) {
+                throw new PicardException("Incorrect number of Bustard files found in " +
+                        bustardDirectory.getAbsolutePath() + " for lane " + lane + ".  Found " +
+                        read1files.length + " qseq files and " + intensityFiles.length + " sig2 files, " +
+                        "which should be equal.");
+            }
+            if (read2files.length > 0) {
+                throw new PicardException("Read 2 Bustard files found in " +
+                        bustardDirectory.getAbsolutePath() + " for lane " + lane + ".  Lane " +
+                        " was specified as a non-PE run, and so should not have any read 2 data.");
+            }
+        }
+
+        // Sort each set of reads and create a text parser for it
+        SortedSet<File> sortedRead1 = new TreeSet<File>(new BustardFilenameComparator());
+        sortedRead1.addAll(Arrays.asList(read1files));
+        read1files = sortedRead1.toArray(read1files);
+        BasicTextFileParser read1Parser = new BasicTextFileParser(true, read1files);
+
+        SortedSet<File> sortedIntensity = new TreeSet<File>(new BustardFilenameComparator());
+        sortedIntensity.addAll(Arrays.asList(intensityFiles));
+        intensityFiles = sortedIntensity.toArray(intensityFiles);
+        BasicTextFileParser intensityParser = new BasicTextFileParser(true, intensityFiles);
+
+        // And create a paste parser for all of them
+        if (pairedEnd) {
+            SortedSet<File> sortedRead2 = new TreeSet<File>(new BustardFilenameComparator());
+            sortedRead2.addAll(Arrays.asList(read2files));
+            read2files = sortedRead2.toArray(read2files);
+            BasicTextFileParser read2Parser = new BasicTextFileParser(true, read2files);
+
+            parser = new PasteParser(read1Parser, read2Parser, intensityParser);
+        }
+        else {
+            parser = new PasteParser(read1Parser, intensityParser);
+        }
+    }
+
+    /**
+     * Parses the next line from the parser and constructs a BustardReadData object from it
+     * The first 11 fields are the read1 data, the second 11 are the read2 data, and the remaining
+     * values are the intensities data.  Note that the first four values in the intensity file
+     * are not intensities but rather lane, tiles, x, and y for the given cluster.
+     *
+     * @param validate  whether to check that the expected number of intensity values are returned
+     * @return  a fully populated BustardReadData object
+     */
+    private BustardReadData readNext(boolean validate) {
+        if (!parser.hasNext()) {
+                return null;
+        }
+        String data[][] = parser.next();
+        String machine = data[0][0];
+        int run = formatter.parseInt(data[0][1]);
+        int lane = formatter.parseInt(data[0][2]);
+        int tile = formatter.parseInt(data[0][3]);
+        int x = formatter.parseInt(data[0][4]);
+        int y = formatter.parseInt(data[0][5]);
+        String firstSeq = data[0][8];
+        String firstQual = data[0][9];
+        boolean pf = formatter.parseInt(data[0][10]) == 1;
+        String secondSeq = null;
+        String secondQual = null;
+
+        int intensityIndex = 1;
+        if (pairedEnd) {
+            secondSeq = data[1][8];
+            secondQual = data[1][9];
+            intensityIndex = 2;
+        }
+
+        int numIntensities = firstSeq.length() * (pairedEnd ? 2 : 1);
+
+        // Sanity check since some of those files look a little weird
+        if (validate) {
+            int remaining = data[intensityIndex].length - 4;
+            if ((remaining % 4 != 0) || (remaining/4) != numIntensities) {
+                throw new PicardException("Unexpected number of intensity fields for " + machine + "/" + run +
+                        "/" + lane + "/" + tile + ": " + remaining);
+            }
+        }
+
+        double intensities[][] = new double[numIntensities][4];
+        int intensityArrayIndex = 4;
+        for (int i = 0; i < numIntensities; i++) {
+            for (int j = 0; j < 4; j++) {
+                intensities[i][j] = formatter.parseDouble(data[intensityIndex][intensityArrayIndex++]);
+            }
+        }
+
+        return new BustardReadData(
+                machine, run, lane, tile, firstSeq, firstQual, secondSeq, secondQual, pf, intensities, x, y);
+        
+    }
+
+    /**
+     * Returns an iterator over a set of elements of type BustardReadData.
+     *
+     * @return an iterator over a set of elements of type BustardReadData
+     */
+    public Iterator<BustardReadData> iterator() {
+        if (iterating) {
+            throw new IllegalStateException("iterator() method can only be called once, before the" +
+                    "first call to hasNext()");
+        }
+        next = readNext(true);
+        iterating = true;
+        return this;
+    }
+
+    /**
+     * Returns true if the iteration has more elements.
+     *
+     * @return  true if the iteration has more elements.  Otherwise returns false.
+     */
+    public boolean hasNext() {
+        if (!iterating) {
+            next = readNext(true);
+            iterating = true;
+        }
+        return next != null;
+    }
+
+    /**
+     * Returns the next element in the iteration.
+     *
+     * @return  the next element in the iteration
+     * @throws java.util.NoSuchElementException
+     */
+    public BustardReadData next() {
+
+        if (!hasNext()) {
+            throw new NoSuchElementException("Iteration has no more elements.");
+        }
+
+        BustardReadData result = next;
+        next = readNext(false);
+        return result;
+    }
+
+    /**
+     * Required method for Iterator API.
+     *
+     * @throws UnsupportedOperationException
+     */
+    public void remove() {
+        throw new UnsupportedOperationException("Remove() not supported.");
+    }
+
+    /**
+     * Closes the underlying PasteParser
+     */
+    public void close() {
+        if (parser != null) {
+            parser.close();
+        }
+    }
+
+    public int getLane() { return this.lane; }
+    public boolean isPairedEnd() { return this.pairedEnd; }
+}
diff --git a/lib/edu/mit/broad/picard/illumina/BustardFilenameComparator.java b/lib/edu/mit/broad/picard/illumina/BustardFilenameComparator.java
new file mode 100644
index 0000000000..ad92377f11
--- /dev/null
+++ b/lib/edu/mit/broad/picard/illumina/BustardFilenameComparator.java
@@ -0,0 +1,78 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.illumina;
+
+import java.io.File;
+import java.util.Comparator;
+
+/**
+ * Comparator for getting Bustard files in "sorted" order for use by the BustardFileParser.  Expected order is
+ * by lane in ascending order, then by tile in ascending order, then:
+ *      the read 1 qseq file
+ *      the read 2 qseq file
+ *      the sig2 file
+ *
+ * IMPORTANT: Currently this class expects to receive ONLY qseq and sig2 files.
+ *
+ * @author Kathleen Tibbetts
+ */
+public class BustardFilenameComparator implements Comparator<File> {
+
+    /**
+     * Compares its two arguments for order. Returns a negative integer, zero, or a positive integer as
+     * the first argument is less than, equal to, or greater than the second.
+     *
+     * @param file1
+     * @param file2
+     * @return  a negative integer, zero, or a positive integer as
+     *          the first argument is less than, equal to, or greater than the second.
+     */
+    public int compare(File file1, File file2)
+    {
+        Integer parts1[] =  parseFileNameParts(file1.getName());
+        Integer parts2[] =  parseFileNameParts(file2.getName());
+
+        for (int i = 1; i < parts1.length; i++)
+        {
+            if (!parts1[i].equals(parts2[i])) {
+                return parts1[i].compareTo(parts2[i]);
+            }
+        }
+        return 0;
+    }
+
+    /**
+     * Utility method that returns an array of integers that represent, in order,
+     * the lane, tile, type (0 for qseq files, 1 for sig2 files), and read (if any)
+     * represented by the given file name
+     *
+     * @param name
+     * @return  an array of integers that represent, in order,
+     *          the lane, tile, type (0 for qseq files, 1 for sig2 files), and read (if any)
+     *          represented by the given file name
+     */
+    private Integer[] parseFileNameParts(String name)
+    {
+        Integer parts[] = new Integer[4];   // Lane, tile, type, read
+        String src[] = name.split("_");
+        parts[0] = new Integer(src[1]);     // Lane is always the second part
+        if (src[2].length() == 4) {         // Tile is 3rd or fourth
+            parts[1] = new Integer(src[2]);
+        }
+        else {
+            parts[1] = new Integer(src[3]);
+        }
+        parts[2] = (src[src.length-1].equals("qseq.txt")) ? 0 : 1;  // qseq tests are lower
+        if (src[2].length() == 1) {  // read is last
+            parts[3] = new Integer(src[2]);
+        }
+        return parts;
+    }
+}
diff --git a/lib/edu/mit/broad/picard/illumina/BustardReadData.java b/lib/edu/mit/broad/picard/illumina/BustardReadData.java
new file mode 100644
index 0000000000..6076f36e04
--- /dev/null
+++ b/lib/edu/mit/broad/picard/illumina/BustardReadData.java
@@ -0,0 +1,128 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.illumina;
+
+/**
+ * Holds all the Bustard-level data we need (so far) about an individual read.
+ *
+ * @author Kathleen Tibbetts
+ */
+public class BustardReadData {
+
+    private static final String PADDING ="00000";
+
+    final private String machineName;
+    final private int runNumber;
+    final private int laneNumber;
+    final private int tileNumber;
+    final private String firstReadSequence;
+    final private String firstReadQualities;
+    final private String secondReadSequence;
+    final private String secondReadQualities;
+    final private boolean pf;
+    final private double intensities[][];
+    final private int xCoordinate;
+    final private int yCoordinate;
+    private final SolexaQualityConverter converter = new SolexaQualityConverter();
+
+
+    /**
+     * Constructor that takes everything to populate this object
+     * 
+     * @param machineName
+     * @param runNumber
+     * @param laneNumber
+     * @param tileNumber
+     * @param firstReadSequence
+     * @param firstReadQualities
+     * @param secondReadSequence
+     * @param secondReadQualities
+     * @param pf
+     * @param intensities
+     * @param xCoordinate
+     * @param yCoordinate
+     */
+    public BustardReadData(String machineName, int runNumber, int laneNumber, int tileNumber,
+                           String firstReadSequence, String firstReadQualities,
+                           String secondReadSequence, String secondReadQualities, 
+                           boolean pf, double[][] intensities, int xCoordinate, int yCoordinate ) {
+
+        this.machineName = machineName;
+        this.runNumber = runNumber;
+        this.laneNumber = laneNumber;
+        this.tileNumber = tileNumber;
+        this.firstReadSequence = firstReadSequence;
+        this.firstReadQualities = firstReadQualities;
+        this.secondReadSequence = secondReadSequence;
+        this.secondReadQualities = secondReadQualities;
+        this.pf = pf;
+        this.intensities = intensities;
+        this.xCoordinate = xCoordinate;
+        this.yCoordinate = yCoordinate;
+    }
+
+    // TODO: Finalize read name -- ask Tim
+    /**
+     * Composes a name for this read from its values
+     *
+     * @return the read name
+     */
+    public String getReadName() {
+        return this.machineName + ":" + this.laneNumber + ":" + this.tileNumber +
+                ":" + this.xCoordinate + ":" + this.yCoordinate;
+    }
+
+    /**
+     * Gets Phred-style qualitites for the first read
+     *
+     * @return  the String of qualities
+     */
+    public String getFirstReadPhredQualities() {
+        return decodeSolexaQualitiesToPhred(getFirstReadQualities());
+    }
+
+    /**
+     * Gets Phred-style qualitites for the second read
+     *
+     * @return  the String of qualities
+     */
+    public String getSecondReadPhredQualities() {
+        return decodeSolexaQualitiesToPhred(getSecondReadQualities());
+    }
+
+    /**
+     * Converts a string of Solexa qualities to a Phred-style quality String
+     *
+     * @param qualities the Solexa qualities to decode
+     * @return  the String of Phred qualities
+     */
+    private String decodeSolexaQualitiesToPhred(String qualities) {
+        StringBuilder sb = new StringBuilder();
+        for (char c : qualities.toCharArray()) {
+            // Quality char is phred score + 33
+            sb.append((char)(converter.solexaToPhred((byte)c)+33));
+        }
+        return sb.toString();
+    }
+
+    public String getMachineName() { return machineName; }
+    public int getRunNumber() { return runNumber; }
+    public int getLaneNumber() { return laneNumber; }
+    public int getTileNumber() { return tileNumber; }
+    public String getFirstReadSequence() { return firstReadSequence; }
+    public String getFirstReadQualities() { return firstReadQualities; }
+    public String getSecondReadSequence() { return secondReadSequence; }
+    public String getSecondReadQualities() { return secondReadQualities; }
+    public double[][] getIntensities() { return intensities; }
+    public boolean isPf() { return pf; }
+    public int getXCoordinate() { return xCoordinate; }
+    public int getYCoordinate() { return yCoordinate; }
+
+}
diff --git a/lib/edu/mit/broad/picard/illumina/BustardToSam.java b/lib/edu/mit/broad/picard/illumina/BustardToSam.java
new file mode 100644
index 0000000000..eb88e34651
--- /dev/null
+++ b/lib/edu/mit/broad/picard/illumina/BustardToSam.java
@@ -0,0 +1,58 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2008 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.illumina;
+
+import java.io.File;
+
+import edu.mit.broad.picard.cmdline.CommandLineProgram;
+import edu.mit.broad.picard.cmdline.Option;
+import edu.mit.broad.picard.cmdline.Usage;
+
+/**
+ * CommandLineProgram to generate to invoke BustardToBamWriter
+ *
+ * @author Kathleen Tibbetts
+ */
+public class BustardToSam extends CommandLineProgram {
+    // The following attributes define the command-line arguments
+    @Usage(programVersion="1.0")
+    public String USAGE =
+            "Usage: " + getClass().getName() + " [options]\n\n" +
+                    "Generate a BAM binary file from data in an illumina Bustard directory.\n";
+
+    @Option(shortName = "B", doc = "Bustard directory to parse. ")
+    public File BUSTARD_DIRECTORY;
+
+    @Option(shortName = "F", doc = "The flowcell. ")
+    public String FLOWCELL;
+
+    @Option(shortName = "L", doc = "The lane for which to parse data. ")
+    public Integer LANE;
+
+    @Option(shortName = "P", doc = "Whether the lane was a paired-end run. ")
+    public Boolean PE;
+
+    @Option(shortName = "O", doc = "The directory for the binary output file. ")
+    public File OUTPUT;
+
+    @Override
+	protected int doWork() {
+        BustardToSamWriter writer = new BustardToSamWriter(
+                new BustardFileParser(BUSTARD_DIRECTORY, LANE, PE), OUTPUT, FLOWCELL);
+        writer.writeBamFile();
+        return 0;
+    }
+
+    public static void main(String[] argv) {
+        System.exit(new BustardToSam().instanceMain(argv));
+    }
+
+
+}
diff --git a/lib/edu/mit/broad/picard/illumina/BustardToSamWriter.java b/lib/edu/mit/broad/picard/illumina/BustardToSamWriter.java
new file mode 100644
index 0000000000..05c92c2461
--- /dev/null
+++ b/lib/edu/mit/broad/picard/illumina/BustardToSamWriter.java
@@ -0,0 +1,138 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.illumina;
+
+import edu.mit.broad.sam.*;
+import edu.mit.broad.picard.io.IoUtil;
+import edu.mit.broad.picard.util.Log;
+import edu.mit.broad.picard.filter.AggregateFilter;
+import edu.mit.broad.picard.filter.SamRecordFilter;
+import edu.mit.broad.picard.filter.SolexaNoiseFilter;
+import edu.mit.broad.picard.sam.ReservedTagConstants;
+
+import java.io.File;
+import java.util.*;
+
+/**
+ * Writes the data from a BustardFileParser to a BAM file
+ */
+public class BustardToSamWriter {
+
+    private final BustardFileParser parser;
+    private SAMFileWriter writer;
+    private final File outputFile;
+    private AggregateFilter filters;
+    private int recordsWritten = 0;
+    private Log log = Log.getInstance(BustardToSamWriter.class);
+
+    /**
+     * Constructor
+     *
+     * @param parser            The parser for the Bustard data
+     * @param outputDirectory   The directory in which to write the BAM file
+     * @param flowcell          The flowcell from which the data is drawn
+     */
+    public BustardToSamWriter(BustardFileParser parser, File outputDirectory, String flowcell) {
+        this.parser = parser;
+        this.outputFile = getOutputFile(outputDirectory, flowcell);
+        initializeFilters();
+    }
+
+    /**
+     * Alternate constructor for testing
+     *
+     * @param parser            The parser for the Bustard data
+     * @param outputFile   The directory in which to write the BAM file
+     */
+    BustardToSamWriter(BustardFileParser parser, File outputFile) {
+        this.parser = parser;
+        this.outputFile = outputFile;
+        initializeFilters();
+    }
+
+    private void initializeFilters() {
+        filters = new AggregateFilter(Arrays.asList(
+            (SamRecordFilter)new SolexaNoiseFilter()
+        ));
+    }
+
+
+    /**
+     * Writes all data from the BustardFileParser to a BAM file
+     */
+    public void writeBamFile() {
+        SAMFileHeader header = new SAMFileHeader();
+        header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
+        writer = new SAMFileWriterFactory().makeBAMWriter(header, false, outputFile);
+
+        while (parser.hasNext()) {
+            BustardReadData brd = parser.next();
+
+            SAMRecord sam = createSamRecord(brd, true);
+            writer.addAlignment(sam);
+            this.recordsWritten++;
+
+            if (parser.isPairedEnd()) {
+                SAMRecord sam2 = createSamRecord(brd, false);
+                writer.addAlignment(sam2);
+                this.recordsWritten++;
+            }
+
+        }
+        writer.close();
+
+        log.info("Wrote " + this.recordsWritten + " read records to BAM file " +
+                this.outputFile.getAbsolutePath());
+    }
+
+    /**
+     * Creates a SAMRecord from Bustard data
+     *
+     * @param brd           The BustardReadData to use in populating the SAMRecord
+     * @param isFirstRead   whether this is the first read of a pair
+     * @return SAMRecord    fully populated SAMRecord
+     */
+    private SAMRecord createSamRecord(BustardReadData brd, boolean isFirstRead) {
+        SAMRecord sam = new SAMRecord();
+        sam.setReadName(brd.getReadName());
+        sam.setReadString(isFirstRead ? brd.getFirstReadSequence() : brd.getSecondReadSequence());
+        sam.setBaseQualityString(isFirstRead ? brd.getFirstReadPhredQualities() : brd.getSecondReadPhredQualities());
+
+        // Flag values
+        sam.setReadPairedFlag(parser.isPairedEnd());
+        sam.setReadUmappedFlag(true);
+        sam.setReadFailsVendorQualityCheckFlag(!brd.isPf());
+        sam.setMateUnmappedFlag(true);
+        if (parser.isPairedEnd()) {
+            sam.setFirstOfPairFlag(isFirstRead);
+            sam.setSecondOfPairFlag(!isFirstRead);
+        }
+
+        if (filters.filterOut(sam)) {
+            sam.setAttribute(ReservedTagConstants.XN, 1);
+        }
+        return sam;
+    }
+
+    /**
+     * Constructs the name for the output file, determines whether it is writeable,
+     * and returns the file
+     *
+     * @param outputDirectory     the directory in which to write the BAM file
+     * @param flowcell            the flowcell from which the data is drawn
+     * @return                    a new File object for the BAM file.
+     */
+    private File getOutputFile(File outputDirectory, String flowcell) {
+        File result = new File(outputDirectory.getAbsolutePath() + "/" +
+                flowcell + "." + parser.getLane() + ".unmapped.bam");
+        IoUtil.assertFileIsWritable(result);
+        return result;
+    }
+}
diff --git a/lib/edu/mit/broad/picard/illumina/GeraldParser.java b/lib/edu/mit/broad/picard/illumina/GeraldParser.java
new file mode 100644
index 0000000000..a72f90dbac
--- /dev/null
+++ b/lib/edu/mit/broad/picard/illumina/GeraldParser.java
@@ -0,0 +1,235 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.illumina;
+
+import edu.mit.broad.picard.util.PasteParser;
+import edu.mit.broad.picard.util.TabbedTextFileParser;
+import edu.mit.broad.picard.PicardException;
+import edu.mit.broad.sam.util.CloseableIterator;
+
+import java.io.File;
+import java.util.Iterator;
+import java.util.Arrays;
+import java.util.regex.Pattern;
+import java.text.ParsePosition;
+import java.text.NumberFormat;
+
+/**
+ * Parse the pair of files (eland_extended.txt and export.txt) that correspond to an end of a Gerald run for a lane.
+ */
+public class GeraldParser implements Iterable<GeraldParser.GeraldAlignment>, CloseableIterator<GeraldParser.GeraldAlignment> {
+    private static final int EXPECTED_ELAND_FIELDS = 4;
+    // Regex used to split apart multiple alignments in the eland output
+    private static final Pattern ALIGN_SPLITTER = Pattern.compile("\\,+");
+
+    // export.txt constants
+    private static final int PASSING_FILTER_COLUMN = 21;
+    private static final int QUALITIES_COLUMN = 9;
+    private static final int REQUIRED_EXPORT_COLUMNS = PASSING_FILTER_COLUMN + 1;
+
+    private final NumberFormat integerFormat = NumberFormat.getIntegerInstance();
+
+    private final SquashedCoordinateMap geraldToArachne;
+    private final PasteParser pasteParser;
+    private final File elandExtended;
+    private final File export;
+    private boolean iteratorCalled = false;
+    private final byte[] solexaToPhredQualityConverter = new SolexaQualityConverter().getSolexaToPhredConversionTable();
+
+    /**
+     * @param geraldToArachne for converting btw Gerald coordinate and genomic coordinate
+     */
+    public GeraldParser(final SquashedCoordinateMap geraldToArachne, final File elandExtended, final File export) {
+        this.geraldToArachne = geraldToArachne;
+        this.elandExtended = elandExtended;
+        this.export = export;
+        final TabbedTextFileParser[] parsers = {
+                new TabbedTextFileParser(false, elandExtended),
+                new TabbedTextFileParser(false, export)
+        };
+        pasteParser = new PasteParser(parsers);
+    }
+
+    public Iterator<GeraldAlignment> iterator() {
+        if (iteratorCalled) {
+            throw new IllegalStateException("iterator() cannot be called more than once on a GeraldParser instance.");
+        }
+        iteratorCalled = true;
+        return this;
+    }
+
+    public void close() {
+        pasteParser.close();
+    }
+
+    public boolean hasNext() {
+        return pasteParser.hasNext();
+    }
+
+    public GeraldAlignment next() {
+        final GeraldAlignment ret = new GeraldAlignment();
+        final String[][] fields = pasteParser.next();
+
+        // Parse eland_extended.txt fields
+        final String[] elandExtendedFields = fields[0];
+        if (elandExtendedFields.length < EXPECTED_ELAND_FIELDS) {
+            throw new PicardException("Not enough fields in file: " + elandExtended);
+        }
+
+        ret.readName   = elandExtendedFields[0].substring(1);
+        ret.readBases = elandExtendedFields[1];
+        ret.readLength = ret.readBases.length();
+        final String[] alignCounts = elandExtendedFields[2].split(":");
+        if (alignCounts.length == 3) {
+            ret.zeroMismatchPlacements = Short.parseShort(alignCounts[0]);
+            ret.oneMismatchPlacements  = Short.parseShort(alignCounts[1]);
+            ret.twoMismatchPlacements  = Short.parseShort(alignCounts[2]);
+        }
+
+        final String[] alignments = ALIGN_SPLITTER.split(elandExtendedFields[3]);
+        if (alignments.length == 1 && !"-".equals(alignments[0])) {
+            final int lastDot   = alignments[0].lastIndexOf(".");
+            final int colon     = alignments[0].indexOf(':');
+
+            final String tmp = alignments[0].substring(colon + 1);
+            final ParsePosition pos = new ParsePosition(0);
+            final long start = integerFormat.parse(tmp, pos).longValue();
+            if (pos.getIndex() == 0) {
+                throw new RuntimeException("Problem parsing eland extended alignment record: " + Arrays.toString(elandExtendedFields));
+            }
+
+            final SimpleMapping m = new SimpleMapping(alignments[0].substring(lastDot+1, colon).trim(),
+                    start, start + ret.readLength - 1, null);
+            geraldToArachne.convertToArachneCoords(m);
+            ret.primaryChrom = m.getSequenceName();
+            ret.primaryStart = m.getStartPos();
+            ret.primaryStop  = m.getEndPos();
+            ret.orientation  = tmp.substring(pos.getIndex(), pos.getIndex() + 1);
+            ret.mismatchString = tmp.substring(pos.getIndex() + 1);
+
+            // Count the mismatches in the alignment
+            for (int i=pos.getIndex(); i<tmp.length(); ++i) {
+                final char ch = tmp.charAt(i);
+                if (ch == 'A' || ch == 'C' || ch == 'G' || ch == 'T') {
+                    ret.primaryMismatches += 1;
+                }
+            }
+        }
+
+        final String[] exportFields = fields[1];
+        // Parse export.txt fields
+        if (exportFields.length < REQUIRED_EXPORT_COLUMNS) {
+            throw new RuntimeException("Not enough columns in _export.txt file " + export);
+        }
+        if (exportFields[PASSING_FILTER_COLUMN].equals("Y")) {
+            ret.passingFilter = true;
+        } else if (exportFields[PASSING_FILTER_COLUMN].equals("N")) {
+            ret.passingFilter = false;
+        } else {
+            throw new RuntimeException("Strange value for PF column in _export.txt file " +  export + ": '" +
+                    exportFields[PASSING_FILTER_COLUMN] + "'.");
+        }
+        ret.phredQualities = exportFields[QUALITIES_COLUMN].getBytes();
+        decodeSolexaQualitiesToPhred(ret.phredQualities);
+
+
+
+        return ret;
+    }
+
+    public void remove() {
+        throw new UnsupportedOperationException();
+    }
+
+    /** Decodes an array of solexa quality chars into SOLEXA numeric space.
+     * Decode in place in order to avoid extra object allocation */
+    private void decodeSolexaQualitiesToPhred(final byte[] solexaQuals) {
+        for (int i=0; i<solexaQuals.length; ++i) {
+            solexaQuals[i] = solexaToPhredQualityConverter[solexaQuals[i]];
+        }
+
+    }
+
+    public class GeraldAlignment {
+        // From eland_extended.txt
+        private String readName = null;
+        private String readBases = null;
+        private int readLength = 0;
+        private short zeroMismatchPlacements = 0;
+        private short oneMismatchPlacements = 0;
+        private short twoMismatchPlacements = 0;
+        private String primaryChrom = null;
+        private long primaryStart = 0;
+        private long primaryStop = 0;
+        private String orientation = null;
+        private short primaryMismatches = 0;
+        private String mismatchString = null;
+
+        // from export.txt
+        private boolean passingFilter;
+        private byte[] phredQualities;
+
+        public String getMismatchString() {
+            return mismatchString;
+        }
+
+        public short getOneMismatchPlacements() {
+            return oneMismatchPlacements;
+        }
+
+        public String getOrientation() {
+            return orientation;
+        }
+
+        public boolean isPassingFilter() {
+            return passingFilter;
+        }
+
+        public byte[] getPhredQualities() {
+            return phredQualities;
+        }
+
+        public String getPrimaryChrom() {
+            return primaryChrom;
+        }
+
+        public short getPrimaryMismatches() {
+            return primaryMismatches;
+        }
+
+        public long getPrimaryStart() {
+            return primaryStart;
+        }
+
+        public long getPrimaryStop() {
+            return primaryStop;
+        }
+
+        public String getReadBases() {
+            return readBases;
+        }
+
+        public int getReadLength() {
+            return readLength;
+        }
+
+        public String getReadName() {
+            return readName;
+        }
+
+        public short getTwoMismatchPlacements() {
+            return twoMismatchPlacements;
+        }
+
+        public short getZeroMismatchPlacements() {
+            return zeroMismatchPlacements;
+        }
+    }
+}
diff --git a/lib/edu/mit/broad/picard/illumina/GeraldParserFactory.java b/lib/edu/mit/broad/picard/illumina/GeraldParserFactory.java
new file mode 100644
index 0000000000..963f0ee22a
--- /dev/null
+++ b/lib/edu/mit/broad/picard/illumina/GeraldParserFactory.java
@@ -0,0 +1,58 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.illumina;
+
+import edu.mit.broad.picard.io.IoUtil;
+
+import java.io.File;
+
+/**
+ * Given a Gerald directory, create a GeraldParser for one end or both ends as appropriate.
+ */
+public class GeraldParserFactory {
+
+    // A Map of squashed reference chunk to reference genome sequence/chromosome. The chunk is represented as
+    // a mapping (sequence=chunk file, startPos=offset into chunk file).
+    private final SquashedCoordinateMap geraldToArachne;
+    private final File geraldDir;
+    private final int lane;
+
+    public GeraldParserFactory(final File geraldDir, final int lane, final File squashedMapFile) {
+        this.geraldDir = geraldDir;
+        this.lane = lane;
+        geraldToArachne = new SquashedCoordinateMap(squashedMapFile);
+    }
+
+    /** Attempts to determine if an analysis on a lane is PE or single. */
+    public boolean isPairedRun() {
+        if (new File(geraldDir, "s_" + lane + "_1_eland_query.txt").exists()) return true;
+        else if (new File(geraldDir, "s_" + lane + "_eland_query.txt").exists()) return false;
+
+        throw new IllegalStateException("Could not determine if gerald run is PE or fragment.");
+    }
+
+    private String makeLanePrefix(final Integer readNumber) {
+        return "s_" + lane + "_" + (readNumber == null ? "" : readNumber + "_");
+
+    }
+
+    /**
+     * @param readNumber 1 == first end of pair; 2 == second end of pair; null == unpaired
+     * @return a GeraldParser for the given end
+     */
+    public GeraldParser makeParser(final Integer readNumber) {
+        final File elandExtendedFile = new File(geraldDir, makeLanePrefix(readNumber) + "eland_extended.txt");
+        final File exportFile = new File(geraldDir, makeLanePrefix(readNumber) + "export.txt");
+        IoUtil.assertFileIsReadable(elandExtendedFile);
+        IoUtil.assertFileIsReadable(exportFile);
+        return new GeraldParser(geraldToArachne, elandExtendedFile, exportFile);
+    }
+
+}
diff --git a/lib/edu/mit/broad/picard/illumina/GeraldToSam.java b/lib/edu/mit/broad/picard/illumina/GeraldToSam.java
new file mode 100644
index 0000000000..b0161be76d
--- /dev/null
+++ b/lib/edu/mit/broad/picard/illumina/GeraldToSam.java
@@ -0,0 +1,348 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2008 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.illumina;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.List;
+import java.util.Iterator;
+
+import edu.mit.broad.picard.util.*;
+import edu.mit.broad.picard.cmdline.CommandLineProgram;
+import edu.mit.broad.picard.cmdline.Usage;
+import edu.mit.broad.picard.cmdline.Option;
+import edu.mit.broad.picard.cmdline.CommandLineParser;
+import edu.mit.broad.sam.SAMFileHeader;
+import edu.mit.broad.sam.SAMFileReader;
+import edu.mit.broad.sam.SAMFileWriter;
+import edu.mit.broad.sam.SAMFileWriterFactory;
+import edu.mit.broad.sam.SAMProgramRecord;
+import edu.mit.broad.sam.SAMReadGroupRecord;
+import edu.mit.broad.sam.SAMRecord;
+
+/**
+ * Read alignments for a lane (paired or unpaired) from Gerald directory and write to SAM file.
+ */
+public class GeraldToSam extends CommandLineProgram {
+
+    // These are all written to the SAM header
+    private static final String DEFAULT_CN = "broad";
+    private static final String DEFAULT_PL = "illumina";
+    private static final String PROGRAM_VERSION = "1.0";
+    private static final String READ_GROUP_ID = "0";
+    private static final String PROGRAM_RECORD_ID = "0";
+    private static final String UNKNOWN_SAMPLE = "N/A";
+
+    private static final Log log = Log.getInstance(GeraldToSam.class);
+
+    // The following attributes define the command-line arguments
+    @Usage(programVersion=PROGRAM_VERSION)
+    public String USAGE =
+            getStandardUsagePreamble() +
+                    "Read Gerald alignments for the given lane, and write in SAM format, coordinate sorted.\n";
+
+    @Option(shortName = "G", doc = "Location of Gerald files.")
+    public File GERALD_DIR;
+
+    @Option(shortName = "L")
+    public Integer LANE;
+
+    @Option(shortName = "M", doc = "Translates from Gerald alignment coordinates to genomic coordinates.")
+    public File SQUASHED_MAP;
+
+    @Option(shortName = "D", doc = "Input SAM or BAM file defining the names, sizes and order of the reference contig, " +
+                                    "and other reference metadata.")
+    public File SEQUENCE_DICT;
+
+    @Option(shortName = "O", doc = "SAM or BAM file to be written (file extension determines format).")
+    public File OUTPUT;
+
+    @Option(doc = "Populates SM field of read group.  Use pool name when a pool is being sequenced.  " +
+            "If any other read group fields are specified, then this is required.")
+    public String SAMPLE = UNKNOWN_SAMPLE;
+
+    @Option(doc = "Populates LB field of read group.")
+    public String LIBRARY;
+
+    @Option(doc = "Populates DS field of read group.", optional = true)
+    public String DESCRIPTION;
+
+    @Option(doc = "Flowcell.lane.  Populates PU field of read group.")
+    public String RUN;
+
+    @Option(doc = "Predicted median insert size (may be different from the actual median insert size.  " +
+                    "Populates the PI field of read group.", optional = true)
+    public Integer PI;
+
+    @Option(doc = "Sequencing center that produced the reads.  Populates CN field of read group.")
+    public String CN = DEFAULT_CN;
+
+    @Option(doc = "Date the run was produced.  Populates the DT field of read group.")
+    public Date RUN_DATE;
+
+    @Option(doc = "Platform/technology used to produce the reads.  Populates the PL field of read group")
+    public String PL = DEFAULT_PL;
+
+    @Option(shortName = "JUMPING", doc = "True if this is a jumping library")
+    public Boolean JUMPING_LIBRARY = Boolean.FALSE;
+
+    @Option(doc = "String to put in the PG:CL header field.  If not present, the GeraldToSam command line is put there",
+        optional = true)
+    public String ALIGNMENT_COMMAND;
+
+    @Option(doc = "Write no more than this number of alignment records.  Default: Write all the alignment records",
+    optional = true)
+    public Integer MAX_ALIGNMENTS;
+
+    private SAMFileWriter writer;
+    SAMFileHeader header;
+    private boolean paired;
+
+
+    public static void main(final String[] argv) {
+        System.exit(new GeraldToSam().instanceMain(argv));
+    }
+
+    @Override
+	public int doWork() {
+        makeHeader(clp.getArgv());
+        writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(header, false, OUTPUT);
+        writeAlignments();
+        writer.close();
+        return 0;
+    }
+
+    /**
+     * If any of the read group options are specified on the command line, then SAMPLE must be specified.
+     * This is currently not doing anything because SAMPLE has a non-null default value.
+     * @return false if there is a problem with the command line
+     */
+    @Override
+	protected boolean customCommandLineValidation() {
+        if (SAMPLE == null &&
+                (LIBRARY != null || DESCRIPTION != null || RUN != null || PI != null || !CN.equals(DEFAULT_CN)
+                        || RUN_DATE != null || !PL.equals(DEFAULT_PL)
+                )) {
+            System.err.println("SAMPLE must be specified if any read group options are used.");
+            clp.usage(System.err);
+            return false;
+        }
+        return true;
+    }
+
+    /**
+     * Create the SAMFileHeader given the cmd-line args
+     * @param argv
+     */
+    private void makeHeader(final String[] argv) {
+        header = new SAMFileHeader();
+        header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
+        final SAMProgramRecord programRecord = new SAMProgramRecord(PROGRAM_RECORD_ID);
+        programRecord.setProgramVersion(PROGRAM_VERSION);
+        String commandLine = ALIGNMENT_COMMAND;
+        if (commandLine == null) {
+            commandLine = StringUtil.join(" ", argv);
+        }
+        programRecord.setCommandLine(commandLine);
+        header.addProgramRecord(programRecord);
+
+        final SAMFileReader sequenceDictionary = new SAMFileReader(SEQUENCE_DICT);
+        final SAMFileHeader sequenceDictionaryHeader = sequenceDictionary.getFileHeader();
+        header.setSequences(sequenceDictionaryHeader.getSequences());
+
+        if (SAMPLE != null) {
+            final SAMReadGroupRecord readGroup = new SAMReadGroupRecord(READ_GROUP_ID);
+            final List<SAMReadGroupRecord> readGroups = new ArrayList<SAMReadGroupRecord>();
+            readGroups.add(readGroup);
+            readGroup.setSample(SAMPLE);
+            if (LIBRARY != null) {
+                readGroup.setLibrary(LIBRARY);
+            }
+            setRGAttributeIfNotNull(readGroup, DESCRIPTION, "DS");
+            setRGAttributeIfNotNull(readGroup, RUN, "PU");
+            setRGAttributeIfNotNull(readGroup, PI, SAMReadGroupRecord.PREDICTED_MEDIAN_INSERT_SIZE_TAG);
+            setRGAttributeIfNotNull(readGroup, CN, "CN");
+            setRGAttributeIfNotNull(readGroup, RUN_DATE, SAMReadGroupRecord.DATE_RUN_PRODUCED_TAG);
+            setRGAttributeIfNotNull(readGroup, PL, "PL");
+            header.setReadGroups(readGroups);
+        }
+    }
+
+    private void setRGAttributeIfNotNull(final SAMReadGroupRecord readGroup, final Object value, final String key) {
+        if (value == null) {
+            return;
+        }
+        readGroup.setAttribute(key, value);
+    }
+
+    /**
+     * Iterate through the Gerald output and write alignments.  eland_extended.txt and export.txt are
+     * iterated together using PasteParser.  If paired end lane, then two PasteParsers are iterated in tandem,
+     * so that mate info is available when a SAMRecord is created.
+     */
+    private void writeAlignments() {
+        final GeraldParserFactory geraldParserFactory = new GeraldParserFactory(GERALD_DIR, LANE, SQUASHED_MAP);
+        paired = geraldParserFactory.isPairedRun();
+        final GeraldParser firstEndIterator = geraldParserFactory.makeParser(paired ? 1: null);
+        GeraldParser secondEndIterator = null;
+        if (paired) {
+            secondEndIterator = geraldParserFactory.makeParser(2);
+        }
+        int numAlignmentsOrPairsWritten = 0;
+        while (firstEndIterator.hasNext()) {
+            final GeraldParser.GeraldAlignment firstEnd = firstEndIterator.next();
+            GeraldParser.GeraldAlignment secondEnd = null;
+            if (paired) {
+                hasNextAssert(secondEndIterator);
+                secondEnd = secondEndIterator.next();
+            }
+            final SAMRecord firstEndAlignment = createSAMRecordFromGerald(firstEnd);
+            SAMRecord secondEndAlignment = null;
+            if (paired) {
+                secondEndAlignment = createSAMRecordFromGerald(secondEnd);
+                setMateInfo(secondEndAlignment, firstEnd);
+                setMateInfo(firstEndAlignment, secondEnd);
+                secondEndAlignment.setSecondOfPairFlag(true);
+                firstEndAlignment.setFirstOfPairFlag(true);
+                final boolean properPair = SamPairUtil.isProperPair(firstEndAlignment, secondEndAlignment, JUMPING_LIBRARY);
+                firstEndAlignment.setProperPairFlag(properPair);
+                secondEndAlignment.setProperPairFlag(properPair);
+                int insertSize = SamPairUtil.computeInsertSize(firstEndAlignment, secondEndAlignment);
+                firstEndAlignment.setInferredInsertSize(insertSize);
+                secondEndAlignment.setInferredInsertSize(-insertSize);
+            }
+
+            writer.addAlignment(firstEndAlignment);
+            if (secondEndAlignment != null) {
+                writer.addAlignment(secondEndAlignment);
+            }
+            ++numAlignmentsOrPairsWritten;
+            if (MAX_ALIGNMENTS != null && numAlignmentsOrPairsWritten >= MAX_ALIGNMENTS) {
+                break;
+            }
+            if (numAlignmentsOrPairsWritten % 500000 == 0) {
+                log.info("Loaded " + numAlignmentsOrPairsWritten + " reads");
+            }
+        }
+        if (MAX_ALIGNMENTS == null) {
+            noMoreAssert(firstEndIterator);
+            if (paired) {
+                noMoreAssert(secondEndIterator);
+            }
+        }
+        log.info("Done loading " + numAlignmentsOrPairsWritten + " reads");
+    }
+
+    /**
+     * Write into the samRecord the mate info from the mate gerald alignment
+     */
+    private void setMateInfo(final SAMRecord samRecord, final GeraldParser.GeraldAlignment mateGeraldAlignment) {
+        final boolean isMapped = mateGeraldAlignment.getPrimaryChrom() != null;
+        if (isMapped) {
+            samRecord.setMateReferenceName(mateGeraldAlignment.getPrimaryChrom());
+            samRecord.setMateAlignmentStart((int)mateGeraldAlignment.getPrimaryStart());
+            samRecord.setMateNegativeStrandFlag(isNegativeStrand(mateGeraldAlignment));
+        } else {
+            samRecord.setMateReferenceName(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME);
+            samRecord.setMateAlignmentStart(SAMRecord.NO_ALIGNMENT_START);
+            samRecord.setMateUnmappedFlag(true);
+        }
+    }
+
+    private boolean isNegativeStrand(final GeraldParser.GeraldAlignment alignment) {
+        final String orientation = alignment.getOrientation();
+        if (orientation.equals("F")) {
+            return false;
+        } else if (orientation.equals("R")) {
+            return true;
+        } else {
+            throw new RuntimeException("Strange orientation in eland_extended file");
+        }
+    }
+
+    private SAMRecord createSAMRecordFromGerald(final GeraldParser.GeraldAlignment alignment) {
+        final SAMRecord samRecord = new SAMRecord();
+        // Consider an alignment with a negative start (i.e. that hangs off the beginning of the contig)
+        // to be unmapped.
+        final boolean isMapped = alignment.getPrimaryChrom() != null && alignment.getPrimaryStart() >= 0;
+
+        String readName = alignment.getReadName();
+        if (readName.endsWith("/1") || readName.endsWith("/2")) {
+            readName = readName.substring(0, readName.length() - 2);
+        }
+        samRecord.setReadName(readName);
+
+        // Set all the flags
+        samRecord.setReadPairedFlag(paired);
+        samRecord.setReadUmappedFlag(!isMapped);
+        if (isMapped) {
+            samRecord.setReadNegativeStrandFlag(isNegativeStrand(alignment));
+        }
+        // For now we are only taking the primary alignment
+        samRecord.setNotPrimaryAlignmentFlag(false);
+        String readBases = alignment.getReadBases();
+        if (samRecord.getReadNegativeStrandFlag()) {
+            readBases = SequenceUtil.reverseComplement(readBases);
+        }
+        samRecord.setReadString(readBases);
+        final byte[] phredQualities = alignment.getPhredQualities();
+        if (isMapped && samRecord.getReadNegativeStrandFlag()) {
+            ArrayUtil.reverseArray(phredQualities);
+        }
+        samRecord.setBaseQualities(phredQualities);
+        if (isMapped) {
+            /*
+        if ("23".equals(geraldReferenceName)) {
+                        geraldReferenceName = "X";
+                    } else if ("24".equals(geraldReferenceName)) {
+                        geraldReferenceName = "Y";
+                    }
+                    return REFERENCE_PREFIX + geraldReferenceName;
+            */
+            samRecord.setReferenceName(alignment.getPrimaryChrom());
+            samRecord.setAlignmentStart((int)alignment.getPrimaryStart());
+            samRecord.setMappingQuality(SAMRecord.UNKNOWN_MAPPING_QUALITY);
+            // CIGAR is trivial because there are no indels or clipping in Gerald
+            final String cigar = Integer.toString(alignment.getReadLength()) + "M";
+            samRecord.setCigarString(cigar);
+            // We've decided not to bother with this, and just load the reference
+            // if we want to determine mismatches.
+            // samRecord.setAttribute("MD", alignment.getMismatchString());
+        } else {
+            samRecord.setReferenceName(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME);
+            samRecord.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START);
+            samRecord.setMappingQuality(SAMRecord.NO_MAPPING_QUALITY);
+            samRecord.setCigarString(SAMRecord.NO_ALIGNMENT_CIGAR);
+        }
+
+        if (SAMPLE != null) {
+            // There is a read group (id = READ_GROUP_ID)
+            samRecord.setAttribute("RG", READ_GROUP_ID);
+        }
+
+        samRecord.setAttribute("PG", PROGRAM_RECORD_ID);
+        return samRecord;
+    }
+
+    private void hasNextAssert(final Iterator iterator) {
+        if (!iterator.hasNext()) {
+            throw new RuntimeException("gerald output file ends unexpectedly.");
+
+        }
+    }
+
+    private void noMoreAssert(final Iterator iterator) {
+        if (iterator.hasNext()) {
+            throw new RuntimeException("gerald output file has more lines than expected.");
+        }
+    }
+
+}
diff --git a/lib/edu/mit/broad/picard/illumina/SimpleMapping.java b/lib/edu/mit/broad/picard/illumina/SimpleMapping.java
new file mode 100644
index 0000000000..a1797d58d6
--- /dev/null
+++ b/lib/edu/mit/broad/picard/illumina/SimpleMapping.java
@@ -0,0 +1,117 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.illumina;
+
+import edu.mit.broad.sam.util.CoordMath;
+
+class SimpleMapping implements Comparable<SimpleMapping> {
+    String arachneIndex;
+    long startPos;
+    long endPos;
+    String sequenceName;
+
+    public SimpleMapping(final String arachneIndex, final long startPos, final long endPos, final String sequenceName) {
+        this.arachneIndex = arachneIndex;
+        this.startPos = startPos;
+        this.endPos = endPos;
+        this.sequenceName = sequenceName;
+
+        if (this.endPos < this.startPos) throw new IllegalArgumentException("startPos must be less than endPos!");
+    }
+
+    public String getArachneIndex() {
+        return arachneIndex;
+    }
+
+    public void setArachneIndex(final String arachneIndex) {
+        this.arachneIndex = arachneIndex;
+    }
+
+    public long getStartPos() {
+        return startPos;
+    }
+
+    public void setStartPos(final long startPos) {
+        this.startPos = startPos;
+    }
+
+    public long getEndPos() {
+        return endPos;
+    }
+
+    public void setEndPos(final long endPos) {
+        this.endPos = endPos;
+    }
+
+    public String getSequenceName() {
+        return sequenceName;
+    }
+
+    public void setSequenceName(final String sequenceName) {
+        this.sequenceName = sequenceName;
+    }
+
+    public SimpleMapping intersection(final SimpleMapping other) {
+        if (this.intersects(other)) {
+            return new SimpleMapping(this.getArachneIndex(),
+                    (this.getStartPos() >= other.getStartPos())?this.getStartPos():other.getStartPos(),
+                    (this.getEndPos() <= other.getEndPos())?this.getEndPos():other.getEndPos(), this.getSequenceName());
+        }
+
+        return null;
+    }
+
+    public boolean intersects(final SimpleMapping other) {
+        return  (this.getArachneIndex().equals(other.getArachneIndex()) &&
+                CoordMath.overlaps(this.getStartPos(), this.getEndPos(), other.getStartPos(), other.getEndPos()));
+    }
+
+    public long length() {
+        return CoordMath.getLength(startPos, endPos);
+    }
+
+    /**
+     * Sort based on sequence.compareTo, then start pos, then end pos
+     * with null objects coming lexically last
+     */
+    public int compareTo(final SimpleMapping that) {
+        if (that == null) return -1; // nulls last
+
+        int result = this.getArachneIndex().compareTo(that.getArachneIndex());
+        if (result == 0) {
+            if (this.getStartPos() == that.getStartPos()) {
+                result = ((int) (this.getEndPos() - that.getEndPos()));
+            } else {
+                result = ((int) (this.getStartPos() - that.getStartPos()));
+            }
+        }
+
+        // normalize to -1, 0, 1
+        if (result > 1) result = 1;
+        else if (result < -1) result = -1;
+        return result;
+    }
+
+    public boolean equals(final SimpleMapping that) {
+        return (this.compareTo(that) == 0);
+    }
+
+    public int hashCode() {
+        int result;
+        result = arachneIndex.hashCode();
+        result = 31 * result + (int) (startPos ^ (startPos >>> 32));
+        result = 31 * result + (int) (endPos ^ (endPos >>> 32));
+        return result;
+    }
+
+    public String toString() {
+        return getArachneIndex() + ":" + getStartPos() + "-" + getEndPos();
+    }
+}
diff --git a/lib/edu/mit/broad/picard/illumina/SolexaQualityConverter.java b/lib/edu/mit/broad/picard/illumina/SolexaQualityConverter.java
new file mode 100644
index 0000000000..80633fb724
--- /dev/null
+++ b/lib/edu/mit/broad/picard/illumina/SolexaQualityConverter.java
@@ -0,0 +1,58 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.illumina;
+
+/**
+ * Optimized method for converting Solexa ASCII qualities into Phred scores.
+ * Pre-computes all values in order to eliminate repeated computation.
+ */
+public class SolexaQualityConverter {
+
+    /**
+     * This value is added to a Solexa quality score to make it printable ASCII
+     */
+    private static int SOLEXA_ADDEND = 64;
+
+    /**
+     * Mapping from ASCII value in Gerald export file to phred score
+     */
+    private final byte[] phredScore = new byte[256];
+
+    public SolexaQualityConverter() {
+        for (int i = 0; i < SOLEXA_ADDEND; ++i) {
+            phredScore[i] = 0;
+        }
+        for (int i = SOLEXA_ADDEND; i < phredScore.length; ++i) {
+            phredScore[i] = decodeSolexaQualityToPhred(i);
+        }
+    }
+
+
+    /** Converts a solexa character quality into a phred numeric quality. */
+    private byte decodeSolexaQualityToPhred(final int solexaQuality) {
+        return (byte) Math.round(10d * Math.log10(1d+Math.pow(10d, (solexaQuality - SOLEXA_ADDEND)/10d)));
+    }
+
+    /**
+     * Convert a solexa quality ASCII character into a phred score.
+     */
+    public byte solexaToPhred(final byte solexaQuality) {
+        return phredScore[solexaQuality];
+    }
+
+    /**
+     * @return a byte array that can be indexed by Solexa ASCII quality, with value
+     * of corresponding Phred score.  Elements 0-63 are invalid because Solexa qualities
+     * should all be >= 64.  Do not modify this array!
+     */
+    public byte[] getSolexaToPhredConversionTable() {
+        return phredScore;
+    }
+}
diff --git a/lib/edu/mit/broad/picard/illumina/SquashedCoordinateMap.java b/lib/edu/mit/broad/picard/illumina/SquashedCoordinateMap.java
new file mode 100644
index 0000000000..92011add26
--- /dev/null
+++ b/lib/edu/mit/broad/picard/illumina/SquashedCoordinateMap.java
@@ -0,0 +1,75 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.illumina;
+
+import edu.mit.broad.sam.util.CoordMath;
+import edu.mit.broad.picard.cmdline.CommandLineUtils;
+
+import java.util.Map;
+import java.util.HashMap;
+import java.io.File;
+import java.io.BufferedReader;
+import java.io.IOException;
+
+public class SquashedCoordinateMap {
+    private final Map<SimpleMapping, String> geraldToArachne = new HashMap<SimpleMapping, String>();
+    private long genomeSize;
+
+    public SquashedCoordinateMap(final File squashedMapFile) {
+        try {
+            final BufferedReader in = CommandLineUtils.getReader(squashedMapFile);
+            String line;
+            genomeSize = 0;
+
+            while ((line = in.readLine()) != null) {
+                final String[] fields = CommandLineUtils.SPACE_SPLITTER.split(line);
+                final String arachneIndex = fields[0].trim().intern();
+                final String squashedRefIndex = fields[1].trim().intern();
+                final long squashedStart = Long.parseLong(fields[2]);
+                final long length = Long.parseLong(fields[3]);
+                final String sequenceName = fields[4];
+
+                final SimpleMapping mapping = new SimpleMapping(squashedRefIndex, squashedStart, 
+                        CoordMath.getEnd(squashedStart, length), sequenceName);
+                geraldToArachne.put(mapping, arachneIndex);
+
+                genomeSize += length;
+            }
+
+            in.close();
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    /* Converts a read's mapping from Gerald's vretarded space to arachne index + coords. */
+    public void convertToArachneCoords(final SimpleMapping read) {
+        if (this.geraldToArachne == null || this.geraldToArachne.isEmpty()) {
+            throw new IllegalStateException("Cannot invoke convertToArachneCoords before parseSquashedMapFile");
+        }
+
+        for (final Map.Entry<SimpleMapping,String> entry : this.geraldToArachne.entrySet()) {
+            final SimpleMapping chunk = entry.getKey();
+            if (chunk.intersects(read)) {
+                read.setArachneIndex(entry.getValue());
+                read.setStartPos( read.getStartPos() - chunk.getStartPos() );
+                read.setEndPos(   read.getEndPos()   - chunk.getStartPos() );
+                read.setSequenceName(chunk.getSequenceName());
+                return;
+            }
+        }
+
+        throw new RuntimeException("Could not convert read: " + read);
+    }
+
+    long getGenomeSize() {
+        return genomeSize;
+    }
+}
diff --git a/lib/edu/mit/broad/picard/importer/genotype/BedFileReader.java b/lib/edu/mit/broad/picard/importer/genotype/BedFileReader.java
new file mode 100644
index 0000000000..8bd01c755b
--- /dev/null
+++ b/lib/edu/mit/broad/picard/importer/genotype/BedFileReader.java
@@ -0,0 +1,82 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+
+package edu.mit.broad.picard.importer.genotype;
+
+import java.io.Closeable;
+import java.io.File;
+
+import edu.mit.broad.picard.PicardException;
+import edu.mit.broad.sam.util.BinaryCodec;
+
+/**
+ *
+ *
+ * @author Doug Voet
+ */
+public class BedFileReader implements Closeable {
+    private static final int LOWEST_2_BIT_MASK = 3; // binary 11
+    private static final short BED_MAGIC_NUMBER = 7020;
+//    private static final short BED_MAGIC_NUMBER = Short.parseShort("0110110000011011", 2);
+    
+    public static final byte MODE_INDIVIDUAL_MAJOR = 0;
+    public static final byte MODE_SNP_MAJOR = 1;
+    
+    public static final byte GENOTYPE_AA = 0; // binary 00
+    public static final byte GENOTYPE_NO_CALL = 1; // binary 01
+    public static final byte GENOTYPE_AB = 2; // binary 10
+    public static final byte GENOTYPE_BB = 3; // binary 11
+    
+    private final byte mode;
+    private final BinaryCodec codec;
+    private byte currentBlock;
+    private int genotypeCount = 0;
+    
+    public BedFileReader(File bedFile) {
+        this.codec = new BinaryCodec(bedFile, false);
+        short fileMagicNumber = this.codec.readShort();
+        if (fileMagicNumber != BED_MAGIC_NUMBER) {
+            this.codec.close();
+            throw new PicardException("Given file [" + bedFile.getAbsolutePath() + 
+                    "] is not in bed file format... magic number does not match");
+        }
+        this.mode = codec.readByte();
+    }
+    
+    public byte getMode() {
+        return mode;    
+    }
+
+    @Override
+    public void close() {
+        this.codec.close();
+    }
+    
+    public byte nextGenotype() {
+        // there are 4 genotypes per byte so get a new byte every 4 genotypes read
+        if (this.genotypeCount++ % 4 == 0) {
+            this.currentBlock = this.codec.readByte();
+        }
+        
+        // the 2 lowest order bits of currentBlock are the next genotype, pop them off
+        byte genotype = (byte) (LOWEST_2_BIT_MASK & this.currentBlock);
+        this.currentBlock >>>= 2;
+        
+        return genotype;
+    }
+    
+    /**
+     * Call this method when moving on to the next individual (in indiv-major mode) or next
+     * snp (in snp-major mode).
+     */
+    public void dropRemainingBlock() {
+        this.genotypeCount = 0;
+    }
+}
diff --git a/lib/edu/mit/broad/picard/importer/genotype/BedToGeli.java b/lib/edu/mit/broad/picard/importer/genotype/BedToGeli.java
new file mode 100644
index 0000000000..8a735207e3
--- /dev/null
+++ b/lib/edu/mit/broad/picard/importer/genotype/BedToGeli.java
@@ -0,0 +1,371 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+
+package edu.mit.broad.picard.importer.genotype;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+
+import edu.mit.broad.picard.PicardException;
+import edu.mit.broad.picard.cmdline.CommandLineProgram;
+import edu.mit.broad.picard.cmdline.Option;
+import edu.mit.broad.picard.cmdline.Usage;
+import edu.mit.broad.picard.genotype.GeliFileWriter;
+import edu.mit.broad.picard.genotype.GenotypeLikelihoods;
+import edu.mit.broad.picard.genotype.GenotypeLikelihoodsCodec;
+import edu.mit.broad.picard.genotype.GenotypeLikelihoods.GenotypeLikelihoodsComparator;
+import edu.mit.broad.picard.io.IoUtil;
+import edu.mit.broad.picard.util.BasicTextFileParser;
+import edu.mit.broad.picard.util.Log;
+import edu.mit.broad.sam.SAMFileHeader;
+import edu.mit.broad.sam.SAMSequenceRecord;
+import edu.mit.broad.sam.SAMTextHeaderCodec;
+import edu.mit.broad.sam.util.AsciiLineReader;
+import edu.mit.broad.sam.util.SortingCollection;
+
+/**
+ * Converts a BED/BIM/FAM file trio to a number of GELI files (1 per individual).
+ * BED files come in 2 formats, individual-major and snp-major. The former lists all SNPs for the 
+ * first individual then all SNPs for the second individual, etc. The latter list all individuals 
+ * for first SNP then all individuals for second SNP, etc. The order for snps is dictated by
+ * the bim file and the order for individuals is dictated by the fam file.
+ * <p>
+ * See <a href="http://pngu.mgh.harvard.edu/~purcell/plink/binary.shtml">this page</a> for details
+ * of the format.
+ *
+ * @author Doug Voet
+ */
+public class BedToGeli extends CommandLineProgram {
+    static final float LIKELIHOOD = 500;
+    private static final Log log = Log.getInstance(BedToGeli.class);
+    
+    @Usage(programVersion="1.0")
+    public final String USAGE = "";
+    
+    @Option(doc="The bed file name.", mutex="BFILE")
+    public File BED;
+
+    @Option(doc="The bim file name.", mutex="BFILE")
+    public File BIM;
+
+    @Option(doc="The fam file name.", mutex="BFILE")
+    public File FAM;
+
+    @Option(doc="The root file name of the bed, bim & fam files.", mutex={"BED", "BIM", "FAM"})
+    public String BFILE;
+    
+    @Option(doc="The directory to write the output GELI files", shortName="D")
+    public File OUTPUT_DIR;
+    
+    @Option(doc="Set to 'true' if the family name should be included in the output file names, default false", 
+            shortName="F",
+            optional=true)
+    public Boolean USE_FAMILY = Boolean.FALSE;
+    
+    @Option(doc="Name of file containing sequence dictionary to embed in new GELI files",
+            shortName="DICT")
+    public File SEQUENCE_DICTIONARY;
+    
+    private List<SNP> snpCache;
+    private List<String> geliFileNames;
+    private List<SAMSequenceRecord> sequenceDictionary;
+    private Map<String, Byte> referenceIndexes;
+
+    public static void main(String[] argv) {
+        System.exit(new BedToGeli().instanceMain(argv));
+    }
+
+    @Override
+    protected int doWork() {
+        populateFileNames();
+        IoUtil.assertFileIsReadable(this.BED);
+        IoUtil.assertFileIsReadable(this.BIM);
+        IoUtil.assertFileIsReadable(this.FAM);
+        IoUtil.assertFileIsReadable(this.SEQUENCE_DICTIONARY);
+        IoUtil.assertDirectoryIsWritable(this.OUTPUT_DIR);
+        
+        populateSequenceDictionary();
+        
+        BedFileReader bedReader = new BedFileReader(this.BED);
+        if (bedReader.getMode() == BedFileReader.MODE_INDIVIDUAL_MAJOR) {
+            log.debug("Detected BED file in individual-major mode");
+            parseIndividualMajor(bedReader);
+        } else {
+            log.debug("Detected BED file in snp-major mode");
+            parseSnpMajor(bedReader);
+        }
+        
+        return 0;
+    }
+
+    /**
+     * loads the SEQUENCE_DICTIONARY file
+     */
+    private void populateSequenceDictionary() {
+        try {
+            final SAMFileHeader header = new SAMTextHeaderCodec().decode(new AsciiLineReader(new FileInputStream(this.SEQUENCE_DICTIONARY)), null);
+            this.sequenceDictionary = header.getSequences();
+            
+            this.referenceIndexes = new HashMap<String, Byte>();
+            for (byte i = 0; i < sequenceDictionary.size(); i++) {
+                this.referenceIndexes.put(sequenceDictionary.get(i).getSequenceName().intern(), i);
+            }
+        } catch (FileNotFoundException e) {
+            throw new PicardException("Unexpected exception", e);
+        }
+    }
+
+    private void parseIndividualMajor(BedFileReader bedReader) {
+        cacheSnps();
+        BasicTextFileParser famReader = new BasicTextFileParser(true, this.FAM);
+        for (String[] famFields : famReader) {
+            GeliFileWriter geliWriter = getGeliFileWriter(getGeliFileName(famFields[0], famFields[1]), false);
+            for (SNP snp : this.snpCache) {
+                GenotypeLikelihoods genotypeLikelihoods = constructGenotypeLikelihoods(
+                        bedReader, snp);
+                if (genotypeLikelihoods != null) {
+                    geliWriter.addGenotypeLikelihoods(genotypeLikelihoods);
+                }
+            }
+            bedReader.dropRemainingBlock();
+            geliWriter.close();
+        }
+        famReader.close();
+    }
+
+    /**
+     * @return null if for a no-call or the snp has no position on the genome
+     */
+    private char[] getNextGenotype(BedFileReader bedReader, SNP snp) {
+        char[] genotype = new char[2];
+        byte genotypeCode = bedReader.nextGenotype();
+        if (snp == null) {
+            // unplaced marker... we need to read the genotype off the reader so we don't lose
+            // our place, but we cannot put the marker in the geli file.
+            return null;
+        }
+        switch (genotypeCode) {
+        case BedFileReader.GENOTYPE_AA:
+            genotype[0] = (char) snp.getAllele1();
+            genotype[1] = (char) snp.getAllele1();
+            break;
+        case BedFileReader.GENOTYPE_AB:
+            genotype[0] = (char) snp.getAllele1();
+            genotype[1] = (char) snp.getAllele2();
+            break;
+        case BedFileReader.GENOTYPE_BB:
+            genotype[0] = (char) snp.getAllele2();
+            genotype[1] = (char) snp.getAllele2();
+            break;
+        case BedFileReader.GENOTYPE_NO_CALL:
+            // don't record a genotype likelihood for a no call
+            return null;
+        default:
+            throw new PicardException("Unknown genotype code: " + Integer.toBinaryString(genotypeCode));
+        }
+        return genotype;
+    }
+
+    private void cacheSnps() {
+        BasicTextFileParser bimReader = null;
+        try {
+            bimReader = new BasicTextFileParser(true, this.BIM);
+            this.snpCache = new LinkedList<SNP>();
+            for (String[] bimFields : bimReader) {
+                SNP snp = constructSnp(bimFields);
+                snpCache.add(snp);
+            }
+        } finally {
+            try {
+                bimReader.close();
+            } catch (Exception e) {
+            }
+        }
+    }
+
+    private SNP constructSnp(String[] bimFields) {
+        byte referenceIndex = getReferenceIndex(bimFields[0]);
+        if (referenceIndex == -1) {
+            return null;
+        }
+        SNP snp = new SNP(
+                referenceIndex,
+                Integer.parseInt(bimFields[3]),
+                bimFields[4].toUpperCase().getBytes()[0],
+                bimFields[5].toUpperCase().getBytes()[0]);
+        return snp;
+    }
+
+    /**
+     * determines the index in the sequence dictionary for the given chromosome
+     */
+    private byte getReferenceIndex(String chromosome) {
+        final String referenceName;
+        int chromosomeNumber;
+        try {
+            chromosomeNumber = Integer.parseInt(chromosome);
+        } catch (NumberFormatException e) {
+            chromosomeNumber = -1;
+        }
+        
+        if (chromosomeNumber >= 1 && chromosomeNumber <= 22) {
+            referenceName = ("chr" + chromosome).intern();
+        } else if (chromosomeNumber == 26 || chromosome.equalsIgnoreCase("MT")) {
+            referenceName = "chrM";
+        } else if (chromosomeNumber == 23 || chromosomeNumber == 25 ||
+                chromosome.equalsIgnoreCase("XY") || chromosome.equalsIgnoreCase("X")) {
+            referenceName = "chrX";
+        } else if (chromosomeNumber == 24 || chromosome.equalsIgnoreCase("Y")) {
+            referenceName = "chrY";
+        } else {
+            // unplaced marker
+            return -1;
+        }
+
+        Byte referenceIndex = this.referenceIndexes.get(referenceName);
+        if (referenceIndex == null) {
+            throw new PicardException("Reference sequence [" + referenceName + "] not found in sequence dictionary");
+        }
+        return referenceIndex;
+    }
+
+    private void cacheGELIFileNames() {
+        BasicTextFileParser famReader = null;
+        try {
+            famReader = new BasicTextFileParser(true, this.FAM);
+            this.geliFileNames = new LinkedList<String>();
+            for (String[] fields : famReader) {
+                this.geliFileNames.add(getGeliFileName(fields[0], fields[1]));
+            }
+        } finally {
+            try {
+                famReader.close();
+            } catch (Exception e) {
+            }
+        }
+    }
+    
+    private void parseSnpMajor(BedFileReader bedReader) {
+        cacheGELIFileNames();        
+        BasicTextFileParser bimReader = new BasicTextFileParser(true, this.BIM);
+        Map<String, SortingCollection<GenotypeLikelihoods>> likelihoodsByFile =
+            new HashMap<String, SortingCollection<GenotypeLikelihoods>>(
+                    (int) Math.ceil(this.geliFileNames.size() * 1.34));
+        
+        int maxRecordsInRam = calculateMaxRecordsInRam();
+        for (String geliFileName : this.geliFileNames) {
+            likelihoodsByFile.put(geliFileName, SortingCollection.newInstance(
+                    GenotypeLikelihoods.class, 
+                    new GenotypeLikelihoodsCodec(), 
+                    new GenotypeLikelihoodsComparator(), 
+                    maxRecordsInRam));
+        }
+        
+        for (String[] bimFields : bimReader) {
+            for (String fileName : this.geliFileNames) {
+                SNP snp = constructSnp(bimFields);
+                GenotypeLikelihoods genotypeLikelihoods = constructGenotypeLikelihoods(
+                        bedReader, snp);
+                if (genotypeLikelihoods != null) {
+                    likelihoodsByFile.get(fileName).add(genotypeLikelihoods);
+                }
+            }
+            bedReader.dropRemainingBlock();
+        }
+        bimReader.close();
+        
+        writeGeliFiles(likelihoodsByFile);
+    }
+
+    /**
+     * @return
+     */
+    private int calculateMaxRecordsInRam() {
+        Runtime.getRuntime().gc();
+        double memoryToUse = Runtime.getRuntime().maxMemory() * .8; // use up to 80%
+        int objectCountLimit = (int) (memoryToUse / GenotypeLikelihoods.OBJECT_SIZE_BYTES);
+        return objectCountLimit / this.geliFileNames.size();
+    }
+
+    /**
+     * @param likelihoodsByFile
+     */
+    private void writeGeliFiles(
+            Map<String, SortingCollection<GenotypeLikelihoods>> likelihoodsByFile) {
+
+        for (Map.Entry<String, SortingCollection<GenotypeLikelihoods>> entry : likelihoodsByFile.entrySet()) {
+            GeliFileWriter fileWriter = getGeliFileWriter(entry.getKey(), true);
+            for (GenotypeLikelihoods likelihoods : entry.getValue()) {
+                fileWriter.addGenotypeLikelihoods(likelihoods);
+            }
+            fileWriter.close();
+        }
+    }
+
+    private GeliFileWriter getGeliFileWriter(
+            String fileName, boolean presorted) {
+        File geliFile = new File(this.OUTPUT_DIR, fileName);
+        GeliFileWriter fileWriter = new GeliFileWriter(geliFile, presorted);
+        SAMFileHeader header = new SAMFileHeader();
+        header.setAttribute(SAMFileHeader.VERSION_TAG, "1.0");
+        header.setSequences(this.sequenceDictionary);
+        fileWriter.setHeader(header);
+        return fileWriter;
+    }
+
+    /**
+     * @param bedReader
+     * @param snp
+     * @return
+     */
+    private GenotypeLikelihoods constructGenotypeLikelihoods(
+            BedFileReader bedReader, SNP snp) {
+        char[] genotype = getNextGenotype(bedReader, snp);
+        if (genotype == null) {
+            // no call or unplaced marker
+            return null;
+        }
+        
+        GenotypeLikelihoods genotypeLikelihoods = new GenotypeLikelihoods();
+        genotypeLikelihoods.setLikelihood(
+                GenotypeLikelihoods.getLikelihoodIndex(genotype), 
+                LIKELIHOOD);
+        genotypeLikelihoods.setReferenceIndex(snp.getReferenceIndex());
+        genotypeLikelihoods.setPosition(snp.getPosition());
+        return genotypeLikelihoods;
+    }
+
+    /**
+     * populates bed/bim/fam if bfile option is used
+     */
+    private void populateFileNames() {
+        if (this.BFILE != null) {
+            this.BED = new File(this.BFILE + ".bed");
+            this.BIM = new File(this.BFILE + ".bim");
+            this.FAM = new File(this.BFILE + ".fam");
+        }
+    }
+
+    /**
+     * @return the appropriate name taking into account this.USE_FAMILY
+     */
+    private String getGeliFileName(String family, String individual) {
+        StringBuilder fileName = new StringBuilder(individual).append(".geli");
+        if (this.USE_FAMILY) {
+            fileName.insert(0, "_").insert(0, family);
+        }
+        return fileName.toString();
+    }
+}
diff --git a/lib/edu/mit/broad/picard/importer/genotype/SNP.java b/lib/edu/mit/broad/picard/importer/genotype/SNP.java
new file mode 100644
index 0000000000..d9fce76cf4
--- /dev/null
+++ b/lib/edu/mit/broad/picard/importer/genotype/SNP.java
@@ -0,0 +1,35 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+
+package edu.mit.broad.picard.importer.genotype;
+
+/**
+ * data class for storing snp info
+ *
+ * @author Doug Voet
+ */
+public class SNP {
+    private final byte referenceIndex;
+    private final int position;
+    private final byte allele1;
+    private final byte allele2;
+
+    public SNP(byte chromosome, int position, byte allele1, byte allele2) {
+        this.referenceIndex = chromosome;
+        this.position = position;
+        this.allele1 = allele1;
+        this.allele2 = allele2;
+    }
+
+    public byte getReferenceIndex() { return referenceIndex; }
+    public int getPosition() { return position; }
+    public byte getAllele1() { return allele1; }
+    public byte getAllele2() { return allele2; }
+}
diff --git a/lib/edu/mit/broad/picard/io/IoUtil.java b/lib/edu/mit/broad/picard/io/IoUtil.java
new file mode 100644
index 0000000000..14688a7c14
--- /dev/null
+++ b/lib/edu/mit/broad/picard/io/IoUtil.java
@@ -0,0 +1,183 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.io;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.zip.GZIPInputStream;
+import java.util.zip.GZIPOutputStream;
+
+import edu.mit.broad.picard.PicardException;
+
+/**
+ * A class for utility methods that wrap or aggregate functionality in Java IO.
+ *
+ * @author Tim Fennell
+ */
+public class IoUtil {
+    /**
+     * Checks that a file is non-null, exists, is not a directory and is readable.  If any
+     * condition is false then a runtime exception is thrown.
+     *
+     * @param file the file to check for readability
+     */
+    public static void assertFileIsReadable(File file) {
+        if (file == null) {
+			throw new IllegalArgumentException("Cannot check readability of null file.");
+		} else if (!file.exists()) {
+            throw new PicardException("Cannot read non-existent file: " + file.getAbsolutePath());
+        }
+        else if (file.isDirectory()) {
+            throw new PicardException("Cannot read file because it is a directory: " + file.getAbsolutePath());
+        }
+        else if (!file.canRead()) {
+            throw new PicardException("File exists but is not readable: " + file.getAbsolutePath());
+        }
+    }
+
+    /**
+     * Checks that a file is non-null, and is either extent and writable, or non-existent but
+     * that the parent directory exists and is writable. If any
+     * condition is false then a runtime exception is thrown.
+     *
+     * @param file the file to check for writability
+     */
+    public static void assertFileIsWritable(File file) {
+        if (file == null) {
+			throw new IllegalArgumentException("Cannot check readability of null file.");
+		} else if (!file.exists()) {
+            // If the file doesn't exist, check that it's parent directory does and is writable
+            File parent = file.getAbsoluteFile().getParentFile();
+            if (!parent.exists()) {
+                throw new PicardException("Cannot write file: " + file.getAbsolutePath() + ". " +
+                        "Neither file nor parent directory exist.");
+            }
+            else if (!parent.isDirectory()) {
+                throw new PicardException("Cannot write file: " + file.getAbsolutePath() + ". " +
+                        "File does not exist and parent is not a directory.");
+            }
+            else if (!parent.canWrite()) {
+                throw new PicardException("Cannot write file: " + file.getAbsolutePath() + ". " +
+                        "File does not exist and parent directory is not writable..");
+            }
+        }
+        else if (file.isDirectory()) {
+            throw new PicardException("Cannot write file because it is a directory: " + file.getAbsolutePath());
+        }
+        else if (!file.canWrite()) {
+            throw new PicardException("File exists but is not writable: " + file.getAbsolutePath());
+        }
+    }
+
+    /**
+     * Checks that a directory is non-null, extent, writable and a directory 
+     * otherwise a runtime exception is thrown.
+     *
+     * @param dir the dir to check for writability
+     */
+    public static void assertDirectoryIsWritable(File dir) {
+        if (dir == null) {
+            throw new IllegalArgumentException("Cannot check readability of null file.");
+        } 
+        else if (!dir.exists()) {
+            throw new PicardException("Directory does not exist: " + dir.getAbsolutePath());
+        }
+        else if (!dir.isDirectory()) {
+            throw new PicardException("Cannot write to directory because it is not a directory: " + dir.getAbsolutePath());
+        }
+        else if (!dir.canWrite()) {
+            throw new PicardException("Directory exists but is not writable: " + dir.getAbsolutePath());
+        }
+    }
+
+    /**
+     * Opens a file for reading, decompressing it if necessary
+     *
+     * @param file  The file to open
+     * @return the input stream to read from
+     */
+    public static InputStream openFileForReading(File file) {
+
+        try {
+            if (file.getName().endsWith(".gz") ||
+                file.getName().endsWith(".bfq") ||
+                file.getName().endsWith(".map")) {
+                return new GZIPInputStream(new FileInputStream(file));
+            }
+            //TODO: Other compression formats
+            else {
+                return new FileInputStream(file);
+            }
+        }
+        catch (IOException ioe) {
+            throw new PicardException("File not found: " + file.getName(), ioe);
+        }
+
+    }
+
+    /**
+     * Opens a file for writing, overwriting the file if it already exists
+     *
+     * @param file  the file to write to
+     * @return the output stream to write to
+     */
+    public static OutputStream openFileForWriting(File file) {
+        return openFileForWriting(file, false);
+    }
+
+    /**
+     * Opens a file for writing
+     *
+     * @param file  the file to write to
+     * @param append    whether to append to the file if it already exists (we overwrite it if false)
+     * @return the output stream to write to
+     */
+    public static OutputStream openFileForWriting(File file, boolean append) {
+
+        try {
+            if (file.getName().endsWith(".gz") ||
+                file.getName().endsWith(".bfq") ||
+                file.getName().endsWith(".map")) {
+                return new GZIPOutputStream(new FileOutputStream(file, append));
+            }
+            //TODO: Other compression formats
+            else {
+                return new FileOutputStream(file, append);
+            }
+        }
+        catch (IOException ioe) {
+            throw new PicardException("Error opening file for writing: " + file.getName(), ioe);
+        }
+    }
+    
+    /**
+     * Utility method to copy the contents of input to output. The caller is responsible for
+     * opening and closing both streams.
+     * 
+     * @param input contents to be copied
+     * @param output destination
+     */
+    public static void copyStream(InputStream input, OutputStream output) {
+        try {
+            byte[] buffer = new byte[1024];
+            int bytesRead = 0;
+            while((bytesRead = input.read(buffer)) > 0) {
+                output.write(buffer, 0, bytesRead);
+            }
+        } catch (IOException e) {
+            throw new PicardException("Exception copying stream", e);
+        }
+    }
+
+}
diff --git a/lib/edu/mit/broad/picard/metrics/AggregateMetricCollector.java b/lib/edu/mit/broad/picard/metrics/AggregateMetricCollector.java
new file mode 100644
index 0000000000..fa611ff091
--- /dev/null
+++ b/lib/edu/mit/broad/picard/metrics/AggregateMetricCollector.java
@@ -0,0 +1,50 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+
+package edu.mit.broad.picard.metrics;
+
+import edu.mit.broad.sam.SAMRecord;
+
+public class AggregateMetricCollector<T extends MetricBase> implements MetricCollector<T> {
+    private final MetricCollector<T>[] collectors;
+
+    public AggregateMetricCollector(MetricCollector<T>... collectors) {
+        if (collectors.length == 0) {
+            throw new IllegalArgumentException("Must supply at least one collector.");
+        }
+        this.collectors = collectors;
+    }
+
+    @Override
+    public void addRecord(SAMRecord record) {
+        for (MetricCollector<T> collector : this.collectors) {
+            collector.addRecord(record);
+        }
+    }
+
+    @Override
+    public void onComplete() {
+        for (MetricCollector<T> collector : this.collectors) {
+            collector.onComplete();
+        }
+    }
+
+    @Override
+    public void setMetrics(T metrics) {
+        for (MetricCollector<T> collector : this.collectors) {
+            collector.setMetrics(metrics);
+        }
+    }
+    
+    @Override
+    public T getMetrics() {
+        return this.collectors[0].getMetrics();
+    }
+}
\ No newline at end of file
diff --git a/lib/edu/mit/broad/picard/metrics/Header.java b/lib/edu/mit/broad/picard/metrics/Header.java
new file mode 100644
index 0000000000..3ae8f21794
--- /dev/null
+++ b/lib/edu/mit/broad/picard/metrics/Header.java
@@ -0,0 +1,17 @@
+package edu.mit.broad.picard.metrics;
+
+/**
+ * A header for a metrics file.  A header simply consists of a type and some arbitrary
+ * data, but must be able to turn itself into a String and parse it's data back out
+ * of that String at a later date.
+ *
+ * @author Tim Fennell
+ */
+public interface Header {
+    /** Converts the header to a String for persisting to a file. */
+    public String toString();
+
+    /** Parses the data contained in the String version of the header. */
+    public void parse(String in);
+
+}
diff --git a/lib/edu/mit/broad/picard/metrics/MetricBase.java b/lib/edu/mit/broad/picard/metrics/MetricBase.java
new file mode 100644
index 0000000000..21c1226cd7
--- /dev/null
+++ b/lib/edu/mit/broad/picard/metrics/MetricBase.java
@@ -0,0 +1,77 @@
+package edu.mit.broad.picard.metrics;
+
+import edu.mit.broad.picard.PicardException;
+import edu.mit.broad.picard.util.FormatUtil;
+
+import java.lang.reflect.Field;
+
+/**
+ * A base class from which all Metric classes should inherit.
+ *
+ * @author Tim Fennell
+ */
+public class MetricBase {
+    /**
+     * An equals method that checks equality by asserting that the classes are of the exact
+     * same type and that all public fields are equal.
+     *
+     * @param o an instance to compare to
+     * @return true if they are equal, false otherwise
+     */
+    public boolean equals(Object o) {
+        if (o == null) return false;
+        if (o.getClass() != getClass()) return false;
+
+        // Loop through all the fields and check that they are either
+        // null in both objects or equal in both objects
+        for (Field f : getClass().getFields()) {
+            try {
+                Object lhs = f.get(this);
+                Object rhs = f.get(o);
+
+                if (lhs == null) {
+                    if (rhs == null) {
+                        // keep going
+                    }
+                    else if (rhs != null) {
+                        return false;
+                    }
+                }
+                else {
+                    if (lhs.equals(rhs)) {
+                        // keep going
+                    }
+                    else {
+                        return false;
+                    }
+                }
+            }
+            catch (IllegalAccessException iae) {
+                throw new PicardException("Could not read field " + f.getName() + " from a " + getClass().getSimpleName());
+            }
+        }
+
+        // If we got this far all the fields are equal
+        return true;
+    }
+
+    /** Converts the metric class to a human readable string. */
+    public String toString() {
+        StringBuilder buffer = new StringBuilder();
+        FormatUtil formatter = new FormatUtil();
+
+        for (Field f : getClass().getFields()) {
+            try {
+                buffer.append(f.getName());
+                buffer.append("\t");
+                buffer.append(formatter.format(f.get(this)));
+                buffer.append("\n");
+            }
+            catch (IllegalAccessException iae) {
+                throw new PicardException("Could not read field " + f.getName() + " from a " + getClass().getSimpleName());
+            }
+        }
+
+        return buffer.toString();
+    }
+}
diff --git a/lib/edu/mit/broad/picard/metrics/MetricCollector.java b/lib/edu/mit/broad/picard/metrics/MetricCollector.java
new file mode 100644
index 0000000000..e84fed450e
--- /dev/null
+++ b/lib/edu/mit/broad/picard/metrics/MetricCollector.java
@@ -0,0 +1,24 @@
+package edu.mit.broad.picard.metrics;
+
+import edu.mit.broad.sam.SAMRecord;
+
+/**
+ * Interface for objects that collect metrics about SAMRecords.
+ */
+public interface MetricCollector<T extends MetricBase> {
+    T getMetrics();
+    
+    /** Called after collector is constructed to populate the metrics object. */
+    void setMetrics(T metrics);
+    
+    /**
+     * Called when collection is complete. Implementations can do any calculations
+     * that must wait until all records are visited at this time.
+     */
+    void onComplete();
+
+    /**
+     * Visitor method called to have the record considered by the collector.
+     */
+    void addRecord(SAMRecord record);
+}
\ No newline at end of file
diff --git a/lib/edu/mit/broad/picard/metrics/MetricsFile.java b/lib/edu/mit/broad/picard/metrics/MetricsFile.java
new file mode 100644
index 0000000000..72c6da8423
--- /dev/null
+++ b/lib/edu/mit/broad/picard/metrics/MetricsFile.java
@@ -0,0 +1,370 @@
+package edu.mit.broad.picard.metrics;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.Reader;
+import java.io.Writer;
+import java.lang.reflect.Field;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import edu.mit.broad.picard.PicardException;
+import edu.mit.broad.picard.util.FormatUtil;
+import edu.mit.broad.picard.util.Histogram;
+import edu.mit.broad.picard.util.StringUtil;
+
+/**
+ * Contains a set of metrics that can be written to a file and parsed back
+ * again. The set of metrics is composed of zero or more instances of a class,
+ * BEAN, that extends {@link MetricBase} (all instances must be of the same type)
+ * and may optionally include a histogram of data.
+ *
+ * @author Tim Fennell
+ */
+public class MetricsFile<BEAN extends MetricBase, HKEY extends Comparable> {
+    public static final String MAJOR_HEADER_PREFIX = "## ";
+    public static final String MINOR_HEADER_PREFIX = "# ";
+    public static final String SEPARATOR = "\t";
+    public static final String HISTO_HEADER = "## HISTOGRAM\t";
+    public static final String METRIC_HEADER = "## METRICS CLASS\t";
+
+    private List<Header> headers = new ArrayList<Header>();
+    private List<BEAN> metrics = new ArrayList<BEAN>();
+    private Histogram<HKEY> histogram;
+
+    /** Adds a header to the collection of metrics. */
+    public void addHeader(Header h) { this.headers.add(h); }
+
+    /** Returns the list of headers. */
+    public List<Header> getHeaders() { return Collections.unmodifiableList(this.headers); }
+
+    /** Adds a bean to the collection of metrics. */
+    public void addMetric(BEAN bean) { this.metrics.add(bean); }
+
+    /** Returns the list of headers. */
+    public List<BEAN> getMetrics() { return Collections.unmodifiableList(this.metrics); }
+
+    /** Returns the histogram contained in the metrics file if any. */
+    public Histogram<HKEY> getHistogram() { return histogram; }
+
+    /** Sets the histogram contained in the metrics file. */
+    public void setHistogram(Histogram<HKEY> histogram) { this.histogram = histogram; }
+
+    /** Returns the list of headers with the specified type. */
+    public List<Header> getHeaders(Class<? extends Header> type) {
+        List<Header> tmp = new ArrayList<Header>();
+        for (Header h : this.headers) {
+            if (h.getClass().equals(type)) {
+                tmp.add(h);
+            }
+        }
+
+        return tmp;
+    }
+
+    /**
+     * Writes out the metrics file to the supplied file. The file is written out
+     * headers first, metrics second and histogram third.
+     *
+     * @param f a File into which to write the metrics
+     */
+    public void write(File f) {
+        FileWriter w = null;
+        try {
+            w = new FileWriter(f);
+            write(w);
+        }
+        catch (IOException ioe) {
+            throw new PicardException("Could not write metrics to file: " + f.getAbsolutePath(), ioe);
+        }
+        finally {
+            if (w != null) {
+                try {
+                    w.close();
+                } catch (IOException e) {
+                }
+            }
+        }
+    }
+
+    /**
+     * Writes out the metrics file to the supplied writer. The file is written out
+     * headers first, metrics second and histogram third.
+     *
+     * @param w a Writer into which to write the metrics
+     */
+    public void write(Writer w) {
+        try {
+            FormatUtil formatter = new FormatUtil();
+            BufferedWriter out = new BufferedWriter(w);
+            printHeaders(out);
+            out.newLine();
+
+            printBeanMetrics(out, formatter);
+            out.newLine();
+
+            printHistogram(out, formatter);
+            out.newLine();
+            out.flush();
+        }
+        catch (IOException ioe) {
+            throw new PicardException("Could not write metrics file.", ioe);
+        }
+    }
+
+    /** Prints the headers into the provided PrintWriter. */
+    private void printHeaders(BufferedWriter out) throws IOException {
+        for (Header h : this.headers) {
+            out.append(MAJOR_HEADER_PREFIX);
+            out.append(h.getClass().getName());
+            out.newLine();
+            out.append(MINOR_HEADER_PREFIX);
+            out.append(h.toString());
+            out.newLine();
+        }
+    }
+
+    /** Prints each of the metrics entries into the provided PrintWriter. */
+    private void printBeanMetrics(BufferedWriter out, FormatUtil formatter) throws IOException {
+        if (this.metrics.isEmpty()) {
+            return;
+        }
+
+        // Write out a header row with the type of the metric class
+        out.append(METRIC_HEADER + getBeanType().getName());
+        out.newLine();
+
+        // Write out the column headers
+        Field[] fields = getBeanType().getFields();
+        final int fieldCount = fields.length;
+
+        for (int i=0; i<fieldCount; ++i) {
+            out.append(fields[i].getName());
+            if (i < fieldCount - 1) {
+                out.append(MetricsFile.SEPARATOR);
+            }
+            else {
+                out.newLine();
+            }
+        }
+
+        // Write out each of the data rows
+        for (BEAN bean : this.metrics) {
+            for (int i=0; i<fieldCount; ++i) {
+                try {
+                    Object value = fields[i].get(bean);
+                    out.append(StringUtil.assertCharactersNotInString(formatter.format(value), '\t', '\n'));
+
+                    if (i < fieldCount - 1) {
+                        out.append(MetricsFile.SEPARATOR);
+                    }
+                    else {
+                        out.newLine();
+                    }
+                }
+                catch (IllegalAccessException iae) {
+                    throw new PicardException("Could not read property " + fields[i].getName()
+                            + " from class of type " + bean.getClass());
+                }
+            }
+        }
+
+        out.flush();
+    }
+
+    /** Prints the histogram if one is present. */
+    private void printHistogram(BufferedWriter out, FormatUtil formatter) throws IOException {
+        if (this.histogram == null || this.histogram.isEmpty()) {
+            return;
+        }
+
+        // Add a header for the histogram key type
+        out.append(HISTO_HEADER + this.histogram.keySet().iterator().next().getClass().getName());
+        out.newLine();
+        
+        if (this.histogram != null) {
+            out.append(StringUtil.assertCharactersNotInString(this.histogram.getBinLabel(), '\t', '\n'));
+            out.append(SEPARATOR);
+            out.append(StringUtil.assertCharactersNotInString(this.histogram.getValueLabel(), '\t', '\n'));
+            out.newLine();
+            
+            for (Histogram<HKEY>.Bin bin : this.histogram.values()) {
+                out.append(StringUtil.assertCharactersNotInString(formatter.format(bin.getId()), '\t', '\n'));
+                out.append(MetricsFile.SEPARATOR);
+                out.append(formatter.format(bin.getValue()));
+                out.newLine();
+            }
+        }
+    }
+
+    /** Gets the type of the metrics bean being used. */
+    private Class<?> getBeanType() {
+        if (this.metrics == null || this.metrics.isEmpty()) {
+            return null;
+        } else {
+            return this.metrics.get(0).getClass();
+        }
+    }
+
+    /** Reads the Metrics in from the given reader. */
+    public void read(Reader r) {
+        BufferedReader in = new BufferedReader(r);
+        FormatUtil formatter = new FormatUtil();
+        String line = null;
+
+        try {
+            // First read the headers
+            Header header = null;
+            boolean inHeader = true;
+            while ((line = in.readLine()) != null && inHeader) {
+                line = line.trim();
+                // A blank line signals the end of the headers, otherwise parse out
+                // the header types and values and build the headers.
+                if ("".equals(line)) {
+                    inHeader = false;
+                }
+                else if (line.startsWith(MAJOR_HEADER_PREFIX)) {
+                    if (header != null) {
+                        throw new IllegalStateException("Consecutive header class lines encountered.");
+                    }
+                    
+                    String className = line.substring(MAJOR_HEADER_PREFIX.length()).trim();
+                    try {
+                        header = (Header) Class.forName(className).newInstance();
+                    }
+                    catch (Exception e) {
+                        throw new PicardException("Error load and/or instantiating an instance of " + className, e);
+                    }
+                }
+                else if (line.startsWith(MINOR_HEADER_PREFIX)) {
+                    if (header == null) {
+                        throw new IllegalStateException("Header class must precede header value:" + line);
+                    }
+                    header.parse(line.substring(MINOR_HEADER_PREFIX.length()));
+                    this.headers.add(header);
+                    header = null;
+                }
+                else {
+                    throw new PicardException("Illegal state. Found following string in metrics file header: " + line);
+                }
+            }
+
+            // Then read the metrics if there are any
+            while (!line.startsWith(MAJOR_HEADER_PREFIX)) {
+                line = in.readLine().trim();
+            }
+            if (line.startsWith(METRIC_HEADER)) {
+                // Get the metric class from the header
+                String className = line.split(SEPARATOR)[1];
+                Class<?> type = null;
+                try {
+                    type = Class.forName(className);
+                }
+                catch (ClassNotFoundException cnfe) {
+                    throw new PicardException("Could not locate class with name " + className, cnfe);
+                }
+
+                // Read the next line with the column headers
+                String[] fieldNames = in.readLine().split(SEPARATOR);
+                Field[] fields = new Field[fieldNames.length];
+                for (int i=0; i<fieldNames.length; ++i) {
+                    try {
+                        fields[i] = type.getField(fieldNames[i]);
+                    }
+                    catch (Exception e) {
+                        throw new PicardException("Could not get field with name " + fieldNames[i] +
+                            " from class " + type.getName());
+                    }
+                }
+
+                // Now read the values
+                while ((line = in.readLine()) != null) {
+                    line = line.trim();
+                    if ("".equals(line)) {
+                        break;
+                    }
+                    else {
+                        String[] values = line.split(SEPARATOR);
+                        BEAN bean = null;
+
+                        try { bean = (BEAN) type.newInstance(); }
+                        catch (Exception e) { throw new PicardException("Error instantiating a " + type.getName(), e); }
+
+
+                        for (int i=0; i<fields.length; ++i) {
+                            Object value = null;
+                            if (values[i] != null && values[i].length() > 0) {
+                                value = formatter.parseObject(values[i], fields[i].getType());
+                            }
+
+                            try { fields[i].set(bean, value); }
+                            catch (Exception e) {
+                                throw new PicardException("Error setting field " + fields[i].getName() +
+                                        " on class of type " + type.getName(), e);
+                            }
+                        }
+
+                        this.metrics.add(bean);
+                    }
+                }
+            }
+
+            // Then read the histogram if it is present
+            while (line != null && !line.startsWith(MAJOR_HEADER_PREFIX)) {
+                line = in.readLine();
+            }
+            if (line != null && line.startsWith(HISTO_HEADER)) {
+                // Get the key type of the histogram
+                String keyClassName = line.split(SEPARATOR)[1].trim();
+                Class<?> keyClass = null;
+
+                try { keyClass = Class.forName(keyClassName); }
+                catch (ClassNotFoundException cnfe) { throw new PicardException("Could not load class with name " + keyClassName); }
+
+                // Read the next line with the bin and value labels
+                String[] labels = in.readLine().split(SEPARATOR);
+                this.histogram = new Histogram(labels[0], labels[1]);
+
+                // Read the entries in the histogram
+                while ((line = in.readLine()) != null && !"".equals(line)) {
+                    String[] fields = line.trim().split(SEPARATOR);
+                    HKEY key = (HKEY) formatter.parseObject(fields[0], keyClass);
+                    double value = formatter.parseDouble(fields[1]);
+                    this.histogram.increment(key, value);
+                }
+            }
+        }
+        catch (IOException ioe) {
+            throw new PicardException("Could not read metrics from reader.", ioe);
+        }
+    }
+
+    /** Checks that the headers, metrics and histogram are all equal. */
+    @Override
+    public boolean equals(Object o) {
+        if (getClass() != o.getClass()) {
+            return false;
+        }
+        MetricsFile that = (MetricsFile) o;
+
+        if (!this.headers.equals(that.headers)) {
+            return false;
+        }
+        if (!this.metrics.equals(that.metrics)) {
+            return false;
+        }
+        if (this.histogram == null && that.histogram == null) {
+            return true;
+        } else if (this.histogram != null) {
+            return this.histogram.equals(that.histogram);
+        } else if (that.histogram != null) {
+            return that.histogram.equals(this.histogram);
+        }
+
+        return true;
+    }
+}
diff --git a/lib/edu/mit/broad/picard/metrics/StringHeader.java b/lib/edu/mit/broad/picard/metrics/StringHeader.java
new file mode 100644
index 0000000000..6798def882
--- /dev/null
+++ b/lib/edu/mit/broad/picard/metrics/StringHeader.java
@@ -0,0 +1,43 @@
+package edu.mit.broad.picard.metrics;
+
+import edu.mit.broad.picard.util.StringUtil;
+
+/**
+ * A simple header who's data type is a single String.  Should not be used for anything other
+ * than comments or descriptive text.
+ *
+ * @author Tim Fennell
+ */
+public class StringHeader implements Header {
+    private String value;
+
+    /** Default constructor. */
+    public StringHeader() {}
+
+    /** Constructor that uses the supplied value as the value of the header. */
+    public StringHeader(String value) {
+        setValue(value);
+    }
+
+    public void parse(String in) { value = in.trim(); }
+    public String toString() { return value; }
+
+    public String getValue() { return value; }
+    public void setValue(String value) { this.value = StringUtil.assertCharactersNotInString(value, '\n'); }
+
+    /** Checks equality on the value of the header. */
+    public boolean equals(Object o) {
+        if (o != null && o instanceof StringHeader) {
+            StringHeader that = (StringHeader) o;
+            if (this.value == null) {
+                return that.value == null;
+            }
+            else {
+                return this.value.equals(that.value);
+            }
+        }
+        else {
+            return false;
+        }
+    }
+}
diff --git a/lib/edu/mit/broad/picard/metrics/VersionHeader.java b/lib/edu/mit/broad/picard/metrics/VersionHeader.java
new file mode 100644
index 0000000000..665f39ecf9
--- /dev/null
+++ b/lib/edu/mit/broad/picard/metrics/VersionHeader.java
@@ -0,0 +1,50 @@
+package edu.mit.broad.picard.metrics;
+
+import edu.mit.broad.picard.util.StringUtil;
+
+/**
+ * Header that stores information about the version of some piece of software or
+ * data used to create the metrics file.  Payload consists of a name or description
+ * of the versioned item and a version string.
+ *
+ * @author Tim Fennell
+ */
+public class VersionHeader implements Header {
+    private String versionedItem;
+    private String versionString;
+
+    public void parse(String in) {
+        String[] fields = in.split("\t");
+        this.versionedItem = fields[0];
+        this.versionString = fields[1];
+    }
+
+    public String toString() {
+        return this.versionedItem + "\t" + this.versionString;
+    }
+
+    public String getVersionedItem() { return versionedItem; }
+    public void setVersionedItem(String versionedItem) {
+        this.versionedItem = StringUtil.assertCharactersNotInString(versionedItem, '\t', '\n');
+    }
+
+    public String getVersionString() { return versionString; }
+    public void setVersionString(String versionString) {
+        this.versionString = StringUtil.assertCharactersNotInString(versionString, '\t', '\n');
+    }
+
+    /** Equals method that checks that both the item and version string are equal. */
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+
+        VersionHeader that = (VersionHeader) o;
+
+        if (versionString != null ? !versionString.equals(that.versionString) : that.versionString != null)
+            return false;
+        if (versionedItem != null ? !versionedItem.equals(that.versionedItem) : that.versionedItem != null)
+            return false;
+
+        return true;
+    }
+}
diff --git a/lib/edu/mit/broad/picard/quality/CalibrateQualityScores.java b/lib/edu/mit/broad/picard/quality/CalibrateQualityScores.java
new file mode 100644
index 0000000000..9aa59618f6
--- /dev/null
+++ b/lib/edu/mit/broad/picard/quality/CalibrateQualityScores.java
@@ -0,0 +1,148 @@
+package edu.mit.broad.picard.quality;
+
+import edu.mit.broad.picard.cmdline.CommandLineProgram;
+import edu.mit.broad.picard.cmdline.Option;
+import edu.mit.broad.picard.io.IoUtil;
+import edu.mit.broad.picard.reference.ReferenceSequenceFile;
+import edu.mit.broad.picard.reference.ReferenceSequenceFileFactory;
+import edu.mit.broad.picard.variation.DbSnpFileReader;
+import edu.mit.broad.picard.util.Log;
+import edu.mit.broad.sam.SAMFileReader;
+import edu.mit.broad.sam.SAMFileWriter;
+import edu.mit.broad.sam.SAMFileWriterFactory;
+import edu.mit.broad.sam.SAMRecord;
+
+import java.io.File;
+import java.io.PrintStream;
+
+/**
+ * Command line program to calibrate quality scores using alignment and dbsnp data. Calibrates
+ * qualities cycle by cycle and separately for reads one and two in a pair. Bases that fall
+ * within dbSNP loci are ignored otherwise the empircal mismatch rate is calculated for
+ * each quality at each cycle and used to calculate the calibrated quality value.
+ *
+ * @author Tim Fennell
+ */
+public class CalibrateQualityScores extends CommandLineProgram {
+    @Option(shortName="A", doc="A file of aligned reads in SAM or BAM format")
+    public File ALIGNED_SAM;
+
+    @Option(shortName="I", doc="A SAM or BAM file to rewrite with calibrated qualities. If omitted ALIGNED_SAM is used.", optional=true)
+    public File INPUT;
+
+    @Option(shortName="O", doc="The SAM or BAM file to write with updated qualities.")
+    public File OUTPUT;
+
+    @Option(shortName="R", doc="Reference sequence file")
+    public File REFERENCE;
+
+    @Option(shortName="SNP", doc="Binary file of dbSNP information", optional=true)
+    public File DBSNP_FILE;
+
+    @Option(shortName="TABLE", doc="A file to output the calibration table(s) to.")
+    public File CALIBRATION_TABLE_OUT;
+
+    @Option(doc="Optional limit to the number of aligned reads that should be procesed", optional=true)
+    public Integer READ_LIMIT = -1;
+
+    /** Stock main method for a command line program. */
+    public static void main(String[] argv) {
+        System.exit(new CalibrateQualityScores().instanceMain(argv));
+    }
+
+    /**
+     * Main method for the program.  Checks that all input files are present and
+     * readable and that the output file can be written to.  Then loads up all the
+     * data and calibrates the quality scores and proceeds to write an output file
+     * with calibrated quality scores instead of the input quality scores.
+     */
+    protected int doWork() {
+        final Log log = Log.getInstance(getClass());
+
+        // Some quick parameter checking
+        if (INPUT == null) INPUT = ALIGNED_SAM;
+
+        IoUtil.assertFileIsReadable(ALIGNED_SAM);
+        IoUtil.assertFileIsReadable(REFERENCE);
+        IoUtil.assertFileIsReadable(INPUT);
+        IoUtil.assertFileIsWritable(OUTPUT);
+        IoUtil.assertFileIsWritable(CALIBRATION_TABLE_OUT);
+
+        log.info("Reading input files and calculating calibration matrices.");
+
+        // Load things up and calculate the quality score calibrations
+        SAMFileReader sam = new SAMFileReader(ALIGNED_SAM);
+        ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE);
+        DbSnpFileReader dbsnp = null;
+
+        if (DBSNP_FILE != null) {
+            IoUtil.assertFileIsReadable(DBSNP_FILE);
+            dbsnp = new DbSnpFileReader(DBSNP_FILE);
+        }
+
+        QualityScoreCalibrator calibrator = new QualityScoreCalibrator(sam, ref, dbsnp);
+        calibrator.calibrate(READ_LIMIT);
+
+        // Dump the calibration tables
+        log.info("Writing out calibration table.");
+        PrintStream stream = new PrintStream(IoUtil.openFileForWriting(CALIBRATION_TABLE_OUT));
+        stream.println("Read 1 Calibration Table:");
+        print(stream, calibrator.getRead1Matrix().getCalibratedQualities());
+
+        if (!calibrator.getRead2Matrix().isEmpty()) {
+            stream.println();
+            stream.println("Read 2 Calibration Table:");
+            print(stream, calibrator.getRead2Matrix().getCalibratedQualities());
+        }
+
+        // And then load up the input and rewrite with calibrated qualities
+        log.info("Writing file with calibrated qualities.");
+        SAMFileReader in  = new SAMFileReader(INPUT);
+        SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(in.getFileHeader(), true, OUTPUT);
+
+        for (SAMRecord rec : in) {
+            byte[] quals = rec.getBaseQualities();
+            byte[] calibrated = new byte[quals.length];
+            QualityScoreMatrix matrix = rec.getFirstOfPairFlag() ? calibrator.getRead1Matrix() : calibrator.getRead2Matrix();
+
+            for (int i=0; i<quals.length; ++i) {
+                calibrated[i] = (byte) matrix.getCalibratedQuality(i+1, quals[i]);
+            }
+
+            rec.setBaseQualities(calibrated);
+            out.addAlignment(rec);
+        }
+
+        out.close();
+
+        return 0;
+    }
+
+    /** Static helper method to dump a calibration matrix to the screen for debugging. */
+    private void print(PrintStream out, int[][] matrix) {
+        int maxY = 0;
+        for (int x=0; x<matrix.length; ++x) {
+            if (matrix[x] != null) {
+                maxY = Math.max(maxY, matrix[x].length - 1);
+            }
+        }
+
+        // Print out the header row
+        for (int i=0;i<=maxY; ++i) {
+            out.print(i + "\t");
+        }
+        out.println();
+
+        // Now print out the data cycle by cycle
+        for (int cycle=1; cycle<matrix.length; ++cycle) {
+            out.print(cycle + "\t");
+
+            int[] quals = matrix[cycle];
+
+            for (int qual=1; qual<quals.length; ++qual) {
+                out.print(quals[qual] + "\t");
+            }
+            out.println();
+        }
+    }
+}
diff --git a/lib/edu/mit/broad/picard/quality/QualityScoreCalibrator.java b/lib/edu/mit/broad/picard/quality/QualityScoreCalibrator.java
new file mode 100644
index 0000000000..625bf25b91
--- /dev/null
+++ b/lib/edu/mit/broad/picard/quality/QualityScoreCalibrator.java
@@ -0,0 +1,155 @@
+package edu.mit.broad.picard.quality;
+
+import edu.mit.broad.sam.SAMFileReader;
+import edu.mit.broad.sam.SAMRecord;
+import edu.mit.broad.sam.SAMFileHeader;
+import edu.mit.broad.sam.AlignmentBlock;
+import edu.mit.broad.sam.util.CloseableIterator;
+import edu.mit.broad.picard.variation.DbSnpFileReader;
+import edu.mit.broad.picard.variation.KnownVariant;
+import edu.mit.broad.picard.reference.ReferenceSequenceFile;
+import edu.mit.broad.picard.reference.ReferenceSequence;
+import edu.mit.broad.picard.util.CoordMath;
+import edu.mit.broad.picard.util.Histogram;
+import edu.mit.broad.picard.util.SequenceUtil;
+
+import java.util.Map;
+import java.util.BitSet;
+import java.util.TreeMap;
+
+/**
+ * Takes a set of aligned reads with qualities and determines the empirical quality
+ * score for each of the bins.
+ *
+ * @author Tim Fennell
+ */
+public class QualityScoreCalibrator {
+    private final SAMFileReader sam;
+    private final ReferenceSequenceFile ref;
+    private final DbSnpFileReader dbsnp;
+
+    private QualityScoreMatrix read1Matrix;
+    private QualityScoreMatrix read2Matrix;
+
+    /**
+     * Constructs a calibrator that will read records from the specified SAMFileReader
+     * and compare them the supplied reference. Optionally takes a set of known variants
+     * who's positions will be excluded during calibration.
+     *
+     * @param sam the set of SAM records to use to calibrate qualities
+     * @param ref the reference sequence against which the records were aligned
+     * @param dbsnp the (optional) set of dbsnp positions to mask during calibration
+     */
+    public QualityScoreCalibrator(SAMFileReader sam, ReferenceSequenceFile ref, DbSnpFileReader dbsnp) {
+        this.sam = sam;
+        this.dbsnp = dbsnp;
+        this.ref = ref;
+    }
+
+    /**
+     * Calculates calibrated quality scores using at most the specified number of aligned
+     * reads. If the end of the file is hit first then fewer reads will be used.
+     *
+     * @param readLimit the number of aligned reads to use if the file contains more
+     */
+    public void calibrate(final int readLimit) {
+        ReferenceSequence reference = null;
+        SAMFileHeader header = this.sam.getFileHeader();
+        CloseableIterator<SAMRecord> samIterator = this.sam.iterator();
+        SAMRecord read = samIterator.next();
+        int readsProcessed = 0;
+
+        // Quality score matrixes for reads 1 and 2 separately
+        this.read1Matrix = new QualityScoreMatrix();
+        this.read2Matrix = new QualityScoreMatrix();
+
+
+        refloop: while ((reference = this.ref.nextSequence()) != null) {
+            final byte[] refBases = reference.getBases();
+            final BitSet snps = getDbSnpMask(reference);
+
+            while (read != null && read.getReferenceIndex(header) == reference.getContigIndex()) {
+                if (!read.getReadUnmappedFlag() && !read.getNotPrimaryAlignmentFlag()) {
+                    final QualityScoreMatrix matrix = read.getFirstOfPairFlag() ? this.read1Matrix : this.read2Matrix;
+                    final byte[] readBases = read.getReadBases();
+                    final byte[] qualities  = read.getBaseQualities();
+
+                    for (AlignmentBlock block : read.getAlignmentBlocks()) {
+                        final int readIndex = block.getReadStart() - 1;
+                        final int refIndex  = block.getReferenceStart() - 1;
+                        final int length    = block.getLength();
+
+                        for (int i=0; i<length; ++i) {
+                            // Skip dbSNP loci
+                            if (snps.get(refIndex+i+1)) continue;
+
+                            final int readBaseIndex = readIndex+i;
+                            boolean match = SequenceUtil.basesEqual(readBases[readBaseIndex], refBases[refIndex+i]);
+                            int cycle = CoordMath.getCycle(
+                                    read.getReadNegativeStrandFlag(), readBases.length, readBaseIndex); 
+                            matrix.addObservation(cycle, qualities[readBaseIndex], !match);
+                        }
+                    }
+
+                    if (readLimit > 0 && ++readsProcessed >= readLimit) {
+                        break refloop;
+                    }
+                }
+
+                // Advance the sam iterator
+                if (samIterator.hasNext()) {
+                    read = samIterator.next();
+                }
+                else {
+                    read = null;
+                }
+            }
+        }
+
+        this.read1Matrix.computeCalibratedQualities();
+        if (!this.read2Matrix.isEmpty()) this.read2Matrix.computeCalibratedQualities();
+    }
+
+    /** Gets the calibration matrix for the first read. */
+    public QualityScoreMatrix getRead1Matrix() { return read1Matrix; }
+
+    /** Gets the calibration matrix for the second read. May be empty if there was no second read data. */
+    public QualityScoreMatrix getRead2Matrix() { return read2Matrix; }
+
+    /**
+     * Returns a BitSet that denotes whether a dbSNP entry is present at each
+     * base in the reference sequence.  The set is reference.length() + 1 so that
+     * it can be indexed by 1-based reference base.  True means dbSNP present,
+     * false means no dbSNP present.
+     */
+    private BitSet getDbSnpMask(ReferenceSequence reference) {
+        int index = reference.getContigIndex();
+        BitSet bits = new BitSet(reference.length() + 1);
+
+        /* Just return an all false bit set if we don't have dbsnp data. */
+        if (this.dbsnp == null) {
+            return bits;
+        }
+
+        /* Read off the next contig's worth of data. */
+        while (this.dbsnp.hasNext()) {
+            KnownVariant variant = this.dbsnp.peek();
+
+            if (variant.getSequenceIndex() < index) {
+                this.dbsnp.next();
+            }
+            else if (variant.getSequenceIndex() == index) {
+                variant = this.dbsnp.next();
+
+                for (int i=variant.getStartPos(); i<=variant.getEndPos(); ++i) {
+                    bits.set(i, true);
+                }
+            }
+            else {
+                break;
+            }
+        }
+
+        return bits;
+    }
+}
diff --git a/lib/edu/mit/broad/picard/quality/QualityScoreMatrix.java b/lib/edu/mit/broad/picard/quality/QualityScoreMatrix.java
new file mode 100644
index 0000000000..c5c1674c68
--- /dev/null
+++ b/lib/edu/mit/broad/picard/quality/QualityScoreMatrix.java
@@ -0,0 +1,133 @@
+package edu.mit.broad.picard.quality;
+
+import edu.mit.broad.picard.util.Histogram;
+
+import java.util.TreeMap;
+import java.util.Map;
+import java.util.SortedMap;
+
+/**
+ * <p>Holds all the information necessary to perform quality score calibration for a single
+ * end/read for a lane or run of sequencing.  General usage is to construct an instance
+ * an call {@link #addObservation(int, int, boolean)} repeatedly and when all input data
+ * is consumed call {@link #computeCalibratedQualities()}.</p>
+ *
+ * <p>Once this is done then {@link #getCalibratedQualities()} can be called to get a matrix
+ * of quality score calibrations by cycle and input quality.  However it is preferred to call
+ * {@link #getCalibratedQuality(int, int)} which will attempt to infer the correct value in the
+ * case that the input quality was not observed in the training data.</p>
+ *
+ * @author Tim Fennell
+ */
+public class QualityScoreMatrix {
+    // Maps by cycle, histograms by quality
+    private SortedMap<Integer, Histogram<Integer>> observations = new TreeMap<Integer, Histogram<Integer>>();
+    private SortedMap<Integer, Histogram<Integer>> errors       = new TreeMap<Integer, Histogram<Integer>>();
+
+    private int[][] calibratedQualities = null;
+
+    /**
+     * Adds an observation to the matrix.
+     * @param cycle the cycle in the read (1-based)
+     * @param quality the uncalibrated quality
+     * @param error true if the base did not match the reference, false otherwise
+     */
+    public void addObservation(int cycle, int quality, boolean error) {
+        Histogram<Integer> obs = this.observations.get(cycle);
+        if (obs == null) {
+            obs = new Histogram<Integer>();
+            this.observations.put(cycle, obs);
+        }
+        obs.increment(quality);
+
+        if (error) {
+            Histogram<Integer> errs = this.errors.get(cycle);
+            if (errs == null) {
+                errs = new Histogram<Integer>();
+                this.errors.put(cycle, errs);
+            }
+            errs.increment(quality);
+        }
+    }
+
+    /**
+     * Takes the input observations so far and builds a matrix of input cycle and
+     * uncalibrated quality to calibrated quality value.
+     */
+    public void computeCalibratedQualities() {
+        this.calibratedQualities = new int[this.observations.lastKey() + 1][];
+
+        for (int cycle=1; cycle<this.calibratedQualities.length; ++cycle) {
+            Histogram<Integer> obs = this.observations.get(cycle);
+            Histogram<Integer> err = this.errors.get(cycle);
+
+            this.calibratedQualities[cycle] = new int[obs.lastKey() + 1];
+
+            for (Integer qual : obs.keySet()) {
+                double o = obs.get(qual).getValue();
+                Histogram<Integer>.Bin errBin = err.get(qual);
+                double e = (errBin == null) ? 1 : errBin.getValue();
+
+                this.calibratedQualities[cycle][qual] = computePhredScore(e, o);
+            }
+        }
+    }
+
+    /**
+     * Returns the set of calibrated quality scores from the training data. The array is
+     * indexed first by the cycle (1-based, index 0 is empty) and then by input quality
+     * (again, the actualy quality, not shifted).
+     *
+     * @return an array of calibrated qualities for the read
+     */
+    public int[][] getCalibratedQualities() {
+        return calibratedQualities;
+    }
+
+    /**
+     * Accesses the calibrated quality for the given input cycle and quality. If the quality
+     * is outside the range given in the training data then the upper or lower bound of
+     * the calibrated qualities is used instead.
+     *
+     * @param cycle the input cycle (1-based)
+     * @param quality the uncalibrated quality
+     * @return the calibrated quality for the cycle and uncalibrated quality
+     */
+    public final int getCalibratedQuality(int cycle, int quality) {
+        final int[] quals = this.calibratedQualities[cycle];
+
+        // TODO: proper iterpolation where we don't have the right quality
+        try {
+            int retval = quals[quality];
+
+            // If we didn't calibrate this quality value, search up and down for non-zero
+            for (int i=quality; i>0 && retval == 0; --i) {
+                if (quals[i] != 0)  retval = quals[i];
+            }
+
+            for (int i=quality; i<quals.length && retval == 0; ++i) {
+                if (quals[i] != 0)  retval = quals[i];
+            }
+
+            return retval;
+        }
+        catch (IndexOutOfBoundsException ioobe) {
+            // If we try to fetch a quality out of the calibrted range use either
+            // 1 or max quality based on which side we were out of range on
+            if (quality < 1) return 1;
+            else return quals[quals.length - 1];
+        }
+    }
+
+    /** Returns true if no observations were made, otherwise false. */
+    public boolean isEmpty() {
+        return this.observations.isEmpty();
+    }
+
+    /** Just does the simple phred scaling given the errors and observations. */
+    private int computePhredScore(double errors, double observations) {
+        return (int) Math.round(-10d * Math.log10(errors / observations));
+    }
+
+
+}
diff --git a/lib/edu/mit/broad/picard/reference/FastaSequenceFile.java b/lib/edu/mit/broad/picard/reference/FastaSequenceFile.java
new file mode 100644
index 0000000000..e39b633733
--- /dev/null
+++ b/lib/edu/mit/broad/picard/reference/FastaSequenceFile.java
@@ -0,0 +1,137 @@
+package edu.mit.broad.picard.reference;
+
+import edu.mit.broad.picard.PicardException;
+import edu.mit.broad.picard.io.IoUtil;
+import edu.mit.broad.sam.SAMSequenceRecord;
+import edu.mit.broad.sam.SAMTextHeaderCodec;
+import edu.mit.broad.sam.SAMFileHeader;
+import edu.mit.broad.sam.util.LineReader;
+import edu.mit.broad.sam.util.AsciiLineReader;
+
+import java.io.*;
+import java.nio.charset.Charset;
+import java.util.List;
+
+/**
+ * Implementation of ReferenceSequenceFile for reading from FASTA files.
+ *
+ * @author Tim Fennell
+ */
+class FastaSequenceFile implements ReferenceSequenceFile {
+    private static final Charset ASCII = Charset.forName("US-ASCII");
+    private File file;
+    private BufferedReader in;
+    private List<SAMSequenceRecord> sequenceDictionary;
+    private String cachedLine = null;
+    private int index = -1;
+
+    /** Constructs a FastaSequenceFile that reads from the specified file. */
+    FastaSequenceFile(File file) {
+        this.file = file;
+        this.in = new BufferedReader(new InputStreamReader(IoUtil.openFileForReading(file)));
+
+        // Try and locate the dictionary
+        String dictionaryName = file.getAbsolutePath();
+        dictionaryName = dictionaryName.substring(0, dictionaryName.lastIndexOf(".fasta"));
+        dictionaryName += ".dict";
+        File dictionary = new File(dictionaryName);
+        if (dictionary.exists()) {
+            IoUtil.assertFileIsReadable(dictionary);
+
+            try {
+                SAMTextHeaderCodec codec = new SAMTextHeaderCodec();
+                SAMFileHeader header = codec.decode(new AsciiLineReader(new FileInputStream(dictionary)), dictionary);
+                if (header.getSequences() != null && header.getSequences().size() > 0) {
+                    this.sequenceDictionary = header.getSequences();
+                }
+            }
+            catch (Exception e) {
+                throw new PicardException("Could not open sequence dictionary file: " + dictionaryName, e);
+            }
+        }
+    }
+
+    /**
+     * Returns the list of sequence records associated with the reference sequence if found
+     * otherwise null.
+     */
+    public List<SAMSequenceRecord> getSequenceDictionary() {
+        return this.sequenceDictionary;
+    }
+
+    public ReferenceSequence nextSequence() {
+        String line = null;
+        String name = null;
+
+        // Scan forward to a header line
+        while ((line = readNextLine()) != null) {
+            if (line.startsWith(">")) {
+                name = line.substring(1).trim();
+                this.index += 1;
+                break;
+            }
+        }
+
+        // No more!
+        if (name == null) return null;
+
+        // Read the sequence
+        int basesRead = 0;
+        byte[] bases = new byte[250000000]; // big enough to hold human chr1!
+        while ((line = readNextLine()) != null) {
+            if (line.startsWith(">")) {
+                pushBackLine(line);
+                break;
+            }
+            else {
+                final byte[] nextBases = line.getBytes(ASCII);
+                final int lineLength = nextBases.length;
+
+                // If the array isn't big enough to hold the next chunk, resize it
+                if (basesRead + lineLength > bases.length) {
+                    byte[] tmp = new byte[bases.length * 2];
+                    System.arraycopy(bases, 0, tmp, 0, basesRead);
+                    bases = tmp;
+                }
+
+                // Now shunt the most recent bases onto the end of the array
+                System.arraycopy(nextBases, 0, bases, basesRead, lineLength);
+                basesRead += lineLength;
+            }
+        }
+
+        // And lastly resize the array down to the right size
+        if (basesRead != bases.length) {
+            byte[] tmp = new byte[basesRead];
+            System.arraycopy(bases, 0, tmp, 0, basesRead);
+            bases = tmp;
+        }
+
+        return new ReferenceSequence(name, this.index, bases);
+    }
+
+    /**
+     * Reads the next line from the file, or if we've saved a line earlier, returns that
+     * instead.
+     */
+    private String readNextLine() {
+        // If we have a cached line use it
+        if (this.cachedLine != null) {
+            String tmp = this.cachedLine;
+            this.cachedLine = null;
+            return tmp;
+        }
+        else {
+            try { return this.in.readLine(); }
+            catch (IOException ioe) {
+                throw new PicardException("Error reading line from file: " + this.file.getAbsolutePath(), ioe);
+            }
+        }
+    }
+
+    /** Pushed a line back so that the next call to readNextLine() will return it. */
+    private void pushBackLine(String line) {
+        this.cachedLine = line;
+    }
+}
+
diff --git a/lib/edu/mit/broad/picard/reference/ReferenceSequence.java b/lib/edu/mit/broad/picard/reference/ReferenceSequence.java
new file mode 100644
index 0000000000..24aebc7b54
--- /dev/null
+++ b/lib/edu/mit/broad/picard/reference/ReferenceSequence.java
@@ -0,0 +1,48 @@
+package edu.mit.broad.picard.reference;
+
+/**
+ * Wrapper around a reference sequence that has been read from a reference file.
+ *
+ * @author Tim Fennell
+ */
+public class ReferenceSequence {
+    private String name;
+    private byte[] bases;
+    private int contigIndex;
+    private int length;
+
+    /**
+     * Package level constructor that creates a fully formed ReferenceSequence
+     *
+     * @param name the name of the sequence from the source file
+     * @param index the zero based index of this contig in the source file
+     * @param bases the bases themselves stored as one-byte characters
+     */
+    ReferenceSequence(String name, int index, byte[] bases) {
+        this.name = name;
+        this.contigIndex = index;
+        this.bases = bases;
+        this.length = bases.length;
+    }
+
+    /** Gets the set of names given to this sequence in the source file. */
+    public String getName() { return name; }
+
+    /**
+     * Gets the array of bases that define this sequence. The bases can include any
+     * letter and possibly include masking information in the form of lower case
+     * letters.  This array is mutable (obviously!) and it NOT a clone of the array
+     * held interally.  Do not modify it!!!
+     */
+    public byte[] getBases() { return bases; }
+
+    /** Gets the 0-based index of this contig in the source file from which it came. */
+    public int getContigIndex() { return contigIndex; }
+
+    /** Gets the length of this reference sequence in bases. */
+    public int length() { return length; }
+    
+    public String toString() {
+        return "ReferenceSequence " + getName();
+    }
+}
diff --git a/lib/edu/mit/broad/picard/reference/ReferenceSequenceFile.java b/lib/edu/mit/broad/picard/reference/ReferenceSequenceFile.java
new file mode 100644
index 0000000000..34accc3f6c
--- /dev/null
+++ b/lib/edu/mit/broad/picard/reference/ReferenceSequenceFile.java
@@ -0,0 +1,29 @@
+package edu.mit.broad.picard.reference;
+
+import edu.mit.broad.sam.SAMSequenceRecord;
+
+import java.util.List;
+
+/**
+ * An interface for working with files of reference sequences regardless of the file format
+ * being used.
+ *
+ * @author Tim Fennell
+ */
+public interface ReferenceSequenceFile {
+
+    /**
+     * Must return a sequence dictionary with at least the following fields completed
+     * for each sequence: name, length.
+     *
+     * @return a list of sequence records representing the sequences in this reference file
+     */
+    public List<SAMSequenceRecord> getSequenceDictionary();
+
+    /**
+     * Retrieves the next whole sequences from the file.
+     * @return a ReferenceSequence or null if at the end of the file
+     */
+    public ReferenceSequence nextSequence();
+
+}
diff --git a/lib/edu/mit/broad/picard/reference/ReferenceSequenceFileFactory.java b/lib/edu/mit/broad/picard/reference/ReferenceSequenceFileFactory.java
new file mode 100644
index 0000000000..57b5907d1d
--- /dev/null
+++ b/lib/edu/mit/broad/picard/reference/ReferenceSequenceFileFactory.java
@@ -0,0 +1,28 @@
+package edu.mit.broad.picard.reference;
+
+import java.io.File;
+
+/**
+ * Factory class for creating ReferenceSequenceFile instances for reading reference
+ * sequences store in various formats.
+ *
+ * @author Tim Fennell
+ */
+public class ReferenceSequenceFileFactory {
+
+    /**
+     * Attempts to determine the type of the reference file and return an instance
+     * of ReferenceSequenceFile that is appropriate to read it.
+     *
+     * @param file the reference sequence file on disk
+     */
+    public static ReferenceSequenceFile getReferenceSequenceFile(File file) {
+        String name = file.getName();
+        if (name.endsWith(".fasta") || name.endsWith("fasta.gz") || name.endsWith(".txt") || name.endsWith(".txt.gz")) {
+            return new FastaSequenceFile(file);
+        }
+        else {
+            throw new IllegalArgumentException("File is not a supported reference file type: " + file.getAbsolutePath());
+        }
+    }
+}
diff --git a/lib/edu/mit/broad/picard/sam/CollectAlignmentSummaryMetrics.java b/lib/edu/mit/broad/picard/sam/CollectAlignmentSummaryMetrics.java
new file mode 100644
index 0000000000..a3bc8fed8e
--- /dev/null
+++ b/lib/edu/mit/broad/picard/sam/CollectAlignmentSummaryMetrics.java
@@ -0,0 +1,352 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2009 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever. Neither
+ * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+ */
+
+package edu.mit.broad.picard.sam;
+
+import java.io.File;
+
+import edu.mit.broad.picard.PicardException;
+import edu.mit.broad.picard.cmdline.CommandLineProgram;
+import edu.mit.broad.picard.cmdline.Option;
+import edu.mit.broad.picard.cmdline.Usage;
+import edu.mit.broad.picard.io.IoUtil;
+import edu.mit.broad.picard.metrics.AggregateMetricCollector;
+import edu.mit.broad.picard.metrics.MetricBase;
+import edu.mit.broad.picard.metrics.MetricCollector;
+import edu.mit.broad.picard.metrics.MetricsFile;
+import edu.mit.broad.picard.metrics.StringHeader;
+import edu.mit.broad.picard.reference.ReferenceSequence;
+import edu.mit.broad.picard.reference.ReferenceSequenceFile;
+import edu.mit.broad.picard.reference.ReferenceSequenceFileFactory;
+import edu.mit.broad.picard.sam.CollectAlignmentSummaryMetrics.AlignmentSummaryMetrics.Type;
+import edu.mit.broad.picard.util.CoordMath;
+import edu.mit.broad.picard.util.Histogram;
+import edu.mit.broad.picard.util.SequenceUtil;
+import edu.mit.broad.sam.AlignmentBlock;
+import edu.mit.broad.sam.SAMFileHeader;
+import edu.mit.broad.sam.SAMFileReader;
+import edu.mit.broad.sam.SAMRecord;
+import edu.mit.broad.sam.util.CloseableIterator;
+
+/**
+ * A command line tool to read a BAM file and produce standard alignment metrics that would be applicable to any alignment.  
+ * Metrics to include, but not limited to:
+ * <ul>
+ * <li>Total number of reads (total, period, no exclusions)</li>
+ * <li>Total number of PF reads (PF == does not fail vendor check flag)</li>
+ * <li>Number of PF noise reads (does not fail vendor check and has noise attr set)</li>
+ * <li>Total aligned PF reads (any PF read that has a sequence and position)</li>
+ * <li>High quality aligned PF reads (high quality == mapping quality >= 20)</li>
+ * <li>High quality aligned PF bases (actual aligned bases, calculate off alignment blocks)</li>
+ * <li>High quality aligned PF Q20 bases (subset of above where base quality >= 20)</li>
+ * <li>Median mismatches in HQ aligned PF reads (how many aligned bases != ref on average)</li>
+ * <li>Reads aligned in pairs (vs. reads aligned with mate unaligned/not present)</li>
+ * <li>Read length (how to handle mixed lengths?)</li>
+ * <li>Bad Cycles - how many machine cycles yielded combined no-call and mismatch rates of >= 80%</li>
+ * <li>Strand balance - reads mapped to positive strand / total mapped reads</li>
+ * </ul>
+ * Metrics are written for the first read of a pair, the second read, and combined for the pair.
+ * 
+ * @author Doug Voet
+ */
+public class CollectAlignmentSummaryMetrics extends CommandLineProgram {
+    private static final int MAPPING_QUALITY_THRESHOLD = 20;
+    private static final int BASE_QUALITY_THRESHOLD = 20;
+
+    // Usage and parameters
+    @Usage(programVersion="1.0") 
+    public String USAGE = "Reads a SAM or BAM file and writes a file containing summary metrics.\n";
+    @Option(shortName="I", doc="SAM or BAM file") public File INPUT;
+    @Option(shortName="O", doc="File to write insert size metrics to") public File OUTPUT;
+    @Option(shortName="R", doc="Reference sequence file") public File REFERENCE;
+    @Option(doc="If true (default), \"unsorted\" SAM/BAM files will be considerd coordinate sorted")
+    public Boolean ASSUME_COODINATE_SORTED = Boolean.TRUE;
+
+    private ReferenceSequenceFile ref;
+    private ReferenceSequence refSequence;
+    private SAMFileHeader samFileHeader;
+
+    /** Required main method implementation. */
+    public static void main(String[] argv) {
+        System.exit(new CollectAlignmentSummaryMetrics().instanceMain(argv));
+    }
+
+    @Override
+    protected int doWork() {
+        IoUtil.assertFileIsReadable(INPUT);
+        IoUtil.assertFileIsReadable(REFERENCE);
+        IoUtil.assertFileIsWritable(OUTPUT);
+        SAMFileReader in = new SAMFileReader(INPUT);
+        assertCoordinateSortOrder(in);
+
+        this.ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE);
+        this.samFileHeader = in.getFileHeader();
+
+        MetricsFile<AlignmentSummaryMetrics, Comparable<?>> file = collectMetrics(in.iterator());
+        in.close();
+
+        file.write(OUTPUT);
+
+        return 0;
+    }
+
+    private void assertCoordinateSortOrder(SAMFileReader in) {
+        switch (in.getFileHeader().getSortOrder()) {
+        case coordinate:
+            break;
+        case unsorted:
+            if (this.ASSUME_COODINATE_SORTED) {
+                break;
+            }
+        default:
+            throw new PicardException("Cannot collect summary statistics in file " + INPUT.getAbsoluteFile() +
+            " because it is not sorted in coordinate order.");
+        }
+    }
+
+    private ReferenceSequence getReference(SAMRecord record) {
+        while (refSequence == null || 
+                record.getReferenceIndex(samFileHeader) > refSequence.getContigIndex()) {
+
+            refSequence = ref.nextSequence();
+        }
+
+        if (refSequence == null || record.getReferenceIndex() != refSequence.getContigIndex()) {
+            throw new PicardException("Cannot find reference sequence [" + 
+                    record.getReferenceIndex() + "] in reference file");
+        }
+
+        return refSequence;
+    }
+
+    /**
+     * Does all the work of iterating through the sam file and collecting summary alignment metrics.
+     */
+    private MetricsFile<AlignmentSummaryMetrics, Comparable<?>> collectMetrics(
+            CloseableIterator<SAMRecord> samIterator) {
+
+        final MetricCollector<AlignmentSummaryMetrics> unpairedCollector = 
+            constructCollector(Type.UNPAIRED);
+        final MetricCollector<AlignmentSummaryMetrics> firstOfPairCollector = 
+            constructCollector(Type.FIRST_OF_PAIR);
+        final MetricCollector<AlignmentSummaryMetrics> secondOfPairCollector = 
+            constructCollector(Type.SECOND_OF_PAIR);
+        final MetricCollector<AlignmentSummaryMetrics> pairCollector = 
+            constructCollector(Type.PAIR);
+
+        while (samIterator.hasNext()) {
+            SAMRecord record = samIterator.next();
+
+            if (record.getReadPairedFlag()) {
+                if (record.getFirstOfPairFlag()) {
+                    firstOfPairCollector.addRecord(record);
+                } else {
+                    secondOfPairCollector.addRecord(record);
+                }
+                pairCollector.addRecord(record);
+            } else {
+                unpairedCollector.addRecord(record);
+            }
+        }
+
+        firstOfPairCollector.onComplete();
+        secondOfPairCollector.onComplete();
+        pairCollector.onComplete();
+        unpairedCollector.onComplete();
+        
+        MetricsFile<AlignmentSummaryMetrics, Comparable<?>> file = getMetricsFile();
+        file.addHeader(new StringHeader("Input file: " + INPUT.getAbsolutePath()));
+        file.addHeader(new StringHeader("Output file: " + OUTPUT.getAbsolutePath()));
+        file.addHeader(new StringHeader("Reference file: " + REFERENCE.getAbsolutePath()));
+        
+        if (firstOfPairCollector.getMetrics().TOTAL_READS > 0) {
+            file.addMetric(firstOfPairCollector.getMetrics());
+            // override how bad cycle is determined for paired reads, it should be
+            // the sum of first and second reads
+            pairCollector.getMetrics().BAD_CYCLES = 
+                firstOfPairCollector.getMetrics().BAD_CYCLES +
+                secondOfPairCollector.getMetrics().BAD_CYCLES;
+            file.addMetric(secondOfPairCollector.getMetrics());
+            file.addMetric(pairCollector.getMetrics());
+        }
+        if (unpairedCollector.getMetrics().TOTAL_READS > 0) {
+            file.addMetric(unpairedCollector.getMetrics());
+        }
+
+        return file;
+    }
+
+    private MetricCollector<AlignmentSummaryMetrics> constructCollector(Type type) {
+        MetricCollector<AlignmentSummaryMetrics> collector = 
+            new AggregateMetricCollector<AlignmentSummaryMetrics>(new ReadCounter(), new QualityMappingCounter());
+        collector.setMetrics(new AlignmentSummaryMetrics());
+        collector.getMetrics().TYPE = type;
+        return collector;
+    }
+
+    public static class AlignmentSummaryMetrics extends MetricBase {
+        public enum Type { UNPAIRED, FIRST_OF_PAIR, SECOND_OF_PAIR, PAIR }
+        public Type TYPE;
+        public long TOTAL_READS;
+        public long PF_READS;
+        public long PF_NOISE_READS;
+        public long PF_READS_ALIGNED;
+        public long PF_HQ_ALIGNED_READS;
+        public long PF_HQ_ALIGNED_BASES;
+        public long PF_HQ_ALIGNED_Q20_BASES;
+        public double PF_HQ_MEDIAN_MISMATCHES;
+        public double MEAN_READ_LENGTH;
+        public long READS_ALIGNED_IN_PAIRS;
+        public long BAD_CYCLES;
+        public double STRAND_BALANCE;
+    }
+
+    /** counts reads that match various conditions */
+    private class ReadCounter implements MetricCollector<AlignmentSummaryMetrics> {
+        private long numPositiveStrand = 0;
+        private Histogram<Integer> readLengthHistogram = new Histogram<Integer>();
+        private AlignmentSummaryMetrics metrics;
+
+        @Override
+        public void addRecord(SAMRecord record) {
+            if (record.getNotPrimaryAlignmentFlag()) {
+                // only want 1 count per read so skip non primary alignments
+                return;
+            }
+
+            metrics.TOTAL_READS++;
+            readLengthHistogram.increment(record.getReadBases().length);
+
+            if (!record.getReadFailsVendorQualityCheckFlag()) {
+                metrics.PF_READS++;
+
+                if (isNoiseRead(record)) {
+                    metrics.PF_NOISE_READS++;
+                }
+                if (!record.getReadUnmappedFlag()) {
+                    metrics.PF_READS_ALIGNED++;
+                }
+            }
+
+            if (!record.getReadUnmappedFlag() && 
+                    record.getReadPairedFlag() &&
+                    !record.getMateUnmappedFlag()) {
+                metrics.READS_ALIGNED_IN_PAIRS++;
+            }
+
+            if (!record.getReadNegativeStrandFlag()) {
+                numPositiveStrand++;
+            }
+        }
+
+        @Override
+        public void onComplete() {
+            metrics.MEAN_READ_LENGTH = readLengthHistogram.getMean();
+            metrics.STRAND_BALANCE = numPositiveStrand / (double) metrics.TOTAL_READS;
+        }
+
+        private boolean isNoiseRead(SAMRecord record) {
+            final Object noiseAttribute = record.getAttribute(ReservedTagConstants.XN);
+            return (noiseAttribute != null && noiseAttribute.equals(1));
+        }
+
+        @Override
+        public void setMetrics(AlignmentSummaryMetrics metrics) {
+            this.metrics = metrics;
+        }
+
+        @Override
+        public AlignmentSummaryMetrics getMetrics() {
+            return this.metrics;
+        }
+    }
+
+    /** counts quality mappings & base calls that match various conditions */
+    private class QualityMappingCounter implements MetricCollector<AlignmentSummaryMetrics> {
+        private Histogram<Long> mismatchHistogram = new Histogram<Long>();
+        private Histogram<Integer> badCycleHistogram = new Histogram<Integer>();
+        private AlignmentSummaryMetrics metrics;
+
+        @Override
+        public void addRecord(SAMRecord record) {
+            if (record.getNotPrimaryAlignmentFlag()) {
+                return;
+            }
+            if (record.getReadUnmappedFlag()) {
+                final byte[] readBases = record.getReadBases();
+                for (int i = 0; i < readBases.length; i++) {
+                    if (SequenceUtil.isNoCall(readBases[i])) {
+                        badCycleHistogram.increment(CoordMath.getCycle(record.getReadNegativeStrandFlag(), readBases.length, i));
+                    }
+                }
+            } else {
+                boolean highQualityMapping = isHighQualityMapping(record);
+                if (highQualityMapping) metrics.PF_HQ_ALIGNED_READS++;
+                
+                final byte[] readBases = record.getReadBases();
+                final byte[] refBases = getReference(record).getBases();
+                final byte[] qualities  = record.getBaseQualities();
+                long mismatchCount = 0;
+                
+                for (AlignmentBlock alignmentBlock : record.getAlignmentBlocks()) {
+                    final int readIndex = alignmentBlock.getReadStart() - 1;
+                    final int refIndex  = alignmentBlock.getReferenceStart() - 1;
+                    final int length    = alignmentBlock.getLength();
+                    if (highQualityMapping) metrics.PF_HQ_ALIGNED_BASES += alignmentBlock.getLength();
+    
+                    for (int i=0; i<length; ++i) {
+                        final int readBaseIndex = readIndex + i;
+                        boolean mismatch = !SequenceUtil.basesEqual(readBases[readBaseIndex], refBases[refIndex+i]);
+                        if (highQualityMapping) {
+                            if (qualities[readBaseIndex] >= BASE_QUALITY_THRESHOLD) {
+                                metrics.PF_HQ_ALIGNED_Q20_BASES++;
+                            }
+                            if (mismatch) {
+                                mismatchCount++;
+                            }
+                        }
+                        if (mismatch || SequenceUtil.isNoCall(readBases[readBaseIndex])) {
+                            badCycleHistogram.increment(CoordMath.getCycle(record.getReadNegativeStrandFlag(), readBases.length, i));
+                        }
+                    }
+                }
+                mismatchHistogram.increment(mismatchCount);
+            }
+        }
+
+        private boolean isHighQualityMapping(SAMRecord record) {
+            return !record.getReadFailsVendorQualityCheckFlag() &&
+            record.getMappingQuality() >= MAPPING_QUALITY_THRESHOLD;
+        }
+
+        @Override
+        public void onComplete() {
+            metrics.PF_HQ_MEDIAN_MISMATCHES = mismatchHistogram.getMedian();
+            metrics.BAD_CYCLES = 0;
+
+            for (Histogram<Integer>.Bin cycleBin : badCycleHistogram.values()) {
+                double badCyclePercentage = cycleBin.getValue() / metrics.TOTAL_READS;
+                if (badCyclePercentage >= .8) {
+                    metrics.BAD_CYCLES++;
+                }
+            }
+        }
+
+        @Override
+        public void setMetrics(AlignmentSummaryMetrics metrics) {
+            this.metrics = metrics;
+        }
+
+        @Override
+        public AlignmentSummaryMetrics getMetrics() {
+            return this.metrics;
+        }
+    }
+}
diff --git a/lib/edu/mit/broad/picard/sam/CollectInsertSizeMetrics.java b/lib/edu/mit/broad/picard/sam/CollectInsertSizeMetrics.java
new file mode 100644
index 0000000000..c25d88cc93
--- /dev/null
+++ b/lib/edu/mit/broad/picard/sam/CollectInsertSizeMetrics.java
@@ -0,0 +1,154 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+
+package edu.mit.broad.picard.sam;
+
+import java.io.File;
+
+import edu.mit.broad.picard.PicardException;
+import edu.mit.broad.picard.cmdline.CommandLineProgram;
+import edu.mit.broad.picard.cmdline.Option;
+import edu.mit.broad.picard.cmdline.Usage;
+import edu.mit.broad.picard.io.IoUtil;
+import edu.mit.broad.picard.metrics.MetricsFile;
+import edu.mit.broad.picard.util.Histogram;
+import edu.mit.broad.picard.util.Log;
+import edu.mit.broad.picard.util.RExecutor;
+import edu.mit.broad.sam.SAMFileReader;
+import edu.mit.broad.sam.SAMRecord;
+import edu.mit.broad.sam.util.CloseableIterator;
+
+/**
+ * Command line program to read non-duplicate insert sizes, create a histogram
+ * and report distribution statistics.
+ *
+ * @author Doug Voet
+ */
+public class CollectInsertSizeMetrics extends CommandLineProgram {
+    private static Log log = Log.getInstance(CollectInsertSizeMetrics.class);
+    private static final String HISTOGRAM_R_SCRIPT = "edu/mit/broad/picard/sam/insertSizeHistogram.R";
+    // Usage and parameters
+    @Usage(programVersion="1.0") 
+    public String USAGE = "Reads a SAM or BAM file and writes a file containing metrics about " +
+    		"the statistical distribution of insert size (excluding duplicates) " +
+    		"and generates a histogram plot.\n";
+    @Option(shortName="I", doc="SAM or BAM file") public File INPUT;
+    @Option(shortName="O", doc="File to write insert size metrics to") public File OUTPUT;
+    @Option(shortName="H", doc="File to write insert size histogram chart to") public File HISTOGRAM_FILE;
+	
+    /** Required main method implementation. */
+    public static void main(String[] argv) {
+        System.exit(new CollectInsertSizeMetrics().instanceMain(argv));
+    }
+
+	@Override
+	protected int doWork() {
+        IoUtil.assertFileIsReadable(INPUT);
+        IoUtil.assertFileIsWritable(OUTPUT);
+        IoUtil.assertFileIsWritable(HISTOGRAM_FILE);
+
+        SAMFileReader in = new SAMFileReader(INPUT);
+        MetricsFile<InsertSizeMetrics, Integer> file = collectMetrics(in.iterator());
+		in.close();
+        
+        file.write(OUTPUT);
+        
+        if (file.getMetrics().get(0).READ_PAIRS == 0) {
+            log.warn("Input file did not contain any records with insert size information.");
+        } else  {
+            int rResult = RExecutor.executeFromClasspath(
+                    HISTOGRAM_R_SCRIPT, 
+                    OUTPUT.getAbsolutePath(), 
+                    HISTOGRAM_FILE.getAbsolutePath(),
+                    INPUT.getName());
+
+            if (rResult != 0) {
+                throw new PicardException("R script " + HISTOGRAM_R_SCRIPT + " failed with return code " + rResult);
+            }
+        }
+        
+        return 0;
+	}
+
+	/**
+	 * Does all the work of iterating through the sam file and collecting insert size metrics.
+	 */
+	MetricsFile<InsertSizeMetrics, Integer> collectMetrics(CloseableIterator<SAMRecord> samIterator) {
+		Histogram<Integer> insertSizeHistogram = new Histogram<Integer>("insert_size", "count");
+        while (samIterator.hasNext()) {
+			SAMRecord record = samIterator.next();
+			if (skipRecord(record)) {
+				continue;
+			}
+			
+			int insertSize = Math.abs(record.getInferredInsertSize());
+            insertSizeHistogram.increment(insertSize);
+		}
+
+        MetricsFile<InsertSizeMetrics, Integer> file = new MetricsFile<InsertSizeMetrics, Integer>();
+        file.setHistogram(insertSizeHistogram);
+        InsertSizeMetrics metrics = new InsertSizeMetrics();
+        metrics.READ_PAIRS = (long) insertSizeHistogram.getCount();
+        metrics.MAX_INSERT_SIZE = (int) insertSizeHistogram.getMax();
+        metrics.MIN_INSERT_SIZE = (int) insertSizeHistogram.getMin();
+        metrics.MEAN_INSERT_SIZE = insertSizeHistogram.getMean();
+        metrics.STANDARD_DEVIATION = insertSizeHistogram.getStandardDeviation();
+        metrics.MEDIAN_INSERT_SIZE = insertSizeHistogram.getMedian();
+
+        final double total   = insertSizeHistogram.getCount();
+        final double median  = insertSizeHistogram.getMedian();
+        double covered = 0;
+        double low  = median;
+        double high = median;
+
+        while (low >= insertSizeHistogram.getMin() || high <= insertSizeHistogram.getMax()) {
+            Histogram<Integer>.Bin lowBin = insertSizeHistogram.get((int) low);
+            if (lowBin != null) covered += lowBin.getValue();
+
+            if (low != high) {
+                Histogram<Integer>.Bin highBin = insertSizeHistogram.get((int) high);
+                if (highBin != null) covered += highBin.getValue();
+            }
+
+            double percentCovered = covered / total;
+            int distance = (int) (high - low) + 1;
+            if (percentCovered >= 0.1  && metrics.WIDTH_OF_10_PERCENT == 0) metrics.WIDTH_OF_10_PERCENT = distance;
+            if (percentCovered >= 0.2  && metrics.WIDTH_OF_20_PERCENT == 0) metrics.WIDTH_OF_20_PERCENT = distance;
+            if (percentCovered >= 0.3  && metrics.WIDTH_OF_30_PERCENT == 0) metrics.WIDTH_OF_30_PERCENT = distance;
+            if (percentCovered >= 0.4  && metrics.WIDTH_OF_40_PERCENT == 0) metrics.WIDTH_OF_40_PERCENT = distance;
+            if (percentCovered >= 0.5  && metrics.WIDTH_OF_50_PERCENT == 0) metrics.WIDTH_OF_50_PERCENT = distance;
+            if (percentCovered >= 0.6  && metrics.WIDTH_OF_60_PERCENT == 0) metrics.WIDTH_OF_60_PERCENT = distance;
+            if (percentCovered >= 0.7  && metrics.WIDTH_OF_70_PERCENT == 0) metrics.WIDTH_OF_70_PERCENT = distance;
+            if (percentCovered >= 0.8  && metrics.WIDTH_OF_80_PERCENT == 0) metrics.WIDTH_OF_80_PERCENT = distance;
+            if (percentCovered >= 0.9  && metrics.WIDTH_OF_90_PERCENT == 0) metrics.WIDTH_OF_90_PERCENT = distance;
+            if (percentCovered >= 0.99 && metrics.WIDTH_OF_99_PERCENT == 0) metrics.WIDTH_OF_99_PERCENT = distance;
+
+            --low;
+            ++high;
+        }
+
+        file.addMetric(metrics);
+        
+		return file;
+	}
+
+    /**
+     * Figures out whether or not the record should be included in the counting of insert sizes
+     */
+    private boolean skipRecord(SAMRecord record) {
+        return !record.getReadPairedFlag() || 
+                record.getMateUnmappedFlag() || 
+                record.getFirstOfPairFlag() || 
+                record.getNotPrimaryAlignmentFlag() || 
+                record.getDuplicateReadFlag() ||
+                record.getInferredInsertSize() == 0;
+    }
+
+}
diff --git a/lib/edu/mit/broad/picard/sam/ComparableSamRecordIterator.java b/lib/edu/mit/broad/picard/sam/ComparableSamRecordIterator.java
new file mode 100644
index 0000000000..819811720c
--- /dev/null
+++ b/lib/edu/mit/broad/picard/sam/ComparableSamRecordIterator.java
@@ -0,0 +1,64 @@
+/*
+  * The Broad Institute
+  * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+  * This software and its documentation are copyright Jan 22, 2009 by the
+  * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+  *
+  * This software is supplied without any warranty or guaranteed support whatsoever. Neither
+  * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+  */
+package edu.mit.broad.picard.sam;
+
+import edu.mit.broad.picard.util.PeekableIterator;
+import edu.mit.broad.sam.SAMFileReader;
+import edu.mit.broad.sam.SAMRecord;
+
+import java.util.Comparator;
+
+/**
+ * Iterator for SAM records that implements comparable to enable sorting of iterators.
+ * The comparison is performed by comparing the next record in the iterator to the next
+ * record in another iterator and returning the ordering between those SAM records.
+ */
+class ComparableSamRecordIterator extends PeekableIterator<SAMRecord> implements Comparable<ComparableSamRecordIterator> {
+    private Comparator<SAMRecord> comparator;
+    private SAMFileReader reader;
+
+    /**
+     * Constructs an iterator for iteration over the supplied SAM file that will be
+     * able to compare itself to other ComparableSAMRecordIterator instances using
+     * the supplied comparator for ordering SAMRecords.
+     *
+     * @param sam the SAM file to read records from
+     * @param comparator the Comparator to use to provide ordering fo SAMRecords
+     */
+    public ComparableSamRecordIterator(SAMFileReader sam, Comparator<SAMRecord> comparator) {
+        super(sam.iterator());
+        this.reader = sam;
+        this.comparator = comparator;
+    }
+
+    /** Returns the reader from which this iterator was constructed. */
+    public SAMFileReader getReader() {
+        return reader;
+    }
+
+    /**
+     * Compares this iterator to another comparable iterator based on the next record
+     * available in each iterator.  If the two comparable iterators have different
+     * comparator types internally an exception is thrown.
+     *
+     * @param that another iterator to compare to
+     * @return a negative, 0 or positive number as described in the Comparator interface
+     */
+    public int compareTo(ComparableSamRecordIterator that) {
+        if (this.comparator.getClass() != that.comparator.getClass()) {
+            throw new IllegalStateException("Attempt to compare two ComparableSAMRecordIterators that " +
+                    "have different orderings internally");
+        }
+
+        SAMRecord record = this.peek();
+        SAMRecord record2 = that.peek();
+        return comparator.compare(record, record2);
+    }
+}
diff --git a/lib/edu/mit/broad/picard/sam/CreateSequenceDictionary.java b/lib/edu/mit/broad/picard/sam/CreateSequenceDictionary.java
new file mode 100644
index 0000000000..01a71fd856
--- /dev/null
+++ b/lib/edu/mit/broad/picard/sam/CreateSequenceDictionary.java
@@ -0,0 +1,145 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.sam;
+
+import edu.mit.broad.sam.SAMSequenceRecord;
+import edu.mit.broad.sam.SAMFileWriter;
+import edu.mit.broad.sam.SAMFileWriterFactory;
+import edu.mit.broad.sam.SAMFileHeader;
+import edu.mit.broad.picard.reference.ReferenceSequenceFile;
+import edu.mit.broad.picard.reference.ReferenceSequenceFileFactory;
+import edu.mit.broad.picard.reference.ReferenceSequence;
+import edu.mit.broad.picard.cmdline.CommandLineProgram;
+import edu.mit.broad.picard.cmdline.Option;
+import edu.mit.broad.picard.cmdline.Usage;
+import edu.mit.broad.picard.PicardException;
+
+import java.util.List;
+import java.util.ArrayList;
+import java.io.File;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.math.BigInteger;
+
+/**
+ * Create a SAM/BAM file from a fasta containing reference sequence.  The output SAM file contains a header but no
+ * SAMRecords, and the header contains only sequence records.
+ */
+public class CreateSequenceDictionary extends CommandLineProgram {
+
+    private static final String PROGRAM_VERSION = "1.0";
+
+    // The following attributes define the command-line arguments
+    @Usage(programVersion=PROGRAM_VERSION)
+    public String USAGE =
+            "Usage: " + getClass().getName() + " [options]\n\n" +
+                    "Read fasta or fasta.gz containing reference sequences, and write as a SAM or BAM file with only sequence dictionary.\n";
+
+    @Option(doc = "Input reference fasta or fasta.gz")
+    public File REFERENCE;
+
+    @Option(doc = "Output SAM or BAM file containing only the sequence dictionary")
+    public File OUTPUT;
+
+    @Option(doc = "Put into AS field of sequence dictionary entry if supplied", optional = true)
+    public String GENOME_ASSEMBLY;
+
+    @Option(doc = "Put into UIR field of sequence dictionary entry.  If not supplied, input reference file is used",
+            optional = true)
+    public String URI;
+
+    @Option(doc = "Put into SP field of sequence dictionary entry", optional = true)
+    public String SPECIES;
+
+    private final MessageDigest md5;
+
+    public CreateSequenceDictionary() {
+        try {
+            md5 = MessageDigest.getInstance("MD5");
+        } catch (NoSuchAlgorithmException e) {
+            throw new PicardException("MD5 algorithm not found", e);
+        }
+    }
+
+    public static void main(final String[] argv) {
+        System.exit(new CreateSequenceDictionary().instanceMain(argv));
+    }
+
+    /**
+     * Use reference filename to create URI to go into header if URI was not passed on cmd line.
+     */
+    protected boolean customCommandLineValidation() {
+        if (URI == null) {
+            URI = "file:" + REFERENCE.getAbsolutePath();
+        }
+        return true;
+    }
+
+    /**
+     * Do the work after command line has been parsed.
+     * RuntimeException may be thrown by this method, and are reported appropriately.
+     *
+     * @return program exit status.
+     */
+    protected int doWork() {
+        final List<SAMSequenceRecord> sequences = makeSequenceDictionary(REFERENCE);
+        final SAMFileHeader samHeader = new SAMFileHeader();
+        samHeader.setSequences(sequences);
+        final SAMFileWriter samWriter = new SAMFileWriterFactory().makeSAMOrBAMWriter(samHeader, false, OUTPUT);
+        samWriter.close();
+        return 0;
+    }
+
+
+    /**
+     * Read all the sequences from the given reference file, and convert into SAMSequenceRecords
+     * @param referenceFile fasta or fasta.gz
+     * @return SAMSequenceRecords containing info from the fasta, plus from cmd-line arguments.
+     */
+    List<SAMSequenceRecord> makeSequenceDictionary(final File referenceFile) {
+        final ReferenceSequenceFile refSeqFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(referenceFile);
+        ReferenceSequence refSeq;
+        final List<SAMSequenceRecord> ret = new ArrayList<SAMSequenceRecord>();
+        while ((refSeq = refSeqFile.nextSequence()) != null) {
+            ret.add(makeSequenceRecord(refSeq));
+        }
+        return ret;
+    }
+
+    /**
+     * Create one SAMSequenceRecord from a single fasta sequence
+     */
+    private SAMSequenceRecord makeSequenceRecord(final ReferenceSequence refSeq) {
+        final SAMSequenceRecord ret = new SAMSequenceRecord(refSeq.getName());
+        ret.setSequenceLength(refSeq.length());
+
+        // Compute MD5 of upcased bases
+        final byte[] bases = refSeq.getBases();
+        for (int i = 0; i < bases.length; ++i) {
+                bases[i] = (byte) (Character.toUpperCase(bases[i]) & 0xff);
+            }
+
+        ret.setAttribute(SAMSequenceRecord.MD5_TAG, md5Hash(bases));
+        if (GENOME_ASSEMBLY != null) {
+            ret.setAttribute(SAMSequenceRecord.ASSEMBLY_TAG, GENOME_ASSEMBLY);
+        }
+        ret.setAttribute(SAMSequenceRecord.URI_TAG, URI);
+        if (SPECIES != null) {
+                ret.setAttribute(SAMSequenceRecord.SPECIES_TAG, SPECIES);
+            }
+        return ret;
+    }
+
+    private String md5Hash(final byte[] bytes) {
+        md5.reset();
+        md5.update(bytes);
+        return new BigInteger(1, md5.digest()).toString(16);
+    }
+}
diff --git a/lib/edu/mit/broad/picard/sam/DuplicationMetrics.java b/lib/edu/mit/broad/picard/sam/DuplicationMetrics.java
new file mode 100644
index 0000000000..689e2b806f
--- /dev/null
+++ b/lib/edu/mit/broad/picard/sam/DuplicationMetrics.java
@@ -0,0 +1,116 @@
+package edu.mit.broad.picard.sam;
+
+import edu.mit.broad.picard.metrics.MetricBase;
+import edu.mit.broad.picard.util.Histogram;
+
+/**
+ * Metrics that are calculated during the process of marking duplicates
+ * within a stream of SAMRecords.
+ */
+public class DuplicationMetrics extends MetricBase {
+    /** The number of mapped reads examined which did not have a mapped mate pair. */
+    public long UNPAIRED_READS_EXAMINED;
+
+    /** The number of mapped read pairs examined. */
+    public long READ_PAIRS_EXAMINED;
+
+    /** The total number of unmapped reads examined. */
+    public long UNMAPPED_READS;
+
+    /** The number of fragments that were marked as duplicates. */
+    public long UNPAIRED_READ_DUPLICATES;
+
+    /** The number of read pairs that were marked as duplicates. */
+    public long READ_PAIR_DUPLICATES;
+
+    /** The percentage of mapped sequence that is marked as duplicate. */
+    public Double PERCENT_DUPLICATION;
+
+    /** The estimated number of unique molecules in the library based on PE duplication. */
+    public Long ESTIMATED_LIBRARY_SIZE;
+
+    /**
+     * Fills in the ESTIMATED_LIBRARY_SIZE based on the paired read data examined where
+     * possible and the PERCENT_DUPLICATION.
+     */
+    public void calculateDerivedMetrics() {
+        if (READ_PAIRS_EXAMINED > 0) {
+            // Following code "borrowed" from CRD codebase
+            long n    = READ_PAIRS_EXAMINED;
+            long c = READ_PAIRS_EXAMINED - READ_PAIR_DUPLICATES;
+
+            double m = 1.0, M = 100.0;
+
+            if (c >= n || f(m*c, c, n) <= 0) {
+                throw new IllegalStateException("Invalid values for pairs and unique pairs: "
+                        + n + ", " + c);
+
+            }
+
+            while( f(M*c, c, n) >= 0 ) M *= 10.0;
+
+            for (int i=0; i<40; i++ ) {
+                double r = (m+M)/2.0;
+                double u = f( r * c, c, n );
+                if ( u == 0 ) break;
+                else if ( u > 0 ) m = r;
+                else if ( u < 0 ) M = r;
+            }
+
+            this.ESTIMATED_LIBRARY_SIZE = (long) (c * (m+M)/2.0);
+        }
+
+        PERCENT_DUPLICATION = (UNPAIRED_READ_DUPLICATES + READ_PAIR_DUPLICATES *2) /(double) (UNPAIRED_READS_EXAMINED + READ_PAIRS_EXAMINED *2);
+    }
+
+    /** Method that is used in the computation of estimated library size. */
+    private double f(double x, double c, double n) {
+        return c/x - 1 + Math.exp(-n/x);
+    }
+
+    /**
+     * Estimates the ROI (return on investment) that one would see if a library was sequenced to
+     * x higher coverage than the observed coverage.
+     *
+     * @param estimatedLibrarySize the estimated number of molecules in the library
+     * @param x the multiple of sequencing to be simulated (i.e. how many X sequencing)
+     * @param pairs the number of pairs observed in the actual sequencing
+     * @param uniquePairs the number of unique pairs observed in the actual sequencing
+     * @return a number z <= x that estimates if you had pairs*x as your sequencing then you
+     *         would observe uniquePairs*z unique pairs.
+     */
+    private double estimateRoi(long estimatedLibrarySize, double x, long pairs, long uniquePairs) {
+        return estimatedLibrarySize * ( 1 - Math.exp(-(x*pairs)/estimatedLibrarySize) ) / uniquePairs;
+    }
+
+    /**
+     * Calculates a histogram using the estimateRoi method to estimate the effective yield
+     * doing x sequencing for x=1..10.
+     */
+    public Histogram<Double> calculateRoiHistogram() {
+        if (ESTIMATED_LIBRARY_SIZE == null) {
+            try { calculateDerivedMetrics();  }
+            catch (IllegalStateException ise) { return null; }
+        }
+
+        long uniquePairs = READ_PAIRS_EXAMINED - READ_PAIR_DUPLICATES;
+        Histogram<Double> histo = new Histogram<Double>();
+
+        for (double x=1; x<=10; x+=1) {
+            histo.increment(x, estimateRoi(ESTIMATED_LIBRARY_SIZE, x, READ_PAIRS_EXAMINED, uniquePairs));
+        }
+
+        return histo;
+    }
+
+    // Main method used for debugging the derived metrics
+//    public static void main(String[] args) {
+//        DuplicationMetrics m = new DuplicationMetrics();
+//        m.PAIRS_EXAMINED  = Integer.parseInt(args[0]);
+//        m.DUPLICATE_PAIRS = m.PAIRS_EXAMINED - Integer.parseInt(args[1]);
+//        m.calculateDerivedMetrics();
+//        System.out.println("Percent Duplication: " + m.PERCENT_DUPLICATION);
+//        System.out.println("Est. Library Size  : " + m.ESTIMATED_LIBRARY_SIZE);
+//        System.out.println(m.calculateRoiHistogram());
+//    }
+}
diff --git a/lib/edu/mit/broad/picard/sam/InsertSizeMetrics.java b/lib/edu/mit/broad/picard/sam/InsertSizeMetrics.java
new file mode 100644
index 0000000000..fdc9c47075
--- /dev/null
+++ b/lib/edu/mit/broad/picard/sam/InsertSizeMetrics.java
@@ -0,0 +1,38 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+
+package edu.mit.broad.picard.sam;
+
+import edu.mit.broad.picard.metrics.MetricBase;
+
+/**
+ * Metrics class for insert size statistics
+ *
+ * @author Doug Voet
+ */
+public class InsertSizeMetrics extends MetricBase {
+    public double MEDIAN_INSERT_SIZE;
+	public int MIN_INSERT_SIZE;
+	public int MAX_INSERT_SIZE;
+	public double MEAN_INSERT_SIZE;
+	public double STANDARD_DEVIATION;
+    public long READ_PAIRS;
+
+    public int WIDTH_OF_10_PERCENT;
+    public int WIDTH_OF_20_PERCENT;
+    public int WIDTH_OF_30_PERCENT;
+    public int WIDTH_OF_40_PERCENT;
+    public int WIDTH_OF_50_PERCENT;
+    public int WIDTH_OF_60_PERCENT;
+    public int WIDTH_OF_70_PERCENT;
+    public int WIDTH_OF_80_PERCENT;
+    public int WIDTH_OF_90_PERCENT;
+    public int WIDTH_OF_99_PERCENT;
+}
diff --git a/lib/edu/mit/broad/picard/sam/MarkDuplicates.java b/lib/edu/mit/broad/picard/sam/MarkDuplicates.java
new file mode 100644
index 0000000000..75321bf82d
--- /dev/null
+++ b/lib/edu/mit/broad/picard/sam/MarkDuplicates.java
@@ -0,0 +1,461 @@
+package edu.mit.broad.picard.sam;
+
+import edu.mit.broad.picard.cmdline.CommandLineProgram;
+import edu.mit.broad.picard.cmdline.Option;
+import edu.mit.broad.picard.cmdline.Usage;
+import edu.mit.broad.picard.PicardException;
+import edu.mit.broad.picard.metrics.MetricsFile;
+import edu.mit.broad.picard.util.Log;
+import edu.mit.broad.sam.util.SortingCollection;
+import edu.mit.broad.sam.*;
+
+import java.io.*;
+import java.util.*;
+
+/**
+ * A better duplication marking algorithm that handles all cases including clipped
+ * and gapped alignments.
+ *
+ * @author Tim Fennell
+ */
+public class MarkDuplicates extends CommandLineProgram {
+    private static final Log log = Log.getInstance(MarkDuplicates.class);
+
+    @Usage public final String USAGE =
+            "Examines aligned records in the supplied SAM or BAM file to locate duplicate molecules. " +
+            "All records are then written to the output file with the duplicate records flagged.";
+    @Option(shortName="I", doc="The input SAM or BAM file to analyze") public File INPUT;
+    @Option(shortName="O", doc="The output file to right marked records to") public File OUTPUT;
+    @Option(shortName="M", doc="File to write duplication metrics to") public File METRICS_FILE;
+
+    private SortingCollection<ReadEnds> pairSort;
+    private SortingCollection<ReadEnds> fragSort;
+    private long[] duplicateIndexes = new long[1000000];
+    private int nextIndex = 0; // The next offset into duplicateIndexes to use
+
+
+    /** Stock main method. */
+    public static void main(String[] args) {
+        new MarkDuplicates().instanceMain(args);
+    }
+
+    /** Little struct-like class to hold read pair (and fragment) end data. */
+    private static class ReadEnds {
+        public static final int SIZE_OF = (1*1) + (2*1) + (4*4) + (8*2) + 8; // last 8 == reference overhead
+        public static final byte F=0, R=1, FF=2, FR=3, RR=4, RF=5;
+
+        short score = 0;
+        byte orientation;
+        int read1Sequence     = -1;
+        int read1Coordinate   = -1;
+        long read1IndexInFile = -1;
+        int read2Sequence     = -1;
+        int read2Coordinate   = -1;
+        long read2IndexInFile = -1;
+
+        boolean isPaired() { return this.read2Sequence != -1; }
+    }
+
+    /** Comparator for ReadEnds that orders by read1 position then pair orientation then read2 position. */
+    private static class ReadEndsComparator implements Comparator<ReadEnds> {
+        public int compare(ReadEnds lhs, ReadEnds rhs) {
+            int retval = lhs.read1Sequence - rhs.read1Sequence;
+            if (retval == 0) retval = lhs.read1Coordinate - rhs.read1Coordinate;
+            if (retval == 0) retval = lhs.orientation - rhs.orientation;
+            if (retval == 0) retval = lhs.read2Sequence   - rhs.read2Sequence;
+            if (retval == 0) retval = lhs.read2Coordinate - rhs.read2Coordinate;
+            if (retval == 0) retval = (int) (lhs.read1IndexInFile - rhs.read1IndexInFile);
+            if (retval == 0) retval = (int) (lhs.read2IndexInFile - rhs.read2IndexInFile);
+
+            return retval;
+        }
+    }
+
+    /** Coded for ReadEnds that just outputs the primitive fields and reads them back. */
+    private static class ReadEndsCodec implements SortingCollection.Codec<ReadEnds> {
+        private DataInputStream in;
+        private DataOutputStream out;
+
+        public SortingCollection.Codec<ReadEnds> clone() {
+            return new ReadEndsCodec();
+        }
+
+        public void setOutputStream(OutputStream os) { this.out = new DataOutputStream(os); }
+        public void setInputStream(InputStream is) { this.in = new DataInputStream(is); }
+
+        public void encode(ReadEnds read) {
+            try {
+                this.out.writeShort(read.score);
+                this.out.writeByte(read.orientation);
+                this.out.writeInt(read.read1Sequence);
+                this.out.writeInt(read.read1Coordinate);
+                this.out.writeLong(read.read1IndexInFile);
+                this.out.writeInt(read.read2Sequence);
+
+                if (read.orientation > ReadEnds.R) {
+                    this.out.writeInt(read.read2Coordinate);
+                    this.out.writeLong(read.read2IndexInFile);
+                }
+                this.out.flush();
+            }
+            catch (IOException ioe) {
+                throw new PicardException("Exception writing ReadEnds to file.", ioe);
+            }
+        }
+
+        public ReadEnds decode() {
+            ReadEnds read = new ReadEnds();
+            try {
+                // If the first read results in an EOF we've exhausted the stream
+                try { read.score = this.in.readShort(); }
+                catch (EOFException eof) { return null; }
+
+                read.orientation      = this.in.readByte();
+                read.read1Sequence    = this.in.readInt();
+                read.read1Coordinate  = this.in.readInt();
+                read.read1IndexInFile = this.in.readLong();
+                read.read2Sequence    = this.in.readInt();
+
+                if (read.orientation > ReadEnds.R) {
+                    read.read2Coordinate  = this.in.readInt();
+                    read.read2IndexInFile = this.in.readLong();
+                }
+                return read;
+            }
+            catch (IOException ioe) {
+                throw new PicardException("Exception writing ReadEnds to file.", ioe);
+            }
+        }
+    }
+
+    /**
+     * Main work method.  Reads the BAM file once and collects sorted information about
+     * the 5' ends of both ends of each read (or just one end in the case of pairs).
+     * Then makes a pass through those determining duplicates before re-reading the
+     * input file and writing it out with duplication flags set correctly.
+     */
+    protected int doWork() {
+        log.info("Reading input file and constructing read end information.");
+        buildSortedReadEndLists();
+        generateDuplicateIndexes();
+        log.info("Marking " + this.duplicateIndexes.length + " records as duplicates.");
+        DuplicationMetrics metrics = new DuplicationMetrics();
+        SAMFileReader in  = new SAMFileReader(INPUT);
+        SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(in.getFileHeader(),
+                                                                          true,
+                                                                          OUTPUT);
+
+        // Now copy over the file while marking all the necessary indexes as duplicates
+        long recordInFileIndex = 0;
+        long nextDuplicateIndex = (this.duplicateIndexes.length == 0 ? -1 :  this.duplicateIndexes[0]);
+        int  arrayIndex = 1;
+
+        for (SAMRecord rec : in) {
+            // First bring the simple metrics up to date
+            if (rec.getReadUnmappedFlag()) {
+                ++metrics.UNMAPPED_READS;
+            }
+            else if (!rec.getReadPairedFlag() || rec.getMateUnmappedFlag()) {
+                ++metrics.UNPAIRED_READS_EXAMINED;
+            }
+            else if (rec.getFirstOfPairFlag()){
+                ++metrics.READ_PAIRS_EXAMINED;
+            }
+
+
+            if (recordInFileIndex++ == nextDuplicateIndex) {
+                rec.setDuplicateReadFlag(true);
+
+                // Update the duplication metrics
+                if (!rec.getReadPairedFlag() || rec.getMateUnmappedFlag()) {
+                    ++metrics.UNPAIRED_READ_DUPLICATES;
+                }
+                else if (rec.getFirstOfPairFlag()) {
+                    ++metrics.READ_PAIR_DUPLICATES;
+                }
+
+                // Now try and figure out the next duplicate index
+                try {
+                    nextDuplicateIndex = this.duplicateIndexes[arrayIndex++];
+                }
+                catch (ArrayIndexOutOfBoundsException e) {
+                    // Only happens once we've marked all the duplicates
+                    nextDuplicateIndex = -1;
+                    arrayIndex = -1;
+                }
+            }
+
+            out.addAlignment(rec);
+        }
+
+        out.close();
+
+
+        // Write out the metrics
+        metrics.calculateDerivedMetrics();
+        MetricsFile<DuplicationMetrics,Double> file = getMetricsFile();
+        file.addMetric(metrics);
+        file.setHistogram(metrics.calculateRoiHistogram());
+        file.write(METRICS_FILE);        
+
+        return 0;
+    }
+
+    /**
+     * Goes through all the records in a file and generates a set of ReadEnds objects that
+     * hold the necessary information (reference sequence, 5' read coordinate) to do
+     * duplication, caching to disk as necssary to sort them.
+     */
+    private void buildSortedReadEndLists() {
+        // TODO: take into account clipping/padding?
+        int maxInMemory = (int) ((Runtime.getRuntime().maxMemory() * 0.25) / ReadEnds.SIZE_OF);
+        this.pairSort = SortingCollection.newInstance(ReadEnds.class,
+                                                      new ReadEndsCodec(),
+                                                      new ReadEndsComparator(),
+                                                      maxInMemory);
+
+        this.fragSort = SortingCollection.newInstance(ReadEnds.class,
+                                                      new ReadEndsCodec(),
+                                                      new ReadEndsComparator(),
+                                                      maxInMemory);
+
+        Map<String, ReadEnds> tmp = new HashMap<String, ReadEnds>();
+        SAMFileReader sam = new SAMFileReader(INPUT);
+        SAMFileHeader header = sam.getFileHeader();
+        long index = 0;
+
+        for (SAMRecord rec : sam) {
+            if (rec.getReadUnmappedFlag()) {
+                continue;
+            }
+
+            ReadEnds fragmentEnd = buildReadEnds(header, index, rec);
+            this.fragSort.add(fragmentEnd);
+
+            if (rec.getReadPairedFlag() && !rec.getMateUnmappedFlag()) {
+                String key = rec.getAttribute(ReservedTagConstants.READ_GROUP_ID) + ":" + rec.getReadName();
+                ReadEnds pairedEnds = tmp.remove(key);
+
+                // See if we've already seen the first end or not
+                if (pairedEnds == null) {
+                    pairedEnds = buildReadEnds(header, index, rec);
+                    tmp.put(key, pairedEnds);
+                }
+                else {
+                    int sequence = fragmentEnd.read1Sequence;
+                    int coordinate = fragmentEnd.read1Coordinate;
+
+                    // If the second read is actually later, just add the second read data, else flip the reads
+                    if (sequence > pairedEnds.read1Sequence || (sequence == pairedEnds.read1Sequence && coordinate >= pairedEnds.read1Coordinate)) {
+                        pairedEnds.read2Sequence    = sequence;
+                        pairedEnds.read2Coordinate  = coordinate;
+                        pairedEnds.read2IndexInFile = index;
+                        pairedEnds.orientation = getOrientationByte(pairedEnds.orientation == ReadEnds.R, rec.getReadNegativeStrandFlag());
+                    }
+                    else {
+                        pairedEnds.read2Sequence    = pairedEnds.read1Sequence;
+                        pairedEnds.read2Coordinate  = pairedEnds.read1Coordinate;
+                        pairedEnds.read2IndexInFile = pairedEnds.read1IndexInFile;
+                        pairedEnds.read1Sequence    = sequence;
+                        pairedEnds.read1Coordinate  = coordinate;
+                        pairedEnds.read1IndexInFile = index;
+                        pairedEnds.orientation = getOrientationByte(rec.getReadNegativeStrandFlag(), pairedEnds.orientation == ReadEnds.R);
+                    }
+
+                    pairedEnds.score += getScore(rec);
+                    this.pairSort.add(pairedEnds);
+                }
+            }
+
+            ++index;
+        }
+    }
+
+    /** Builds a read ends object that represents a single read. */
+    private ReadEnds buildReadEnds(SAMFileHeader header, long index, SAMRecord rec) {
+        ReadEnds ends = new ReadEnds();
+        ends.read1Sequence    = rec.getReferenceIndex(header);
+        ends.read1Coordinate  = rec.getReadNegativeStrandFlag() ? rec.getUnclippedEnd() : rec.getUnclippedStart();
+        ends.orientation = rec.getReadNegativeStrandFlag() ? ReadEnds.R : ReadEnds.F;
+        ends.read1IndexInFile = index;
+        ends.score = getScore(rec);
+
+        // Doing this lets the ends object know that it's part of a pair
+        if (rec.getReadPairedFlag() && !rec.getMateUnmappedFlag()) {
+            ends.read2Sequence = rec.getMateReferenceIndex(header);
+        }
+
+        return ends;
+    }
+
+    /**
+     * Returns a single byte that encodes the orientation of the two reads in a pair.
+     */
+    private byte getOrientationByte(boolean read1NegativeStrand, boolean read2NegativeStrand) {
+        if (read1NegativeStrand) {
+            if (read2NegativeStrand)  return ReadEnds.RR;
+            else return ReadEnds.RF;
+        }
+        else {
+            if (read2NegativeStrand)  return ReadEnds.FR;
+            else return ReadEnds.FF;
+        }
+    }
+
+
+
+    /** Calculates a score for the read which is the sum of scores over Q20. */
+    private short getScore(SAMRecord rec) {
+        short score = 0;
+        for (byte b : rec.getBaseQualities()) {
+            if (b >= 15) score += b;
+        }
+
+        return score;
+    }
+
+    /**
+     * Goes through the accumulated ReadEnds objects and determines which of them are
+     * to be marked as duplicates.
+     *
+     * @return an array with an ordered list of indexes into the source file
+     */
+    private void generateDuplicateIndexes() {
+        ReadEnds firstOfNextChunk = null;
+        List<ReadEnds> nextChunk  = new ArrayList<ReadEnds>(200);
+
+        // First just do the pairs
+        log.info("Traversing read pair information and detecting duplicates.");
+        for (ReadEnds next : this.pairSort) {
+            if (firstOfNextChunk == null) {
+                firstOfNextChunk = next;
+                nextChunk.add(firstOfNextChunk);
+            }
+            else if (areComparableForDuplicates(firstOfNextChunk, next, true)) {
+                nextChunk.add(next);
+            }
+            else {
+                if (nextChunk.size() > 1) {
+                    markDuplicatePairs(nextChunk);
+                }
+
+                nextChunk.clear();
+                nextChunk.add(next);
+                firstOfNextChunk = next;
+            }
+        }
+        markDuplicatePairs(nextChunk);
+        this.pairSort = null;
+
+        // Now deal with the fragments
+        log.info("Traversing fragment information and detecting duplicates.");
+        boolean containsPairs = false;
+        boolean containsFrags = false;
+
+        for (ReadEnds next : this.fragSort) {
+            if (firstOfNextChunk != null && areComparableForDuplicates(firstOfNextChunk, next, false)) {
+                nextChunk.add(next);
+                containsPairs = containsPairs || next.isPaired();
+                containsFrags = containsFrags || !next.isPaired();
+            }
+            else {
+                if (nextChunk.size() > 1 && containsFrags) {
+                    markDuplicateFragments(nextChunk, containsPairs);
+                }
+
+                nextChunk.clear();
+                nextChunk.add(next);
+                firstOfNextChunk = next;
+                containsPairs = next.isPaired();
+                containsFrags = !next.isPaired();
+            }
+        }
+        markDuplicateFragments(nextChunk, containsPairs);
+        this.fragSort = null;
+
+        // Now shrink down the array and sort it
+        log.info("Sorting list of duplicate records.");
+        long[] tmp = new long[this.nextIndex];
+        System.arraycopy(this.duplicateIndexes, 0, tmp, 0, this.nextIndex);
+        this.duplicateIndexes = tmp;
+        Arrays.sort(this.duplicateIndexes);
+    }
+
+    private boolean areComparableForDuplicates(final ReadEnds lhs, final ReadEnds rhs, final boolean compareRead2) {
+        boolean retval =  lhs.read1Sequence   == rhs.read1Sequence &&
+                          lhs.read1Coordinate == rhs.read1Coordinate &&
+                          lhs.orientation     == rhs.orientation;
+
+        if (compareRead2) {
+            retval = lhs.read2Sequence   == rhs.read2Sequence &&
+                     lhs.read2Coordinate == rhs.read2Coordinate;
+        }
+
+        return retval;
+    }
+
+    private void addIndexAsDuplicate(final long bamIndex) {
+        if (this.nextIndex > this.duplicateIndexes.length - 1) {
+            long[] tmp = new long[this.duplicateIndexes.length * 2];
+            System.arraycopy(this.duplicateIndexes, 0, tmp, 0, this.nextIndex);
+            this.duplicateIndexes = tmp;
+        }
+
+        this.duplicateIndexes[this.nextIndex++] = bamIndex;
+    }
+
+    /**
+     * Takes a list of ReadEnds objects and removes from it all objects that should
+     * not be marked as duplicates.
+     *
+     * @param list
+     */
+    private void markDuplicatePairs(final List<ReadEnds> list) {
+        short maxScore = 0;
+        ReadEnds best = null;
+
+        for (final ReadEnds end : list) {
+            if (end.score > maxScore || best == null) {
+                maxScore = end.score;
+                best = end;
+            }
+        }
+
+        for (final ReadEnds end : list) {
+            if (end != best) {
+                addIndexAsDuplicate(end.read1IndexInFile);
+                addIndexAsDuplicate(end.read2IndexInFile);
+            }
+        }
+    }
+
+    /**
+     * Takes a list of ReadEnds objects and removes from it all objects that should
+     * not be marked as duplicates.
+     *
+     * @param list
+     */
+    private void markDuplicateFragments(final List<ReadEnds> list, final boolean containsPairs) {
+        if (containsPairs) {
+            for (final ReadEnds end : list) {
+                if (!end.isPaired()) addIndexAsDuplicate(end.read1IndexInFile);
+            }
+        }
+        else {
+            short maxScore = 0;
+            ReadEnds best = null;
+            for (final ReadEnds end : list) {
+                if (end.score > maxScore || best == null) {
+                    maxScore = end.score;
+                    best = end;
+                }
+            }
+
+            for (final ReadEnds end : list) {
+                if (end != best) {
+                    addIndexAsDuplicate(end.read1IndexInFile);
+                }
+            }
+        }
+    }
+}
diff --git a/lib/edu/mit/broad/picard/sam/MarkDuplicates2.java b/lib/edu/mit/broad/picard/sam/MarkDuplicates2.java
new file mode 100644
index 0000000000..908f27f7d9
--- /dev/null
+++ b/lib/edu/mit/broad/picard/sam/MarkDuplicates2.java
@@ -0,0 +1,461 @@
+package edu.mit.broad.picard.sam;
+
+import edu.mit.broad.picard.cmdline.CommandLineProgram;
+import edu.mit.broad.picard.cmdline.Option;
+import edu.mit.broad.picard.cmdline.Usage;
+import edu.mit.broad.picard.PicardException;
+import edu.mit.broad.picard.metrics.MetricsFile;
+import edu.mit.broad.picard.util.Log;
+import edu.mit.broad.sam.util.SortingCollection;
+import edu.mit.broad.sam.*;
+
+import java.io.*;
+import java.util.*;
+
+/**
+ * A better duplication marking algorithm that handles all cases including clipped
+ * and gapped alignments.
+ *
+ * @author Tim Fennell
+ */
+public class MarkDuplicates2 extends CommandLineProgram {
+    private static final Log log = Log.getInstance(MarkDuplicates2.class);
+
+    @Usage public final String USAGE =
+            "Examines aligned records in the supplied SAM or BAM file to locate duplicate molecules. " +
+            "All records are then written to the output file with the duplicate records flagged.";
+    @Option(shortName="I", doc="The input SAM or BAM file to analyze") public File INPUT;
+    @Option(shortName="O", doc="The output file to right marked records to") public File OUTPUT;
+    @Option(shortName="M", doc="File to write duplication metrics to") public File METRICS_FILE;
+
+    private SortingCollection<ReadEnds> pairSort;
+    private SortingCollection<ReadEnds> fragSort;
+    private long[] duplicateIndexes = new long[1000000];
+    private int nextIndex = 0; // The next offset into duplicateIndexes to use
+
+
+    /** Stock main method. */
+    public static void main(String[] args) {
+        new MarkDuplicates2().instanceMain(args);
+    }
+
+    /** Little struct-like class to hold read pair (and fragment) end data. */
+    private static class ReadEnds {
+        public static final int SIZE_OF = (1*1) + (2*1) + (4*4) + (8*2) + 8; // last 8 == reference overhead
+        public static final byte F=0, R=1, FF=2, FR=3, RR=4, RF=5;
+
+        short score = 0;
+        byte orientation;
+        int read1Sequence     = -1;
+        int read1Coordinate   = -1;
+        long read1IndexInFile = -1;
+        int read2Sequence     = -1;
+        int read2Coordinate   = -1;
+        long read2IndexInFile = -1;
+
+        boolean isPaired() { return this.read2Sequence != -1; }
+    }
+
+    /** Comparator for ReadEnds that orders by read1 position then pair orientation then read2 position. */
+    private static class ReadEndsComparator implements Comparator<ReadEnds> {
+        public int compare(ReadEnds lhs, ReadEnds rhs) {
+            int retval = lhs.read1Sequence - rhs.read1Sequence;
+            if (retval == 0) retval = lhs.read1Coordinate - rhs.read1Coordinate;
+            if (retval == 0) retval = lhs.orientation - rhs.orientation;
+            if (retval == 0) retval = lhs.read2Sequence   - rhs.read2Sequence;
+            if (retval == 0) retval = lhs.read2Coordinate - rhs.read2Coordinate;
+            if (retval == 0) retval = (int) (lhs.read1IndexInFile - rhs.read1IndexInFile);
+            if (retval == 0) retval = (int) (lhs.read2IndexInFile - rhs.read2IndexInFile);
+
+            return retval;
+        }
+    }
+
+    /** Coded for ReadEnds that just outputs the primitive fields and reads them back. */
+    private static class ReadEndsCodec implements SortingCollection.Codec<ReadEnds> {
+        private DataInputStream in;
+        private DataOutputStream out;
+
+        public SortingCollection.Codec<ReadEnds> clone() {
+            return new ReadEndsCodec();
+        }
+
+        public void setOutputStream(OutputStream os) { this.out = new DataOutputStream(os); }
+        public void setInputStream(InputStream is) { this.in = new DataInputStream(is); }
+
+        public void encode(ReadEnds read) {
+            try {
+                this.out.writeShort(read.score);
+                this.out.writeByte(read.orientation);
+                this.out.writeInt(read.read1Sequence);
+                this.out.writeInt(read.read1Coordinate);
+                this.out.writeLong(read.read1IndexInFile);
+                this.out.writeInt(read.read2Sequence);
+
+                if (read.orientation > ReadEnds.R) {
+                    this.out.writeInt(read.read2Coordinate);
+                    this.out.writeLong(read.read2IndexInFile);
+                }
+                this.out.flush();
+            }
+            catch (IOException ioe) {
+                throw new PicardException("Exception writing ReadEnds to file.", ioe);
+            }
+        }
+
+        public ReadEnds decode() {
+            ReadEnds read = new ReadEnds();
+            try {
+                // If the first read results in an EOF we've exhausted the stream
+                try { read.score = this.in.readShort(); }
+                catch (EOFException eof) { return null; }
+
+                read.orientation      = this.in.readByte();
+                read.read1Sequence    = this.in.readInt();
+                read.read1Coordinate  = this.in.readInt();
+                read.read1IndexInFile = this.in.readLong();
+                read.read2Sequence    = this.in.readInt();
+
+                if (read.orientation > ReadEnds.R) {
+                    read.read2Coordinate  = this.in.readInt();
+                    read.read2IndexInFile = this.in.readLong();
+                }
+                return read;
+            }
+            catch (IOException ioe) {
+                throw new PicardException("Exception writing ReadEnds to file.", ioe);
+            }
+        }
+    }
+
+    /**
+     * Main work method.  Reads the BAM file once and collects sorted information about
+     * the 5' ends of both ends of each read (or just one end in the case of pairs).
+     * Then makes a pass through those determining duplicates before re-reading the
+     * input file and writing it out with duplication flags set correctly.
+     */
+    protected int doWork() {
+        log.info("Reading input file and constructing read end information.");
+        buildSortedReadEndLists();
+        generateDuplicateIndexes();
+        log.info("Marking " + this.duplicateIndexes.length + " records as duplicates.");
+        DuplicationMetrics metrics = new DuplicationMetrics();
+        SAMFileReader in  = new SAMFileReader(INPUT);
+        SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(in.getFileHeader(),
+                                                                          true,
+                                                                          OUTPUT);
+
+        // Now copy over the file while marking all the necessary indexes as duplicates
+        long recordInFileIndex = 0;
+        long nextDuplicateIndex = (this.duplicateIndexes.length == 0 ? -1 :  this.duplicateIndexes[0]);
+        int  arrayIndex = 1;
+
+        for (SAMRecord rec : in) {
+            // First bring the simple metrics up to date
+            if (rec.getReadUnmappedFlag()) {
+                ++metrics.UNMAPPED_READS;
+            }
+            else if (!rec.getReadPairedFlag() || rec.getMateUnmappedFlag()) {
+                ++metrics.UNPAIRED_READS_EXAMINED;
+            }
+            else if (rec.getFirstOfPairFlag()){
+                ++metrics.READ_PAIRS_EXAMINED;
+            }
+
+
+            if (recordInFileIndex++ == nextDuplicateIndex) {
+                rec.setDuplicateReadFlag(true);
+
+                // Update the duplication metrics
+                if (!rec.getReadPairedFlag() || rec.getMateUnmappedFlag()) {
+                    ++metrics.UNPAIRED_READ_DUPLICATES;
+                }
+                else if (rec.getFirstOfPairFlag()) {
+                    ++metrics.READ_PAIR_DUPLICATES;
+                }
+
+                // Now try and figure out the next duplicate index
+                try {
+                    nextDuplicateIndex = this.duplicateIndexes[arrayIndex++];
+                }
+                catch (ArrayIndexOutOfBoundsException e) {
+                    // Only happens once we've marked all the duplicates
+                    nextDuplicateIndex = -1;
+                    arrayIndex = -1;
+                }
+            }
+
+            out.addAlignment(rec);
+        }
+
+        out.close();
+
+
+        // Write out the metrics
+        metrics.calculateDerivedMetrics();
+        MetricsFile<DuplicationMetrics,Double> file = getMetricsFile();
+        file.addMetric(metrics);
+        file.setHistogram(metrics.calculateRoiHistogram());
+        file.write(METRICS_FILE);        
+
+        return 0;
+    }
+
+    /**
+     * Goes through all the records in a file and generates a set of ReadEnds objects that
+     * hold the necessary information (reference sequence, 5' read coordinate) to do
+     * duplication, caching to disk as necssary to sort them.
+     */
+    private void buildSortedReadEndLists() {
+        // TODO: take into account clipping/padding?
+        int maxInMemory = (int) ((Runtime.getRuntime().maxMemory() * 0.25) / ReadEnds.SIZE_OF);
+        this.pairSort = SortingCollection.newInstance(ReadEnds.class,
+                                                      new ReadEndsCodec(),
+                                                      new ReadEndsComparator(),
+                                                      maxInMemory);
+
+        this.fragSort = SortingCollection.newInstance(ReadEnds.class,
+                                                      new ReadEndsCodec(),
+                                                      new ReadEndsComparator(),
+                                                      maxInMemory);
+
+        Map<String, ReadEnds> tmp = new HashMap<String, ReadEnds>();
+        SAMFileReader sam = new SAMFileReader(INPUT);
+        SAMFileHeader header = sam.getFileHeader();
+        long index = 0;
+
+        for (SAMRecord rec : sam) {
+            if (rec.getReadUnmappedFlag()) {
+                continue;
+            }
+
+            ReadEnds fragmentEnd = buildReadEnds(header, index, rec);
+            this.fragSort.add(fragmentEnd);
+
+            if (rec.getReadPairedFlag() && !rec.getMateUnmappedFlag()) {
+                String key = rec.getAttribute(ReservedTagConstants.READ_GROUP_ID) + ":" + rec.getReadName();
+                ReadEnds pairedEnds = tmp.remove(key);
+
+                // See if we've already seen the first end or not
+                if (pairedEnds == null) {
+                    pairedEnds = buildReadEnds(header, index, rec);
+                    tmp.put(key, pairedEnds);
+                }
+                else {
+                    int sequence = fragmentEnd.read1Sequence;
+                    int coordinate = fragmentEnd.read1Coordinate;
+
+                    // If the second read is actually later, just add the second read data, else flip the reads
+                    if (sequence > pairedEnds.read1Sequence || (sequence == pairedEnds.read1Sequence && coordinate >= pairedEnds.read1Coordinate)) {
+                        pairedEnds.read2Sequence    = sequence;
+                        pairedEnds.read2Coordinate  = coordinate;
+                        pairedEnds.read2IndexInFile = index;
+                        pairedEnds.orientation = getOrientationByte(pairedEnds.orientation == ReadEnds.R, rec.getReadNegativeStrandFlag());
+                    }
+                    else {
+                        pairedEnds.read2Sequence    = pairedEnds.read1Sequence;
+                        pairedEnds.read2Coordinate  = pairedEnds.read1Coordinate;
+                        pairedEnds.read2IndexInFile = pairedEnds.read1IndexInFile;
+                        pairedEnds.read1Sequence    = sequence;
+                        pairedEnds.read1Coordinate  = coordinate;
+                        pairedEnds.read1IndexInFile = index;
+                        pairedEnds.orientation = getOrientationByte(rec.getReadNegativeStrandFlag(), pairedEnds.orientation == ReadEnds.R);
+                    }
+
+                    pairedEnds.score += getScore(rec);
+                    this.pairSort.add(pairedEnds);
+                }
+            }
+
+            ++index;
+        }
+    }
+
+    /** Builds a read ends object that represents a single read. */
+    private ReadEnds buildReadEnds(SAMFileHeader header, long index, SAMRecord rec) {
+        ReadEnds ends = new ReadEnds();
+        ends.read1Sequence    = rec.getReferenceIndex(header);
+        ends.read1Coordinate  = rec.getReadNegativeStrandFlag() ? rec.getUnclippedEnd() : rec.getUnclippedStart();
+        ends.orientation = rec.getReadNegativeStrandFlag() ? ReadEnds.R : ReadEnds.F;
+        ends.read1IndexInFile = index;
+        ends.score = getScore(rec);
+
+        // Doing this lets the ends object know that it's part of a pair
+        if (rec.getReadPairedFlag() && !rec.getMateUnmappedFlag()) {
+            ends.read2Sequence = rec.getMateReferenceIndex(header);
+        }
+
+        return ends;
+    }
+
+    /**
+     * Returns a single byte that encodes the orientation of the two reads in a pair.
+     */
+    private byte getOrientationByte(boolean read1NegativeStrand, boolean read2NegativeStrand) {
+        if (read1NegativeStrand) {
+            if (read2NegativeStrand)  return ReadEnds.RR;
+            else return ReadEnds.RF;
+        }
+        else {
+            if (read2NegativeStrand)  return ReadEnds.FR;
+            else return ReadEnds.FF;
+        }
+    }
+
+
+
+    /** Calculates a score for the read which is the sum of scores over Q20. */
+    private short getScore(SAMRecord rec) {
+        short score = 0;
+        for (byte b : rec.getBaseQualities()) {
+            if (b >= 15) score += b;
+        }
+
+        return score;
+    }
+
+    /**
+     * Goes through the accumulated ReadEnds objects and determines which of them are
+     * to be marked as duplicates.
+     *
+     * @return an array with an ordered list of indexes into the source file
+     */
+    private void generateDuplicateIndexes() {
+        ReadEnds firstOfNextChunk = null;
+        List<ReadEnds> nextChunk  = new ArrayList<ReadEnds>(200);
+
+        // First just do the pairs
+        log.info("Traversing read pair information and detecting duplicates.");
+        for (ReadEnds next : this.pairSort) {
+            if (firstOfNextChunk == null) {
+                firstOfNextChunk = next;
+                nextChunk.add(firstOfNextChunk);
+            }
+            else if (areComparableForDuplicates(firstOfNextChunk, next, true)) {
+                nextChunk.add(next);
+            }
+            else {
+                if (nextChunk.size() > 1) {
+                    markDuplicatePairs(nextChunk);
+                }
+
+                nextChunk.clear();
+                nextChunk.add(next);
+                firstOfNextChunk = next;
+            }
+        }
+        markDuplicatePairs(nextChunk);
+        this.pairSort = null;
+
+        // Now deal with the fragments
+        log.info("Traversing fragment information and detecting duplicates.");
+        boolean containsPairs = false;
+        boolean containsFrags = false;
+
+        for (ReadEnds next : this.fragSort) {
+            if (firstOfNextChunk != null && areComparableForDuplicates(firstOfNextChunk, next, false)) {
+                nextChunk.add(next);
+                containsPairs = containsPairs || next.isPaired();
+                containsFrags = containsFrags || !next.isPaired();
+            }
+            else {
+                if (nextChunk.size() > 1 && containsFrags) {
+                    markDuplicateFragments(nextChunk, containsPairs);
+                }
+
+                nextChunk.clear();
+                nextChunk.add(next);
+                firstOfNextChunk = next;
+                containsPairs = next.isPaired();
+                containsFrags = !next.isPaired();
+            }
+        }
+        markDuplicateFragments(nextChunk, containsPairs);
+        this.fragSort = null;
+
+        // Now shrink down the array and sort it
+        log.info("Sorting list of duplicate records.");
+        long[] tmp = new long[this.nextIndex];
+        System.arraycopy(this.duplicateIndexes, 0, tmp, 0, this.nextIndex);
+        this.duplicateIndexes = tmp;
+        Arrays.sort(this.duplicateIndexes);
+    }
+
+    private boolean areComparableForDuplicates(final ReadEnds lhs, final ReadEnds rhs, final boolean compareRead2) {
+        boolean retval =  lhs.read1Sequence   == rhs.read1Sequence &&
+                          lhs.read1Coordinate == rhs.read1Coordinate &&
+                          lhs.orientation     == rhs.orientation;
+
+        if (compareRead2) {
+            retval = lhs.read2Sequence   == rhs.read2Sequence &&
+                     lhs.read2Coordinate == rhs.read2Coordinate;
+        }
+
+        return retval;
+    }
+
+    private void addIndexAsDuplicate(final long bamIndex) {
+        if (this.nextIndex > this.duplicateIndexes.length - 1) {
+            long[] tmp = new long[this.duplicateIndexes.length * 2];
+            System.arraycopy(this.duplicateIndexes, 0, tmp, 0, this.nextIndex);
+            this.duplicateIndexes = tmp;
+        }
+
+        this.duplicateIndexes[this.nextIndex++] = bamIndex;
+    }
+
+    /**
+     * Takes a list of ReadEnds objects and removes from it all objects that should
+     * not be marked as duplicates.
+     *
+     * @param list
+     */
+    private void markDuplicatePairs(final List<ReadEnds> list) {
+        short maxScore = 0;
+        ReadEnds best = null;
+
+        for (final ReadEnds end : list) {
+            if (end.score > maxScore || best == null) {
+                maxScore = end.score;
+                best = end;
+            }
+        }
+
+        for (final ReadEnds end : list) {
+            if (end != best) {
+                addIndexAsDuplicate(end.read1IndexInFile);
+                addIndexAsDuplicate(end.read2IndexInFile);
+            }
+        }
+    }
+
+    /**
+     * Takes a list of ReadEnds objects and removes from it all objects that should
+     * not be marked as duplicates.
+     *
+     * @param list
+     */
+    private void markDuplicateFragments(final List<ReadEnds> list, final boolean containsPairs) {
+        if (containsPairs) {
+            for (final ReadEnds end : list) {
+                if (!end.isPaired()) addIndexAsDuplicate(end.read1IndexInFile);
+            }
+        }
+        else {
+            short maxScore = 0;
+            ReadEnds best = null;
+            for (final ReadEnds end : list) {
+                if (end.score > maxScore || best == null) {
+                    maxScore = end.score;
+                    best = end;
+                }
+            }
+
+            for (final ReadEnds end : list) {
+                if (end != best) {
+                    addIndexAsDuplicate(end.read1IndexInFile);
+                }
+            }
+        }
+    }
+}
diff --git a/lib/edu/mit/broad/picard/sam/MergeSamFiles.java b/lib/edu/mit/broad/picard/sam/MergeSamFiles.java
new file mode 100644
index 0000000000..cae476956b
--- /dev/null
+++ b/lib/edu/mit/broad/picard/sam/MergeSamFiles.java
@@ -0,0 +1,95 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever.
+* Neither the Broad Institute nor MIT can be responsible for its use, misuse, or
+* functionality.
+*/
+package edu.mit.broad.picard.sam;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.List;
+
+import edu.mit.broad.picard.cmdline.CommandLineProgram;
+import edu.mit.broad.picard.cmdline.Option;
+import edu.mit.broad.picard.cmdline.Usage;
+import edu.mit.broad.picard.io.IoUtil;
+import edu.mit.broad.sam.SAMFileHeader;
+import static edu.mit.broad.sam.SAMFileHeader.SortOrder;
+import edu.mit.broad.sam.SAMFileReader;
+import edu.mit.broad.sam.SAMFileWriter;
+import edu.mit.broad.sam.SAMFileWriterFactory;
+import edu.mit.broad.sam.SAMRecord;
+
+/**
+ * Reads a SAM or BAM file and combines the output to one file
+ *
+ * @author Dave Tefft
+ */
+public class MergeSamFiles extends CommandLineProgram {
+    // Usage and parameters
+    @Usage(programVersion="1.0")
+    public String USAGE = "Merges multiple SAM/BAM files into one file.\n";
+
+    @Option(shortName="I", doc="SAM or BAM input file", minElements=1)
+    public List<File> INPUT = new ArrayList<File>();
+
+    @Option(shortName="O", doc="SAM or BAM file to write merged result to")
+    public File OUTPUT;
+
+    @Option(shortName="SO", doc="Sort order of output file", optional=true)
+    public SAMFileHeader.SortOrder SORT_ORDER = SAMFileHeader.SortOrder.coordinate;
+
+    /** Required main method implementation. */
+    public static void main(String[] argv) {
+        System.exit(new MergeSamFiles().instanceMain(argv));
+    }
+
+    /** Combines multiple SAM/BAM files into one. */
+    @Override
+	protected int doWork() {
+        boolean matchedSortOrders = true;
+
+        // Open the files for reading and writing
+        List<SAMFileReader> readers = new ArrayList<SAMFileReader>();
+        for (File inFile : INPUT) {
+            IoUtil.assertFileIsReadable(inFile);
+            SAMFileReader in = new SAMFileReader(inFile);
+            readers.add(in);
+            matchedSortOrders = matchedSortOrders && in.getFileHeader().getSortOrder() == SORT_ORDER;
+        }
+
+        // If all the input sort orders match the output sort order then just merge them and
+        // write on the fly, otherwise setup to merge and sort before writing out the final file
+        IoUtil.assertFileIsWritable(OUTPUT);
+        MergingSamRecordIterator iterator = null;
+        SAMFileWriter out = null;
+
+        if (matchedSortOrders || SORT_ORDER == SortOrder.unsorted) {
+            SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(readers, SORT_ORDER);
+            iterator = new MergingSamRecordIterator(headerMerger);
+            out = new SAMFileWriterFactory().makeSAMOrBAMWriter(headerMerger.getMergedHeader(), true, OUTPUT);
+        }
+        else {
+            SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(readers, SortOrder.unsorted);
+            iterator = new MergingSamRecordIterator(headerMerger);
+            SAMFileHeader header = headerMerger.getMergedHeader();
+            header.setSortOrder(SORT_ORDER);
+            out = new SAMFileWriterFactory().makeSAMOrBAMWriter(header, false, OUTPUT);
+        }
+
+        // Lastly loop through and write out the records
+        while (iterator.hasNext()) {
+            SAMRecord record = iterator.next();
+            out.addAlignment(record);
+        }
+
+        out.close();
+        return 0;
+    }
+
+}
\ No newline at end of file
diff --git a/lib/edu/mit/broad/picard/sam/MergingSamRecordIterator.java b/lib/edu/mit/broad/picard/sam/MergingSamRecordIterator.java
new file mode 100644
index 0000000000..5641512af1
--- /dev/null
+++ b/lib/edu/mit/broad/picard/sam/MergingSamRecordIterator.java
@@ -0,0 +1,136 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever.
+* Neither the Broad Institute nor MIT can be responsible for its use, misuse, or
+* functionality.
+*/
+package edu.mit.broad.picard.sam;
+
+import edu.mit.broad.sam.*;
+import static edu.mit.broad.sam.SAMFileHeader.SortOrder;
+import edu.mit.broad.picard.PicardException;
+
+import java.util.*;
+import java.lang.reflect.Constructor;
+
+/**
+ * Provides an iterator interface for merging multiple underlying iterators into a single
+ * iterable stream. The underlying iterators/files must all have the same sort order unless
+ * the requested output format is unsorted, in which case any combination is valid.
+ */
+public class MergingSamRecordIterator implements Iterator<SAMRecord> {
+    private final PriorityQueue<ComparableSamRecordIterator> pq;
+    private final SamFileHeaderMerger samHeaderMerger;
+    private final SAMFileHeader.SortOrder sortOrder;
+
+    /**
+     * Constructs a new merging iterator with the same set of readers and sort order as
+     * provided by the header merger parameter.
+     */
+    public MergingSamRecordIterator(final SamFileHeaderMerger headerMerger) {
+        this.samHeaderMerger = headerMerger;
+        this.sortOrder = headerMerger.getMergedHeader().getSortOrder();
+        final SAMRecordComparator comparator = getComparator();
+
+        final Collection<SAMFileReader> readers = headerMerger.getReaders();
+        this.pq = new PriorityQueue<ComparableSamRecordIterator>(readers.size());
+        
+        for (final SAMFileReader reader : readers) {
+            if (this.sortOrder != SortOrder.unsorted && reader.getFileHeader().getSortOrder() != this.sortOrder){
+                throw new PicardException("Files are not compatible with sort order");   
+            }
+
+            final ComparableSamRecordIterator iterator = new ComparableSamRecordIterator(reader, comparator);
+            addIfNotEmpty(iterator);
+        }
+    }
+
+    /** Returns true if any of the underlying iterators has more records, otherwise false. */
+    public boolean hasNext() {
+        return !this.pq.isEmpty();
+    }
+
+    /** Returns the next record from the top most iterator during merging. */
+    public SAMRecord next() {
+        final ComparableSamRecordIterator iterator = this.pq.poll();
+        final SAMRecord record = iterator.next();
+        addIfNotEmpty(iterator);
+
+        if (this.samHeaderMerger.hasGroupIdDuplicates()) {
+            final String id = (String) record.getAttribute(ReservedTagConstants.READ_GROUP_ID);
+            final String newId = this.samHeaderMerger.getReadGroupId(iterator.getReader(), id);
+            record.setAttribute(ReservedTagConstants.READ_GROUP_ID, newId);
+        }
+        final String oldProgramGroupId = (String) record.getAttribute(SAMTag.PG.toString());
+        if (oldProgramGroupId != null) {
+            final String newProgramGroupId = this.samHeaderMerger.getProgramGroupId(iterator.getReader(), oldProgramGroupId);
+            record.setAttribute(SAMTag.PG.toString(), newProgramGroupId);
+        }
+
+        return record;
+    }
+
+    /**
+     * Adds iterator to priority queue. If the iterator has more records it is added
+     * otherwise it is closed and not added.
+     */
+    private void addIfNotEmpty(final ComparableSamRecordIterator iterator) {
+        if (iterator.hasNext()) {
+            pq.offer(iterator);
+        }
+        else {
+            iterator.close();
+        }
+    }
+
+    /** Unsupported operation. */
+    public void remove() {
+        throw new UnsupportedOperationException("MergingSAMRecorderIterator.remove()");
+    }
+
+    /**
+     * Get the right comparator for a given sort order (coordinate, alphabetic). In the
+     * case of "unsorted" it will return a comparator that gives an arbitrary but reflexive
+     * ordering.
+     */
+    private SAMRecordComparator getComparator() {
+        // For unsorted build a fake comparator that compares based on object ID
+        if (this.sortOrder == SAMFileHeader.SortOrder.unsorted) {
+            return new SAMRecordComparator() {
+                public int fileOrderCompare(final SAMRecord lhs, final SAMRecord rhs) {
+                    return System.identityHashCode(lhs) - System.identityHashCode(rhs);
+                }
+
+                public int compare(final SAMRecord lhs, final SAMRecord rhs) {
+                    return fileOrderCompare(lhs, rhs);
+                }
+            };
+        }
+
+        // Otherwise try and figure out what kind of comparator to return and build it
+        final Class<? extends SAMRecordComparator> type = this.sortOrder.getComparator();
+
+        try {
+            final Constructor<? extends SAMRecordComparator> ctor = type.getConstructor(SAMFileHeader.class);
+            return ctor.newInstance(this.samHeaderMerger.getMergedHeader());
+        }
+        catch (Exception e) {
+            try {
+                final Constructor<? extends SAMRecordComparator> ctor = type.getConstructor();
+                return ctor.newInstance();
+            }
+            catch (Exception e2) {
+                throw new PicardException("Could not instantiate a comparator for sort order: " + this.sortOrder, e2);
+            }
+        }
+    }
+
+    /** Returns the merged header that the merging iterator is working from. */
+    public SAMFileHeader getMergedHeader() {
+        return this.samHeaderMerger.getMergedHeader();
+    }
+}
diff --git a/lib/edu/mit/broad/picard/sam/ReservedTagConstants.java b/lib/edu/mit/broad/picard/sam/ReservedTagConstants.java
new file mode 100644
index 0000000000..2f4d3ef91d
--- /dev/null
+++ b/lib/edu/mit/broad/picard/sam/ReservedTagConstants.java
@@ -0,0 +1,18 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2008 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.sam;
+
+/**
+ * Constants for tags used in our SAM/BAM files
+ */
+public class ReservedTagConstants {
+    public static final String READ_GROUP_ID = "RG"; // Specified in the SAM spec doc
+    public static final String XN = "XN";    // Present and set to 1 if a read is a noise read
+}
diff --git a/lib/edu/mit/broad/picard/sam/SamFileHeaderMerger.java b/lib/edu/mit/broad/picard/sam/SamFileHeaderMerger.java
new file mode 100644
index 0000000000..6c69678ad6
--- /dev/null
+++ b/lib/edu/mit/broad/picard/sam/SamFileHeaderMerger.java
@@ -0,0 +1,286 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever.
+* Neither the Broad Institute nor MIT can be responsible for its use, misuse, or
+* functionality.
+*/
+package edu.mit.broad.picard.sam;
+
+import edu.mit.broad.sam.*;
+import edu.mit.broad.picard.PicardException;
+
+import java.util.*;
+
+/**
+ * Merges SAMFileHeaders that have the same sequences into a single merged header
+ * object while providing read group translation for cases where read groups
+ * clash across input headers.
+ *
+ * @author Dave Tefft
+ */
+public class SamFileHeaderMerger {
+    //Super Header to construct
+    private final SAMFileHeader mergedHeader;
+    private final Collection<SAMFileReader> readers;
+
+    //Translation of old group ids to new group ids
+    private final Map<SAMFileReader, Map<String, String>> samGroupIdTranslation =
+            new HashMap<SAMFileReader, Map<String, String>>();
+
+    //the groups from different files use the same group ids
+    private boolean hasGroupIdDuplicates = false;
+
+    //Translation of old program group ids to new program group ids
+    private final Map<SAMFileReader, Map<String, String>> samProgramGroupIdTranslation =
+            new HashMap<SAMFileReader, Map<String, String>>();
+
+    //Letters to construct new ids from a counter
+    private static final String ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+
+
+    /**
+     * Create SAMFileHeader with additional information
+     *
+     * @param readers same file readers to combine
+     * @param sortOrder sort order new header should have
+     */
+    public SamFileHeaderMerger(final Collection<SAMFileReader> readers, final SAMFileHeader.SortOrder sortOrder) {
+        this.readers = readers;
+        this.mergedHeader = new SAMFileHeader();
+
+        // Set sequences first because if it throws exception there is no need to continue
+        final List<SAMSequenceRecord> sequences = getSAMSequences(readers);
+        this.mergedHeader.setSequences(sequences);
+
+        // Set program that creates input alignments
+        for (final SAMProgramRecord program : mergeSAMProgramRecordLists(readers)) {
+            this.mergedHeader.addProgramRecord(program);
+        }
+
+        // Set read groups for merged header
+        final List<SAMReadGroupRecord> readGroups = getReadGroups(readers);
+        this.mergedHeader.setReadGroups(readGroups);
+        this.mergedHeader.setGroupOrder(SAMFileHeader.GroupOrder.none);
+
+        this.mergedHeader.setSortOrder(sortOrder);
+    }
+
+    /**
+     * Checks to see if there are clashes where different readers are using the same read
+     * group IDs. If they are then a new set of unique read group IDs are generated (across all
+     * read groups) otherwise the original read group headers are returned.
+     *
+     * @param readers readers to combine
+     * @return new list of readgroups constructed from all the readers
+     */
+    private List<SAMReadGroupRecord> getReadGroups(final Collection<SAMFileReader> readers) {
+        // Read groups as read from the readers
+        final List<SAMReadGroupRecord> orginalReadGroups = new ArrayList<SAMReadGroupRecord>();
+
+        // Read group with new ids that don't confict
+        final List<SAMReadGroupRecord> modifiedReadGroups = new ArrayList<SAMReadGroupRecord>();
+
+        //set to see if there are duplicate group ids and whether or not we need to modify them
+        final Set<String> groupIdsSeenBefore = new HashSet<String>();
+
+        int x = 0;
+        this.hasGroupIdDuplicates = false;
+
+        for (final SAMFileReader reader : readers) {
+            final SAMFileHeader header = reader.getFileHeader();
+            final Map<String, String> idTranslation = new HashMap<String, String>();
+
+            // Iterate over read groups to find conflicting ids
+            for (final SAMReadGroupRecord readGroup : header.getReadGroups()) {
+                final String groupId = readGroup.getReadGroupId();
+                final String newGroupId = createNewId(x++);
+
+                // Check to see if same group id is used in two different readers
+                if (groupIdsSeenBefore.contains(groupId)) {
+                    hasGroupIdDuplicates = true;
+                }
+                groupIdsSeenBefore.add(groupId);
+
+                // Creates a new read group with the new id and copies all it's attributes
+                final SAMReadGroupRecord groupRecordWithNewId = copyReadGroup(readGroup, newGroupId);
+
+                orginalReadGroups.add(readGroup);
+                modifiedReadGroups.add(groupRecordWithNewId);
+
+                idTranslation.put(groupId, newGroupId);
+            }
+
+            // Add id tranlation for updating SamRecords with new ids if neccessary
+            this.samGroupIdTranslation.put(reader, idTranslation);
+        }
+
+        // return approriate readgroups whether or not the new ids have to be used
+        if (this.hasGroupIdDuplicates) {
+            return modifiedReadGroups;
+        }
+        else {
+            return orginalReadGroups;
+        }
+    }
+
+    /**
+     * Get the sequences off the SAMFileReader header.  Throws runtime exception if the sequence
+     * are different from one another
+     *
+     * @param readers readers to pull sequences from
+     * @return sequences from files.  Each file should have the same sequence
+     */
+    private List<SAMSequenceRecord> getSAMSequences(final Collection<SAMFileReader> readers) {
+        List<SAMSequenceRecord> sequences = null;
+        for (final SAMFileReader reader : readers) {
+            final SAMFileHeader header = reader.getFileHeader();
+
+            if (sequences == null) {
+                sequences = header.getSequences();
+            }
+            else {
+                final List<SAMSequenceRecord> currentSequences = header.getSequences();
+                if (!sequenceListsEqual(sequences, currentSequences)) {
+                    throw new PicardException("Files are not compatible with each other.  They can not be combined");
+                }
+            }
+        }
+        return sequences;
+    }
+
+    /**
+     * Checks the equality of two lists of sequence records using the isSameSequence
+     * method instead of the equals method which is a more strict identity check.
+     * @param s1 a list of sequence headers
+     * @param s2 a second list of sequence headers
+     * @return true if the two lists match otherwise false
+     */
+    private boolean sequenceListsEqual(final List<SAMSequenceRecord> s1, final List<SAMSequenceRecord> s2) {
+        if (s1.size() != s2.size()) {
+            return false;
+        }
+        for (int i = 0; i < s1.size(); ++i) {
+            if (!s1.get(i).isSameSequence(s2.get(i))) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    /**
+     * Find the alignment program that produced the readers.  If there are more than one
+     * generate a new program represents that
+     *
+     * @param readers SAMFileReaders to pull program information from
+     * @return SAMProgram record that represents all the readers
+     */
+    // TODO: this needs to be fixed up to support multiple program records (PIC-15)
+    private List<SAMProgramRecord> mergeSAMProgramRecordLists(final Collection<SAMFileReader> readers) {
+        final boolean programMixed = false;
+        final List<SAMProgramRecord> ret = new ArrayList<SAMProgramRecord>();
+        int nextProgramGroupId = 0;
+        for (final SAMFileReader reader : readers) {
+            final SAMFileHeader header = reader.getFileHeader();
+            final Map<String, String> idTranslation = new HashMap<String, String>();
+            for (final SAMProgramRecord oldProgramRecord : header.getProgramRecords()) {
+                boolean foundMatch = false;
+                for (final SAMProgramRecord newProgramRecord : ret) {
+                    if (newProgramRecord.equivalent(oldProgramRecord)) {
+                        idTranslation.put(oldProgramRecord.getProgramGroupId(), newProgramRecord.getProgramGroupId());
+                        foundMatch = true;
+                        break;
+                    }
+                }
+                if (!foundMatch) {
+                    final SAMProgramRecord newProgramRecord = new SAMProgramRecord(Integer.toString(nextProgramGroupId++));
+                    copyProgramGroupAttributes(oldProgramRecord, newProgramRecord);
+                    ret.add(newProgramRecord);
+                    idTranslation.put(oldProgramRecord.getProgramGroupId(), newProgramRecord.getProgramGroupId());
+                }
+            }
+            samProgramGroupIdTranslation.put(reader, idTranslation);
+        }
+        return ret;
+    }
+
+    private void copyProgramGroupAttributes(final SAMProgramRecord oldProgramRecord, final SAMProgramRecord newProgramRecord) {
+        for (final Map.Entry<String, String> entry : oldProgramRecord.getAttributes()) {
+            newProgramRecord.setAttribute(entry.getKey(), entry.getValue());
+        }
+    }
+
+
+    /**
+     * Copies all the attribute of a readgroup to a new readgroup with a new id
+     *
+     * @param readGroup  the group to be copied
+     * @param modifiedId the id for the new readgroup
+     * @return new read group
+     */
+    private SAMReadGroupRecord copyReadGroup(final SAMReadGroupRecord readGroup, final String modifiedId) {
+        final SAMReadGroupRecord retval = new SAMReadGroupRecord(modifiedId);
+        retval.setLibrary(readGroup.getLibrary());
+        retval.setSample(readGroup.getSample());
+
+        for (final Map.Entry<String, Object> attr : readGroup.getAttributes()) {
+            retval.setAttribute(attr.getKey(), attr.getValue());
+        }
+
+        return retval;
+    }
+
+
+    /**
+     * Creates a base 26 representation of an int
+     *
+     * @param n int to covert to letter representation
+     * @return string rep for an int eg 0 = A  27 = AB
+     */
+    protected static String createNewId(int n) {
+        final int base = ALPHABET.length();
+
+        String s = "";
+        while (true) {
+            final int r = n % base;
+            s = ALPHABET.charAt(r) + s;
+            n = n / base;
+            if (n == 0) {
+                return s;
+            }
+            n -= 1;
+        }
+    }
+
+    /** Returns the read group id that should be used for the input read and RG id. */
+    public String getReadGroupId(final SAMFileReader reader, final String originalReadGroupId) {
+        return this.samGroupIdTranslation.get(reader).get(originalReadGroupId);
+    }
+
+    /**
+     * @param reader one of the input files
+     * @param originalProgramGroupId a program group ID from the above input file
+     * @return new ID from the merged list of program groups in the output file
+     */
+    public String getProgramGroupId(final SAMFileReader reader, final String originalProgramGroupId) {
+        return this.samProgramGroupIdTranslation.get(reader).get(originalProgramGroupId);
+    }
+
+    /** Returns true if there are read group duplicates within the merged headers. */
+    public boolean hasGroupIdDuplicates() {
+        return this.hasGroupIdDuplicates;
+    }
+
+    /** Returns the merged header that should be written to any output merged file. */
+    public SAMFileHeader getMergedHeader() {
+        return this.mergedHeader;
+    }
+
+    /** Returns the collection of readers that this header merger is working with. */
+    public Collection<SAMFileReader> getReaders() {
+        return this.readers;
+    }
+}
diff --git a/lib/edu/mit/broad/picard/sam/SamLocusIterator.java b/lib/edu/mit/broad/picard/sam/SamLocusIterator.java
new file mode 100644
index 0000000000..f7a52ae909
--- /dev/null
+++ b/lib/edu/mit/broad/picard/sam/SamLocusIterator.java
@@ -0,0 +1,280 @@
+package edu.mit.broad.picard.sam;
+
+import edu.mit.broad.sam.util.CloseableIterator;
+import edu.mit.broad.sam.SAMRecord;
+import edu.mit.broad.sam.NotPrimarySkippingIterator;
+import edu.mit.broad.picard.directed.GenomeMask;
+
+import java.util.*;
+
+/**
+ * Iterator that traverses a SAM File, accumulating information on a per-locus basis
+ */
+public class SamLocusIterator implements Iterable<SamLocusIterator.LocusInfo>, CloseableIterator<SamLocusIterator.LocusInfo> {
+
+    /**
+     * The unit of iteration.  Holds the locus, plus the base, quality and strand for each read at that locus.
+     */
+    public static class LocusInfo {
+        protected final int sequenceIndex;
+        protected final int position;
+        protected final List<Byte> bases = new ArrayList<Byte>(100);
+        protected final List<Byte> qualities = new ArrayList<Byte>(100);
+        protected final List<Boolean> negativeStrandFlags = new ArrayList<Boolean>(100);
+
+        LocusInfo(final int sequenceIndex, final int position) {
+            this.sequenceIndex = sequenceIndex;
+            this.position = position;
+        }
+
+        /**
+         * Accumulate info for one read at the locus.
+         */
+        public void add(final Byte readBase, final Byte baseQuality, final boolean strand) {
+            bases.add(readBase);
+            qualities.add(baseQuality);
+            negativeStrandFlags.add(strand);
+        }
+
+        public int getSequenceIndex() { return sequenceIndex; }
+        public int getPosition() { return position; }
+        public List<Byte> getBases() { return bases; }
+        public List<Byte> getQualities() { return qualities; }
+        public List<Boolean> getNegativeStrandFlags() { return negativeStrandFlags; }
+
+        public String getBasesAsString() { return bytesToString(bases); }
+
+        private static String bytesToString(final List<Byte> data) {
+            if (data == null || data.size() == 0) {
+                return "";
+            }
+
+            final char[] chars = new char[data.size()];
+            for (int i = 0; i < data.size(); i++) {
+                chars[i] = (char) (data.get(i) & 0xFF);
+            }
+            return new String(chars);
+        }
+    }
+
+
+
+
+    private final CloseableIterator<SAMRecord> underlyingIterator;
+    private final NotPrimarySkippingIterator it;
+    private final LinkedList<LocusInfo> complete = new LinkedList<LocusInfo>();
+    private final LinkedList<LocusInfo> accumulator = new LinkedList<LocusInfo>();
+
+    private boolean includeNonPfReads = false;
+    private boolean includeDuplicates = false;
+    private int qualityScoreCutoff = -Integer.MAX_VALUE;
+    
+    private GenomeMask mask;
+    private int lastContig = 0;
+    private int lastPosition = 0;
+
+    private boolean finishedAlignedReads = false;
+
+
+    // this should probably take a SAM
+    public SamLocusIterator(final CloseableIterator<SAMRecord> samIterator) {
+        this.underlyingIterator = samIterator;
+        this.it = new NotPrimarySkippingIterator(samIterator);
+    }
+
+    public Iterator<LocusInfo> iterator() {
+        return this;
+    }
+
+    public void close() {
+        this.underlyingIterator.close();
+    }
+
+    private boolean samHasMore() {
+        return !finishedAlignedReads && it.hasCurrent();
+    }
+    public boolean hasNext() {
+        return ((complete.size() > 0) || (accumulator.size() > 0) || (samHasMore()) || hasRemainingMaskBases());
+    }
+
+    private boolean hasRemainingMaskBases() {
+        if (mask == null) return false;
+
+        // if there are more contigs in the mask, by definition some of them must have
+        // marked bases otherwise if we're in the last contig, but we're not at the last marked position,
+        // there is also more in the mask
+        return (lastContig <= mask.getMaxContig() ||
+               (lastContig == mask.getMaxContig() && lastPosition <= mask.get(lastContig).nextSetBit(lastPosition+1)));
+    }
+
+    public LocusInfo next() {
+
+        // if we don't have any completed entries to return, try and make some!
+        while(complete.size() == 0 && samHasMore()) {
+            final SAMRecord rec = it.getCurrent();
+            final String cigar = rec.getCigarString();
+            
+            // as soon as we hit our first non-aligned read, we can stop!
+            if (cigar.equals("*")) {
+                this.finishedAlignedReads = true;
+                continue;
+            }
+
+            // skip dupe reads, if so requested
+            if (!isIncludeDuplicates() && rec.getDuplicateReadFlag()) { it.advance(); continue; }
+
+            // skip non-PF reads, if so requested
+            if (!isIncludeNonPfReads() && rec.getReadFailsVendorQualityCheckFlag()) { it.advance(); continue; }
+            
+            // when we switch contigs, emit everything in the accumulator
+            if (accumulator.size() > 0 && accumulator.getFirst().sequenceIndex != rec.getReferenceIndex()) {
+                while (accumulator.size() > 0) {
+                    popLocus();
+                }
+            }
+
+            // pop off things we're not going to accumulate more coverage at the locus in question
+            while(accumulator.size() > 0 && accumulator.getFirst().position < rec.getAlignmentStart()) {
+                popLocus();
+            }
+
+            // check that it's a non-gapped alignment for now!
+            // TODO: handle gapped and clipped alignments
+            if (!cigar.matches("[0-9]+M")) {
+                System.out.println("Cannot deal with clipped or gapped alignments. CIGAR="+cigar);
+                System.exit(1);
+            }
+
+            // at this point, either the list is empty or the head should
+            // be the same position as the first base of the read
+
+            // interpret the CIGAR string and add the base info
+            for(int j=0; j < rec.getReadBases().length; j++) {
+                // if the position is empty, initialize it
+                if (j > accumulator.size() - 1) {
+                    accumulator.add(new LocusInfo(rec.getReferenceIndex(), rec.getAlignmentStart() + j));
+                }
+
+                // if the quality score cutoff is met, accumulate the base info
+                if (rec.getBaseQualities()[j] >= getQualityScoreCutoff()) {
+                    accumulator.get(j).add(rec.getReadBases()[j], rec.getBaseQualities()[j], rec.getReadNegativeStrandFlag());
+                }
+            }
+
+
+            it.advance();
+        }
+
+        // if we have nothing to return to the user, and we're at the end of the SAM iterator,
+        // push everything into the complete queue
+        if (complete.size() == 0 && !samHasMore()) {
+            while(accumulator.size() > 0) {
+                popLocus();
+            }
+        }
+
+        // if there are completed entries, return those
+        if (complete.size() > 0) {
+            return complete.removeFirst();
+        } else {
+
+            // In this case... we're past the last read from SAM so see if we can
+            // fill out any more (zero coverage) entries from the mask
+            LocusInfo zeroResult = null;
+            while (zeroResult == null && lastContig <= mask.getMaxContig()) {
+                final int nextbit = mask.get(lastContig).nextSetBit(lastPosition+1);
+
+                // try the next contig
+                if (nextbit == -1) {
+                    lastContig++;
+                    lastPosition = 0;
+                } else {
+                    lastPosition = nextbit;
+                    zeroResult = new LocusInfo(lastContig, lastPosition);
+                }
+            }
+
+            return zeroResult;
+        }
+    }
+
+    /**
+     * Pop the first entry from the LocusInfo accumulator into the complete queue.  In addition,
+     * check the GenomeMask and if there are intervening mask positions between the last popped base and the one
+     * about to be popped, put those on the complete queue as well.
+     */
+    private void popLocus() {
+        final LocusInfo li = accumulator.removeFirst();
+
+        // fill in any gaps based on our genome mask
+        final int liContig = li.getSequenceIndex();
+
+        // if we're not on the same contig, fill in the rest of the bits for the previous contig first...
+        if (lastContig < liContig) {
+            while (lastContig < liContig) {
+                int nextbit = 0;
+
+                if (mask != null && mask.get(lastContig) != null) {
+                    while (nextbit != -1) {
+                        nextbit = mask.get(lastContig).nextSetBit(lastPosition + 1);
+                        if (nextbit > -1) {
+                            complete.addLast(new LocusInfo(lastContig, nextbit));
+                            lastPosition = nextbit;
+                        }
+                    }
+                }
+                lastPosition=0;
+                lastContig++;
+            }
+        }
+
+        // now that we're on the same contig, fill in any unfilled positions
+        // if we have some bits in the mask to fill in...
+        if (mask != null && mask.get(lastContig) != null && lastPosition + 1 < li.getPosition()) {
+            while (lastPosition + 1 < li.getPosition()) {
+
+                final int nextbit = mask.get(lastContig).nextSetBit(lastPosition + 1);
+
+                // if there are no more mask bits, or the next mask bit is
+                // at or after the current data, just continue on
+                if (nextbit == -1 || nextbit >= li.getPosition()) { break; }
+
+                // otherwise, pop on the desired empty locus info
+                complete.addLast(new LocusInfo(lastContig, nextbit));
+                lastPosition = nextbit;
+            }
+        }
+
+        // only add to the complete queue if it's in the mask (or we have no mask!)
+        if (mask == null || mask.get(li.getSequenceIndex(), li.getPosition())) {
+            complete.addLast(li);
+        }
+
+        lastContig = liContig;
+        lastPosition = li.getPosition();
+
+
+    }
+
+    public void remove() {
+        throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
+    }
+
+    // --------------------------------------------------------------------------------------------
+    // Helper methods below this point...
+    // --------------------------------------------------------------------------------------------
+
+    public void setGenomeMask(final GenomeMask mask) { this.mask = mask; }
+    public GenomeMask getGenomeMask() { return this.mask; }
+
+    public boolean isIncludeNonPfReads() { return includeNonPfReads; }
+    public void setIncludeNonPfReads(final boolean includeNonPfReads) { this.includeNonPfReads = includeNonPfReads; }
+
+    public boolean isIncludeDuplicates() { return includeDuplicates; }
+    public void setIncludeDuplicates(final boolean includeDuplicates) { this.includeDuplicates = includeDuplicates; }
+
+    public int getQualityScoreCutoff() { return qualityScoreCutoff; }
+    public void setQualityScoreCutoff(final int qualityScoreCutoff) { this.qualityScoreCutoff = qualityScoreCutoff; }
+
+
+}
diff --git a/lib/edu/mit/broad/picard/util/AbstractTextFileParser.java b/lib/edu/mit/broad/picard/util/AbstractTextFileParser.java
new file mode 100644
index 0000000000..74dd1e12ae
--- /dev/null
+++ b/lib/edu/mit/broad/picard/util/AbstractTextFileParser.java
@@ -0,0 +1,203 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.util;
+
+import edu.mit.broad.picard.PicardException;
+import edu.mit.broad.sam.util.CloseableIterator;
+
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+import java.io.Closeable;
+
+/**
+ * Class for parsing text files where each line consists of fields separated by whitespace.
+ * Code is abstracted into this class so that we can optimize its performance over time.
+ *
+ * This class assumes that every line will have the same number of whitespace-separated "words"
+ * and that lines that start with "#" are comments and should be ignored.
+ *
+ * Classes that extend this parser can do so simply by implementing their own constructors and the
+ * readNextLine(), close(), and getFileName() methods.
+ *
+ * @author Kathleen Tibbetts
+ */
+public abstract class AbstractTextFileParser implements Iterable<String[]>, CloseableIterator<String[]> {
+
+    private boolean treatGroupedDelimitersAsOne = true; // Whether multiple delimiters in succession should be treated as one
+    private byte nextLine[] = null;
+    private int wordCount = 0;      /* The number of delimiter-separated "words" per line of the file.
+                                       We can save a little caclulation, or handle files with varying numbers of
+                                       words per line, by specifying this if known in advance */
+    private boolean iterating = false;
+
+    /**
+     * Closes this stream and releases any system resources associated with it.
+     */
+    public abstract void close();
+
+    /**
+     * @return the next line of text from the underlying stream(s) or null if there is no next line
+     */
+    protected abstract byte[] readNextLine();
+
+    /**
+     * @return  the name(s) of the file(s) being parsed, or null if no name is available
+     */
+    protected abstract String getFileName();
+
+    /**
+     * @return an iterator over a set of elements of type String[]
+     */
+    public Iterator<String[]> iterator() {
+        if (iterating) {
+            throw new IllegalStateException("iterator() method can only be called once, before the" +
+                    "first call to hasNext()");
+        }
+        nextLine = readNextLine();
+        iterating = true;
+        return this;
+    }
+
+    /**
+     * Returns true if the iteration has more elements.
+     *
+     * @return  true if the iteration has more elements.  Otherwise returns false.
+     */
+    public boolean hasNext() {
+        // If this is the start of iteration, queue up the first item
+        if(!iterating) {
+            nextLine = readNextLine();
+            iterating = true;
+        }
+        return nextLine != null;
+    }
+
+    /**
+     * Returns the next element in the iteration.
+     *
+     * @return  the next tlement in the iteration
+     * @throws java.util.NoSuchElementException
+     */
+    public String[] next() {
+
+        if (!hasNext()) {
+            throw new NoSuchElementException("Iteration from text file(s) " +
+                    getFileName() + " has no more elements.");
+        }
+
+        String[] result = parseLine(nextLine);
+        do {
+            nextLine = readNextLine();
+        }
+        while (nextLine != null && isComment(nextLine));
+        return result;
+    }
+
+    /**
+     * This method represents the most efficient way (so far) to parse a line of whitespace-delimited text
+     *
+     * @param line the line to parse
+     * @return  an array of all the "words"
+     */
+    private String[] parseLine(byte line[]) {
+
+        if (getWordCount() == 0) {
+            calculateWordCount(line);
+        }
+        String parts[] = new String[getWordCount()];
+        boolean delimiter = true;
+        int index=0;
+        int start = 0;
+
+        try
+        {
+            for (int i = 0; i < line.length; i++) {
+                if (isDelimiter(line[i])) {
+                    if (!delimiter) {
+                        parts[index++] = new String(line,start,i-start);
+                    }
+                    else if(!isTreatGroupedDelimitersAsOne()) {
+                        parts[index++] = null;
+                    }
+                    delimiter=true;
+                }
+                else {
+                    if (delimiter)  start = i;
+                    delimiter = false;
+                }
+            }
+            if (!delimiter) {
+                 parts[index] = new String(line,start,line.length-start);
+            }
+        }
+        catch (ArrayIndexOutOfBoundsException e) {
+            throw new PicardException("Unexpected number of elements found when parsing file " +
+                    this.getFileName() + ": " + index + ".  Expected a maximum of " +
+                    this.getWordCount() + " elements per line.");
+        }
+        return parts;
+    }
+
+    /**
+     * Calculates the number of delimiter-separated "words" in a line and sets the value of <code>wordCount</code>
+     *
+     * @param line  representative line from the file
+     */
+    protected void calculateWordCount(byte line[]) {
+        int words = 0;
+        boolean delimiter = true;
+        for (byte b : line) {
+            if (isDelimiter(b)) {
+                if (delimiter && !isTreatGroupedDelimitersAsOne()) words++;
+                delimiter = true;
+            } else {
+                if (delimiter) words++;
+                delimiter = false;
+            }
+        }
+        setWordCount(words);
+    }
+
+    /**
+     * Required method for Iterator API.
+     *
+     * @throws UnsupportedOperationException
+     */
+    public void remove() {
+        throw new UnsupportedOperationException("Remove() not supported.");
+    }
+
+    /**
+     * Determines whether a given line is a comment
+     *
+     * @param line  the line to evaluate
+     * @return  true if the line is a comment (and should be ignored) otherwise false
+     */
+    protected boolean isComment(byte line[]) {
+        return line[0] == '#';
+    }
+
+    /**
+     * Determines whether a given character is a delimiter
+     *
+     * @param b the character to evaluate
+     * @return  true if <code>b</code> is a delimiter; otherwise false
+     */
+    protected boolean isDelimiter(byte b) { 
+        return b == ' ' || b == '\t';
+    }
+
+    protected int getWordCount() { return wordCount; }
+    protected void setWordCount(int wordCount) { this.wordCount = wordCount; }
+    protected boolean isTreatGroupedDelimitersAsOne() { return treatGroupedDelimitersAsOne; }
+    protected void setTreatGroupedDelimitersAsOne(boolean treatGroupedDelimitersAsOne) {
+        this.treatGroupedDelimitersAsOne = treatGroupedDelimitersAsOne;
+    }
+}
diff --git a/lib/edu/mit/broad/picard/util/ArrayUtil.java b/lib/edu/mit/broad/picard/util/ArrayUtil.java
new file mode 100644
index 0000000000..7ca7e38836
--- /dev/null
+++ b/lib/edu/mit/broad/picard/util/ArrayUtil.java
@@ -0,0 +1,33 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.util;
+
+public class ArrayUtil {
+
+    /**
+     * Reverse the elements of the given array in place
+     */
+    public static <T> void reverseArray(T[] array) {
+        for (int left=0, right=array.length-1; left<right; left++, right--) {
+            // exchange the first and last
+            T temp = array[left]; array[left]  = array[right]; array[right] = temp;
+        }
+    }
+
+    /**
+     * clone the above method as necessary for non-object types
+     */
+    public static void reverseArray(byte[] array) {
+        for (int left=0, right=array.length-1; left<right; left++, right--) {
+            // exchange the first and last
+            byte temp = array[left]; array[left]  = array[right]; array[right] = temp;
+        }
+    }
+}
diff --git a/lib/edu/mit/broad/picard/util/BasicTextFileParser.java b/lib/edu/mit/broad/picard/util/BasicTextFileParser.java
new file mode 100644
index 0000000000..50e041df4f
--- /dev/null
+++ b/lib/edu/mit/broad/picard/util/BasicTextFileParser.java
@@ -0,0 +1,102 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.util;
+
+import edu.mit.broad.picard.PicardException;
+import edu.mit.broad.picard.io.IoUtil;
+import edu.mit.broad.sam.util.AsciiLineReader;
+import edu.mit.broad.sam.util.RuntimeIOException;
+
+import java.io.*;
+import java.util.List;
+import java.util.Arrays;
+import java.util.ArrayList;
+
+/**
+ * TextFileParser which reads a single text file.
+ *
+ * @author Kathleen Tibbetts
+ */
+public class BasicTextFileParser extends AbstractTextFileParser
+{
+    // TODO: Replace this with AsciiStreamReader when Alec creates it.
+    private AsciiLineReader reader;
+    private final ArrayList<File> files = new ArrayList<File>();
+    String currentFileName = null;
+
+    /**
+     * Constructor.  Opens up a buffered reader and reads the first line.
+     *
+     * @param files  the file(s) to parse, in order
+     */
+    public BasicTextFileParser(boolean treatGroupedDelimitersAsOne, File... files) {
+        if (files.length == 0) {
+            throw new IllegalArgumentException("At least one file must be specified.");
+        }
+        this.files.addAll(Arrays.asList(files));
+        File f = this.files.remove(0);
+        currentFileName = f.getAbsolutePath();
+        reader = new AsciiLineReader(IoUtil.openFileForReading(f));
+        this.setTreatGroupedDelimitersAsOne(treatGroupedDelimitersAsOne);
+    }
+
+    /**
+     * Constructor.  In addition to opening and priming the files, it sets the number of
+     * whitespace-separated "words" per line.
+     *
+     * @param files      the file(s) to parse
+     * @param wordCount number of whitespace-separated "words" per line
+     */
+    public BasicTextFileParser(boolean treatGroupedDelimitersAsOne, int wordCount, File... files) {
+        this(treatGroupedDelimitersAsOne, files);
+        setWordCount(wordCount);
+    }
+    /**
+     * Workhorse method that reads the next line from the underlying reader
+     *
+     * @return  String or null if there is no next line
+     */
+    protected byte[] readNextLine()
+    {
+        try {
+            String line = reader.readLine();
+            if (line != null) {
+                return line.getBytes();
+            }
+            if (files.size() > 0) {
+                currentFileName = files.get(0).getAbsolutePath();
+                reader = new AsciiLineReader(IoUtil.openFileForReading(files.remove(0)));
+                return readNextLine();
+            }
+            return null;
+        }
+        catch(RuntimeIOException ioe) {
+            throw new PicardException("Error reading from file " + currentFileName, ioe);
+        }
+    }
+
+    /**
+     * Closes the underlying stream
+     */
+    public void close() {
+        if (reader != null)  {
+            reader.close();
+        }
+    }
+
+    /**
+     * Gets the name of the file being parsed
+     *
+     * @return  the name of the file being parsed
+     */
+    protected String getFileName() {
+        return this.currentFileName;
+    }
+}
diff --git a/lib/edu/mit/broad/picard/util/CloseableIteratorWrapper.java b/lib/edu/mit/broad/picard/util/CloseableIteratorWrapper.java
new file mode 100644
index 0000000000..9099016521
--- /dev/null
+++ b/lib/edu/mit/broad/picard/util/CloseableIteratorWrapper.java
@@ -0,0 +1,42 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+
+package edu.mit.broad.picard.util;
+
+import java.util.Iterator;
+
+import edu.mit.broad.sam.util.CloseableIterator;
+
+public class CloseableIteratorWrapper<T> implements CloseableIterator<T> {
+	Iterator<T> wrappedIterator;
+
+	public CloseableIteratorWrapper(Iterator<T> wrappedIterator) {
+		this.wrappedIterator = wrappedIterator;
+	}
+
+	@Override
+	public boolean hasNext() {
+		return wrappedIterator.hasNext();
+	}
+
+	@Override
+	public T next() {
+		return wrappedIterator.next();
+	}
+
+	@Override
+	public void remove() {
+		wrappedIterator.remove();
+	}
+
+	@Override
+	public void close() {
+	}
+}
\ No newline at end of file
diff --git a/lib/edu/mit/broad/picard/util/CloserUtil.java b/lib/edu/mit/broad/picard/util/CloserUtil.java
new file mode 100644
index 0000000000..8b5f702ef3
--- /dev/null
+++ b/lib/edu/mit/broad/picard/util/CloserUtil.java
@@ -0,0 +1,50 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2008 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.util;
+
+import java.util.List;
+import java.util.Arrays;
+import java.io.Closeable;
+import java.io.IOException;
+
+/**
+ * Utility to close things that implement Closeable
+ *
+ * @author Kathleen Tibbetts
+ */
+public class CloserUtil {
+
+    /**
+     * Calls close() on <code>obj</code> if it implements Closeable
+     *
+     * @param obj   The potentially closeable object
+     */
+    public static void close(Object obj) {
+        close(Arrays.asList(obj));
+    }
+
+    /**
+     * Calls close() on all elements of <code>objs</code> that implement Closeable
+     *
+     * @param objs   A list of potentially closeable objects
+     */
+    public static void close(List<Object> objs) {
+        for (Object o : objs) {
+            if (o instanceof Closeable) {
+                try {
+                    ((Closeable)o).close();
+                }
+                catch (IOException ioe) {
+                    // Do nothing 
+                }
+            }
+        }
+    }
+}
diff --git a/lib/edu/mit/broad/picard/util/CoordMath.java b/lib/edu/mit/broad/picard/util/CoordMath.java
new file mode 100644
index 0000000000..981b494c07
--- /dev/null
+++ b/lib/edu/mit/broad/picard/util/CoordMath.java
@@ -0,0 +1,59 @@
+/*
+  The Broad Institute
+  SOFTWARE COPYRIGHT NOTICE AGREEMENT
+  This software and its documentation are copyright 2005 by the
+  Broad Institute/Massachusetts Institute of Technology. All rights are
+  reserved.
+
+  This software is supplied without any warranty or guaranteed support
+  whatsoever. Neither the Broad Institute nor MIT can be responsible for its
+  use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.util;
+
+
+/**
+ * Basic coordinate-based math utils, so it's encapsulated in one place!  Assumes
+ * a one-based coordinate system and then 'end' is always inclusive
+ */
+public class CoordMath {
+
+    /** Gets the length of an interval given the start and the end. */
+    public static int getLength(int start, int end) { return (end - start) + 1; }
+
+    /** Gets the start of an interval given the end and the length. */
+    public static int getStart(int end, int length) { return end - length + 1; }
+
+    /** Gets the end of an interval given the start and the length. */
+    public static int getEnd(int start, int length) { return start + length - 1; }
+
+    /** Checks to see if the two sets of coordinates have any overlap. */
+    public static boolean overlaps(int start, int end, int start2, int end2) {
+        return (start2 >= start && start2 <= end) || (end2 >=start && end2 <= end) ||
+                encloses(start2, end2, start, end);
+    }
+
+    /** Returns true if the "inner" coords and totally enclosed by the "outer" coords. */
+    public static boolean encloses(int outerStart, int outerEnd, int innerStart, int innerEnd) {
+        return innerStart >= outerStart && innerEnd <= outerEnd;
+    }
+
+    /**
+     * Determines the amount of overlap between two coordinate ranges. Assumes that the two ranges
+     * actually do overlap and therefore may produce strange results when they do not!
+     */
+    public static int getOverlap(int start, int end, int start2, int end2) {
+        return getLength(Math.max(start, start2), Math.min(end, end2));
+    }
+
+    /** 
+     * Determines the read cycle number for the base
+     * 
+     *  @param isNegativeStrand true if the read is negative strand
+     *  @param readLength
+     *  @param readBaseIndex the 0-based index of the read base in question
+     */
+    public static int getCycle(boolean isNegativeStrand, int readLength, final int readBaseIndex) {
+        return isNegativeStrand ? readLength - readBaseIndex : readBaseIndex + 1;
+    }
+}
diff --git a/lib/edu/mit/broad/picard/util/Coverage.java b/lib/edu/mit/broad/picard/util/Coverage.java
new file mode 100644
index 0000000000..26212f4fc1
--- /dev/null
+++ b/lib/edu/mit/broad/picard/util/Coverage.java
@@ -0,0 +1,36 @@
+package edu.mit.broad.picard.util;
+
+/**
+ * A simple class that is used to store the coverage information about an interval.
+ *
+ * @author Tim Fennell 
+ */
+public class Coverage {
+    private Interval interval;
+    private short[] depths;
+
+    /** Constructs a new coverage object for the provided mapping with the desired padding either side. */
+    public Coverage(Interval i, int padding) {
+        this.interval = i;
+        this.depths = new short[interval.length() + 2*padding];
+    }
+
+    /** Adds a single point of depth at the desired offset into the coverage array. */
+    public void addBase(int offset) {
+        if (offset >= 0 && offset < this.depths.length) {
+            this.depths[offset] += 1;
+        }
+    }
+
+    /** Returns true if any base in the range has coverage of > 1 */
+    public boolean hasCoverage() {
+        for (short s : depths) {
+            if (s > 1) return true;
+        }
+
+        return false;
+    }
+
+    /** Gets the coverage depths as an array of shorts. */
+    public short[] getDepths() { return this.depths; }
+}
diff --git a/lib/edu/mit/broad/picard/util/CreateAnalysisDirectory.java b/lib/edu/mit/broad/picard/util/CreateAnalysisDirectory.java
new file mode 100644
index 0000000000..c7ba6c6262
--- /dev/null
+++ b/lib/edu/mit/broad/picard/util/CreateAnalysisDirectory.java
@@ -0,0 +1,88 @@
+package edu.mit.broad.picard.util;
+
+import edu.mit.broad.picard.cmdline.CommandLineProgram;
+import edu.mit.broad.picard.cmdline.Usage;
+import edu.mit.broad.picard.cmdline.Option;
+import edu.mit.broad.picard.io.IoUtil;
+
+import java.io.File;
+import java.util.Date;
+import java.text.SimpleDateFormat;
+
+/**
+ * CommandLineProgram to create Picard analysis directory
+ *
+ * @author Kathleen Tibbetts
+ */
+public class CreateAnalysisDirectory extends CommandLineProgram {
+
+    public static final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy_MM_dd");
+
+    // The following attributes define the command-line arguments
+    @Usage(programVersion="1.0")
+    public String USAGE =
+            "Usage: " + getClass().getName() + " [options]\n\n" +
+                    "Create a new Picard analysis directory.\n";
+
+
+    @Option(shortName = "P", doc = "Analysis directory prefix.  ")
+    public String PREFIX = "/seq/picard";
+
+    @Option(shortName = "F", doc = "The flowcell.  ")
+    public String FLOWCELL;
+
+    @Option(shortName = "A", doc = "The first cycle being analyzed.  ")
+    public Integer FIRST_CYCLE = 1;
+
+    @Option(shortName = "O", doc = "The last cycle being analyzed.  ")
+    public Integer LAST_CYCLE;
+
+    @Option(shortName = "R", doc = "The run date in the format MM/dd/yyyy. ")
+    public Date RUNDATE;
+
+    @Option(shortName = "L", doc = "Lane number. ")
+    public Integer LANE;
+
+    @Option(shortName="LIB", doc = "Library this analysis is for (e.g. 'Solexa-1234').  ")
+    public String LIBRARY;
+
+    @Option(shortName="S", doc = "Analysis start date in the format MM/dd/yyyy")
+    public Date ANALYSIS_START_DATE;
+
+    @Override
+	protected int doWork() {
+        if (PREFIX.charAt(PREFIX.length()-1) == '/') {
+            PREFIX = PREFIX.substring(0, PREFIX.length()-1);
+        }
+        IoUtil.assertDirectoryIsWritable(new File(PREFIX));
+        String parts[] = { PREFIX, FLOWCELL, "C" + FIRST_CYCLE + "-" + LAST_CYCLE + "_" +
+                dateFormat.format(RUNDATE) + "_" + dateFormat.format(ANALYSIS_START_DATE),
+                String.valueOf(LANE), LIBRARY };
+        String directory = null;
+
+        for (int i = 1; i < parts.length; i++) {
+            StringBuilder sb = new StringBuilder();
+            for (int j=0; j <= i; j++) {
+                sb.append(parts[j]).append("/");
+            }
+            directory = sb.toString();
+            File dir = new File(directory);
+            if (!dir.exists()) {
+                if (!dir.mkdir()) {
+                    System.err.println("Unable to create directory " + directory);
+                    return 1;
+                }
+            }
+        }
+        System.out.print(directory);
+        return 0;
+    }
+
+    public static void main(String[] argv) {
+        CreateAnalysisDirectory cmd = new CreateAnalysisDirectory();
+        cmd.QUIET = true;
+        System.exit(cmd.instanceMain(argv));
+    }
+
+    
+}
diff --git a/lib/edu/mit/broad/picard/util/FormatUtil.java b/lib/edu/mit/broad/picard/util/FormatUtil.java
new file mode 100644
index 0000000000..94816c1fea
--- /dev/null
+++ b/lib/edu/mit/broad/picard/util/FormatUtil.java
@@ -0,0 +1,135 @@
+package edu.mit.broad.picard.util;
+
+import edu.mit.broad.picard.PicardException;
+
+import java.security.InvalidParameterException;
+import java.text.DateFormat;
+import java.text.NumberFormat;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.math.RoundingMode;
+
+/**
+ * Simple class used to format object values into a standard format for printing.
+ *
+ * @author Tim Fennell
+ */
+public class FormatUtil {
+    private DateFormat dateFormat;
+    private NumberFormat integerFormat;
+    private NumberFormat floatFormat;
+
+    /** Constructs a new FormatUtil and initializes various internal formatters. */
+    public FormatUtil() {
+        this.dateFormat = new SimpleDateFormat("yyyy-MM-dd");
+
+        this.integerFormat = NumberFormat.getIntegerInstance();
+        this.integerFormat.setGroupingUsed(false);
+
+        this.floatFormat = NumberFormat.getNumberInstance();
+        this.floatFormat.setGroupingUsed(false);
+        this.floatFormat.setMaximumFractionDigits(6);
+        this.floatFormat.setRoundingMode(RoundingMode.HALF_DOWN);
+    }
+
+    /** Formats a short to an integer string. */
+    public String format(short value) { return this.integerFormat.format(value); }
+
+    /** Formats an int to an integer string. */
+    public String format(int value) { return this.integerFormat.format(value); }
+
+    /** Formats a long to an integer string. */
+    public String format(long value) { return this.integerFormat.format(value); }
+
+    /** Formats a float to a floating point string. */
+    public String format(float value) {return this.floatFormat.format(value); }
+
+    /** Formats a double to a floating point string. */
+    public String format(double value) {return this.floatFormat.format(value); }
+
+    /** Formats an enum to the String representation of an enum. */
+    public String format(Enum value) { return value.name(); }
+
+    /** Formats a date to a date string without time. */
+    public String format(Date value) { return this.dateFormat.format(value); }
+
+    /** Formats a boolean value to a String. */
+    public String format(boolean value) { if (value) return "Y"; else return "N"; }
+
+    /** Attempts to determine the type of value and format it appropriately. */
+    public String format(Object value) {
+        if (value == null) return "";
+        if (value instanceof Short)   return format( ((Short) value).shortValue() );
+        if (value instanceof Integer) return format( ((Integer) value).intValue() );
+        if (value instanceof Long)    return format( ((Long) value).longValue() );
+        if (value instanceof Float)   return format( ((Float) value).floatValue() );
+        if (value instanceof Double)  return format( ((Double) value).doubleValue() );
+        if (value instanceof Enum)    return format( ((Enum) value) );
+        if (value instanceof Date)    return format( ((Date) value) );
+        if (value instanceof Boolean) return format( ((Boolean) value).booleanValue() );
+        return value.toString();
+    }
+
+    ///////////////////////////////////////////////////////////////////////////
+    // Parsing methods
+    ///////////////////////////////////////////////////////////////////////////
+
+    /** Parses a String into a short. */
+    public short parseShort(String value) { return Short.parseShort(value); }
+
+    /** Parses a String into an int. */
+    public int parseInt(String value) { return Integer.parseInt(value); }
+
+    /** Parses a String into a long. */
+    public long parseLong(String value) { return Long.parseLong(value); }
+
+    /** Parses a String into a float. */
+    public float parseFloat(String value) { return Float.parseFloat(value); }
+
+    /** Parses a String into a double. */
+    public double parseDouble(String value) { return Double.parseDouble(value); }
+
+    /** Parses a String into an Enum of the given type. */
+    public <E extends Enum> E parseEnum(String value, Class<E> type) { return (E) Enum.valueOf(type, value); }
+
+    /** Parses a String into a date. */
+    public Date parseDate(String value) {
+        try {
+            return this.dateFormat.parse(value);
+        }
+        catch (ParseException pe) {
+            throw new PicardException("Could not parse value as date: " + value, pe);
+        }
+    }
+
+    /** Parses a String into a boolean. */
+    public boolean parseBoolean(String value) {
+        if (value == null || value.length() == 0) return false;
+        char ch = Character.toUpperCase(value.charAt(0));
+
+        return (ch == 'Y');
+    }
+
+    /**
+     * Attempts to determine the correct parse method to call based on the desired
+     * return type and then parses the String and returns the value.
+     *
+     * @param value the String value to be parsed
+     * @param returnType the desired return type
+     * @return an object of the returnType
+     */
+    public Object parseObject(String value, Class<?> returnType) {
+        if (returnType == Short.class   || returnType == Short.TYPE)   return parseShort(value);
+        if (returnType == Integer.class || returnType == Integer.TYPE) return parseInt(value);
+        if (returnType == Long.class    || returnType == Long.TYPE)    return parseLong(value);
+        if (returnType == Float.class   || returnType == Float.TYPE)   return parseFloat(value);
+        if (returnType == Double.class  || returnType == Double.TYPE)  return parseDouble(value);
+        if (returnType == Boolean.class || returnType == Boolean.TYPE) return parseBoolean(value);
+        if (returnType == Date.class)                                  return parseDate(value);
+        if (Enum.class.isAssignableFrom(returnType)) return parseEnum(value, (Class<? extends Enum>)returnType);
+        if (returnType == String.class) return value;
+
+        throw new InvalidParameterException("Don't know how to convert a String to a " + returnType.getName());
+    }
+}
diff --git a/lib/edu/mit/broad/picard/util/Histogram.java b/lib/edu/mit/broad/picard/util/Histogram.java
new file mode 100644
index 0000000000..3d1f3f8078
--- /dev/null
+++ b/lib/edu/mit/broad/picard/util/Histogram.java
@@ -0,0 +1,152 @@
+package edu.mit.broad.picard.util;
+
+import edu.mit.broad.picard.util.Histogram.Bin;
+
+import java.util.TreeMap;
+
+/**
+ * Class for computing and accessing histogram type data.  Stored internally in
+ * a sorted Map so that keys can be iterated in order.
+ *
+ * @author Tim Fennell
+ */
+public class Histogram<K extends Comparable> extends TreeMap<K, Bin> {
+    private String binLabel   = "BIN";
+    private String valueLabel = "VALUE";
+    private double count = 0;
+    private Double mean;
+
+    /** Constructs a new Histogram with default bin and value labels. */ 
+    public Histogram() { }
+
+    /** Constructs a new Histogram with supplied bin and value labels. */
+    public Histogram(String binLabel, String valueLabel) {
+        this.binLabel = binLabel;
+        this.valueLabel = valueLabel;
+    }
+
+    /** Represents a bin in the Histogram. */
+    public class Bin {
+        private final K id;
+        private double value = 0;
+
+        /** Constructs a new bin with the given ID. */
+        private Bin(K id) { this.id = id; }
+
+        /** Gets the ID of this bin. */
+        public K getId() { return id; }
+
+        /** Gets the value in the bin. */
+        public double getValue() { return value; }
+
+        /** Returns the String format for the value in the bin. */
+        public String toString() { return String.valueOf(this.value); }
+
+        /** Checks the equality of the bin by ID and value. */
+        public boolean equals(Object o) {
+            if (this == o) return true;
+            if (o == null || getClass() != o.getClass()) return false;
+
+            Bin bin = (Bin) o;
+
+            if (Double.compare(bin.value, value) != 0) return false;
+            if (!id.equals(bin.id)) return false;
+
+            return true;
+        }
+        
+        public double getIdValue() {
+            if (id instanceof Number) {
+                return ((Number) id).doubleValue();
+            } else {
+                throw new UnsupportedOperationException("getIdValue only supported for Histogram<? extends Number>");
+            }
+        }
+    }
+
+    /** Prefill the histogram with the supplied set of bins. */
+    public void prefillBins(K... ids) {
+        for (K id : ids) {
+            put(id, new Bin(id));
+        }
+    }
+
+    /** Increments the value in the designated bin by 1. */
+    public void increment(K id) {
+        increment(id, 1d);
+    }
+
+    /** Increments the value in the designated bin by the supplied increment. */
+    public void increment(K id, double increment) {
+        Bin bin = get(id);
+        if (bin == null) {
+            bin = new Bin(id);
+            put(id, bin);
+        }
+
+        bin.value += increment;
+        count += increment;
+        mean = null;
+    }
+
+    public String getBinLabel() { return binLabel; }
+    public void setBinLabel(String binLabel) { this.binLabel = binLabel; }
+
+    public String getValueLabel() { return valueLabel; }
+    public void setValueLabel(String valueLabel) { this.valueLabel = valueLabel; }
+
+    /** Checks that the labels and values in the two histograms are identical. */
+    public boolean equals(Object o) {
+        return o != null &&
+                (o instanceof Histogram) &&
+                ((Histogram) o).binLabel.equals(this.binLabel) &&
+                ((Histogram) o).valueLabel.equals(this.valueLabel) &&
+                super.equals(o);
+    }
+
+    public double getMean() {
+        if (mean == null) {
+            double total = 0;
+            for (Bin bin : values()) {
+                total += bin.getValue() * bin.getIdValue();
+            }
+    
+            mean = total / count;
+        }
+        
+        return mean;
+    }
+    
+    public double getStandardDeviation() {
+        double total = 0;
+        for (Bin bin : values()) {
+            total += bin.getValue() * bin.getIdValue() * bin.getIdValue();
+        }
+
+        return Math.sqrt((total / count) - (getMean() * getMean()));
+    }
+    
+    public double getMedian() {
+        double total = 0;
+        double halfCount = count / 2;
+        for (Bin bin : values()) {
+            total += bin.getValue();
+            if (total >= halfCount) {
+                return bin.getIdValue();
+            }
+        }
+        return 0;
+    }
+
+    public double getMin() {
+        return firstEntry().getValue().getIdValue();
+    }
+    
+    public double getMax() {
+        return lastEntry().getValue().getIdValue();
+    }
+    
+    public double getCount() {
+        return count;
+    }
+}
diff --git a/lib/edu/mit/broad/picard/util/Interval.java b/lib/edu/mit/broad/picard/util/Interval.java
new file mode 100644
index 0000000000..79a0918073
--- /dev/null
+++ b/lib/edu/mit/broad/picard/util/Interval.java
@@ -0,0 +1,139 @@
+package edu.mit.broad.picard.util;
+
+import edu.mit.broad.picard.PicardException;
+
+import java.util.List;
+import java.util.Collection;
+
+/**
+ * Represents a simple interval on a sequence.  Coordinates are 1-based closed ended.
+ *
+ * @author Tim Fennell
+ */
+public class Interval implements Comparable<Interval>, Cloneable {
+    private String sequence;
+    private int start;
+    private int end;
+    private boolean negativeStrand;
+    private String name;
+
+    /**
+     * Constructs an interval with the supplied sequence and start and end. If the end
+     * position is less than the start position an exception is thrown.
+     *
+     * @param sequence the name of the sequence
+     * @param start the start position of the interval on the sequence
+     * @param end the end position of the interval on the sequence
+     */
+    public Interval(String sequence, int start, int end) {
+        this.sequence = sequence;
+        this.start = start;
+        this.end = end;
+
+        if (this.end < this.start) throw new IllegalArgumentException("start must be less than or equal to end!");
+    }
+
+    /**
+     * Constructs an interval with the supplied sequence and start, end, strand and name.
+     * If the end position is less than the start position an exception is thrown.
+     *
+     * @param sequence the name of the sequence
+     * @param start the start position of the interval on the sequence
+     * @param end the end position of the interval on the sequence
+     * @param negative true to indicate negative strand, false otherwise
+     * @param name the name (possibly null) of the interval
+     *
+     */
+    public Interval(String sequence, int start, int end, boolean negative, String name) {
+        this(sequence, start, end);
+        this.negativeStrand = negative;
+        this.name = name;
+    }
+
+    /** Gets the name of the sequence on which the interval resides. */
+    public String getSequence() { return sequence; }
+
+    /** Gets the 1-based start position of the interval on the sequence. */
+    public int getStart() { return start; }
+
+    /** Gets the 1-based closed-ended end position of the interval on the sequence. */
+    public int getEnd() { return end; }
+
+    /** Returns true if the interval is on the negative strand, otherwise false. */
+    public boolean isNegativeStrand() { return this.negativeStrand; }
+
+    /** Returns true if the interval is on the positive strand, otherwise false. */
+    public boolean isPositiveStrand() { return !this.negativeStrand; }
+
+    /** Returns the name of the interval, possibly null. */
+    public String getName() { return this.name; }
+
+    /** Returns true if this interval overlaps the other interval, otherwise false. */
+    public boolean intersects(Interval other) {
+        return  (this.getSequence().equals(other.getSequence()) &&
+                 CoordMath.overlaps(this.start, this.end, other.start, other.end));
+    }
+
+    /** Returns true if this interval overlaps the other interval, otherwise false. */
+    public boolean abuts(Interval other) {
+        return this.getSequence().equals(other.getSequence()) &&
+               (this.start == other.end + 1 || other.start == this.end + 1);
+    }
+
+    /** Gets the length of this interval. */
+    public int length() { return this.end - this.start + 1; }
+
+    /** Counts the total number of bases a collection of intervals. */
+    public static long countBases(Collection<Interval> intervals) {
+        long total = 0;
+        for (Interval i : intervals) {
+            total += i.length();
+        }
+
+        return total;
+    }
+
+
+    /**
+     * Sort based on sequence.compareTo, then start pos, then end pos
+     * with null objects coming lexically last
+     */
+    public int compareTo(Interval that) {
+        if (that == null) return -1; // nulls last
+
+        int result = this.getSequence().compareTo(that.getSequence());
+        if (result == 0) {
+            if (this.start == that.start) {
+                result = this.end - that.end;
+            }
+            else {
+                result = this.start - that.start;
+            }
+        }
+
+        return result;
+    }
+
+    /** Equals method that agrees with {@link #compareTo(Interval)}. */
+    public boolean equals(Interval that) {
+        return (this.compareTo(that) == 0);
+    }
+
+    public int hashCode() {
+        int result;
+        result = sequence.hashCode();
+        result = 31 * result + (start ^ (start >>> 32));
+        result = 31 * result + (end ^ (end >>> 32));
+        return result;
+    }
+
+    public String toString() {
+        return getSequence() + ":" + start + "-" + end;
+    }
+
+    @Override
+    public Interval clone() {
+        try { return (Interval) super.clone(); }
+        catch (CloneNotSupportedException cnse) { throw new PicardException("That's unpossible", cnse); }
+    }
+}
diff --git a/lib/edu/mit/broad/picard/util/IntervalTree.java b/lib/edu/mit/broad/picard/util/IntervalTree.java
new file mode 100644
index 0000000000..8821b25d0e
--- /dev/null
+++ b/lib/edu/mit/broad/picard/util/IntervalTree.java
@@ -0,0 +1,1304 @@
+/*
+ * $Id: IntervalTree.java 51146 2007-11-05 17:48:24Z tsharpe $
+ * BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
+ * Software and documentation are copyright 2005 by the Broad Institute.
+ * All rights are reserved.
+ *
+ * Users acknowledge that this software is supplied without any warranty or support.
+ * The Broad Institute is not responsible for its use, misuse, or functionality.
+ */
+package edu.mit.broad.picard.util;
+
+import java.util.ConcurrentModificationException;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+/**
+ * A Red-Black tree with intervals for keys.
+ * Not thread-safe, and cannot be made so.
+ *
+ * 7/24/2008: This was copied from the tedUtils package.
+ * IMPORTANT!!! It has been modified to use the Reseq way of
+ * handling coordinates (end-inclusive).
+ *
+ * @author tsharpe
+ */
+public class IntervalTree<V> implements Iterable<IntervalTree.Node<V>>
+{
+    /**
+     * Return the number of intervals in the tree.
+     * @return The number of intervals.
+     */
+    public int size()
+    {
+        return mRoot == null ? 0 : mRoot.getSize();
+    }
+
+    /**
+     * Remove all entries.
+     */
+    public void clear()
+    {
+        mRoot = null;
+    }
+
+    /**
+     * Put a new interval into the tree (or update the value associated with an existing interval).
+     * If the interval is novel, the special sentinel value is returned.
+     * @param interval The interval.
+     * @param value The associated value.
+     * @return The old value associated with that interval, or the sentinel.
+     */
+    public V put( HalfOpenInterval interval, V value )
+    {
+        return put(interval.getStart(),interval.getEnd(),value);
+    }
+
+    /**
+     * Put a new interval into the tree (or update the value associated with an existing interval).
+     * If the interval is novel, the special sentinel value is returned.
+     * @param start The interval's start.
+     * @param end The interval's end.
+     * @param value The associated value.
+     * @return The old value associated with that interval, or the sentinel.
+     */
+    @SuppressWarnings("null")
+    public V put( int start, int end, V value )
+    {
+        if ( start > end )
+            throw new IllegalArgumentException("Start cannot exceed end.");
+
+        V result = mSentinel;
+
+        if ( mRoot == null )
+        {
+            mRoot = new Node<V>(start,end,value);
+        }
+        else
+        {
+            Node<V> parent = null;
+            Node<V> node = mRoot;
+            int cmpVal = 0;
+
+            while ( node != null )
+            {
+                parent = node; // last non-null node
+                cmpVal = node.compare(start,end);
+                if ( cmpVal == 0 )
+                {
+                    break;
+                }
+
+                node = cmpVal < 0 ? node.getLeft() : node.getRight();
+            }
+
+            if ( cmpVal == 0 )
+            {
+                result = parent.setValue(value);
+            }
+            else
+            {
+                if ( cmpVal < 0 )
+                {
+                    mRoot = parent.insertLeft(start,end,value,mRoot);
+                }
+                else
+                {
+                    mRoot = parent.insertRight(start,end,value,mRoot);
+                }
+            }
+        }
+
+        return result;
+    }
+
+    /**
+     * Remove an interval from the tree.  If the interval does not exist in the tree the
+     * special sentinel value is returned.
+     * @param interval The interval to remove.
+     * @return The value associated with that interval, or the sentinel.
+     */
+    public V remove( HalfOpenInterval interval )
+    {
+        return remove(interval.getStart(),interval.getEnd());
+    }
+
+    /**
+     * Remove an interval from the tree.  If the interval does not exist in the tree the
+     * special sentinel value is returned.
+     * @param start The interval's start.
+     * @param end The interval's end.
+     * @return The value associated with that interval, or the sentinel.
+     */
+    public V remove( int start, int end )
+    {
+        V result = mSentinel;
+        Node<V> node = mRoot;
+
+        while ( node != null )
+        {
+            int cmpVal = node.compare(start,end);
+            if ( cmpVal == 0 )
+            {
+                result = node.getValue();
+                mRoot = node.remove(mRoot);
+                break;
+            }
+
+            node = cmpVal < 0 ? node.getLeft() : node.getRight();
+        }
+
+        return result;
+    }
+
+    /**
+     * Find an interval.
+     * @param interval The interval sought.
+     * @return The Node that represents that interval, or null.
+     */
+    public Node<V> find( HalfOpenInterval interval )
+    {
+        return find(interval.getStart(),interval.getEnd());
+    }
+
+    /**
+     * Find an interval.
+     * @param start The interval's start.
+     * @param end The interval's end.
+     * @return The Node that represents that interval, or null.
+     */
+    public Node<V> find( int start, int end )
+    {
+        Node<V> node = mRoot;
+
+        while ( node != null )
+        {
+            int cmpVal = node.compare(start,end);
+            if ( cmpVal == 0 )
+            {
+                break;
+            }
+
+            node = cmpVal < 0 ? node.getLeft() : node.getRight();
+        }
+
+        return node;
+    }
+
+    /**
+     * Find the nth interval in the tree.
+     * @param idx The rank of the interval sought (from 0 to size()-1).
+     * @return The Node that represents the nth interval.
+     */
+    public Node<V> findByIndex( int idx )
+    {
+        return Node.findByRank(mRoot,idx+1);
+    }
+
+    /**
+     * Find the rank of the specified interval.  If the specified interval is not in the
+     * tree, then -1 is returned.
+     * @param interval The interval for which the index is sought.
+     * @return The rank of that interval, or -1.
+     */
+    public int getIndex( HalfOpenInterval interval )
+    {
+        return getIndex(interval.getStart(),interval.getEnd());
+    }
+
+    /**
+     * Find the rank of the specified interval.  If the specified interval is not in the
+     * tree, then -1 is returned.
+     * @param start The interval's start.
+     * @param end The interval's end.
+     * @return The rank of that interval, or -1.
+     */
+    public int getIndex( int start, int end )
+    {
+        return Node.getRank(mRoot,start,end) - 1;
+    }
+
+    /**
+     * Find the least interval in the tree.
+     * @return The earliest interval, or null if the tree is empty.
+     */
+    public Node<V> min()
+    {
+        Node<V> result = null;
+        Node<V> node = mRoot;
+
+        while ( node != null )
+        {
+            result = node;
+            node = node.getLeft();
+        }
+
+        return result;
+    }
+
+    /**
+     * Find the earliest interval in the tree greater than or equal to the specified interval.
+     * @param interval The interval sought.
+     * @return The earliest >= interval, or null if there is none.
+     */
+    public Node<V> min( HalfOpenInterval interval )
+    {
+        return min(interval.getStart(),interval.getEnd());
+    }
+
+    /**
+     * Find the earliest interval in the tree greater than or equal to the specified interval.
+     * @param start The interval's start.
+     * @param end The interval's end.
+     * @return The earliest >= interval, or null if there is none.
+     */
+    @SuppressWarnings("null")
+    public Node<V> min( int start, int end )
+    {
+        Node<V> result = null;
+        Node<V> node = mRoot;
+        int cmpVal = 0;
+
+        while ( node != null )
+        {
+            result = node;
+            cmpVal = node.compare(start,end);
+            if ( cmpVal == 0 )
+            {
+                break;
+            }
+
+            node = cmpVal < 0 ? node.getLeft() : node.getRight();
+        }
+
+        if ( cmpVal > 0 )
+        {
+            result = result.getNext();
+        }
+
+        return result;
+    }
+
+    /**
+     * Find the earliest interval in the tree that overlaps the specified interval.
+     * @param interval The interval sought.
+     * @return The earliest overlapping interval, or null if there is none.
+     */
+    public Node<V> minOverlapper( HalfOpenInterval interval )
+    {
+        return minOverlapper(interval.getStart(),interval.getEnd());
+    }
+
+    /**
+     * Find the earliest interval in the tree that overlaps the specified interval.
+     * @param start The interval's start.
+     * @param end The interval's end.
+     * @return The earliest overlapping interval, or null if there is none.
+     */
+    public Node<V> minOverlapper( int start, int end )
+    {
+        Node<V> result = null;
+        Node<V> node = mRoot;
+
+        if ( node != null && node.getMaxEnd() >= start )
+        {
+            while ( true )
+            {
+                if ( node.getStart() <= end && start <= node.getEnd() )
+                { // this node overlaps.  there might be a lesser overlapper down the left sub-tree.
+                  // no need to consider the right sub-tree:  even if there's an overlapper, if won't be minimal
+                    result = node;
+                    node = node.getLeft();
+                    if ( node == null || node.getMaxEnd() < start )
+                        break; // no left sub-tree or all nodes end too early
+                }
+                else
+                { // no overlap.  if there might be a left sub-tree overlapper, consider the left sub-tree.
+                    Node<V> left = node.getLeft();
+                    if ( left != null && left.getMaxEnd() >= start )
+                    {
+                        node = left;
+                    }
+                    else
+                    { // left sub-tree cannot contain an overlapper.  consider the right sub-tree.
+                        if ( node.getStart() > end )
+                            break; // everything in the right sub-tree is past the end of the query interval
+
+                        node = node.getRight();
+                        if ( node == null || node.getMaxEnd() < start )
+                            break; // no right sub-tree or all nodes end too early
+                    }
+                }
+            }
+        }
+
+        return result;
+    }
+
+    /**
+     * Find the greatest interval in the tree.
+     * @return The latest interval, or null if the tree is empty.
+     */
+    public Node<V> max()
+    {
+        Node<V> result = null;
+        Node<V> node = mRoot;
+
+        while ( node != null )
+        {
+            result = node;
+            node = node.getRight();
+        }
+
+        return result;
+    }
+
+    /**
+     * Find the latest interval in the tree less than or equal to the specified interval.
+     * @param interval The interval sought.
+     * @return The latest <= interval, or null if there is none.
+     */
+    public Node<V> max( HalfOpenInterval interval )
+    {
+        return max(interval.getStart(),interval.getEnd());
+    }
+
+    /**
+     * Find the latest interval in the tree less than or equal to the specified interval.
+     * @param start The interval's start.
+     * @param end The interval's end.
+     * @return The latest >= interval, or null if there is none.
+     */
+    @SuppressWarnings("null")
+    public Node<V> max( int start, int end )
+    {
+        Node<V> result = null;
+        Node<V> node = mRoot;
+        int cmpVal = 0;
+
+        while ( node != null )
+        {
+            result = node;
+            cmpVal = node.compare(start,end);
+            if ( cmpVal == 0 )
+            {
+                break;
+            }
+
+            node = cmpVal < 0 ? node.getLeft() : node.getRight();
+        }
+
+        if ( cmpVal < 0 )
+        {
+            result = result.getPrev();
+        }
+
+        return result;
+    }
+
+    /**
+     * Return an iterator over the entire tree.
+     * @return An iterator.
+     */
+    public Iterator<Node<V>> iterator()
+    {
+        return new FwdIterator(min());
+    }
+
+    /**
+     * Return an iterator over all intervals greater than or equal to the specified interval.
+     * @param interval The minimum interval.
+     * @return An iterator.
+     */
+    public Iterator<Node<V>> iterator( HalfOpenInterval interval )
+    {
+        return new FwdIterator(min(interval.getStart(),interval.getEnd()));
+    }
+
+    /**
+     * Return an iterator over all intervals greater than or equal to the specified interval.
+     * @param start The interval's start.
+     * @param end The interval's end.
+     * @return An iterator.
+     */
+    public Iterator<Node<V>> iterator( int start, int end )
+    {
+        return new FwdIterator(min(start,end));
+    }
+
+    /**
+     * Return an iterator over all intervals overlapping the specified range.
+     * @param start The range start.
+     * @param end The range end.
+     * @return An iterator.
+     */
+    public Iterator<Node<V>> overlappers( int start, int end )
+    {
+        return new OverlapIterator(start,end);
+    }
+
+    /**
+     * Return an iterator over the entire tree that returns intervals in reverse order.
+     * @return An iterator.
+     */
+    public Iterator<Node<V>> reverseIterator()
+    {
+        return new RevIterator(max());
+    }
+
+    /**
+     * Return an iterator over all intervals less than or equal to the specified interval, in reverse order.
+     * @param interval The maximum interval.
+     * @return An iterator.
+     */
+    public Iterator<Node<V>> reverseIterator( HalfOpenInterval interval )
+    {
+        return new RevIterator(max(interval.getStart(),interval.getEnd()));
+    }
+
+    /**
+     * Return an iterator over all intervals less than or equal to the specified interval, in reverse order.
+     * @param start The interval's start.
+     * @param end The interval's end.
+     * @return An iterator.
+     */
+    public Iterator<Node<V>> reverseIterator( int start, int end )
+    {
+        return new RevIterator(max(start,end));
+    }
+
+    /**
+     * Get the special sentinel value that will be used to signal novelty when putting a new interval
+     * into the tree, or to signal "not found" when removing an interval.  This is null by default.
+     * @return The sentinel value.
+     */
+    public V getSentinel()
+    {
+        return mSentinel;
+    }
+
+    /**
+     * Set the special sentinel value that will be used to signal novelty when putting a new interval
+     * into the tree, or to signal "not found" when removing an interval.
+     * @param sentinel The new sentinel value.
+     * @return The old sentinel value.
+     */
+    public V setSentinel( V sentinel )
+    {
+        V result = mSentinel;
+        mSentinel = sentinel;
+        return result;
+    }
+
+    void removeNode( Node<V> node )
+    {
+        mRoot = node.remove(mRoot);
+    }
+
+    private Node<V> mRoot;
+    private V mSentinel;
+
+    public static class Node<V1>
+        implements HalfOpenInterval
+    {
+        Node( int start, int end, V1 value )
+        {
+            mStart = start;
+            mEnd = end;
+            mValue = value;
+            mSize = 1;
+            mMaxEnd = mEnd;
+            mIsBlack = true;
+        }
+
+        Node( Node<V1> parent, int start, int end, V1 value )
+        {
+            mParent = parent;
+            mStart = start;
+            mEnd = end;
+            mValue = value;
+            mMaxEnd = mEnd;
+            mSize = 1;
+        }
+
+        public int getStart()
+        {
+            return mStart;
+        }
+
+        public int getEnd()
+        {
+            return mEnd;
+        }
+
+        public int getLength()
+        {
+            return mEnd - mStart;
+        }
+
+        public int getRelationship( HalfOpenInterval interval )
+        {
+            int result = 0;
+            if ( mStart < interval.getStart() )
+                result = HalfOpenInterval.HAS_LESSER_PART;
+            if ( mEnd > interval.getEnd() )
+                result |= HalfOpenInterval.HAS_GREATER_PART;
+            if ( mStart < interval.getEnd() && interval.getStart() < mEnd )
+                result |= HalfOpenInterval.HAS_OVERLAPPING_PART;
+            return result;
+        }
+
+        public boolean isAdjacent( HalfOpenInterval interval )
+        {
+            return mStart == interval.getEnd() || mEnd == interval.getStart();
+        }
+
+        public V1 getValue()
+        {
+            return mValue;
+        }
+
+        public V1 setValue( V1 value )
+        {
+            V1 result = mValue;
+            mValue = value;
+            return result;
+        }
+
+        int getSize()
+        {
+            return mSize;
+        }
+
+        int getMaxEnd()
+        {
+            return mMaxEnd;
+        }
+
+        Node<V1> getLeft()
+        {
+            return mLeft;
+        }
+
+        Node<V1> insertLeft( int start, int end, V1 value, Node<V1> root )
+        {
+            mLeft = new Node<V1>(this,start,end,value);
+            return insertFixup(mLeft,root);
+        }
+
+        Node<V1> getRight()
+        {
+            return mRight;
+        }
+
+        Node<V1> insertRight( int start, int end, V1 value, Node<V1> root )
+        {
+            mRight = new Node<V1>(this,start,end,value);
+            return insertFixup(mRight,root);
+        }
+
+        Node<V1> getNext()
+        {
+            Node<V1> result;
+
+            if ( mRight != null )
+            {
+                result = mRight;
+                while ( result.mLeft != null )
+                {
+                    result = result.mLeft;
+                }
+            }
+            else
+            {
+                Node<V1> node = this;
+                result = mParent;
+                while ( result != null && node == result.mRight )
+                {
+                    node = result;
+                    result = result.mParent;
+                }
+            }
+
+            return result;
+        }
+
+        Node<V1> getPrev()
+        {
+            Node<V1> result;
+
+            if ( mLeft != null )
+            {
+                result = mLeft;
+                while ( result.mRight != null )
+                {
+                    result = result.mRight;
+                }
+            }
+            else
+            {
+                Node<V1> node = this;
+                result = mParent;
+                while ( result != null && node == result.mLeft )
+                {
+                    node = result;
+                    result = result.mParent;
+                }
+            }
+
+            return result;
+        }
+
+        boolean wasRemoved()
+        {
+            return mSize == 0;
+        }
+
+        Node<V1> remove( Node<V1> root )
+        {
+            if ( mSize == 0 )
+            {
+                throw new IllegalStateException("Entry was already removed.");
+            }
+
+            if ( mLeft == null )
+            {
+                if ( mRight == null )
+                { // no children
+                    if ( mParent == null )
+                    {
+                        root = null;
+                    }
+                    else if ( mParent.mLeft == this )
+                    {
+                        mParent.mLeft = null;
+                        fixup(mParent);
+
+                        if ( mIsBlack )
+                            root = removeFixup(mParent,null,root);
+                    }
+                    else
+                    {
+                        mParent.mRight = null;
+                        fixup(mParent);
+
+                        if ( mIsBlack )
+                            root = removeFixup(mParent,null,root);
+                    }
+                }
+                else
+                { // single child on right
+                    root = spliceOut(mRight,root);
+                }
+            }
+            else if ( mRight == null )
+            { // single child on left
+                root = spliceOut(mLeft,root);
+            }
+            else
+            { // two children
+                Node<V1> next = getNext();
+                root = next.remove(root);
+
+                // put next into tree in same position as this, effectively removing this
+                if ( (next.mParent = mParent) == null )
+                    root = next;
+                else if ( mParent.mLeft == this )
+                    mParent.mLeft = next;
+                else
+                    mParent.mRight = next;
+
+                if ( (next.mLeft = mLeft) != null )
+                {
+                    mLeft.mParent = next;
+                }
+
+                if ( (next.mRight = mRight) != null )
+                {
+                    mRight.mParent = next;
+                }
+
+                next.mIsBlack = mIsBlack;
+                next.mSize = mSize;
+            }
+
+            mSize = 0;
+            return root;
+        }
+
+        // backwards comparison!  compares start+end to this.
+        int compare( int start, int end )
+        {
+            int result = 0;
+
+            if ( start > mStart )
+                result = 1;
+            else if ( start < mStart )
+                result = -1;
+            else if ( end > mEnd )
+                result = 1;
+            else if ( end < mEnd )
+                result = -1;
+
+            return result;
+        }
+
+        @SuppressWarnings("null")
+        static <V1> Node<V1> getNextOverlapper( Node<V1> node, int start, int end )
+        {
+            do
+            {
+                Node<V1> nextNode = node.mRight;
+                if ( nextNode != null && nextNode.mMaxEnd >= start )
+                {
+                    node = nextNode;
+                    while ( (nextNode = node.mLeft) != null && nextNode.mMaxEnd >= start )
+                        node = nextNode;
+                }
+                else
+                {
+                    nextNode = node;
+                    while ( (node = nextNode.mParent) != null && node.mRight == nextNode )
+                        nextNode = node;
+                }
+
+                if ( node != null && node.mStart > end )
+                    node = null;
+            }
+            while ( node != null && !(node.mStart <= end && start <= node.mEnd) );
+
+            return node;
+        }
+
+        static <V1> Node<V1> findByRank( Node<V1> node, int rank )
+        {
+            while ( node != null )
+            {
+                int nodeRank = node.getRank();
+                if ( rank == nodeRank )
+                    break;
+
+                if ( rank < nodeRank )
+                {
+                    node = node.mLeft;
+                }
+                else
+                {
+                    node = node.mRight;
+                    rank -= nodeRank;
+                }
+            }
+
+            return node;
+        }
+
+        static <V1> int getRank( Node<V1> node, int start, int end )
+        {
+            int rank = 0;
+
+            while ( node != null )
+            {
+                int cmpVal = node.compare(start,end);
+                if ( cmpVal < 0 )
+                {
+                    node = node.mLeft;
+                }
+                else
+                {
+                    rank += node.getRank();
+                    if ( cmpVal == 0 )
+                        return rank; // EARLY RETURN!!!
+
+                    node = node.mRight;
+                }
+            }
+
+            return 0;
+        }
+
+        private int getRank()
+        {
+            int result = 1;
+            if ( mLeft != null )
+                result = mLeft.mSize + 1;
+            return result;
+        }
+
+        private Node<V1> spliceOut( Node<V1> child, Node<V1> root )
+        {
+            if ( (child.mParent = mParent) == null )
+            {
+                root = child;
+                child.mIsBlack = true;
+            }
+            else
+            {
+                if ( mParent.mLeft == this )
+                    mParent.mLeft = child;
+                else
+                    mParent.mRight = child;
+                fixup(mParent);
+
+                if ( mIsBlack )
+                    root = removeFixup(mParent,child,root);
+            }
+
+            return root;
+        }
+
+        private Node<V1> rotateLeft( Node<V1> root )
+        {
+            Node<V1> child = mRight;
+
+            int childSize = child.mSize;
+            child.mSize = mSize;
+            mSize -= childSize;
+
+            if ( (mRight = child.mLeft) != null )
+            {
+                mRight.mParent = this;
+                mSize += mRight.mSize;
+            }
+
+            if ( (child.mParent = mParent) == null )
+                root = child;
+            else if ( this == mParent.mLeft )
+                mParent.mLeft = child;
+            else
+                mParent.mRight = child;
+
+            child.mLeft = this;
+            mParent = child;
+
+            setMaxEnd();
+            child.setMaxEnd();
+
+            return root;
+        }
+
+        private Node<V1> rotateRight( Node<V1> root )
+        {
+            Node<V1> child = mLeft;
+
+            int childSize = child.mSize;
+            child.mSize = mSize;
+            mSize -= childSize;
+
+            if ( (mLeft = child.mRight) != null )
+            {
+                mLeft.mParent = this;
+                mSize += mLeft.mSize;
+            }
+
+            if ( (child.mParent = mParent) == null )
+                root = child;
+            else if ( this == mParent.mLeft )
+                mParent.mLeft = child;
+            else
+                mParent.mRight = child;
+
+            child.mRight = this;
+            mParent = child;
+
+            setMaxEnd();
+            child.setMaxEnd();
+
+            return root;
+        }
+
+        private void setMaxEnd()
+        {
+            mMaxEnd = mEnd;
+            if ( mLeft != null )
+                mMaxEnd = Math.max(mMaxEnd,mLeft.mMaxEnd);
+            if ( mRight != null )
+                mMaxEnd = Math.max(mMaxEnd,mRight.mMaxEnd);
+        }
+
+        private static <V1> void fixup( Node<V1> node )
+        {
+            do
+            {
+                node.mSize = 1;
+                node.mMaxEnd = node.mEnd;
+                if ( node.mLeft != null )
+                {
+                    node.mSize += node.mLeft.mSize;
+                    node.mMaxEnd = Math.max(node.mMaxEnd,node.mLeft.mMaxEnd);
+                }
+                if ( node.mRight != null )
+                {
+                    node.mSize += node.mRight.mSize;
+                    node.mMaxEnd = Math.max(node.mMaxEnd,node.mRight.mMaxEnd);
+                }
+            }
+            while ( (node = node.mParent) != null );
+        }
+
+        private static <V1> Node<V1> insertFixup( Node<V1> daughter, Node<V1> root )
+        {
+            Node<V1> mom = daughter.mParent;
+            fixup(mom);
+
+            while( mom != null && !mom.mIsBlack )
+            {
+                Node<V1> gramma = mom.mParent;
+                Node<V1> auntie = gramma.mLeft;
+                if ( auntie == mom )
+                {
+                    auntie = gramma.mRight;
+                    if ( auntie != null && !auntie.mIsBlack )
+                    {
+                        mom.mIsBlack = true;
+                        auntie.mIsBlack = true;
+                        gramma.mIsBlack = false;
+                        daughter = gramma;
+                    }
+                    else
+                    {
+                        if ( daughter == mom.mRight )
+                        {
+                            root = mom.rotateLeft(root);
+                            mom = daughter;
+                        }
+                        mom.mIsBlack = true;
+                        gramma.mIsBlack = false;
+                        root = gramma.rotateRight(root);
+                        break;
+                    }
+                }
+                else
+                {
+                    if ( auntie != null && !auntie.mIsBlack )
+                    {
+                        mom.mIsBlack = true;
+                        auntie.mIsBlack = true;
+                        gramma.mIsBlack = false;
+                        daughter = gramma;
+                    }
+                    else
+                    {
+                        if ( daughter == mom.mLeft )
+                        {
+                            root = mom.rotateRight(root);
+                            mom = daughter;
+                        }
+                        mom.mIsBlack = true;
+                        gramma.mIsBlack = false;
+                        root = gramma.rotateLeft(root);
+                        break;
+                    }
+                }
+                mom = daughter.mParent;
+            }
+            root.mIsBlack = true;
+            return root;
+        }
+
+        private static <V1> Node<V1> removeFixup( Node<V1> parent, Node<V1> node, Node<V1> root )
+        {
+            do
+            {
+                if ( node == parent.mLeft )
+                {
+                    Node<V1> sister = parent.mRight;
+                    if ( !sister.mIsBlack )
+                    {
+                        sister.mIsBlack = true;
+                        parent.mIsBlack = false;
+                        root = parent.rotateLeft(root);
+                        sister = parent.mRight;
+                    }
+                    if ( (sister.mLeft == null || sister.mLeft.mIsBlack) && (sister.mRight == null || sister.mRight.mIsBlack) )
+                    {
+                        sister.mIsBlack = false;
+                        node = parent;
+                    }
+                    else
+                    {
+                        if ( sister.mRight == null || sister.mRight.mIsBlack )
+                        {
+                            sister.mLeft.mIsBlack = true;
+                            sister.mIsBlack = false;
+                            root = sister.rotateRight(root);
+                            sister = parent.mRight;
+                        }
+                        sister.mIsBlack = parent.mIsBlack;
+                        parent.mIsBlack = true;
+                        sister.mRight.mIsBlack = true;
+                        root = parent.rotateLeft(root);
+                        node = root;
+                    }
+                }
+                else
+                {
+                    Node<V1> sister = parent.mLeft;
+                    if ( !sister.mIsBlack )
+                    {
+                        sister.mIsBlack = true;
+                        parent.mIsBlack = false;
+                        root = parent.rotateRight(root);
+                        sister = parent.mLeft;
+                    }
+                    if ( (sister.mLeft == null || sister.mLeft.mIsBlack) && (sister.mRight == null || sister.mRight.mIsBlack) )
+                    {
+                        sister.mIsBlack = false;
+                        node = parent;
+                    }
+                    else
+                    {
+                        if ( sister.mLeft == null || sister.mLeft.mIsBlack )
+                        {
+                            sister.mRight.mIsBlack = true;
+                            sister.mIsBlack = false;
+                            root = sister.rotateLeft(root);
+                            sister = parent.mLeft;
+                        }
+                        sister.mIsBlack = parent.mIsBlack;
+                        parent.mIsBlack = true;
+                        sister.mLeft.mIsBlack = true;
+                        root = parent.rotateRight(root);
+                        node = root;
+                    }
+                }
+                parent = node.mParent;
+            }
+            while ( parent != null && node.mIsBlack );
+
+            node.mIsBlack = true;
+            return root;
+        }
+
+        private Node<V1> mParent;
+        private Node<V1> mLeft;
+        private Node<V1> mRight;
+        private int mStart;
+        private int mEnd;
+        private V1 mValue;
+        private int mSize;
+        private int mMaxEnd;
+        private boolean mIsBlack;
+    }
+
+    public class FwdIterator
+        implements Iterator<Node<V>>
+    {
+        public FwdIterator( Node<V> node )
+        {
+            mNext = node;
+        }
+
+        public boolean hasNext()
+        {
+            return mNext != null;
+        }
+
+        public Node<V> next()
+        {
+            if ( mNext == null )
+            {
+                throw new NoSuchElementException("No next element.");
+            }
+
+            if ( mNext.wasRemoved() )
+            {
+                mNext = min(mNext.getStart(),mNext.getEnd());
+                if ( mNext == null )
+                    throw new ConcurrentModificationException("Current element was removed, and there are no more elements.");
+            }
+            mLast = mNext;
+            mNext = mNext.getNext();
+            return mLast;
+        }
+
+        public void remove()
+        {
+            if ( mLast == null )
+            {
+                throw new IllegalStateException("No entry to remove.");
+            }
+
+            removeNode(mLast);
+            mLast = null;
+        }
+
+        private Node<V> mNext;
+        private Node<V> mLast;
+    }
+
+    public class RevIterator
+        implements Iterator<Node<V>>
+    {
+        public RevIterator( Node<V> node )
+        {
+            mNext = node;
+        }
+
+        public boolean hasNext()
+        {
+            return mNext != null;
+        }
+
+        public Node<V> next()
+        {
+            if ( mNext == null )
+                throw new NoSuchElementException("No next element.");
+            if ( mNext.wasRemoved() )
+            {
+                mNext = max(mNext.getStart(),mNext.getEnd());
+                if ( mNext == null )
+                    throw new ConcurrentModificationException("Current element was removed, and there are no more elements.");
+            }
+            mLast = mNext;
+            mNext = mNext.getPrev();
+            return mLast;
+        }
+
+        public void remove()
+        {
+            if ( mLast == null )
+            {
+                throw new IllegalStateException("No entry to remove.");
+            }
+
+            removeNode(mLast);
+            mLast = null;
+        }
+
+        private Node<V> mNext;
+        private Node<V> mLast;
+    }
+
+    public class OverlapIterator
+        implements Iterator<Node<V>>
+    {
+        public OverlapIterator( int start, int end )
+        {
+            mNext = minOverlapper(start,end);
+            mStart = start;
+            mEnd = end;
+        }
+
+        public boolean hasNext()
+        {
+            return mNext != null;
+        }
+
+        public Node<V> next()
+        {
+            if ( mNext == null )
+            {
+                throw new NoSuchElementException("No next element.");
+            }
+
+            if ( mNext.wasRemoved() )
+            {
+                throw new ConcurrentModificationException("Current element was removed.");
+            }
+
+            mLast = mNext;
+            mNext = Node.getNextOverlapper(mNext,mStart,mEnd);
+            return mLast;
+        }
+
+        public void remove()
+        {
+            if ( mLast == null )
+            {
+                throw new IllegalStateException("No entry to remove.");
+            }
+
+            removeNode(mLast);
+            mLast = null;
+        }
+
+        private Node<V> mNext;
+        private Node<V> mLast;
+        private int mStart;
+        private int mEnd;
+    }
+
+    public static class ValuesIterator<V1>
+        implements Iterator<V1>
+    {
+        public ValuesIterator( Iterator<Node<V1>> itr )
+        {
+            mItr = itr;
+        }
+
+        public boolean hasNext()
+        {
+            return mItr.hasNext();
+        }
+
+        public V1 next()
+        {
+            return mItr.next().getValue();
+        }
+
+        public void remove()
+        {
+            mItr.remove();
+        }
+
+        private Iterator<Node<V1>> mItr;
+    }
+}
+
+/**
+ * Semi-open interval on the integer number line.
+ * Turf covered runs from the start value inclusive, up to, but not including, the end value.
+ *
+ * @author tsharpe
+ * @version $Revision: 51146 $
+ */
+interface HalfOpenInterval
+{
+    // bit-wise definitions from which the other constants are composed
+    static final int HAS_LESSER_PART = 1;
+    static final int HAS_OVERLAPPING_PART = 2;
+    static final int HAS_GREATER_PART = 4;
+
+    static final int IS_ADJACENT_AND_EMPTY = 0;
+    static final int IS_STRICTLY_LESS = HAS_LESSER_PART; // 1
+    static final int IS_SUBSET = HAS_OVERLAPPING_PART; // 2
+    static final int IS_LEFT_OVERHANGING_OVERLAPPER = HAS_LESSER_PART | HAS_OVERLAPPING_PART; // 3
+    static final int IS_STRICTLY_GREATER = HAS_GREATER_PART; // 4
+    // there is no value that equals 5, since that would imply overhanging on left and right without overlapping
+    static final int IS_RIGHT_OVERHANGING_OVERLAPPER = HAS_GREATER_PART | HAS_OVERLAPPING_PART; // 6
+    static final int IS_SUPERSET = HAS_LESSER_PART | HAS_OVERLAPPING_PART | HAS_GREATER_PART; // 7
+
+    /**
+     * Returns the starting point of the interval.
+     * @return The start.
+     */
+    int getStart();
+
+    /**
+     * Returns the ending point of the interval.
+     * The interval is not regarded as including this point.
+     * @return The end.
+     */
+    int getEnd();
+
+    /**
+     * End - start.
+     */
+    int getLength();
+
+    /**
+     * Returns a constant that describes the relationship of this interval
+     * to a specified interval with regard to position on the number line.
+     * @param interval The interval to compare this one to.
+     * @return One of the IS_* constants defined above.
+     */
+    int getRelationship( HalfOpenInterval interval );
+
+    /**
+     * Returns true if this interval ends where the specified interval starts,
+     * or vice versa.
+     * @param interval The interval to compare this one to.
+     * @return True, if adjacent.
+     */
+    boolean isAdjacent( HalfOpenInterval interval );
+}
diff --git a/lib/edu/mit/broad/picard/util/ListMap.java b/lib/edu/mit/broad/picard/util/ListMap.java
new file mode 100644
index 0000000000..bee27cc18d
--- /dev/null
+++ b/lib/edu/mit/broad/picard/util/ListMap.java
@@ -0,0 +1,24 @@
+package edu.mit.broad.picard.util;
+
+import java.util.List;
+import java.util.HashMap;
+import java.util.ArrayList;
+
+/**
+ * A Map class that holds a list of entries under each key instead of a single entry, and
+ * provides utility methods for adding an entry under a key.
+ *
+ * @author Tim Fennell
+ */
+public class ListMap<K,V> extends HashMap<K, List<V>> {
+    /** Adds a single value to the list stored under a key. */
+    public void add(K key, V value) {
+        List<V> values = get(key);
+        if (values == null) {
+            values = new ArrayList<V>();
+            put(key, values);
+        }
+
+        values.add(value);
+    }
+}
diff --git a/lib/edu/mit/broad/picard/util/Log.java b/lib/edu/mit/broad/picard/util/Log.java
new file mode 100644
index 0000000000..43a628bdb1
--- /dev/null
+++ b/lib/edu/mit/broad/picard/util/Log.java
@@ -0,0 +1,182 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.util;
+
+import java.io.PrintStream;
+import java.util.Arrays;
+
+/**
+ * <p>A <em>wafer thin</em> wrapper around System.out that uses var-args to make it
+ * much more efficient to call the logging methods in without having to
+ * surround every call site with calls to Log.isXXXEnabled().  All the methods on this
+ * class take a variable length list of arguments and, only if logging is enabled for
+ * the level and channel being logged to, will those arguments be toString()'d and
+ * appended together.</p>
+ *
+ * @author Tim Fennell
+ */
+public final class Log {
+    /** Enumeration for setting log levels. */
+    public static enum LogLevel { ERROR, WARNING, INFO, DEBUG }
+
+    private static LogLevel globalLogLevel = LogLevel.DEBUG;
+
+    private final Class<?> clazz;
+    private final String className;
+    private final LogLevel level = globalLogLevel;
+    private final PrintStream out = System.out;
+
+    /**
+     * Private constructor
+     */
+    private Log(final Class<?> clazz) {
+        this.clazz = clazz;
+        this.className = clazz.getSimpleName();
+    }
+
+    /**
+     * Get a Log instance to perform logging within the Class specified.  Returns an instance
+     * of this class which wraps an instance of the commons logging Log class.
+     * @param clazz the Class which is going to be doing the logging
+     * @return a Log instance with which to log
+     */
+    public static Log getInstance(final Class<?> clazz) {
+        return new Log(clazz);
+    }
+
+    public static void setGlobalLogLevel(final LogLevel logLevel) {
+        globalLogLevel = logLevel;
+    }
+
+    /** Returns true if the specified log level is enabled otherwise false. */
+    public final boolean isEnabled(final LogLevel level) {
+        return level.ordinal() <= this.level.ordinal();
+    }
+
+    /**
+     * Private method that does the actual printing of messages to a PrintWriter. Outputs the log level,
+     * class name and parts followed by the stack trace if a throwable is provided.
+     *
+     * @param level the Log level being logged at
+     * @param throwable a Throwable if one is available otherwise null
+     * @param parts the parts of the message to be concatenated
+     */
+    private final void emit(final LogLevel level, final Throwable throwable, final Object... parts) {
+        if (isEnabled(level)) {
+            this.out.print(level.name());
+            this.out.print('\t');
+            this.out.print(this.className);
+            this.out.print('\t');
+
+            for (final Object part : parts) {
+                if (part != null && part.getClass().isArray()) {
+                    final Class<?> component = part.getClass().getComponentType();
+                    if (component.equals(Boolean.TYPE))        this.out.print(Arrays.toString( (boolean[]) part));
+                    else if (component.equals(Byte.TYPE))      this.out.print(Arrays.toString( (byte[]) part));
+                    else if (component.equals(Character.TYPE)) this.out.print(Arrays.toString( (char[]) part));
+                    else if (component.equals(Double.TYPE))    this.out.print(Arrays.toString( (double[]) part));
+                    else if (component.equals(Float.TYPE))     this.out.print(Arrays.toString( (float[]) part));
+                    else if (component.equals(Integer.TYPE))   this.out.print(Arrays.toString( (int[]) part));
+                    else if (component.equals(Long.TYPE))      this.out.print(Arrays.toString( (long[]) part));
+                    else if (component.equals(Short.TYPE))     this.out.print(Arrays.toString( (short[]) part));
+                    else this.out.print(Arrays.toString( (Object[]) part));
+                }
+                else {
+                    this.out.print(part);
+                }
+            }
+
+            this.out.println();
+
+            // Print out the exception if there is one
+            if (throwable != null) {
+                throwable.printStackTrace(this.out);
+            }
+        }
+    }
+
+    /**
+     * Logs a Throwable and optional message parts at level error.
+     * @param throwable an instance of Throwable that should be logged with stack trace
+     * @param messageParts zero or more objects which should be combined, by calling toString()
+     *        to form the log message.
+     */
+    public final void error(final Throwable throwable, final Object... messageParts) {
+        emit(LogLevel.ERROR, throwable, messageParts);
+    }
+
+    /**
+     * Logs a Throwable and optional message parts at level warn.
+     * @param throwable an instance of Throwable that should be logged with stack trace
+     * @param messageParts zero or more objects which should be combined, by calling toString()
+     *        to form the log message.
+     */
+    public final void warn(final Throwable throwable, final Object... messageParts) {
+        emit(LogLevel.WARNING, throwable, messageParts);
+    }
+
+    /**
+     * Logs a Throwable and optional message parts at level info.
+     * @param throwable an instance of Throwable that should be logged with stack trace
+     * @param messageParts zero or more objects which should be combined, by calling toString()
+     *        to form the log message.
+     */
+    public final void info(final Throwable throwable, final Object... messageParts) {
+        emit(LogLevel.INFO, throwable, messageParts);
+    }
+
+    /**
+     * Logs a Throwable and optional message parts at level debug.
+     * @param throwable an instance of Throwable that should be logged with stack trace
+     * @param messageParts zero or more objects which should be combined, by calling toString()
+     *        to form the log message.
+     */
+    public final void debug(final Throwable throwable, final Object... messageParts) {
+        emit(LogLevel.DEBUG, throwable, messageParts);
+    }
+
+    // Similar methods, but without Throwables, follow
+
+    /**
+     * Logs one or more message parts at level error.
+     * @param messageParts one or more objects which should be combined, by calling toString()
+     *        to form the log message.
+     */
+    public final void error(final Object... messageParts) {
+        emit(LogLevel.ERROR, null, messageParts);
+    }
+
+    /**
+     * Logs one or more message parts at level warn.
+     * @param messageParts one or more objects which should be combined, by calling toString()
+     *        to form the log message.
+     */
+    public final void warn(final Object... messageParts) {
+        emit(LogLevel.WARNING, null, messageParts);
+    }
+
+    /**
+     * Logs one or more message parts at level info.
+     * @param messageParts one or more objects which should be combined, by calling toString()
+     *        to form the log message.
+     */
+    public final void info(final Object... messageParts) {
+        emit(LogLevel.INFO, null, messageParts);
+    }
+
+    /**
+     * Logs one or more message parts at level debug.
+     * @param messageParts one or more objects which should be combined, by calling toString()
+     *        to form the log message.
+     */
+    public final void debug(final Object... messageParts) {
+        emit(LogLevel.DEBUG, null, messageParts);
+    }
+}
diff --git a/lib/edu/mit/broad/picard/util/MathUtil.java b/lib/edu/mit/broad/picard/util/MathUtil.java
new file mode 100644
index 0000000000..0cf6de4b7e
--- /dev/null
+++ b/lib/edu/mit/broad/picard/util/MathUtil.java
@@ -0,0 +1,33 @@
+package edu.mit.broad.picard.util;
+
+/**
+ * General math utilities
+ *
+ * @author Tim Fennell
+ */
+public class MathUtil {
+    /** Calculated the mean of an array of doubles. */
+    public static double mean(double[] in, int start, int stop) {
+        double total = 0;
+        for (int i=start; i<stop; ++i) {
+            total += in[i];
+        }
+
+        return total / (stop-start);
+    }
+
+    /** Calculated the standard deviation of an array of doubles. */
+    public static double stddev(double[] in, int start, int length) {
+        return stddev(in, start, length, mean(in, start, length));
+    }
+
+    /** Calculated the standard deviation of an array of doubles. */
+    public static double stddev(double[] in, int start, int stop, double mean) {
+        double total = 0;
+        for (int i=start; i<stop; ++i) {
+            total += (in[i] * in[i]);
+        }
+
+        return Math.sqrt((total / (stop-start)) - (mean*mean));
+    }
+}
diff --git a/lib/edu/mit/broad/picard/util/OverlapDetector.java b/lib/edu/mit/broad/picard/util/OverlapDetector.java
new file mode 100644
index 0000000000..f365684e5a
--- /dev/null
+++ b/lib/edu/mit/broad/picard/util/OverlapDetector.java
@@ -0,0 +1,96 @@
+package edu.mit.broad.picard.util;
+
+import java.util.*;
+
+/**
+ * Utility class to efficiently do in memory overlap detection between a large
+ * set of mapping like objects, and one or more candidate mappings.
+ */
+public class OverlapDetector<T> {
+    private Map<Object, IntervalTree<Set<T>>> cache = new HashMap<Object, IntervalTree<Set<T>>>();
+    private final int lhsBuffer;
+    private final int rhsBuffer;
+
+    /**
+     * Constructs an overlap detector.
+     * @param lhsBuffer the amount by which to "trim" coordinates of mappings on the left
+     *                  hand side when calculating overlaps
+     * @param rhsBuffer the amount by which to "trim" coordinates of mappings on the right
+     *                  hand side when calculating overlaps
+     */
+    public OverlapDetector(int lhsBuffer, int rhsBuffer) {
+        this.lhsBuffer = lhsBuffer;
+        this.rhsBuffer = rhsBuffer;
+    }
+
+    /** Adds a mapping to the set of mappings against which to match candidates. */
+    public void addLhs(T object, Interval interval) {
+        Object seqId = interval.getSequence();
+
+        IntervalTree<Set<T>> tree = this.cache.get(seqId);
+        if (tree == null) {
+            tree = new IntervalTree<Set<T>>();
+            this.cache.put(seqId, tree);
+        }
+
+        int start = interval.getStart() + this.lhsBuffer;
+        int end   = interval.getEnd()   - this.lhsBuffer;
+
+        Set<T> objects = new HashSet<T>();
+        objects.add(object);
+        if (start <= end)  // Don't put in sequences that have no overlappable bases
+        {
+            Set<T> alreadyThere = tree.put(start, end, objects);
+            if (alreadyThere != null)
+            {
+                alreadyThere.add(object);
+                tree.put(start, end, alreadyThere);
+            }
+        }
+    }
+
+    /** Adds all items to the overlap detector. */
+    public void addAll(List<T> objects, List<Interval> intervals) {
+        if (objects.size() != intervals.size()) {
+            throw new IllegalArgumentException("Objects and intervals must be the same size.");
+        }
+
+        for (int i=0; i<objects.size(); ++i) {
+            addLhs(objects.get(i), intervals.get(i));
+        }
+    }
+
+    /** Gets the collection of objects that overlap the provided mapping. */
+    public Collection<T> getOverlaps(Interval rhs)  {
+        Collection<T> matches = new ArrayList<T>();
+
+        Object seqId = rhs.getSequence();
+        IntervalTree<Set<T>> tree = this.cache.get(seqId);
+        int start = rhs.getStart() + this.rhsBuffer;
+        int end = rhs.getEnd() - this.rhsBuffer;
+
+        if (tree != null && start <= end)
+        {
+            Iterator<IntervalTree.Node<Set<T>>> it = tree.overlappers(start, end);
+            while (it.hasNext())
+            {
+                IntervalTree.Node<Set<T>> node = it.next();
+                matches.addAll(node.getValue());
+            }
+        }
+
+        return matches;
+    }
+
+    /** Gets all the objects that could be returned by the overlap detector. */
+    public Collection<T> getAll() {
+        Collection<T> all = new HashSet<T>();
+        for (IntervalTree<Set<T>> tree : this.cache.values()) {
+            for (IntervalTree.Node<Set<T>> node : tree) {
+                all.addAll(node.getValue());
+            }
+        }
+
+        return all;
+    }
+}
diff --git a/lib/edu/mit/broad/picard/util/PasteParser.java b/lib/edu/mit/broad/picard/util/PasteParser.java
new file mode 100644
index 0000000000..2b785a52f4
--- /dev/null
+++ b/lib/edu/mit/broad/picard/util/PasteParser.java
@@ -0,0 +1,132 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.util;
+
+import edu.mit.broad.picard.PicardException;
+import edu.mit.broad.sam.util.CloseableIterator;
+
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+/**
+ * Class to merge files horizontally (like the Unix paste command), so that the first line of each file
+ * is merged together in one big line, then the second lines, etc.
+ *
+ * @author Kathleen Tibbetts
+ */
+public class PasteParser implements Iterable<String[][]>, CloseableIterator<String[][]>{
+
+    private final CloseableIterator<String[]>[] iterators;
+    private boolean iterating = false;
+    private String[][] next = null;
+
+    /**
+     * Constructor
+     *
+     * @param iterators The iterators containing the files to merge together
+     */
+    public PasteParser(CloseableIterator<String[]>... iterators) {
+        this.iterators = iterators;
+    }
+
+    /**
+     * Merges the "next" line from each of the underying iterators and returns an array of the results.
+     *
+     * @return  An array of the lines from each iterator
+     * @throws PicardException if the files are not exhausted at the same time
+     */
+    protected String[][] readNextLine() {
+        String result[][] = new String[iterators.length][];
+        boolean oneFinished = false;
+        boolean oneNotFinished = false;
+
+        for (int i = 0; i < iterators.length; i++) {
+            if (!iterators[i].hasNext()) {
+                oneFinished = true;
+            }
+            else {
+                result[i] = iterators[i].next();
+                oneNotFinished = true;
+            }
+        }
+        if (oneFinished) {
+            if (oneNotFinished) {
+                throw new PicardException("Mismatched file lengths in PasteParser");
+            }
+            else {
+                return null;
+            }
+        }
+        return result;
+    }
+
+    /**
+     * Closes the underlying iterators.
+     */
+    public void close() {
+        for (CloseableIterator iterator : iterators) {
+            iterator.close();
+        }
+    }
+
+    /**
+     * Required method for Iterator API.
+     *
+     * @throws UnsupportedOperationException
+     */
+    public void remove() {
+        throw new UnsupportedOperationException("Remove() not supported.");
+    }
+
+    /**
+     * Returns an iterator over a set of elements of type BustardReadData.
+     *
+     * @return an iterator over a set of elements of type BustardReadData
+     */
+    public Iterator<String[][]> iterator() {
+        if (iterating) {
+            throw new IllegalStateException("iterator() method can only be called once, before the" +
+                    "first call to hasNext()");
+        }
+        next = readNextLine();
+        iterating = true;
+        return this;
+    }
+
+    /**
+     * Returns true if the iteration has more elements.
+     *
+     * @return  true if the iteration has more elements.  Otherwise returns false.
+     */
+    public boolean hasNext() {
+        if (!iterating) {
+            next = readNextLine();
+            iterating = true;
+        }
+        return next != null;
+    }
+
+    /**
+     * Returns the next element in the iteration.
+     *
+     * @return  the next element in the iteration
+     * @throws java.util.NoSuchElementException
+     */
+    public String[][] next() {
+
+        if (!hasNext()) {
+            throw new NoSuchElementException("Iteration has no more elements.");
+        }
+
+        String[][] result = next;
+        next = readNextLine();
+        return result;
+    }
+}
diff --git a/lib/edu/mit/broad/picard/util/PeekableIterator.java b/lib/edu/mit/broad/picard/util/PeekableIterator.java
new file mode 100644
index 0000000000..eae31253df
--- /dev/null
+++ b/lib/edu/mit/broad/picard/util/PeekableIterator.java
@@ -0,0 +1,65 @@
+/*
+  * The Broad Institute
+  * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+  * This software and its documentation are copyright Jan 22, 2009 by the
+  * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+  *
+  * This software is supplied without any warranty or guaranteed support whatsoever. Neither
+  * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+  */
+package edu.mit.broad.picard.util;
+
+import edu.mit.broad.sam.util.CloseableIterator;
+
+/**
+ * Generic Closable Iterator that allows you to peek at the next value before calling next
+ */
+public class PeekableIterator<Object> implements CloseableIterator<Object> {
+    private CloseableIterator<Object> iterator;
+    private Object nextObject;
+
+    /** Constructs a new iterator that wraps the supplied iterator. */
+    public PeekableIterator(CloseableIterator<Object> iterator) {
+        this.iterator = iterator;
+        advance();
+    }
+
+    /** Closes the underlying iterator. */
+    public void close() {
+        this.iterator.close();
+    }
+
+    /** True if there are more items, in which case both next() and peek() will return a value. */
+    public boolean hasNext() {
+        return this.nextObject != null;
+    }
+
+    /** Returns the next object and advances the iterator. */
+    public Object next() {
+        Object retval = this.nextObject;
+        advance();
+        return retval;
+    }
+
+    /**
+     * Returns the next object but does not advance the iterator. Subsequent calls to peek()
+     * and next() will return the same object.
+     */
+    public Object peek(){
+        return this.nextObject;
+    }
+
+    private void advance(){
+        if (this.iterator.hasNext()) {
+            this.nextObject = iterator.next();
+        }
+        else {
+            this.nextObject = null;
+        }
+    }
+
+    /** Unsupported Operation. */
+    public void remove() {
+        throw new UnsupportedOperationException("Not supported: remove");
+    }
+}
diff --git a/lib/edu/mit/broad/picard/util/ProcessExecutor.java b/lib/edu/mit/broad/picard/util/ProcessExecutor.java
new file mode 100644
index 0000000000..6655e37cd2
--- /dev/null
+++ b/lib/edu/mit/broad/picard/util/ProcessExecutor.java
@@ -0,0 +1,121 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+
+package edu.mit.broad.picard.util;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.ThreadFactory;
+
+import edu.mit.broad.picard.PicardException;
+
+/**
+ * Utility class that will execute sub processes via Runtime.getRuntime().exec(...) and read
+ * off the output from stderr and stdout of the sub process. This implementation uses a different
+ * thread to read each stream: the current thread for stdout and another, internal thread for 
+ * stderr. This utility is able to handle concurrent executions, spawning as many threads as
+ * are required to handle the concurrent load.
+ *
+ * @author Doug Voet
+ */
+public class ProcessExecutor {
+    private static Log log = Log.getInstance(ProcessExecutor.class);
+    private static ExecutorService executorService = Executors.newCachedThreadPool(new ThreadFactory() {
+        @Override
+        public Thread newThread(Runnable r) {
+            return new Thread(r, "ProcessExecutor Thread");
+        }
+    });
+    
+    /**
+     * Executes the command via Runtime.getRuntime().exec() then writes stderr to log.error
+     * and stdout to log.info and blocks until the command is complete.
+     * 
+     * @see Runtime#exec(String)
+     * 
+     * @param command command string
+     * @return return code of command
+     */
+    public static int execute(String command) {
+        try {
+            Process process = Runtime.getRuntime().exec(command);
+            return readStreamsAndWaitFor(process);
+        } catch (Throwable t) {
+            throw new PicardException("Unexpected exception executing [" + StringUtil.join(" ", command) + "]", t);
+        }
+    }
+
+    /**
+     * Executes the command via Runtime.getRuntime().exec() then writes stderr to log.error
+     * and stdout to log.info and blocks until the command is complete.
+     * 
+     * @see Runtime#exec(String[])
+     * 
+     * @param commandParts command string
+     * @return return code of command
+     */
+    public static int execute(String[] commandParts) {
+        try {
+            Process process = Runtime.getRuntime().exec(commandParts);
+            return readStreamsAndWaitFor(process);
+        } catch (Throwable t) {
+            throw new PicardException("Unexpected exception executing [" + StringUtil.join(" ", commandParts) + "]", t);
+        }
+    }
+
+    private static int readStreamsAndWaitFor(Process process)
+            throws InterruptedException, ExecutionException {
+        Future<?> stderrReader = executorService.submit(new LogErrorProcessOutputReader(process.getErrorStream()));
+        new LogInfoProcessOutputReader(process.getInputStream()).run();
+        // wait for stderr reader to be done
+        stderrReader.get();
+        return process.waitFor();
+    }
+    
+    /**
+     * Runnable that reads off the given stream and logs it somewhere.
+     */
+    private static abstract class ProcessOutputReader implements Runnable {
+        private BufferedReader reader;
+        public ProcessOutputReader(InputStream stream) {
+            reader = new BufferedReader(new InputStreamReader(stream));
+        }
+
+        @Override
+        public void run() {
+            try {
+                String line;
+                while ((line = reader.readLine()) != null) {
+                    log(line);
+                }
+            } catch (IOException e) {
+                throw new PicardException("Unexpected exception reading from process stream", e);
+            }
+        }
+        
+        protected abstract void log(String message);
+    }
+    
+    private static class LogErrorProcessOutputReader extends ProcessOutputReader {
+        public LogErrorProcessOutputReader(InputStream stream) { super(stream); }
+        @Override protected void log(String message) { log.error(message); }
+    }
+
+    private static class LogInfoProcessOutputReader extends ProcessOutputReader {
+        public LogInfoProcessOutputReader(InputStream stream) { super(stream); }
+        @Override protected void log(String message) { log.info(message); }
+    }
+}
diff --git a/lib/edu/mit/broad/picard/util/RExecutor.java b/lib/edu/mit/broad/picard/util/RExecutor.java
new file mode 100644
index 0000000000..7faa23a9c6
--- /dev/null
+++ b/lib/edu/mit/broad/picard/util/RExecutor.java
@@ -0,0 +1,93 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2009 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever. Neither
+ * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+ */
+
+package edu.mit.broad.picard.util;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+import edu.mit.broad.picard.PicardException;
+import edu.mit.broad.picard.io.IoUtil;
+
+/**
+ * Util class for executing R scripts.
+ * 
+ * @author Doug Voet
+ */
+public class RExecutor {
+    private static final String R_EXE = "Rscript";
+    
+    /**
+     * Executes the given R script that is stored in a file on the classpath. The script file
+     * is read from the classpath and written to a temp file then executed by a call to Rscript.
+     * Blocks until the R script is complete.
+     * 
+     * @param rScriptName the fully qualified name of the classpath resource of the script
+     * @param arguments any arguments required by the script
+     * @return the return code of the R process
+     */
+    public static int executeFromClasspath(String rScriptName, String... arguments) {
+        File scriptFile = writeScriptFile(rScriptName);
+        int returnCode = executeFromFile(scriptFile, arguments);
+        scriptFile.delete();
+        return returnCode;
+    }
+
+    /**
+     * Executes the given R script that is stored in a file by a call to Rscript.
+     * Blocks until the R script is complete.
+     * 
+     * @param scriptFile the file object for the script
+     * @param arguments any arguments required by the script
+     * @return the return code of the R process
+     */
+    public static int executeFromFile(File scriptFile, String... arguments) {
+        String[] command = new String[arguments.length + 2];
+        command[0] = R_EXE;
+        command[1] = scriptFile.getAbsolutePath();
+        System.arraycopy(arguments, 0, command, 2, arguments.length);
+        return ProcessExecutor.execute(command);
+    }
+
+    /**
+     * Writes the classpath resource named by rScriptName to the temp dir.
+     */
+    private static File writeScriptFile(String rScriptName) {
+        InputStream scriptStream = null;
+        OutputStream scriptFileStream = null;
+        try {
+            scriptStream = RExecutor.class.getClassLoader().getResourceAsStream(rScriptName);
+            if (scriptStream == null) {
+                throw new IllegalArgumentException("Script [" + rScriptName + "] not found in classpath");
+            }
+            File scriptFile = File.createTempFile("script", ".R");
+            scriptFileStream = IoUtil.openFileForWriting(scriptFile);
+            IoUtil.copyStream(scriptStream, scriptFileStream);
+            return scriptFile;
+        } catch (IOException e) {
+            throw new PicardException("Unexpected exception creating R script file", e);
+        } finally {
+            if (scriptStream != null) {
+                try {
+                    scriptStream.close();
+                } catch (IOException e) {
+                }
+            }
+            if (scriptFileStream != null) {
+                try {
+                    scriptFileStream.close();
+                } catch (IOException e) {
+                }
+            }
+        }
+    }
+}
diff --git a/lib/edu/mit/broad/picard/util/SamPairUtil.java b/lib/edu/mit/broad/picard/util/SamPairUtil.java
new file mode 100644
index 0000000000..4d78019dbc
--- /dev/null
+++ b/lib/edu/mit/broad/picard/util/SamPairUtil.java
@@ -0,0 +1,74 @@
+package edu.mit.broad.picard.util;
+
+import edu.mit.broad.sam.SAMRecord;
+
+/**
+ * Utility mthods for pairs of SAMRecords
+ */
+public class SamPairUtil {
+
+    // TODO: KT and TF say this is more complicated than what I have here
+    public static boolean isProperPair(final SAMRecord firstEnd, final SAMRecord secondEnd, boolean jumpingLibrary) {
+        if (firstEnd.getReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME)) {
+            return false;
+        }
+        if (!firstEnd.getReferenceName().equals(secondEnd.getReferenceName())) {
+            return false;
+        }
+        if (firstEnd.getReadNegativeStrandFlag() == secondEnd.getReadNegativeStrandFlag()) {
+            return false;
+        }
+        final SAMRecord positiveEnd;
+        final SAMRecord negativeEnd;
+        if (firstEnd.getReadNegativeStrandFlag()) {
+            positiveEnd = secondEnd;
+            negativeEnd = firstEnd;
+        } else {
+            positiveEnd = firstEnd;
+            negativeEnd = secondEnd;
+        }
+        if (!jumpingLibrary) {
+            return positiveEnd.getAlignmentStart() < negativeEnd.getAlignmentStart() + negativeEnd.getReadBases().length;
+        } else {
+            return negativeEnd.getAlignmentStart() < positiveEnd.getAlignmentStart() + positiveEnd.getReadBases().length;
+        }
+    }
+
+    public static int computeInsertSize(final SAMRecord firstEnd, final SAMRecord secondEnd) {
+        if (firstEnd.getReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME)) {
+            return 0;
+        }
+        if (!firstEnd.getReferenceName().equals(secondEnd.getReferenceName())) {
+            return 0;
+        }
+        int firstEnd5PrimePosition = firstEnd.getReadNegativeStrandFlag()? firstEnd.getAlignmentEnd(): firstEnd.getAlignmentStart();
+        int secondEnd5PrimePosition = secondEnd.getReadNegativeStrandFlag()? secondEnd.getAlignmentEnd(): secondEnd.getAlignmentStart();
+        return secondEnd5PrimePosition - firstEnd5PrimePosition;
+    }
+
+    /**
+     * Write the mate info for two SAMRecords
+     */
+    public static void setMateInfo(final SAMRecord samRecord, final SAMRecord mate) {
+        if (!samRecord.getMateUnmappedFlag()) {
+            samRecord.setMateReferenceName(mate.getReferenceName());
+            samRecord.setMateAlignmentStart(mate.getAlignmentStart());
+            samRecord.setMateNegativeStrandFlag(mate.getReadNegativeStrandFlag());
+        } else {
+            samRecord.setMateReferenceName(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME);
+            samRecord.setMateAlignmentStart(SAMRecord.NO_ALIGNMENT_START);
+            samRecord.setMateUnmappedFlag(true);
+        }
+        if (!mate.getMateUnmappedFlag()) {
+            mate.setMateReferenceName(samRecord.getReferenceName());
+            mate.setMateAlignmentStart(samRecord.getAlignmentStart());
+            mate.setMateNegativeStrandFlag(samRecord.getReadNegativeStrandFlag());
+        } else {
+            mate.setMateReferenceName(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME);
+            mate.setMateAlignmentStart(SAMRecord.NO_ALIGNMENT_START);
+            mate.setMateUnmappedFlag(true);
+        }
+    }
+
+
+}
diff --git a/lib/edu/mit/broad/picard/util/SequenceUtil.java b/lib/edu/mit/broad/picard/util/SequenceUtil.java
new file mode 100644
index 0000000000..d0a7937f8b
--- /dev/null
+++ b/lib/edu/mit/broad/picard/util/SequenceUtil.java
@@ -0,0 +1,62 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.util;
+
+public class SequenceUtil {
+    /**
+     * Calculate the reverse complement of the specified sequence
+     * (Stolen from Reseq)
+     *
+     * @param sequenceData
+     * @return reverse complement
+     */
+    public static String reverseComplement(String sequenceData) {
+
+        final char[] original = sequenceData.toCharArray();
+        final char[] complement = new char[original.length];
+
+        for (int i=0, j=complement.length-1; i<original.length; ++i, --j) {
+            switch ( original[i] ) {
+
+                // inlined for performance (although HotSpot may do this anyway...)
+                case 'a' : complement[j] = 't'; break;
+                case 'A' : complement[j] = 'T'; break;
+                case 'c' : complement[j] = 'g'; break;
+                case 'C' : complement[j] = 'G'; break;
+                case 'g' : complement[j] = 'c'; break;
+                case 'G' : complement[j] = 'C'; break;
+                case 't' : complement[j] = 'a'; break;
+                case 'T' : complement[j] = 'A'; break;
+                default  : complement[j] = original[i];
+            }
+        }
+
+        return new String(complement);
+    }
+
+    /** Attempts to efficiently compare two bases stored as bytes for equality. */
+    public static boolean basesEqual(byte lhs, byte rhs) {
+        if (lhs == rhs) return true;
+        else {
+            if (lhs > 90) lhs -= 32;
+            if (rhs > 90) rhs -= 32;
+        }
+
+        return lhs == rhs;
+    }
+    
+    /**
+     * returns true if the value of base represents a no call
+     */
+    public static boolean isNoCall(byte base) {
+        return base == 'N' || base == 'n' || base == '.';
+    }
+
+}
diff --git a/lib/edu/mit/broad/picard/util/StringSortingCollectionFactory.java b/lib/edu/mit/broad/picard/util/StringSortingCollectionFactory.java
new file mode 100644
index 0000000000..fbc4798b92
--- /dev/null
+++ b/lib/edu/mit/broad/picard/util/StringSortingCollectionFactory.java
@@ -0,0 +1,121 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.util;
+
+import edu.mit.broad.sam.util.SortingCollection;
+import edu.mit.broad.sam.util.RuntimeIOException;
+
+import java.util.Comparator;
+import java.nio.ByteBuffer;
+import java.io.OutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.File;
+
+/**
+ * Factory to create new String SortingCollections
+ *
+ * @author Kathleen Tibbetts
+ */
+public class StringSortingCollectionFactory {
+
+    private static final File TEMP_DIR = new File(System.getProperty("java.io.tmpdir"), "StringSortingCollectionFactory");
+    private static final int MAX_RECORDS_IN_RAM = 20000;
+    
+    private StringSortingCollectionFactory() {
+    }
+
+    public static SortingCollection<String> newCollection() {
+        return SortingCollection.newInstance(
+                String.class, new StringCodec(), new StringComparator(), MAX_RECORDS_IN_RAM, TEMP_DIR);
+    }
+
+    static class StringCodec implements SortingCollection.Codec<String> {
+        ByteBuffer byteBuffer = ByteBuffer.allocate(4);
+        OutputStream os;
+        InputStream is;
+
+        /** Returns a new StringCodec. */
+        public SortingCollection.Codec<String> clone() {
+            return new StringCodec();
+        }
+
+        /**
+         * Where to write encoded output
+         *
+         * @param os    the output stream to encode output
+         */
+        public void setOutputStream(final OutputStream os) {
+            this.os = os;
+        }
+
+        /**
+         * Where to read encoded input from
+         *
+         * @param is where to read encoded input from
+         */
+        public void setInputStream(final InputStream is) {
+            this.is = is;
+        }
+
+        /**
+         * Write object to file
+         *
+         * @param val what to write
+         */
+        public void encode(final String val) {
+            try {
+                byteBuffer.clear();
+                byteBuffer.putInt(val.length());
+                os.write(byteBuffer.array());
+                os.write(val.getBytes());
+            } catch (IOException e) {
+                throw new RuntimeIOException(e);
+            }
+        }
+
+        /**
+         * Read the next record from the input stream and convert into a java object.
+         *
+         * @return null if no more records.  Should throw exception if EOF is encountered in the middle of
+         *         a record.
+         */
+        public String decode() {
+            try {
+                byteBuffer.clear();
+                int bytesRead = is.read(byteBuffer.array());
+                if (bytesRead == -1) {
+                    return null;
+                }
+                if (bytesRead != 4) {
+                    throw new RuntimeException("Unexpected EOF in middle of record");
+                }
+                byteBuffer.limit(4);
+                final int length = byteBuffer.getInt();
+                final byte[] buf = new byte[length];
+                bytesRead = is.read(buf);
+                if (bytesRead != length) {
+                    throw new RuntimeException("Unexpected EOF in middle of record");
+                }
+                return new String(buf);
+            } catch (IOException e) {
+                throw new RuntimeIOException(e);
+            }
+        }
+    }
+
+    static class StringComparator implements Comparator<String> {
+
+        public int compare(final String s, final String s1) {
+            return s.compareTo(s1);
+        }
+    }
+
+}
diff --git a/lib/edu/mit/broad/picard/util/StringUtil.java b/lib/edu/mit/broad/picard/util/StringUtil.java
new file mode 100644
index 0000000000..2cf15de820
--- /dev/null
+++ b/lib/edu/mit/broad/picard/util/StringUtil.java
@@ -0,0 +1,108 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.util;
+
+/**
+ * Utilities that are useful when dealing with Strings.
+ *
+ * @author Tim Fennell
+ */
+public class StringUtil {
+    /**
+     * Return input string with newlines inserted to ensure that all lines
+     * have length <= maxLineLength.  if a word is too long, it is simply broken
+     * at maxLineLength.  Does not handle tabs intelligently (due to implementer laziness).
+     */
+    public static String wordWrap(String s, int maxLineLength) {
+        String[] lines = s.split("\n");
+        StringBuilder sb = new StringBuilder();
+        for (String line: lines) {
+            if (sb.length() > 0) {
+                sb.append("\n");
+            }
+            sb.append(wordWrapSingleLine(line, maxLineLength));
+        }
+        if (s.endsWith("\n")) {
+            sb.append("\n");
+        }
+        return sb.toString();
+    }
+
+    public static String wordWrapSingleLine(String s, int maxLineLength) {
+        if (s.length() <= maxLineLength) {
+            return s;
+        }
+        StringBuilder sb = new StringBuilder();
+        int startCopyFrom = 0;
+        while (startCopyFrom < s.length()) {
+            int lastSpaceIndex = startCopyFrom;
+            int i;
+            // Find break point (if it exists)
+            for (i = startCopyFrom; i < s.length() && i - startCopyFrom < maxLineLength; ++i) {
+                if (Character.isWhitespace(s.charAt(i))) {
+                    lastSpaceIndex = i;
+                }
+            }
+            if (i - startCopyFrom < maxLineLength) {
+                lastSpaceIndex = i;
+            }
+            // Include any trailing whitespace
+            for (; lastSpaceIndex < s.length() && Character.isWhitespace(s.charAt(lastSpaceIndex)); ++lastSpaceIndex) {}
+            if (sb.length() > 0) {
+                sb.append("\n");
+            }
+            // Handle situation in which there is no word break.  Just break the word in the middle.
+            if (lastSpaceIndex == startCopyFrom) {
+                lastSpaceIndex = i;
+            }
+            sb.append(s.substring(startCopyFrom, lastSpaceIndex));
+            startCopyFrom = lastSpaceIndex;
+        }
+        return sb.toString();
+    }
+
+    /**
+     *
+     * @param separator String to interject between each string in strings arg
+     * @param strings List of strings to be joined.
+     * @return String that concatenates each item of strings arg, with separator btw each of them.
+     */
+    public static String join(String separator, String... strings) {
+        if (strings.length == 0) {
+            return "";
+        }
+        StringBuilder ret = new StringBuilder(strings[0]);
+        for (int i = 1; i < strings.length; ++i) {
+            ret.append(separator);
+            ret.append(strings[i]);
+        }
+        return ret.toString();
+    }
+
+    /**
+     * Checks that a String doesn't contain one or more characters of interest.
+     *
+     * @param s the String to check
+     * @param chars the characters to check for
+     * @return String the input String for convenience
+     * @throws IllegalArgumentException if the String contains one or more of the characters
+     */
+    public static String assertCharactersNotInString(final String s, final char... chars) {
+        for (char ch : s.toCharArray()) {
+            for (int i=0; i<chars.length; ++i) {
+                if (ch == chars[i]) {
+                    throw new IllegalArgumentException("Supplied String contains illegal character '" + chars[i] + "'.");
+                }
+            }
+        }
+
+        return s;
+    }
+}
diff --git a/lib/edu/mit/broad/picard/util/TabbedTextFileParser.java b/lib/edu/mit/broad/picard/util/TabbedTextFileParser.java
new file mode 100644
index 0000000000..06db8cffea
--- /dev/null
+++ b/lib/edu/mit/broad/picard/util/TabbedTextFileParser.java
@@ -0,0 +1,41 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.util;
+
+import java.io.File;
+import java.util.List;
+import java.util.Arrays;
+
+/**
+ * Parser for tab-delimited files
+ *
+ * @author Kathleen Tibbetts
+ */
+public class TabbedTextFileParser extends BasicTextFileParser {
+
+    /**
+     * Constructor
+     *
+     * @param file  The file to parse
+     */
+    public TabbedTextFileParser(boolean treatGroupedDelimitersAsOne, File... file) {
+        super(treatGroupedDelimitersAsOne, file);
+    }
+
+    /**
+     * Determines whether a given character is a delimiter
+     *
+     * @param b the character to evaluate
+     * @return  true if <code>b</code> is a delimiter; otherwise false
+     */
+    protected boolean isDelimiter(byte b) {
+        return b == '\t';
+    }
+}
diff --git a/lib/edu/mit/broad/picard/variation/DbSnpFileGenerator.java b/lib/edu/mit/broad/picard/variation/DbSnpFileGenerator.java
new file mode 100644
index 0000000000..5f44c972a2
--- /dev/null
+++ b/lib/edu/mit/broad/picard/variation/DbSnpFileGenerator.java
@@ -0,0 +1,172 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.variation;
+
+import java.io.*;
+import java.util.*;
+import edu.mit.broad.sam.SAMSequenceRecord;
+import edu.mit.broad.sam.SAMFileReader;
+import edu.mit.broad.sam.util.BinaryCodec;
+import edu.mit.broad.picard.io.IoUtil;
+import edu.mit.broad.picard.util.TabbedTextFileParser;
+import edu.mit.broad.picard.util.Log;
+
+/**
+ * Generates a binary version of the data for all dbSnps from a UCSU snp###.txt file.  Files with SNP data
+ * can be downloaded here:  http://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/.  See KnownVariantCodec.java
+ * for binary file format.
+ */
+public class DbSnpFileGenerator {
+    // Codes from the DbSnp file that we will handle.  All others are ignored.
+    // Package visibility for testing purposes.
+    static final String snp       = "single";    // code in DbSnp file for a SNP
+    static final String insertion = "insertion"; // code in DbSnp file for an insertion
+    static final String deletion  = "deletion";  // code in DbSnp file for a deletion
+    static final String indel     = "in-del";    // code in DbSnp file for an insertion/deletion
+
+    private File snpFile;
+    private File seqDictionaryFile;
+    private Map<String, Integer> sequenceToIndex = new HashMap<String, Integer>();
+    private List<SAMSequenceRecord> dictionary;
+    private BinaryCodec codec;
+    private KnownVariantCodec kvCodec = new KnownVariantCodec();
+    private Map<String, SortedSet<KnownVariant>> sequenceToSnps;
+
+    private final Log log = Log.getInstance(DbSnpFileGenerator.class);
+
+    /**
+     * Protected constructor so we can use a temporary file during testing
+     * @param snpFile               The UCSC dbSnp file
+     * @param seqDictionaryFile     The Sequence Dictionary
+     * @param tempOutputFile            The binary file to write to
+     */
+    DbSnpFileGenerator(File snpFile, File seqDictionaryFile, File tempOutputFile) {
+        this.snpFile = snpFile;
+        this.seqDictionaryFile = seqDictionaryFile;
+        this.codec = new BinaryCodec(new DataOutputStream(IoUtil.openFileForWriting(tempOutputFile)));
+    }
+
+    /**
+     * Writes the full binary dbSnp file and calls close on the BinaryCodec.
+     */
+    public void writeDbSnpFile() {
+        kvCodec.encode(KnownVariantCodec.MAGIC_NUMBER, codec);
+        writeReferenceSequences();
+        writeDbSnpRecords();
+        codec.close();
+    }
+
+    /**
+     * Writes the number of reference sequences and then the sequences themselves
+     */
+    private void writeReferenceSequences()  {
+        SAMFileReader sam = new SAMFileReader(this.seqDictionaryFile);
+        this.dictionary = sam.getFileHeader().getSequences();
+        kvCodec.encode(this.dictionary, codec);
+    }
+
+    /**
+     * Writes all the dbSnp records to the file in the order of the reference sequences
+     * in the sequence dictionary file.
+     */
+    private void writeDbSnpRecords() {
+        sequenceToSnps = new HashMap<String, SortedSet<KnownVariant>>();
+        int count = 0;
+
+        TabbedTextFileParser parser = new TabbedTextFileParser(true, snpFile);
+        while(parser.hasNext())  {
+            String parts[] = parser.next();
+            String sequence = parts[1];
+
+            // If we don't have this sequence in our dictionary, ignore it
+            if (!getSequenceToIndex().containsKey(sequence)) {
+                continue;
+            }
+
+            int start = Integer.parseInt(parts[2]) + 1; // We go from a zero-based to a 1-based system.
+            int end = Integer.parseInt(parts[3]);
+
+            String var = parts[11];
+
+            // We only care about SNPs, insertions, and deletions; otherwise skip it
+            VariantType type = null;
+            if (var.equals(snp)) {
+                type = VariantType.SNP;
+                end = start;    // For SNPs, we mark the start and end as the same location
+            }
+            // For insertions and deletions, we mark the base on either side of the affected reference sequence
+            else if (var.equals(insertion)) {
+                type = VariantType.insertion;
+                end = start + 1;    // Insertions are always length 1
+            }
+            else if (var.equals(deletion)) {
+                type = VariantType.deletion;
+                start = start - 1;
+                end++;
+            }
+            else if (var.equals(indel)) {   // For indels, we do one each of an insertion (here) and a deletion (below)
+                type = VariantType.insertion;
+                start = start - 1;
+                end = start + 1;
+            }
+            else {
+                continue;
+            }
+
+            if (!sequenceToSnps.containsKey(sequence)) {
+                sequenceToSnps.put(sequence, new TreeSet<KnownVariant>());
+            }
+            SortedSet<KnownVariant> sequenceVars = sequenceToSnps.get(sequence);
+
+            boolean validated = !parts[12].equals("unknown");
+            String name = parts[4];
+
+            sequenceVars.add(new KnownVariant(name, getSequenceToIndex().get(sequence), start, end, type, validated));
+            count++;
+
+            // If it's an in-del, we add it as a deletion (in addition to the insertion we also added) so we
+            // will have two records in our binary format for the one record in the text file
+            if (var.equals(indel)) {
+                sequenceVars.add(new KnownVariant(name, getSequenceToIndex().get(sequence), start,
+                        Integer.parseInt(parts[3])+1, VariantType.deletion, validated));
+                count++;
+            }
+        }
+
+        codec.writeInt(count);
+        // Loop through the sequences from the sequence dictionary in order
+        for (int i = 0; i < dictionary.size(); i++) {
+            // And write their known variants in order
+            if (sequenceToSnps.containsKey(dictionary.get(i).getSequenceName())) {
+                for (Iterator<KnownVariant> it = sequenceToSnps.get(dictionary.get(i).getSequenceName()).iterator();
+                     it.hasNext();) {
+                    kvCodec.encode(it.next(), codec);
+                }
+            }
+        }
+        log.info("Wrote " + count + " dbSnp records.");
+    }
+
+    /**
+     * Returns the map of sequences to their index in the reference dictionary,
+     * creating it if it does not already exist
+     *
+     * @return the map of sequences to their index in the reference dictionary
+     */
+    private Map<String, Integer> getSequenceToIndex() {
+        if (sequenceToIndex.keySet().size() == 0) {
+            for (int i = 0; i < dictionary.size(); i++) {
+                sequenceToIndex.put(dictionary.get(i).getSequenceName(), i);
+            }
+        }
+        return sequenceToIndex;
+    }
+
+}
\ No newline at end of file
diff --git a/lib/edu/mit/broad/picard/variation/DbSnpFileReader.java b/lib/edu/mit/broad/picard/variation/DbSnpFileReader.java
new file mode 100644
index 0000000000..dbee370d29
--- /dev/null
+++ b/lib/edu/mit/broad/picard/variation/DbSnpFileReader.java
@@ -0,0 +1,149 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2008 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.variation;
+
+import java.io.*;
+import java.util.*;
+import edu.mit.broad.sam.SAMSequenceRecord;
+import edu.mit.broad.sam.util.BinaryCodec;
+import edu.mit.broad.picard.PicardException;
+import edu.mit.broad.picard.io.IoUtil;
+
+/**
+ * Reader for DbSnp binary files.  See DbSnpFileGenerator for file format.
+ */
+public class DbSnpFileReader implements KnownVariantIterator
+{
+    private BinaryCodec codec = null;
+    private KnownVariantCodec kvCodec = new KnownVariantCodec();
+    List<SAMSequenceRecord> dictionary; 
+    private Map<Integer,SAMSequenceRecord> refIndexToName = new HashMap<Integer,SAMSequenceRecord>();
+    private KnownVariant next = null;
+    private int dbSnpCount = -1;
+
+    /**
+     * Constructor
+     *
+     * @param dbSnpFile  The binary dbSnp file to read
+     */
+    public DbSnpFileReader(File dbSnpFile)
+    {
+        codec = new BinaryCodec(new DataInputStream(IoUtil.openFileForReading(dbSnpFile)));
+        readHeader();
+        next = readNextDbSnp();
+    }
+
+    /**
+     * Returns an iterator over a set of elements of type KnownVariant.
+     *
+     * @return an Iterator
+     */
+    public Iterator<KnownVariant> iterator()
+    {
+        return this;
+    }
+
+    /**
+     * Returns true if the iteration has more elements.
+     *
+     * @return  true if the iterator has more elements.
+     */
+    public boolean hasNext()
+    {
+        return next != null;
+    }
+
+    /**
+     * Returns the next element in the iteration.
+     *
+     * @return the next KnownVariant in the iteratoion
+     */
+    public KnownVariant next()
+    {
+        if (!hasNext()) throw new NoSuchElementException();
+        KnownVariant result = next;
+        next = readNextDbSnp();
+        return result;
+    }
+
+    /** Allows peeking at the next value without advaning the iterator. */
+    public KnownVariant peek() {
+        return this.next;
+    }
+
+    /**
+     * Not supported.
+     *
+     * @throws UnsupportedOperationException    
+     */
+    public void remove()
+    {
+        throw new UnsupportedOperationException("Remove() not supported.");
+    }
+
+    /**
+     * Closes the underlying stream, via the BinaryCodec's close() method
+     */
+    public void close()
+    {
+            codec.close();
+    }
+
+    /**
+     * Reads the header data from the binary file, validates the version, and populates <code>refIndexToName</code>
+     * 
+     * @throws IOException
+     */
+    private void readHeader()
+    {
+        // Verify that we are using the correct version
+        String ver = kvCodec.decodeMagicNumber(codec);
+        if (!ver.equals(KnownVariantCodec.MAGIC_NUMBER))
+        {
+            throw new RuntimeException("Unsupported dbSnp file version: " + ver);
+        }
+
+        // Read the number of reference sequences and then the sequences themselves
+        dictionary = kvCodec.decodeSequenceDictionary(codec);
+        for (int i = 0; i < dictionary.size(); i++)
+        {
+            refIndexToName.put(i, dictionary.get(i));
+        }
+
+        dbSnpCount = codec.readInt();
+    }
+
+    /**
+     * Reads the next dbSnp record from the binary file
+     *
+     * @return  the populated KnownVariant object
+     */
+    private KnownVariant readNextDbSnp() {
+        KnownVariant kv = kvCodec.decodeKnownVariant(codec);
+        if (kv != null) {
+            kv.setRefrenceSequence(refIndexToName.get(kv.getSequenceIndex()).getSequenceName());
+        }
+        return kv;
+    }
+
+    /**
+     * Returns the SequenceDictionary for this file in SAM format
+     *
+     * @return an ordered List of SAMSequenceRecords
+     */
+    public List<SAMSequenceRecord> getSequenceDictionary() { return dictionary; }
+
+    /**
+     * Returns the total number of dbSnp records encoded in the file
+     *
+     * @return  total dbSnps encoded in the file
+     */
+    public int getCountDbSnpRecords() { return dbSnpCount; }
+}
\ No newline at end of file
diff --git a/lib/edu/mit/broad/picard/variation/GenerateDbSnpFile.java b/lib/edu/mit/broad/picard/variation/GenerateDbSnpFile.java
new file mode 100644
index 0000000000..65c8570b6e
--- /dev/null
+++ b/lib/edu/mit/broad/picard/variation/GenerateDbSnpFile.java
@@ -0,0 +1,51 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2008 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.variation;
+
+import java.io.File;
+
+import edu.mit.broad.picard.cmdline.CommandLineProgram;
+import edu.mit.broad.picard.cmdline.Option;
+import edu.mit.broad.picard.cmdline.Usage;
+
+/**
+ * CommandLineProgram to generate to invoke DbSnpFileGenerator
+ *
+ * @author Kathleen Tibbetts 
+ */
+public class GenerateDbSnpFile extends CommandLineProgram
+{
+    // The following attributes define the command-line arguments
+    @Usage(programVersion="1.0")
+    public String USAGE =
+            "Usage: " + getClass().getName() + " [options]\n\n" +
+                    "Generate a KnownVariant binary file from a UCSC DbSnp text file.\n";
+
+    @Option(shortName = "S", doc = "UCSC SNP file. ")
+    public File SNP_FILE;
+
+    @Option(shortName = "D", doc = "Sequence Dictionary for the genome in SAM or BAM format. ")
+    public File SEQUENCE_DICTIONARY;
+
+    @Option(shortName = "O", doc = "The binary output file. ")
+    public File OUTPUT;
+
+    @Override
+	protected int doWork() {
+        DbSnpFileGenerator generator = new DbSnpFileGenerator(SNP_FILE, SEQUENCE_DICTIONARY, OUTPUT);
+        generator.writeDbSnpFile();
+        return 0; 
+    }
+
+    public static void main(String[] argv) {
+        System.exit(new GenerateDbSnpFile().instanceMain(argv));
+    }
+
+}
diff --git a/lib/edu/mit/broad/picard/variation/KnownVariant.java b/lib/edu/mit/broad/picard/variation/KnownVariant.java
new file mode 100644
index 0000000000..26b2f33be6
--- /dev/null
+++ b/lib/edu/mit/broad/picard/variation/KnownVariant.java
@@ -0,0 +1,115 @@
+package edu.mit.broad.picard.variation;
+
+/**
+ * Utility class to hold data about a population or somatic variant.
+ *
+ * IMPORTANT!  Regardless of the coordinate system of the data from which it is drawn, the data
+ * in this class should be 1-based.  Start and end coordinates should be as follows:
+ *  For SNPs, start and end should be the same base.
+ *  For insertions and deletions, the base on either side of the affected reference sequence
+ *    will be the start and end.  For insertions, this means they will always be 1 base apart.
+ */
+public class KnownVariant implements Comparable<KnownVariant>
+{
+    private final String name;
+    private final int sequenceIndex;
+    private final int startPos;
+    private final int endPos;
+    private final VariantType type;
+    private final boolean validated;
+    private transient String referenceSequence;
+
+    /**
+     * Constructor
+     * 
+     * @param name
+     * @param sequenceIndex
+     * @param startPos
+     * @param endPos
+     * @param type
+     * @param validated
+     */
+    public KnownVariant(String name, int sequenceIndex, int startPos, int endPos,
+                        VariantType type, boolean validated)
+    {
+        this.name = name;
+        this.sequenceIndex = sequenceIndex;
+        this.startPos = startPos;
+        this.endPos = endPos;
+        this.type = type;
+        this.validated = validated;
+    }
+
+    /**
+     * Compares this object with the specified object for order. Returns a negative integer, zero, or a positive
+     * integer as this object is less than, equal to, or greater than the specified object.
+     *
+     * @param that  The KnownVariant to compare
+     * @return  a negative integer, zero, or a positive integer as this object is less than, equal to,
+     *          or greater than the specified object
+     */
+    public int compareTo(KnownVariant that)
+    {
+        if (this.getSequenceIndex() != that.getSequenceIndex())
+        {
+            return (this.getSequenceIndex() > that.getSequenceIndex()) ? 1 : -1;
+        }
+        else if (this.getStartPos() != that.getStartPos())
+        {
+            return (this.getStartPos() > that.getStartPos()) ? 1 : -1;
+        }
+        else if (this.getEndPos() != that.getEndPos())
+        {
+            return (this.getEndPos() > that.getEndPos()) ? 1 : -1;
+        }
+        else if (!this.getName().equals(that.getName()))
+        {
+            return this.getName().compareTo(that.getName());
+        }
+        else if (this.getType() != that.getType())
+        {
+            return this.getType().compareTo(that.getType());
+        }
+        else if (this.isValidated() != that.isValidated())
+        {
+            return this.isValidated() ? 1 : -1;
+        }
+        return 0;
+    }
+
+    public boolean equals(Object o)
+    {
+        if (!(o instanceof KnownVariant)) {
+            return false;
+        }
+        KnownVariant that = (KnownVariant)o;
+        return (this.name.equals(that.name) &&
+                this.sequenceIndex == that.sequenceIndex &&
+                this.startPos == that.startPos &&
+                this.endPos == that.endPos &&
+                this.type == that.type &&
+                this.validated == that.validated);
+    }
+
+    public int hasCode()
+    {
+        int result = 17;
+        result = 37*result + name.hashCode();
+        result = 37*result + sequenceIndex;
+        result = 37*result + startPos;
+        result = 37*result + endPos;
+        result = 37*result + type.hashCode();
+        result = 37*result + (validated ? 1 : 0);
+        return result;
+    }
+
+    public String getName() { return name; }
+    public int getSequenceIndex() { return sequenceIndex; }
+    public String getRefrenceSequence() { return referenceSequence; }
+    public void setRefrenceSequence(String referenceSequence) { this.referenceSequence = referenceSequence; }
+    public int getStartPos() { return startPos; }
+    public int getEndPos() { return endPos; }
+    public VariantType getType() { return type; }
+    public boolean isValidated() { return validated; }
+
+}
diff --git a/lib/edu/mit/broad/picard/variation/KnownVariantCodec.java b/lib/edu/mit/broad/picard/variation/KnownVariantCodec.java
new file mode 100644
index 0000000000..2258e756cb
--- /dev/null
+++ b/lib/edu/mit/broad/picard/variation/KnownVariantCodec.java
@@ -0,0 +1,179 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.variation;
+
+import edu.mit.broad.sam.SAMSequenceRecord;
+import edu.mit.broad.sam.util.BinaryCodec;
+import edu.mit.broad.sam.util.RuntimeEOFException;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Class for encoding and deconding binary data about KnownVariants
+ *
+ * IMPORTANT!  This class assumes that a KnownVariant instance is 1-based and end-inclusive
+ * and that the binary format is 0-based and end-exclusive.
+ *
+ * The format for the binary dbSnp file is as follows:
+ *
+ *  Field        Description                                             Type            Value
+ *  -----        -----------                                             ----            -----
+ *  magic        Known variant magic number                              char[4]         DBS\1
+ *  n_ref        # reference sequences                                   int32
+ *
+ *      -- List of references information (n = n_ref)
+ *    l_name     length of the reference name plus 1 (including NULL)    int32
+ *    name       Name; NULL terminated                                   char[l_name]
+ *    t_ref      Length of the reference sequence                        int32
+ *
+ *
+ *  n_snps       # of Known Variant records                              int32
+ *
+ *      -- List of DBSnps
+ *    block_size Length of the remainder of the block
+ *    rID        Reference sequence ID (-1 <= rId <= n_ref)              int32
+ *    pos        0-based leftmost coordinate                             int32
+ *    snp_len    Length of the dbSnp                                     int32
+ *    type       type of SNP                                             int8            0 = deletion
+ *                                                                                       1 = het
+ *                                                                                       2 = in-del
+ *                                                                                       3 = insertion
+ *                                                                                       4 = microsatellite
+ *                                                                                       5 = mixed
+ *                                                                                       6 = mnp
+ *                                                                                       7 = named
+ *                                                                                       8 = single
+ *                                                                                       9 = unknown
+ *    validated  whether the SNP has been validated                      int8            1 | 0
+ *    name       name of the dbSnp; NULL terminated                      char[block_size-15]
+ *
+ *  @author Kathleen Tibbetts
+ **/
+public class KnownVariantCodec
+{
+    public static final String MAGIC_NUMBER = "DBS\1";
+    private static final int KV_RECORD_LENGTH_LESS_NAME = 15;
+    
+    /**
+     * Reads data about a known variant from the BinaryCodec and instantiates a KnownVariant
+     * object with those values
+     *
+     * @param codec     The BinaryCodec from which to read
+     * @return a populated KnownVariant object
+     */
+    public KnownVariant decodeKnownVariant(BinaryCodec codec)
+    {
+        int blockSize;
+        try {
+            blockSize = codec.readInt();
+        }
+        catch (RuntimeEOFException e) {
+            return null;
+        }
+        int seqIndex = codec.readInt();
+        int startPos = codec.readInt() + 1; // Switch to 1-based
+        int endPos = codec.readInt();
+        byte[] buffer = new byte[1];
+        codec.readBytes(buffer);
+        VariantType type = VariantType.getVariantTypeFromOrdinal((int) buffer[0]);
+        codec.readBytes(buffer);
+        boolean validated = ((int) buffer[0]) == 1;
+        String name = codec.readString(blockSize - KV_RECORD_LENGTH_LESS_NAME);
+        codec.readBytes(buffer); // Skip the null terminator
+        return new KnownVariant(name, seqIndex, startPos, endPos, type, validated);
+
+    }
+
+    /**
+     * Writes data from a KnownVariant in the expected format to the BinaryCodec
+     * 
+     * @param variant  The KnownVariant to encode
+     * @param codec    The BinaryCodec to which to write
+     */
+    public void encode(KnownVariant variant, BinaryCodec codec)
+    {
+        codec.writeInt(variant.getName().length() + KV_RECORD_LENGTH_LESS_NAME);// Length of the rest of the block
+        codec.writeInt(variant.getSequenceIndex());              // Index of the reference sequence
+        codec.writeInt((int)variant.getStartPos()-1);            // Switch to 0-based leftmost coordinate
+        codec.writeInt((int)variant.getEndPos());                // end position, exclusive
+        byte b[] = new byte[1];
+        b[0] = (byte)variant.getType().ordinal();                // Type
+        codec.writeBytes(b);
+        b[0] = (byte)(variant.isValidated() ? 1 : 0);            // Validated
+        codec.writeBytes(b);
+        codec.writeString(variant.getName(), false, true);       // The null-terminated name
+    }
+
+    /**
+     * Reads data about the Sequence Dictionary from the BinaryCodec and instantiates a List of
+     * SAMSequenceRecords with those values
+     *
+     * @param codec     The BinaryCodec from which to read
+     * @return a populated List of SAMSequenceRecords
+     */
+    public List<SAMSequenceRecord> decodeSequenceDictionary(BinaryCodec codec)
+    {
+        int total = codec.readInt();
+        List<SAMSequenceRecord> dictionary = new ArrayList<SAMSequenceRecord>(total);
+        for (int i = 0; i < total; i++)
+        {
+            int len = codec.readInt();
+            // Read the name, leaving off and then skipping the null terminator
+            String name = codec.readString(len-1);
+            byte[] buffer = new byte[1];
+            codec.readBytes(buffer);
+            int seqLength = codec.readInt();
+            SAMSequenceRecord rec = new SAMSequenceRecord(name);
+            rec.setSequenceLength(seqLength);
+            dictionary.add(rec);
+        }
+        return dictionary;
+    }
+
+    /**
+     * Writes a Sequence Dictionary in the format excpected to the BinaryCodec
+     *
+     * @param dictionary  The list of SAMSequenceRecords to encode
+     * @param codec    The BinaryCodec to which to write
+     */
+    public void encode(List<SAMSequenceRecord> dictionary, BinaryCodec codec)
+    {
+        codec.writeInt(dictionary.size());
+        for (SAMSequenceRecord sequence : dictionary)
+        {
+            codec.writeString(sequence.getSequenceName(), true, true);
+            codec.writeInt(sequence.getSequenceLength());
+        }
+
+    }
+
+    /**
+     * Reads data about the Magic Number from the BinaryCodec and returns a string with its value
+     *
+     * @param codec     The BinaryCodec from which to read
+     * @return a Magic Number
+     */
+    public String decodeMagicNumber(BinaryCodec codec)
+    {
+        return codec.readString(4);
+    }
+
+    /**
+     * Writes a Magic Number in the format excpected to the BinaryCodec
+     *
+     * @param magicNumber  The magic number to encode
+     * @param codec    The BinaryCodec to which to write
+     */
+    public void encode(String magicNumber, BinaryCodec codec)
+    {
+        codec.writeString(magicNumber, false, false);
+    }
+}
diff --git a/lib/edu/mit/broad/picard/variation/KnownVariantIterator.java b/lib/edu/mit/broad/picard/variation/KnownVariantIterator.java
new file mode 100644
index 0000000000..6cb0712e14
--- /dev/null
+++ b/lib/edu/mit/broad/picard/variation/KnownVariantIterator.java
@@ -0,0 +1,31 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.variation;
+
+import edu.mit.broad.sam.SAMSequenceRecord;
+
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * API for iterating over records representing known variations
+ *
+ * @author Kathleen Tibbetts
+ */
+public interface KnownVariantIterator extends Iterable<KnownVariant>, Iterator<KnownVariant>
+{
+    /**
+     * Return the list of sequence dictionary (list of SAMSequenceRecords in order)
+     * for this KnownVariantIterator
+     *
+     * @return The SAMSequenceRecords that comprise the sequence dictionary for this iterator, in order
+     */
+    public List<SAMSequenceRecord> getSequenceDictionary();    
+}
diff --git a/lib/edu/mit/broad/picard/variation/VariantType.java b/lib/edu/mit/broad/picard/variation/VariantType.java
new file mode 100644
index 0000000000..354e047230
--- /dev/null
+++ b/lib/edu/mit/broad/picard/variation/VariantType.java
@@ -0,0 +1,30 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.picard.variation;
+
+/**
+ * Enum to hold the possible types of dbSnps.  Note that these correspsond to the names used
+ * in the dbSnp database with the exception of indel (which is in-del in dbSnp).
+ */
+public enum VariantType
+{
+    SNP, insertion, deletion;
+
+    /**
+     * Gets the enum for a given ordinal
+     *
+     * @param ordinal
+     * @return  VariantType
+     */
+    public static VariantType getVariantTypeFromOrdinal(int ordinal)
+    {
+        return VariantType.class.getEnumConstants()[ordinal];        
+    }
+}
diff --git a/lib/edu/mit/broad/sam/AlignmentBlock.java b/lib/edu/mit/broad/sam/AlignmentBlock.java
new file mode 100644
index 0000000000..ef1ec841c2
--- /dev/null
+++ b/lib/edu/mit/broad/sam/AlignmentBlock.java
@@ -0,0 +1,31 @@
+package edu.mit.broad.sam;
+
+/**
+ * Represents the contiguous alignment of a subset of read bases to a reference
+ * sequence. Simply put an alignment block tells you that read bases from
+ * readStart are aligned to the reference (matching or mismatching) from
+ * referenceStart for length bases.
+ *
+ * @author Tim Fennell
+ */
+public class AlignmentBlock {
+    private int readStart;
+    private int referenceStart;
+    private int length;
+
+    /** Constructs a new alignment block with the supplie read and ref starts and length. */
+    AlignmentBlock(int readStart, int referenceStart, int length) {
+        this.readStart = readStart;
+        this.referenceStart = referenceStart;
+        this.length = length;
+    }
+
+    /** The first, 1-based, base in the read that is aligned to the reference reference. */
+    public int getReadStart() { return readStart; }
+
+    /** The first, 1-based, position in the reference to which the read is aligned. */
+    public int getReferenceStart() { return referenceStart; }
+
+    /** The number of contiguous bases aligned to the reference. */
+    public int getLength() { return length; }
+}
diff --git a/lib/edu/mit/broad/sam/BAMFileConstants.java b/lib/edu/mit/broad/sam/BAMFileConstants.java
new file mode 100644
index 0000000000..7b5cf6c70c
--- /dev/null
+++ b/lib/edu/mit/broad/sam/BAMFileConstants.java
@@ -0,0 +1,33 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.sam;
+
+class BAMFileConstants {
+    /**
+     * The beginning of a BAMRecord is a fixed-size block of 8 int32s
+     */
+    static final int FIXED_BLOCK_SIZE = 8 * 4;
+
+    /**
+     * Sanity check -- we never expect BAMRecords to be as big as this.
+     */
+    static final int MAXIMUM_RECORD_LENGTH = 1024 * 1024;
+
+    /**
+     * BAM file magic number.  This is what is present in the gunzipped version of the file,
+     * which never exists on disk.
+     */
+
+    static final byte[] BAM_MAGIC = "BAM\1".getBytes();
+    /**
+     * BAM index file magic number.
+     */
+    static final byte[] BAM_INDEX_MAGIC = "BAI\1".getBytes();
+}
diff --git a/lib/edu/mit/broad/sam/BAMFileIndex.java b/lib/edu/mit/broad/sam/BAMFileIndex.java
new file mode 100644
index 0000000000..d6624b76d1
--- /dev/null
+++ b/lib/edu/mit/broad/sam/BAMFileIndex.java
@@ -0,0 +1,277 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2008 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever.
+ * Neither the Broad Institute nor MIT can be responsible for its use, misuse,
+ * or functionality.
+ */
+package edu.mit.broad.sam;
+
+
+import edu.mit.broad.sam.util.RuntimeEOFException;
+import edu.mit.broad.sam.util.RuntimeIOException;
+
+import java.io.*;
+import java.nio.*;
+import java.nio.channels.*;
+import java.util.*;
+
+/**
+ * Internal class for reading BAM file indexes.
+ */
+class BAMFileIndex
+{
+    private static final int MAX_BINS = 37450; // =(8^6-1)/7+1
+    private static final int BAM_LIDX_SHIFT = 16;
+
+    private File mFile = null;
+    private FileInputStream mFileStream = null;
+    private MappedByteBuffer mFileBuffer = null;
+
+
+    BAMFileIndex(final File file) {
+        mFile = file;
+    }
+
+    void close() {
+        closeFileStream();
+    }
+
+    long[] getSearchBins(int referenceIndex, int startPos, int endPos) {
+
+        openIndex();
+        seek(4);
+
+        int sequenceCount = readInteger();
+        // System.out.println("# Sequence count: " + sequenceCount);
+        if (referenceIndex >= sequenceCount) {
+            return null;
+        }
+
+        BitSet regionBins = regionToBins(startPos, endPos);
+        if (regionBins == null) {
+            return null;
+        }
+
+        for (int i = 0; i < referenceIndex; i++) {
+            // System.out.println("# Sequence TID: " + i);
+            int nBins = readInteger();
+            // System.out.println("# nBins: " + nBins);
+            for (int j = 0; j < nBins; j++) {
+                int bin = readInteger();
+                int nChunks = readInteger();
+                // System.out.println("# bin[" + j + "] = " + bin + ", nChunks = " + nChunks);
+                skipBytes(16 * nChunks);
+            }
+            int nLinearBins = readInteger();
+            // System.out.println("# nLinearBins: " + nLinearBins);
+            skipBytes(8 * nLinearBins);
+        }
+
+        // System.out.println("# Sequence target TID: " + referenceIndex);
+        int nIndexBins = readInteger();
+        // System.out.println("# nBins: " + nIndexBins);
+        if (nIndexBins == 0) {
+            return null;
+        }
+
+        List<Chunk> chunkList = new ArrayList<Chunk>();
+        for (int i = 0; i < nIndexBins; i++) {
+            int indexBin = readInteger();
+            int nChunks = readInteger();
+            // System.out.println("# bin[" + i + "] = " + indexBin + ", nChunks = " + nChunks);
+            if (regionBins.get(indexBin)) {
+                for (int ci = 0; ci < nChunks; ci++) {
+                    long chunkBegin = readLong();
+                    long chunkEnd = readLong();
+                    chunkList.add(new Chunk(chunkBegin, chunkEnd));
+                }
+            } else {
+                skipBytes(16 * nChunks);
+            }
+        }
+
+        if (chunkList.isEmpty()) {
+            return null;
+        }
+
+        int start = (startPos <= 0) ? 0 : startPos-1;
+        int regionLinearBin = start >> BAM_LIDX_SHIFT;
+        int nLinearBins = readInteger();
+        // System.out.println("# nLinearBins: " + nLinearBins);
+        // System.out.println("# regionLinearBin: " + regionLinearBin);
+        long minimumOffset = 0;
+        if (regionLinearBin < nLinearBins) {
+            skipBytes(8 * regionLinearBin);
+            minimumOffset = readLong();
+        }
+        chunkList = optimizeChunkList(chunkList, minimumOffset);
+        return convertToArray(chunkList);
+    }
+
+    private List<Chunk> optimizeChunkList(List<Chunk> chunkList, long minimumOffset) {
+        Chunk lastChunk = null;
+        Collections.sort(chunkList);
+        List<Chunk> result = new ArrayList<Chunk>();
+        for (Chunk chunk : chunkList) {
+            if (chunk.getChunkEnd() <= minimumOffset) {
+                continue;
+            }
+            if (result.isEmpty()) {
+                result.add(chunk);
+                lastChunk = chunk;
+                continue;
+            }
+            // Coalesce chunks that are in adjacent file blocks.
+            // This is a performance optimization.
+            long lastFileBlock = getFileBlock(lastChunk.getChunkEnd());
+            long chunkFileBlock = getFileBlock(chunk.getChunkStart());
+            if (chunkFileBlock - lastFileBlock > 1) {
+                result.add(chunk);
+                lastChunk = chunk;
+            } else {
+                if (chunk.getChunkEnd() > lastChunk.getChunkEnd()) {
+                    lastChunk.setChunkEnd(chunk.getChunkEnd());
+                }
+            }
+        }
+        return result;
+    }
+
+    private long[] convertToArray(List<Chunk> chunkList) {
+        int count = chunkList.size() * 2;
+        if (count == 0) {
+            return null;
+        }
+        int index = 0;
+        long[] result = new long[count];
+        for (Chunk chunk : chunkList) {
+            result[index++] = chunk.getChunkStart();
+            result[index++] = chunk.getChunkEnd();
+        }
+        return result;
+    }
+
+    private BitSet regionToBins(int startPos, int endPos) {
+        int maxPos = 0x1FFFFFFF;
+        int start = (startPos <= 0) ? 0 : (startPos-1) & maxPos;
+        int end = (endPos <= 0) ? maxPos : (endPos-1) & maxPos;
+        if (start > end) {
+            return null;
+        }
+        int k;
+        BitSet bitSet = new BitSet(MAX_BINS);
+        bitSet.set(0);
+        for (k =    1 + (start>>26); k <=    1 + (end>>26); ++k) bitSet.set(k);
+        for (k =    9 + (start>>23); k <=    9 + (end>>23); ++k) bitSet.set(k);
+        for (k =   73 + (start>>20); k <=   73 + (end>>20); ++k) bitSet.set(k);
+        for (k =  585 + (start>>17); k <=  585 + (end>>17); ++k) bitSet.set(k);
+        for (k = 4681 + (start>>14); k <= 4681 + (end>>14); ++k) bitSet.set(k);
+        return bitSet;
+    }
+
+    private long getFileBlock(long bgzfOffset) {
+        return ((bgzfOffset >> 16L) & 0xFFFFFFFFFFFFL);
+    }
+
+    private void openIndex() {
+        if (mFileBuffer != null) {
+            return;
+        }
+        openFileStream();
+        seek(0);
+        byte[] buffer = new byte[4];
+        readBytes(buffer);
+        if (!Arrays.equals(buffer, BAMFileConstants.BAM_INDEX_MAGIC)) {
+            closeFileStream();
+            throw new RuntimeException("Invalid file header in BAM index " + mFile +
+                                       ": " + new String(buffer));
+        }
+    }
+
+    private void readBytes(byte[] buffer) {
+        mFileBuffer.get(buffer);
+    }
+
+    private int readInteger() {
+        return mFileBuffer.getInt();
+    }
+
+    private long readLong() {
+        return mFileBuffer.getLong();
+    }
+
+    private void skipBytes(int count) {
+        mFileBuffer.position(mFileBuffer.position() + count);
+    }
+
+    private void seek(int position) {
+        mFileBuffer.position(position);
+    }
+
+    private void openFileStream() {
+        if (mFileStream != null) {
+            return;
+        }
+        try {
+            mFileStream = new FileInputStream(mFile);
+            FileChannel channel = mFileStream.getChannel();
+            mFileBuffer = channel.map(FileChannel.MapMode.READ_ONLY, 0L, channel.size());
+            mFileBuffer.order(ByteOrder.LITTLE_ENDIAN);
+        } catch (IOException exc) {
+            throw new RuntimeIOException(exc.getMessage(), exc);
+        }
+    }
+
+    private void closeFileStream() {
+        if (mFileStream == null) {
+            return;
+        }
+        try {
+            mFileStream.close();
+        } catch (IOException exc) {
+            throw new RuntimeIOException(exc.getMessage(), exc);
+        }
+        mFileStream = null;
+        mFileBuffer = null;
+    }
+
+    private static class Chunk
+        implements Comparable<Chunk> {
+
+        private long mChunkStart;
+        private long mChunkEnd;
+
+        Chunk(long start, long end) {
+            mChunkStart = start;
+            mChunkEnd = end;
+        }
+
+        long getChunkStart() {
+            return mChunkStart;
+        }
+
+        void setChunkStart(long value) {
+            mChunkStart = value;
+        }
+
+        long getChunkEnd() {
+            return mChunkEnd;
+        }
+
+        void setChunkEnd(long value) {
+            mChunkEnd = value;
+        }
+
+        public int compareTo(Chunk chunk) {
+            int result = Long.signum(mChunkStart - chunk.mChunkStart);
+            if (result == 0) {
+                result = Long.signum(mChunkEnd - chunk.mChunkEnd);
+            }
+            return result;
+        }
+    }
+}
diff --git a/lib/edu/mit/broad/sam/BAMFileReader.java b/lib/edu/mit/broad/sam/BAMFileReader.java
new file mode 100644
index 0000000000..4e81fc0170
--- /dev/null
+++ b/lib/edu/mit/broad/sam/BAMFileReader.java
@@ -0,0 +1,317 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2008 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever.
+ * Neither the Broad Institute nor MIT can be responsible for its use, misuse,
+ * or functionality.
+ */
+package edu.mit.broad.sam;
+
+
+import edu.mit.broad.sam.util.BinaryCodec;
+import edu.mit.broad.sam.util.BlockCompressedInputStream;
+import edu.mit.broad.sam.util.CloseableIterator;
+import edu.mit.broad.sam.util.StringLineReader;
+
+import java.io.DataInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Internal class for reading and querying BAM files.
+ */
+class BAMFileReader
+    extends SAMFileReader.ReaderImplementation {
+    private boolean mIsSeekable = false;
+    private BinaryCodec mStream = null;
+    private final BlockCompressedInputStream mCompressedInputStream;
+    private SAMFileHeader mFileHeader = null;
+    private BAMFileIndex mFileIndex = null;
+    private long mFirstRecordPointer = 0;
+    private CloseableIterator<SAMRecord> mCurrentIterator = null;
+    private final boolean eagerDecode;
+
+
+    BAMFileReader(final InputStream stream, final boolean eagerDecode)
+        throws IOException {
+        mIsSeekable = false;
+        mCompressedInputStream = new BlockCompressedInputStream(stream);
+        mStream = new BinaryCodec(new DataInputStream(mCompressedInputStream));
+        this.eagerDecode = eagerDecode;
+        readHeader(null);
+    }
+
+    BAMFileReader(final File file, final boolean eagerDecode)
+        throws IOException {
+        mIsSeekable = true;
+        mCompressedInputStream = new BlockCompressedInputStream(file);
+        mStream = new BinaryCodec(new DataInputStream(mCompressedInputStream));
+        this.eagerDecode = eagerDecode;
+        readHeader(file);
+        mFirstRecordPointer = mCompressedInputStream.getFilePointer();
+    }
+
+    void close() {
+        if (mStream != null) {
+            mStream.close();
+        }
+        mStream = null;
+        mFileHeader = null;
+        mFileIndex = null;
+    }
+
+    BAMFileIndex getFileIndex() {
+        return mFileIndex;
+    }
+
+    void setFileIndex(final BAMFileIndex fileIndex) {
+        mFileIndex = fileIndex;
+    }
+
+    SAMFileHeader getFileHeader() {
+        return mFileHeader;
+    }
+
+    /**
+     * Currently this is ignored for BAM reading.  Always do strict validation.
+     */
+    void setValidationStringency(final SAMFileReader.ValidationStringency validationStringency) {
+    }
+
+    CloseableIterator<SAMRecord> getIterator() {
+        if (mStream == null) {
+            throw new IllegalStateException("File reader is closed");
+        }
+        if (mCurrentIterator != null) {
+            throw new IllegalStateException("Iteration in progress");
+        }
+        if (mIsSeekable) {
+            try {
+                mCompressedInputStream.seek(mFirstRecordPointer);
+            } catch (IOException exc) {
+                throw new RuntimeException(exc.getMessage(), exc);
+            }
+        }
+        mCurrentIterator = new BAMFileIterator();
+        return mCurrentIterator;
+    }
+
+    CloseableIterator<SAMRecord> query(final String sequence, final int start, final int end, final boolean contained) {
+        if (mStream == null) {
+            throw new IllegalStateException("File reader is closed");
+        }
+        if (mCurrentIterator != null) {
+            throw new IllegalStateException("Iteration in progress");
+        }
+        if (!mIsSeekable) {
+            throw new UnsupportedOperationException("Cannot query stream-based BAM file");
+        }
+        if (mFileIndex == null) {
+            throw new IllegalStateException("No BAM file index is available");
+        }
+        mCurrentIterator = new BAMFileIndexIterator(sequence, start, end, contained);
+        return mCurrentIterator;
+    }
+
+    private void readHeader(final File file)
+        throws IOException {
+
+        final byte[] buffer = new byte[4];
+        mStream.readBytes(buffer);
+        if (!Arrays.equals(buffer, BAMFileConstants.BAM_MAGIC)) {
+            throw new IOException("Invalid BAM file header");
+        }
+
+        final int headerTextLength = mStream.readInt();
+        final String textHeader = mStream.readString(headerTextLength);
+        mFileHeader = new SAMTextHeaderCodec().decode(new StringLineReader(textHeader),
+                file);
+
+        final int sequenceCount = mStream.readInt();
+        if (mFileHeader.getSequences().size() > 0) {
+            // It is allowed to have binary sequences but no text sequences, so only validate if both are present
+            if (sequenceCount != mFileHeader.getSequences().size()) {
+                throw new SAMFormatException("Number of sequences in text header (" + mFileHeader.getSequences().size() +
+                        ") != number of sequences in binary header (" + sequenceCount + ") for file " + file);
+            }
+            for (int i = 0; i < sequenceCount; i++) {
+                final SAMSequenceRecord binarySequenceRecord = readSequenceRecord(file);
+                final SAMSequenceRecord sequenceRecord = mFileHeader.getSequence(i);
+                if (!sequenceRecord.getSequenceName().equals(binarySequenceRecord.getSequenceName())) {
+                    throw new SAMFormatException("For sequence " + i + ", text and binary have different names in file " +
+                            file);
+                }
+                if (sequenceRecord.getSequenceLength() != binarySequenceRecord.getSequenceLength()) {
+                    throw new SAMFormatException("For sequence " + i + ", text and binary have different lengths in file " +
+                            file);
+                }
+            }
+        } else {
+            // If only binary sequences are present, copy them into mFileHeader
+            final List<SAMSequenceRecord> sequences = new ArrayList<SAMSequenceRecord>(sequenceCount);
+            for (int i = 0; i < sequenceCount; i++) {
+                sequences.add(readSequenceRecord(file));
+            }
+            mFileHeader.setSequences(sequences);
+        }
+    }
+
+    private SAMSequenceRecord readSequenceRecord(final File file) {
+        final int nameLength = mStream.readInt();
+        if (nameLength <= 1) {
+            throw new SAMFormatException("Invalid BAM file header: missing sequence name in file " + file);
+        }
+        final String sequenceName = mStream.readString(nameLength - 1);
+        // Skip the null terminator
+        mStream.readByte();
+        final int sequenceLength = mStream.readInt();
+        final SAMSequenceRecord record = new SAMSequenceRecord(sequenceName);
+        record.setSequenceLength(sequenceLength);
+        return record;
+    }
+
+    private class BAMFileIterator
+        implements CloseableIterator<SAMRecord> {
+
+        private SAMRecord mNextRecord = null;
+        private final BAMRecordCodec bamRecordCodec = new BAMRecordCodec(getFileHeader());
+
+
+        BAMFileIterator() {
+            this(true);
+        }
+
+        BAMFileIterator(final boolean advance) {
+            this.bamRecordCodec.setInputStream(BAMFileReader.this.mStream.getInputStream());
+
+            if (advance) {
+                advance();
+            }
+        }
+
+        public void close() {
+            if (this != mCurrentIterator) {
+                throw new IllegalStateException("Attempt to close non-current iterator");
+            }
+            mCurrentIterator = null;
+        }
+
+        public boolean hasNext() {
+            return (mNextRecord != null);
+        }
+
+        public SAMRecord next() {
+            final SAMRecord result = mNextRecord;
+            advance();
+            return result;
+        }
+
+        public void remove() {
+            throw new UnsupportedOperationException("Not supported: remove");
+        }
+
+        void advance() {
+            try {
+                mNextRecord = getNextRecord();
+                if (eagerDecode && mNextRecord != null) {
+                    mNextRecord.eagerDecode();
+                }
+            } catch (IOException exc) {
+                throw new RuntimeException(exc.getMessage(), exc);
+            }
+        }
+
+        SAMRecord getNextRecord()
+            throws IOException {
+            return bamRecordCodec.decode();
+        }
+    }
+
+    private class BAMFileIndexIterator
+        extends BAMFileIterator {
+
+        private long[] mFilePointers = null;
+        private int mFilePointerIndex = 0;
+        private long mFilePointerLimit = -1;
+        private int mReferenceIndex = -1;
+        private int mRegionStart = 0;
+        private int mRegionEnd = 0;
+        private boolean mReturnContained = false;
+
+
+        BAMFileIndexIterator(final String sequence, final int start, final int end, final boolean contained) {
+            super(false);  // delay advance() until after construction
+            final SAMFileHeader fileHeader = getFileHeader();
+            mReferenceIndex = fileHeader.getSequenceIndex(sequence);
+            if (mReferenceIndex != -1) {
+                final BAMFileIndex fileIndex = getFileIndex();
+                mFilePointers = fileIndex.getSearchBins(mReferenceIndex, start, end);
+            }
+            mRegionStart = start;
+            mRegionEnd = (end <= 0) ? Integer.MAX_VALUE : end;
+            mReturnContained = contained;
+            advance();
+        }
+
+        SAMRecord getNextRecord()
+            throws IOException {
+            while (true) {
+                // Advance to next file block if necessary
+                while (mCompressedInputStream.getFilePointer() >= mFilePointerLimit) {
+                    if (mFilePointers == null ||
+                        mFilePointerIndex >= mFilePointers.length) {
+                        return null;
+                    }
+                    final long startOffset = mFilePointers[mFilePointerIndex++];
+                    final long endOffset = mFilePointers[mFilePointerIndex++];
+                    mCompressedInputStream.seek(startOffset);
+                    mFilePointerLimit = endOffset;
+                }
+                // Pull next record from stream
+                final SAMRecord record = super.getNextRecord();
+                if (record == null) {
+                    return null;
+                }
+                // If beyond the end of this reference sequence, end iteration
+                final int referenceIndex = record.getReferenceIndex();
+                if (referenceIndex != mReferenceIndex) {
+                    if (referenceIndex < 0 ||
+                        referenceIndex > mReferenceIndex) {
+                        mFilePointers = null;
+                        return null;
+                    }
+                    // If before this reference sequence, continue
+                    continue;
+                }
+                if (mRegionStart == 0 && mRegionEnd == Integer.MAX_VALUE) {
+                    // Quick exit to avoid expensive alignment end calculation
+                    return record;
+                }
+                final int alignmentStart = record.getAlignmentStart();
+                final int alignmentEnd = record.getAlignmentEnd();
+                if (alignmentStart > mRegionEnd) {
+                    // If scanned beyond target region, end iteration
+                    mFilePointers = null;
+                    return null;
+                }
+                // Filter for overlap with region
+                if (mReturnContained) {
+                    if (alignmentStart >= mRegionStart && alignmentEnd <= mRegionEnd) {
+                        return record;
+                    }
+                } else {
+                    if (alignmentEnd >= mRegionStart && alignmentStart <= mRegionEnd) {
+                        return record;
+                    }
+                }
+            }
+        }
+    }
+}
diff --git a/lib/edu/mit/broad/sam/BAMFileWriter.java b/lib/edu/mit/broad/sam/BAMFileWriter.java
new file mode 100644
index 0000000000..6a7bf7d9ba
--- /dev/null
+++ b/lib/edu/mit/broad/sam/BAMFileWriter.java
@@ -0,0 +1,64 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.sam;
+
+import edu.mit.broad.sam.util.BinaryCodec;
+import edu.mit.broad.sam.util.BlockCompressedOutputStream;
+
+import java.io.DataOutputStream;
+import java.io.File;
+
+/**
+ * Concrete implementation of SAMFileWriter for writing gzipped BAM files.
+ */
+class BAMFileWriter extends SAMFileWriterImpl {
+
+    private final BinaryCodec outputBinaryCodec;
+    private BAMRecordCodec bamRecordCodec = null;
+
+    public BAMFileWriter(final File path) {
+        outputBinaryCodec = new BinaryCodec(new DataOutputStream(new BlockCompressedOutputStream(path)));
+        outputBinaryCodec.setOutputFileName(path.toString());
+    }
+
+    private void prepareToWriteAlignments() {
+        if (bamRecordCodec == null) {
+            bamRecordCodec = new BAMRecordCodec(getHeader());
+            bamRecordCodec.setOutputStream(outputBinaryCodec.getOutputStream());
+        }
+    }
+
+    protected void writeAlignment(final SAMRecord alignment) {
+        prepareToWriteAlignments();
+        bamRecordCodec.encode(alignment);
+    }
+
+    protected void writeHeader(final String textHeader) {
+        outputBinaryCodec.writeBytes(BAMFileConstants.BAM_MAGIC);
+
+        // calculate and write the length of the SAM file header text and the header text
+        outputBinaryCodec.writeString(textHeader, true, false);
+
+        // write the sequences binarily.  This is redundant with the text header
+        outputBinaryCodec.writeInt(getHeader().getSequences().size());
+        for (final SAMSequenceRecord sequenceRecord: getHeader().getSequences()) {
+            outputBinaryCodec.writeString(sequenceRecord.getSequenceName(), true, true);
+            outputBinaryCodec.writeInt(sequenceRecord.getSequenceLength());
+        }
+    }
+
+    protected void finish() {
+        outputBinaryCodec.close();
+    }
+
+    protected String getFilename() {
+        return outputBinaryCodec.getOutputFileName();
+    }
+}
diff --git a/lib/edu/mit/broad/sam/BAMRecord.java b/lib/edu/mit/broad/sam/BAMRecord.java
new file mode 100644
index 0000000000..1ae5c0f3f8
--- /dev/null
+++ b/lib/edu/mit/broad/sam/BAMRecord.java
@@ -0,0 +1,280 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2008 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever.
+ * Neither the Broad Institute nor MIT can be responsible for its use, misuse,
+ * or functionality.
+ */
+package edu.mit.broad.sam;
+
+import edu.mit.broad.sam.util.BinaryCodec;
+import edu.mit.broad.sam.util.StringUtil;
+
+import java.io.ByteArrayInputStream;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Set;
+
+
+/**
+ * Wrapper class for binary BAM records.
+ * Delays unpacking all data binary until requested.
+ */
+class BAMRecord
+    extends SAMRecord
+{
+    private static final int READ_NAME_OFFSET = 0;
+
+    private byte[] mRestOfBinaryData = null;
+    private int mReadLength = 0;
+    private final short mReadNameLength;
+    private final int mCigarLen;
+    private boolean mAttributesDecoded = false;
+    private boolean mCigarDecoded = false;
+
+    /**
+     * If any of the properties set from mRestOfBinaryData have been overridden by calls to setters,
+     * this is set to true, indicating that mRestOfBinaryData cannot be used to write this record to disk.
+     */
+    private boolean mBinaryDataStale;
+
+    BAMRecord(final SAMFileHeader header, final int referenceID, final int coordinate, final short readNameLength, final short mappingQuality,
+              final int indexingBin, final int cigarLen, final int flags, final int readLen, final int mateReferenceID, final int mateCoordinate, final int insertSize,
+              final byte[] restOfData) {
+        setReferenceIndex(referenceID, header);
+        setAlignmentStart(coordinate);
+        mReadNameLength = readNameLength;
+        setMappingQuality(mappingQuality);
+        setIndexingBin(indexingBin);
+        mCigarLen = cigarLen;
+        setFlags(flags);
+        mReadLength = readLen;
+        setMateReferenceIndex(mateReferenceID, header);
+        setMateAlignmentStart(mateCoordinate);
+        setInferredInsertSize(insertSize);
+        mRestOfBinaryData = restOfData;
+
+        // Set these to null in order to mark them as being candidates for lazy initialization.
+        // If this is not done, they will have non-null defaults.
+        super.setReadName(null);
+        super.setCigarString(null);
+        super.setReadBases(null);
+        super.setBaseQualities(null);
+
+        // Mark the binary block as being valid for writing back out to disk
+        mBinaryDataStale = false;
+    }
+
+    protected void eagerDecode() {
+        // Force all the lazily-initialized attributes to be decoded.
+        getReadName();
+        getCigar();
+        getReadBases();
+        getBaseQualities();
+        getAttributes();
+        super.eagerDecode();
+        mRestOfBinaryData = null;
+    }
+
+    /**
+     * If this record has a valid binary representation of the variable-length portion of a binary record stored,
+     * return that byte array, otherwise return null.  This will never be true for SAMRecords.  It will be true
+     * for BAMRecords that have not been eagerDecoded(), and for which none of the data in the variable-length
+     * portion has been changed.
+     */
+    @Override
+    public byte[] getVariableBinaryRepresentation() {
+        if (mBinaryDataStale) {
+            return null;
+        }
+        // This may have been set to null by eagerDecode()
+        return mRestOfBinaryData;
+    }
+
+    /**
+     * Depending on the concrete implementation, the binary file size of attributes may be known without
+     * computing them all.
+     *
+     * @return binary file size of attribute, if known, else -1
+     */
+    @Override
+    public int getAttributesBinarySize() {
+        if (mBinaryDataStale || mRestOfBinaryData == null) {
+            return -1;
+        }
+        final int tagsOffset = readNameSize() + cigarSize() + basesSize() + qualsSize();
+        return mRestOfBinaryData.length - tagsOffset;
+    }
+
+    @Override
+    public void setReadName(final String value) {
+        super.setReadName(value);
+        mBinaryDataStale = true;
+    }
+
+    @Override
+    public void setCigar(final Cigar cigar) {
+        super.setCigar(cigar);
+        mBinaryDataStale = true;
+    }
+
+    @Override
+    public void setReadBases(final byte[] value) {
+        super.setReadBases(value);
+        mBinaryDataStale = true;
+    }
+
+    @Override
+    public void setBaseQualities(final byte[] value) {
+        super.setBaseQualities(value);
+        mBinaryDataStale = true;
+    }
+
+    @Override
+    public void setAttribute(final String key, final Object value) {
+        // populate all the attributes from the binary block before overwriting one
+        getAttributes();
+        super.setAttribute(key, value);
+        mBinaryDataStale = true;
+    }
+
+    /**
+     * Avoids decoding binary block to get read length
+     */
+    @Override
+    public int getReadLength() {
+        return mReadLength;
+    }
+
+    @Override
+    public String getReadName() {
+        String result = super.getReadName();
+        if (mRestOfBinaryData != null && result == null) {
+            result = decodeReadName();
+            super.setReadName(result);
+        }
+        return result;
+    }
+
+    /**
+     * Do not include null terminator
+     */
+    @Override
+    public int getReadNameLength() {
+        return mReadNameLength - 1;
+    }
+
+    @Override
+    public Cigar getCigar() {
+        if (mRestOfBinaryData != null && !mCigarDecoded) {
+            final int cigarOffset = readNameSize();
+            final ByteBuffer byteBuffer  = ByteBuffer.wrap(mRestOfBinaryData, cigarOffset, cigarSize());
+            byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
+            super.setCigar(BinaryCigarCodec.getSingleton().decode(byteBuffer));
+            mCigarDecoded = true;
+        }
+        return super.getCigar();
+    }
+
+    @Override
+    public int getCigarLength() {
+        return mCigarLen;
+    }
+
+    @Override
+    public byte[] getReadBases() {
+        byte[] result = super.getReadBases();
+        if (mRestOfBinaryData != null && result == null && mReadLength > 0) {
+            result = decodeReadBases();
+            super.setReadBases(result);
+        }
+        return result;
+    }
+
+    @Override
+    public byte[] getBaseQualities() {
+        byte[] ret = super.getBaseQualities();
+        if (mRestOfBinaryData != null && ret == null && mReadLength > 0) {
+            ret = decodeBaseQualities();
+            super.setBaseQualities(ret);
+        }
+        return ret;
+    }
+
+    @Override
+    public Object getAttribute(final String key) {
+        if (!mAttributesDecoded) {
+            decodeAttributes();
+        }
+        return super.getAttribute(key);
+    }
+
+    @Override
+    public Set<Map.Entry<String, Object>> getAttributes() {
+        if (!mAttributesDecoded) {
+            decodeAttributes();
+        }
+        return super.getAttributes();
+    }
+
+    private void decodeAttributes() {
+        if (mAttributesDecoded) {
+            return;
+        }
+        mAttributesDecoded = true;
+        final Map<String,Object> attributes = new LinkedHashMap<String, Object>();
+        final int tagsOffset = readNameSize() + cigarSize() + basesSize() + qualsSize();
+        final int tagsSize = mRestOfBinaryData.length - tagsOffset;
+        final BinaryCodec byteBufferCodec = new BinaryCodec(new ByteArrayInputStream(mRestOfBinaryData, tagsOffset, tagsSize));
+        new BinaryTagCodec(byteBufferCodec).readTags(attributes);
+        for (final Map.Entry<String, Object> entry : attributes.entrySet()) {
+            super.setAttribute(entry.getKey(), entry.getValue());
+        }
+    }
+
+    private byte[] decodeBaseQualities() {
+        if (mReadLength == 0) {
+            return null;
+        }
+        final int qualsOffset = readNameSize() + cigarSize() + basesSize();
+        final byte[] ret = new byte[qualsSize()];
+        System.arraycopy(mRestOfBinaryData, qualsOffset, ret, 0, qualsSize());
+        return ret;
+    }
+
+    private String decodeReadName() {
+        // Don't include terminating null
+        return StringUtil.bytesToString(mRestOfBinaryData, READ_NAME_OFFSET, mReadNameLength-1);
+    }
+
+    private byte[] decodeReadBases() {
+        if (mReadLength == 0) {
+            return null;
+        }
+        final int basesOffset = readNameSize() + cigarSize();
+        return SAMUtils.compressedBasesToBytes(mReadLength, mRestOfBinaryData, basesOffset);
+    }
+
+    /* methods for computing size of variably-sizes elements */
+
+    private int readNameSize() {
+        return mReadNameLength;
+    }
+
+    private int cigarSize() {
+        return mCigarLen * 4;
+    }
+
+    private int basesSize() {
+        return (mReadLength + 1)/2;
+    }
+
+    private int qualsSize() {
+        return mReadLength;
+    }
+}
diff --git a/lib/edu/mit/broad/sam/BAMRecordCodec.java b/lib/edu/mit/broad/sam/BAMRecordCodec.java
new file mode 100644
index 0000000000..b73254b522
--- /dev/null
+++ b/lib/edu/mit/broad/sam/BAMRecordCodec.java
@@ -0,0 +1,163 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.sam;
+
+import edu.mit.broad.sam.util.BinaryCodec;
+import edu.mit.broad.sam.util.RuntimeEOFException;
+import edu.mit.broad.sam.util.SortingCollection;
+
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Map;
+
+public class BAMRecordCodec implements SortingCollection.Codec<SAMRecord> {
+    private final BinaryCigarCodec cigarCodec = new BinaryCigarCodec();
+    private final SAMFileHeader header;
+    private OutputStream os;
+    private InputStream is;
+    private BinaryCodec binaryCodec;
+    private BinaryTagCodec binaryTagCodec;
+
+    public BAMRecordCodec(final SAMFileHeader header) {
+        this.header = header;
+    }
+
+    public BAMRecordCodec clone() {
+        BAMRecordCodec other = new BAMRecordCodec(this.header);
+        return other;
+    }
+
+
+    /** Sets the output stream that records will be written to. */
+    public void setOutputStream(final OutputStream os) {
+        this.os = os;
+        this.binaryCodec    = new BinaryCodec(this.os);
+        this.binaryTagCodec = new BinaryTagCodec(this.binaryCodec);
+    }
+
+    /** Sets the input stream that records will be read from. */
+    public void setInputStream(final InputStream is) {
+        this.is = is;
+        this.binaryCodec    = new BinaryCodec(this.is);
+        this.binaryTagCodec = new BinaryTagCodec(this.binaryCodec);
+    }
+
+    /**
+     * Write object to OutputStream.
+     * The SAMRecord must have a header set into it so reference indices can be resolved.
+     *
+     * @param alignment what to write
+     */
+    public void encode(final SAMRecord alignment) {
+        // Compute block size, as it is the first element of the file representation of SAMRecord
+        final int readLength = alignment.getReadLength();
+
+        final int cigarLength = alignment.getCigarLength();
+
+        int blockSize = BAMFileConstants.FIXED_BLOCK_SIZE + alignment.getReadNameLength() + 1  + // null terminated
+                        cigarLength * 4 +
+                        (readLength + 1) / 2 + // 2 bases per byte
+                        readLength;
+
+        final int attributesSize = alignment.getAttributesBinarySize();
+        if (attributesSize != -1) {
+            blockSize += attributesSize;
+        } else {
+            if (alignment.getAttributes() != null) {
+                for (final Map.Entry<String, Object> attribute : alignment.getAttributes()) {
+                    blockSize += (BinaryTagCodec.getTagSize(attribute.getValue()));
+                }
+            }
+        }
+
+        int indexBin = 0;
+        if (alignment.getReferenceIndex(header) >= 0) {
+            if (alignment.getIndexingBin() != null) {
+                indexBin = alignment.getIndexingBin();
+            } else {
+                indexBin = SAMUtils.reg2bin(alignment.getAlignmentStart() - 1,
+                        alignment.getAlignmentEnd() - 1);
+            }
+        }
+
+        // Blurt out the elements
+        this.binaryCodec.writeInt(blockSize);
+        this.binaryCodec.writeInt(alignment.getReferenceIndex(header));
+        // 0-based!!
+        this.binaryCodec.writeInt(alignment.getAlignmentStart() - 1);
+        this.binaryCodec.writeUByte((short)(alignment.getReadNameLength() + 1));
+        this.binaryCodec.writeUByte((short)alignment.getMappingQuality());
+        this.binaryCodec.writeUShort(indexBin);
+        this.binaryCodec.writeUShort(cigarLength);
+        this.binaryCodec.writeUShort(alignment.getFlags());
+        this.binaryCodec.writeInt(alignment.getReadLength());
+        this.binaryCodec.writeInt(alignment.getMateReferenceIndex(header));
+        this.binaryCodec.writeInt(alignment.getMateAlignmentStart() - 1);
+        this.binaryCodec.writeInt(alignment.getInferredInsertSize());
+        final byte[] variableLengthBinaryBlock = alignment.getVariableBinaryRepresentation();
+        if (variableLengthBinaryBlock != null) {
+            this.binaryCodec.writeBytes(variableLengthBinaryBlock);
+        } else {
+            this.binaryCodec.writeString(alignment.getReadName(), false, true);
+            final int[] binaryCigar = cigarCodec.encode(alignment.getCigar());
+            for (final int cigarElement : binaryCigar) {
+                // Assumption that this will fit into an integer, despite the fact
+                // that it is specced as a uint.
+                this.binaryCodec.writeInt(cigarElement);
+            }
+            this.binaryCodec.writeBytes(SAMUtils.bytesToCompressedBases(alignment.getReadBases()));
+            this.binaryCodec.writeBytes(alignment.getBaseQualities());
+            if (alignment.getAttributes() != null) {
+                for (final Map.Entry<String, Object> attribute : alignment.getAttributes()) {
+                    this.binaryTagCodec.writeTag(attribute.getKey(), attribute.getValue());
+                }
+            }
+        }
+    }
+
+    /**
+     * Read the next record from the input stream and convert into a java object.
+     *
+     * @return null if no more records.  Should throw exception if EOF is encountered in the middle of
+     *         a record.
+     */
+    public SAMRecord decode() {
+        int recordLength = 0;
+        try {
+            recordLength = this.binaryCodec.readInt();
+        }
+        catch (RuntimeEOFException e) {
+            return null;
+        }
+
+        if (recordLength < BAMFileConstants.FIXED_BLOCK_SIZE ||
+                recordLength > BAMFileConstants.MAXIMUM_RECORD_LENGTH) {
+            throw new SAMFormatException("Invalid record length: " + recordLength);
+        }
+        
+        final int referenceID = this.binaryCodec.readInt();
+        final int coordinate = this.binaryCodec.readInt() + 1;
+        final short readNameLength = this.binaryCodec.readUByte();
+        final short mappingQuality = this.binaryCodec.readUByte();
+        final int bin = this.binaryCodec.readUShort();
+        final int cigarLen = this.binaryCodec.readUShort();
+        final int flags = this.binaryCodec.readUShort();
+        final int readLen = this.binaryCodec.readInt();
+        final int mateReferenceID = this.binaryCodec.readInt();
+        final int mateCoordinate = this.binaryCodec.readInt() + 1;
+        final int insertSize = this.binaryCodec.readInt();
+        final byte[] restOfRecord = new byte[recordLength - BAMFileConstants.FIXED_BLOCK_SIZE];
+        this.binaryCodec.readBytes(restOfRecord);
+        final BAMRecord ret = new BAMRecord(header, referenceID, coordinate, readNameLength, mappingQuality,
+                bin, cigarLen, flags, readLen, mateReferenceID, mateCoordinate, insertSize, restOfRecord);
+        ret.setHeader(header); 
+        return ret;
+    }
+}
diff --git a/lib/edu/mit/broad/sam/BinaryCigarCodec.java b/lib/edu/mit/broad/sam/BinaryCigarCodec.java
new file mode 100644
index 0000000000..5455f65323
--- /dev/null
+++ b/lib/edu/mit/broad/sam/BinaryCigarCodec.java
@@ -0,0 +1,68 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.sam;
+
+import java.nio.ByteBuffer;
+
+/**
+ * Converter between binary and text CIGAR representation.
+ */
+class BinaryCigarCodec {
+    private static final BinaryCigarCodec singleton = new BinaryCigarCodec();
+
+    /**
+     * It is not necssary to get the singleton but it is preferrable to use the same one
+     * over and over vs. creating a new object for each BAMRecord.
+     */
+    static BinaryCigarCodec getSingleton() {
+        return singleton;
+    }
+
+    int[] encode(final Cigar cigar) {
+        if (cigar.numCigarElements() == 0) {
+            return new int[0];
+        }
+
+        // Binary rep can be no longer than 1/2 of text rep
+        // Although this is documented as uint, I think lengths will never get that long,
+        // and it's a pain in Java.
+        final int[] binaryCigar = new int[cigar.numCigarElements()];
+        int binaryCigarLength = 0;
+        for (int i = 0; i < cigar.numCigarElements(); ++i) {
+            final CigarElement cigarElement = cigar.getCigarElement(i);
+            final int op = CigarOperator.enumToBinary(cigarElement.getOperator());
+            binaryCigar[binaryCigarLength++] = cigarElement.getLength() << 4 | op;
+        }
+        return binaryCigar;
+    }
+
+    Cigar decode(final ByteBuffer binaryCigar) {
+        final Cigar ret = new Cigar();
+        while (binaryCigar.hasRemaining()) {
+            final int cigarette = binaryCigar.getInt();
+            ret.add(binaryCigarToCigarElement(cigarette));
+        }
+        return ret;
+    }
+
+    Cigar decode(final int[] binaryCigar) {
+        final Cigar ret = new Cigar();
+        for (final int cigarette : binaryCigar) {
+            ret.add(binaryCigarToCigarElement(cigarette));
+        }
+        return ret;
+    }
+
+    private static CigarElement binaryCigarToCigarElement(final int cigarette) {
+        final int binaryOp = cigarette & 0xf;
+        final int length = cigarette >> 4;
+        return new CigarElement(length, CigarOperator.binaryToEnum(binaryOp));
+    }
+}
diff --git a/lib/edu/mit/broad/sam/BinaryTagCodec.java b/lib/edu/mit/broad/sam/BinaryTagCodec.java
new file mode 100644
index 0000000000..fbb8711c50
--- /dev/null
+++ b/lib/edu/mit/broad/sam/BinaryTagCodec.java
@@ -0,0 +1,211 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.sam;
+
+import edu.mit.broad.sam.util.BinaryCodec;
+import edu.mit.broad.sam.util.RuntimeEOFException;
+
+import java.util.Map;
+import java.util.Collection;
+import java.util.ArrayList;
+
+/**
+ * Parse & produce tag section of alignment record in BAM file.
+ */
+class BinaryTagCodec {
+    // Size of the fixed part of the binary representation of a tag,
+    // i.e. the number of bytes occupied by the tag name and tag type fields.
+    private static final int FIXED_TAG_SIZE = 3;
+
+    private static final long MAX_INT = Integer.MAX_VALUE;
+    private static final long MAX_UINT = (MAX_INT + 1) * 2;
+    private static final long MAX_SHORT = Short.MAX_VALUE;
+    private static final long MAX_USHORT = (MAX_SHORT + 1) * 2;
+    private static final long MAX_BYTE = Byte.MAX_VALUE;
+    private static final long MAX_UBYTE = (MAX_BYTE + 1) * 2;
+
+    final BinaryCodec binaryCodec;
+
+    BinaryTagCodec(final BinaryCodec binaryCodec) {
+        this.binaryCodec = binaryCodec;
+    }
+
+    private static int getBinaryValueSize(final Object attributeValue) {
+        switch (getTagValueType(attributeValue)) {
+            case 'Z':
+                return ((String)attributeValue).length() + 1;
+            case 'A':
+                return 1;
+            case 'I':
+            case 'i':
+                return 4;
+            case 's':
+            case 'S':
+                return 2;
+            case 'c':
+            case 'C':
+                return 1;
+            case 'f':
+                return 4;
+            case 'H':
+                final byte[] byteArray = (byte[])attributeValue;
+                return byteArray.length * 2 + 1;
+            default:
+                throw new IllegalArgumentException("When writing BAM, unrecognized tag type " +
+                        attributeValue.getClass().getName());
+        }
+    }
+
+    static int getTagSize(final Object value) {
+        return FIXED_TAG_SIZE + getBinaryValueSize(value);
+    }
+
+    static char getTagValueType(final Object value) {
+        if (value.getClass().equals(String.class)) {
+            return 'Z';
+        } else if (value.getClass().equals(Character.class)) {
+            return 'A';
+        } else if (value.getClass().equals(Integer.class)) {
+            return getIntegerType((Integer)value);
+        } else if (value.getClass().equals(Long.class)) {
+            return getIntegerType((Long)value);
+        } else if (value.getClass().equals(Float.class)) {
+            return 'f';
+        } else if (value.getClass().isArray() && value.getClass().getComponentType().equals(Byte.class)) {
+            return 'H';
+        } else {
+            throw new IllegalArgumentException("When writing BAM, unrecognized tag type " +
+                    value.getClass().getName());
+        }
+    }
+
+    static private char getIntegerType(final long val) {
+        if (val > MAX_UINT) {
+            throw new IllegalArgumentException("Integer attribute value too large to be encoded in BAM");
+        }
+        if (val > MAX_INT) {
+            return 'I';
+        }
+        if (val > MAX_USHORT) {
+            return 'i';
+        }
+        if (val > MAX_SHORT) {
+            return 'S';
+        }
+        if (val > MAX_UBYTE) {
+            return 's';
+        }
+        if (val > MAX_BYTE) {
+            return 'C';
+        }
+        if (val >= Byte.MIN_VALUE) {
+            return 'c';
+        }
+        if (val >= Short.MIN_VALUE) {
+            return 's';
+        }
+        if (val >= Integer.MIN_VALUE) {
+            return 'i';
+        }
+        throw new IllegalArgumentException("Integer attribute value too negative to be encoded in BAM");
+    }
+
+    void writeTag(final String key, final Object value) {
+        assert(key.length() == 2);
+        binaryCodec.writeString(key, false, false);
+        final char tagValueType = getTagValueType(value);
+        binaryCodec.writeByte(tagValueType);
+
+        switch (tagValueType) {
+            case 'Z':
+                binaryCodec.writeString((String)value, false, true);
+                break;
+            case 'A':
+                binaryCodec.writeByte(((Character)value));
+                break;
+            case 'I':
+                binaryCodec.writeUInt((Long)value);
+                break;
+            case 'i':
+                binaryCodec.writeInt((Integer)value);
+                break;
+            case 's':
+                binaryCodec.writeShort(((Integer)value).shortValue());
+                break;
+            case 'S':
+                binaryCodec.writeUShort((Integer)value);
+                break;
+            case 'c':
+                binaryCodec.writeByte((Integer)value);
+                break;
+            case 'C':
+                binaryCodec.writeUByte(((Integer)value).shortValue());
+                break;
+            case 'f':
+                binaryCodec.writeFloat((Float)value);
+                break;
+            case 'H':
+                final byte[] byteArray = (byte[])value;
+                binaryCodec.writeString(SAMUtils.bytesToHexString(byteArray), false, true);
+                break;
+            default:
+                throw new IllegalArgumentException("When writing BAM, unrecognized tag type " +
+                        value.getClass().getName());
+        }
+    }
+
+    /**
+     * Reads tags from the binaryCodec passed in the ctor
+     * @param tagCollection tags are stored in this Map
+     */
+    void readTags(final Map<String, Object> tagCollection) {
+        while (true) {
+            final String key;
+            try {
+                // Only way to know at end is when out of input
+                key = binaryCodec.readString(2);
+            } catch (RuntimeEOFException e) {
+                break;
+            }
+            final byte tagType = binaryCodec.readByte();
+            final Object value = readValue(tagType);
+            tagCollection.put(key, value);
+        }
+    }
+
+    private Object readValue(final byte tagType) {
+        switch (tagType) {
+            case 'Z':
+                return binaryCodec.readNullTerminatedString();
+            case 'A':
+                return (char)binaryCodec.readByte();
+            case 'I':
+                return binaryCodec.readUInt();
+            case 'i':
+                return binaryCodec.readInt();
+            case 's':
+                return (int)binaryCodec.readShort();
+            case 'S':
+                return binaryCodec.readUShort();
+            case 'c':
+                return (int)binaryCodec.readByte();
+            case 'C':
+                return (int)binaryCodec.readUByte();
+            case 'f':
+                return binaryCodec.readFloat();
+            case 'H':
+                final String hexRep = binaryCodec.readNullTerminatedString();
+                return SAMUtils.hexStringToBytes(hexRep);
+            default:
+                throw new SAMFormatException("Unrecognized tag type: " + (char)tagType);
+        }
+    }
+
+}
diff --git a/lib/edu/mit/broad/sam/Cigar.java b/lib/edu/mit/broad/sam/Cigar.java
new file mode 100644
index 0000000000..fa98526573
--- /dev/null
+++ b/lib/edu/mit/broad/sam/Cigar.java
@@ -0,0 +1,93 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.sam;
+
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Collections;
+
+/**
+ * A list of CigarElements, which describes how a read aligns with the reference.
+ * E.g. the Cigar string 10M1D25M means
+ * * match or mismatch for 10 bases
+ * * deletion of 1 base
+ * * match or mismatch for 25 bases
+ */
+public class Cigar {
+    private final List<CigarElement> cigarElements = new ArrayList<CigarElement>();
+
+    public Cigar() {
+    }
+
+    public Cigar(final List<CigarElement> cigarElements) {
+        this.cigarElements.addAll(cigarElements);
+    }
+
+    public List<CigarElement> getCigarElements() {
+        return Collections.unmodifiableList(cigarElements);
+    }
+
+    public CigarElement getCigarElement(final int i) {
+        return cigarElements.get(i);
+    }
+
+    public void add(final CigarElement cigarElement) {
+        cigarElements.add(cigarElement);
+    }
+
+    public int numCigarElements() {
+        return cigarElements.size();
+    }
+
+    public int getReferenceLength() {
+        int length = 0;
+        for (CigarElement element : cigarElements) {
+            switch (element.getOperator()) {
+                case M:
+                case D:
+                case N:
+                    length += element.getLength();
+            }
+        }
+        return length;
+    }
+
+    public int getPaddedReferenceLength() {
+        int length = 0;
+        for (CigarElement element : cigarElements) {
+            switch (element.getOperator()) {
+                case M:
+                case D:
+                case N:
+                case P:
+                    length += element.getLength();
+            }
+        }
+        return length;
+    }
+
+    @Override
+    public boolean equals(final Object o) {
+        if (this == o) return true;
+        if (!(o instanceof Cigar)) return false;
+
+        final Cigar cigar = (Cigar) o;
+
+        if (cigarElements != null ? !cigarElements.equals(cigar.cigarElements) : cigar.cigarElements != null)
+            return false;
+
+        return true;
+    }
+
+    @Override
+    public int hashCode() {
+        return cigarElements != null ? cigarElements.hashCode() : 0;
+    }
+}
diff --git a/lib/edu/mit/broad/sam/CigarElement.java b/lib/edu/mit/broad/sam/CigarElement.java
new file mode 100644
index 0000000000..eec99106b2
--- /dev/null
+++ b/lib/edu/mit/broad/sam/CigarElement.java
@@ -0,0 +1,52 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.sam;
+
+/**
+ * One component of a cigar string.  The component comprises the operator, and the number of bases to which
+ * the  operator applies.
+ */
+public class CigarElement {
+    private final int length;
+    private final CigarOperator operator;
+
+    public CigarElement(final int length, final CigarOperator operator) {
+        this.length = length;
+        this.operator = operator;
+    }
+
+    public int getLength() {
+        return length;
+    }
+
+    public CigarOperator getOperator() {
+        return operator;
+    }
+
+    @Override
+    public boolean equals(final Object o) {
+        if (this == o) return true;
+        if (!(o instanceof CigarElement)) return false;
+
+        final CigarElement that = (CigarElement) o;
+
+        if (length != that.length) return false;
+        if (operator != that.operator) return false;
+
+        return true;
+    }
+
+    @Override
+    public int hashCode() {
+        int result = length;
+        result = 31 * result + (operator != null ? operator.hashCode() : 0);
+        return result;
+    }
+}
diff --git a/lib/edu/mit/broad/sam/CigarOperator.java b/lib/edu/mit/broad/sam/CigarOperator.java
new file mode 100644
index 0000000000..7445455e23
--- /dev/null
+++ b/lib/edu/mit/broad/sam/CigarOperator.java
@@ -0,0 +1,113 @@
+package edu.mit.broad.sam;/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+
+/**
+ * The operators that can appear in a cigar string.
+ */
+public enum CigarOperator {
+    M,
+    I,
+    D,
+    N,
+    S,
+    H,
+    P,
+    C; // I don't know what C means, but it is in the BAM spec
+
+    // Readable synonyms of the above enums
+    public static final CigarOperator MATCH_OR_MISMATCH = M;
+    public static final CigarOperator INSERTION = I;
+    public static final CigarOperator DELETION = D;
+    public static final CigarOperator SKIPPED_REGION = N;
+    public static final CigarOperator SOFT_CLIP = S;
+    public static final CigarOperator HARD_CLIP = H;
+    public static final CigarOperator PADDING = P;
+
+    // Representation of CigarOperator in BAM file
+    private static final byte OP_M = 0;
+    private static final byte OP_I = 1;
+    private static final byte OP_D = 2;
+    private static final byte OP_N = 3;
+    private static final byte OP_S = 4;
+    private static final byte OP_H = 5;
+    private static final byte OP_P = 6;
+    private static final byte OP_C = 7;
+
+
+
+    public static CigarOperator characterToEnum(final int b) {
+        switch (b) {
+        case 'M':
+            return M;
+        case 'I':
+            return I;
+        case 'D':
+            return D;
+        case 'N':
+            return N;
+        case 'S':
+            return S;
+        case 'H':
+            return H;
+        case 'P':
+            return P;
+        case 'C':
+            return C;
+        default:
+            throw new IllegalArgumentException("Unrecognized CigarOperator: " + b);
+        }
+    }
+
+    public static CigarOperator binaryToEnum(final int i) {
+        switch(i) {
+            case OP_M:
+                return M;
+            case OP_I:
+                return I;
+            case OP_D:
+                return D;
+            case OP_N:
+                return N;
+            case OP_S:
+                return S;
+            case OP_H:
+                return H;
+            case OP_P:
+                return P;
+            case OP_C:
+                return C;
+            default:
+                throw new IllegalArgumentException("Unrecognized CigarOperator: " + i);
+        }
+    }
+
+    public static int enumToBinary(final CigarOperator e) {
+        switch(e) {
+            case M:
+                return OP_M;
+            case I:
+                return OP_I;
+            case D:
+                return OP_D;
+            case N:
+                return OP_N;
+            case S:
+                return OP_S;
+            case H:
+                return OP_H;
+            case P:
+                return OP_P;
+            case C:
+                return OP_C;
+            default:
+                throw new IllegalArgumentException("Unrecognized CigarOperator: " + e);
+        }
+    }
+}
diff --git a/lib/edu/mit/broad/sam/NotPrimarySkippingIterator.java b/lib/edu/mit/broad/sam/NotPrimarySkippingIterator.java
new file mode 100644
index 0000000000..7191cc14dc
--- /dev/null
+++ b/lib/edu/mit/broad/sam/NotPrimarySkippingIterator.java
@@ -0,0 +1,37 @@
+package edu.mit.broad.sam;
+
+import edu.mit.broad.sam.util.CloseableIterator;
+import edu.mit.broad.sam.util.NonDestructiveIterator;
+
+/**
+ * Wrapper around SAMRecord iterator that skips over non-primary elements.
+ */
+public class NotPrimarySkippingIterator {
+    private final NonDestructiveIterator<SAMRecord, CloseableIterator<SAMRecord>> it;
+
+    public NotPrimarySkippingIterator(final CloseableIterator<SAMRecord> underlyingIt) {
+        it = new NonDestructiveIterator<SAMRecord, CloseableIterator<SAMRecord>>(underlyingIt);
+        skipAnyNotprimary();
+    }
+
+    public boolean hasCurrent() {
+        return it.hasCurrent();
+    }
+
+    public SAMRecord getCurrent() {
+        assert(hasCurrent());
+        return it.getCurrent();
+    }
+
+    public boolean advance() {
+        it.advance();
+        skipAnyNotprimary();
+        return hasCurrent();
+    }
+
+    private void skipAnyNotprimary() {
+        while (it.hasCurrent() && it.getCurrent().getNotPrimaryAlignmentFlag()) {
+            it.advance();
+        }
+    }
+}
diff --git a/lib/edu/mit/broad/sam/SAMFileHeader.java b/lib/edu/mit/broad/sam/SAMFileHeader.java
new file mode 100644
index 0000000000..95d39f1202
--- /dev/null
+++ b/lib/edu/mit/broad/sam/SAMFileHeader.java
@@ -0,0 +1,191 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2008 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever.
+ * Neither the Broad Institute nor MIT can be responsible for its use, misuse,
+ * or functionality.
+ */
+package edu.mit.broad.sam;
+
+
+import java.util.*;
+
+/**
+ * Header information from a SAM file.
+ */
+public class SAMFileHeader
+{
+    public static final String VERSION_TAG = "VN";
+    public static final String CURRENT_VERSION = "1.0";
+
+    public enum SortOrder {
+
+        unsorted(null),
+        queryname(SAMRecordQueryNameComparator.class),
+        coordinate(SAMRecordCoordinateComparator.class);
+
+        private Class<? extends SAMRecordComparator> comparator;
+
+        SortOrder(final Class<? extends SAMRecordComparator> comparatorClass) {
+            this.comparator = comparatorClass;
+        }
+
+        public Class<? extends SAMRecordComparator> getComparator() {
+            return comparator;
+        }
+    }
+
+    public enum GroupOrder {
+        none, query, reference
+    }
+
+    private final Map<String, Object> mAttributes =
+        new HashMap<String, Object>();
+    private List<SAMSequenceRecord> mSequences =
+        new ArrayList<SAMSequenceRecord>();
+    private List<SAMReadGroupRecord> mReadGroups =
+        new ArrayList<SAMReadGroupRecord>();
+    private final List<SAMProgramRecord> mProgramRecords = new ArrayList<SAMProgramRecord>();
+    private final Map<String, SAMSequenceRecord> mSequenceMap =
+        new HashMap<String, SAMSequenceRecord>();
+    private final Map<String, SAMReadGroupRecord> mReadGroupMap =
+        new HashMap<String, SAMReadGroupRecord>();
+    private Map<String, SAMProgramRecord> mProgramRecordMap = new HashMap<String, SAMProgramRecord>();
+
+    public SAMFileHeader() {
+        setAttribute(VERSION_TAG, CURRENT_VERSION);
+    }
+
+    public String getVersion() {
+        return (String) getAttribute("VN");
+    }
+
+    public String getCreator() {
+        return (String) getAttribute("CR");
+    }
+
+    public Object getAttribute(final String key) {
+        return mAttributes.get(key);
+    }
+
+    public Set<Map.Entry<String, Object>> getAttributes() {
+        return mAttributes.entrySet();
+    }
+
+    public List<SAMSequenceRecord> getSequences() {
+        return mSequences;
+    }
+
+    public List<SAMReadGroupRecord> getReadGroups() {
+        return mReadGroups;
+    }
+
+    public SAMSequenceRecord getSequence(final String name) {
+        return mSequenceMap.get(name);
+    }
+
+    public SAMReadGroupRecord getReadGroup(final String name) {
+        return mReadGroupMap.get(name);
+    }
+
+    public void setSequences(final List<SAMSequenceRecord> list) {
+        mSequences = list;
+        mSequenceMap.clear();
+        int index = 0;
+        for (final SAMSequenceRecord record : list) {
+            record.setSequenceIndex(index++);
+            mSequenceMap.put(record.getSequenceName(), record);
+        }
+    }
+
+    public SAMSequenceRecord getSequence(final int sequenceIndex) {
+        if (sequenceIndex < 0 || sequenceIndex >= mSequences.size()) {
+            return null;
+        }
+        return mSequences.get(sequenceIndex);
+    }
+
+    public int getSequenceIndex(final String sequenceName) {
+        final SAMSequenceRecord record = mSequenceMap.get(sequenceName);
+        if (record == null) {
+            return -1;
+        }
+        return record.getSequenceIndex();
+    }
+
+    public void setAttribute(final String key, final String value) {
+        mAttributes.put(key, value);
+    }
+
+    public void setReadGroups(final List<SAMReadGroupRecord> readGroups) {
+        mReadGroups = readGroups;
+        mReadGroupMap.clear();
+        for (final SAMReadGroupRecord readGroupRecord : readGroups) {
+            mReadGroupMap.put(readGroupRecord.getReadGroupId(), readGroupRecord);
+        }
+    }
+
+    public List<SAMProgramRecord> getProgramRecords() {
+        return Collections.unmodifiableList(mProgramRecords);
+    }
+
+    public void addProgramRecord(final SAMProgramRecord programRecord) {
+        this.mProgramRecords.add(programRecord);
+        this.mProgramRecordMap.put(programRecord.getProgramGroupId(), programRecord);
+    }
+
+    public SAMProgramRecord getProgramRecord(final String name) {
+        return this.mProgramRecordMap.get(name);
+    }
+
+    public SortOrder getSortOrder() {
+        if (getAttribute("SO") == null) {
+            return SortOrder.unsorted;
+        }
+        return SortOrder.valueOf((String)getAttribute("SO"));
+    }
+
+    public void setSortOrder(final SortOrder so) {
+        setAttribute("SO", so.name());
+    }
+
+    public GroupOrder getGroupOrder() {
+        if (getAttribute("GO") == null) {
+            return GroupOrder.none;
+        }
+        return GroupOrder.valueOf((String)getAttribute("GO"));
+    }
+
+    public void setGroupOrder(final GroupOrder go) {
+        setAttribute("GO", go.name());
+    }
+
+    @Override
+    public boolean equals(final Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+
+        final SAMFileHeader that = (SAMFileHeader) o;
+
+        if (mAttributes != null ? !mAttributes.equals(that.mAttributes) : that.mAttributes != null) return false;
+        if (mProgramRecords != null ? !mProgramRecords.equals(that.mProgramRecords) : that.mProgramRecords != null)
+            return false;
+        if (mReadGroups != null ? !mReadGroups.equals(that.mReadGroups) : that.mReadGroups != null) return false;
+        if (mSequences != null ? !mSequences.equals(that.mSequences) : that.mSequences != null) return false;
+
+        return true;
+    }
+
+    @Override
+    public int hashCode() {
+        int result = mAttributes != null ? mAttributes.hashCode() : 0;
+        result = 31 * result + (mSequences != null ? mSequences.hashCode() : 0);
+        result = 31 * result + (mReadGroups != null ? mReadGroups.hashCode() : 0);
+        result = 31 * result + (mReadGroupMap != null ? mReadGroupMap.hashCode() : 0);
+        result = 31 * result + (mProgramRecords != null ? mProgramRecords.hashCode() : 0);
+        return result;
+    }
+}
diff --git a/lib/edu/mit/broad/sam/SAMFileReader.java b/lib/edu/mit/broad/sam/SAMFileReader.java
new file mode 100644
index 0000000000..8c0e449191
--- /dev/null
+++ b/lib/edu/mit/broad/sam/SAMFileReader.java
@@ -0,0 +1,213 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2008 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever.
+ * Neither the Broad Institute nor MIT can be responsible for its use, misuse,
+ * or functionality.
+ */
+package edu.mit.broad.sam;
+
+
+import edu.mit.broad.sam.util.CloseableIterator;
+import edu.mit.broad.sam.util.RuntimeIOException;
+import edu.mit.broad.sam.util.BlockCompressedInputStream;
+
+import java.io.*;
+
+
+/**
+ * Class for reading and querying SAM/BAM files.
+ */
+public class SAMFileReader implements Iterable<SAMRecord>
+{
+    private boolean mIsBinary = false;
+    private BAMFileIndex mFileIndex = null;
+    private ReaderImplementation mReader = null;
+
+    public enum ValidationStringency {
+        STRICT,  // Do the right thing, throw an exception if something looks wrong
+        LENIENT, // Emit warnings but keep going if possible
+        SILENT;  // Like LENIENT, only don't emit warning messages
+
+        public static ValidationStringency DEFAULT_STRINGENCY = STRICT;
+    }
+
+    /**
+     * Internal interface for SAM/BAM file reader implementations.
+     * Implemented as an abstract class to enforce better access control.
+     */
+    static abstract class ReaderImplementation {
+        abstract SAMFileHeader getFileHeader();
+        abstract CloseableIterator<SAMRecord> getIterator();
+        abstract CloseableIterator<SAMRecord> query(String sequence, int start, int end, boolean contained);
+        abstract void close();
+        // If true, emit warnings about format errors rather than throwing exceptions;
+        abstract void setValidationStringency(final ValidationStringency validationStringency);
+    }
+
+
+    public SAMFileReader(final InputStream stream) {
+        this(stream, false);
+    }
+
+    public SAMFileReader(final File file) {
+        this(file, null, false);
+    }
+
+    public SAMFileReader(final File file, final File indexFile) {
+        this(file, indexFile, false);
+    }
+
+    /**
+     * Read a SAM or BAM file
+     * @param stream input SAM or BAM
+     * @param eagerDecode if true, decode SAM record entirely when reading it
+     */
+    public SAMFileReader(final InputStream stream, final boolean eagerDecode) {
+        init(stream, eagerDecode);
+    }
+
+    /**
+     * Read a SAM or BAM file, possibly with an index file if present
+     * @param file where to read from
+     * @param eagerDecode if true, decode SAM record entirely when reading it
+     */
+    public SAMFileReader(final File file, final boolean eagerDecode) {
+        init(file, null, eagerDecode);
+    }
+
+    /**
+     * Read a SAM or BAM file, possibly with an index file
+     * @param file where to read from
+     * @param indexFile location of index file, or null in order to use the default index file (if present)
+     * @param eagerDecode eagerDecode if true, decode SAM record entirely when reading it
+     */
+    public SAMFileReader(final File file, final File indexFile, final boolean eagerDecode){
+        init(file, indexFile, eagerDecode);
+    }
+
+    public void close() {
+        if (mReader != null) {
+            mReader.close();
+        }
+        if (mFileIndex != null) {
+            mFileIndex.close();
+        }
+        mReader = null;
+        mFileIndex = null;
+    }
+
+    public boolean isBinary() {
+        return mIsBinary;
+    }
+
+    public boolean hasIndex() {
+        return (mFileIndex != null);
+    }
+
+    public SAMFileHeader getFileHeader() {
+        return mReader.getFileHeader();
+    }
+
+    public void setValidationStringency(final ValidationStringency validationStringency) {
+        mReader.setValidationStringency(validationStringency);
+    }
+
+    public CloseableIterator<SAMRecord> iterator() {
+        return mReader.getIterator();
+    }
+
+    public CloseableIterator<SAMRecord> query(final String sequence, final int start, final int end, final boolean contained) {
+        return mReader.query(sequence, start, end, contained);
+    }
+
+    public CloseableIterator<SAMRecord> queryOverlapping(final String sequence, final int start, final int end) {
+        return query(sequence, start, end, false);
+    }
+
+    public CloseableIterator<SAMRecord> queryContained(final String sequence, final int start, final int end) {
+        return query(sequence, start, end, true);
+    }
+
+    private void init(final InputStream stream, final boolean eagerDecode) {
+
+        try {
+            final BufferedInputStream bufferedStream = toBufferedStream(stream);
+            if (isBAMFile(bufferedStream)) {
+                mIsBinary = true;
+                mReader = new BAMFileReader(bufferedStream, eagerDecode);
+            } else if (isSAMFile(bufferedStream)) {
+                mIsBinary = false;
+                mReader = new SAMTextReader(bufferedStream);
+            } else {
+                throw new SAMFormatException("Unrecognized file format");
+            }
+        } catch (IOException e) {
+            throw new RuntimeIOException(e);
+        }
+    }
+
+    private void init(final File file, File indexFile, final boolean eagerDecode) {
+
+        try {
+            final BufferedInputStream bufferedStream =
+                new BufferedInputStream(new FileInputStream(file));
+            if (isBAMFile(bufferedStream)) {
+                bufferedStream.close();
+                mIsBinary = true;
+                final BAMFileReader reader = new BAMFileReader(file, eagerDecode);
+                mReader = reader;
+                if (indexFile == null) {
+                    indexFile = findIndexFile(file);
+                }
+                if (indexFile != null) {
+                    mFileIndex = new BAMFileIndex(indexFile);
+                    reader.setFileIndex(mFileIndex);
+                }
+            } else if (isSAMFile(bufferedStream)) {
+                if (indexFile != null) {
+                    bufferedStream.close();
+                    throw new RuntimeException("Cannot use index file with textual SAM file");
+                }
+                mIsBinary = false;
+                mReader = new SAMTextReader(bufferedStream, file);
+            } else {
+                bufferedStream.close();
+                throw new SAMFormatException("Unrecognized file format");
+            }
+        } catch (IOException e) {
+            throw new RuntimeIOException(e);
+        }
+    }
+
+    private File findIndexFile(final File dataFile) {
+        final File indexFile =
+            new File(dataFile.getParent(), dataFile.getName() + ".bai");
+        if (indexFile.exists()) {
+            return indexFile;
+        } else {
+            return null;
+        }
+    }
+
+    private boolean isBAMFile(final InputStream stream)
+        throws IOException {
+        return BlockCompressedInputStream.isValidFile(stream);
+    }
+
+    private boolean isSAMFile(final InputStream stream) {
+        // For now, assume every non-binary file is a SAM text file.
+        return true;
+    }
+
+    private BufferedInputStream toBufferedStream(final InputStream stream) {
+        if (stream instanceof BufferedInputStream) {
+            return (BufferedInputStream) stream;
+        } else {
+            return new BufferedInputStream(stream);
+        }
+    }
+}
diff --git a/lib/edu/mit/broad/sam/SAMFileWriter.java b/lib/edu/mit/broad/sam/SAMFileWriter.java
new file mode 100644
index 0000000000..2d57854b5b
--- /dev/null
+++ b/lib/edu/mit/broad/sam/SAMFileWriter.java
@@ -0,0 +1,23 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.sam;
+
+/**
+ * Interface for SAMText and BAM file writers.  Clients need not care which they write to,
+ * once the object is constructed.
+ */
+public interface SAMFileWriter {
+    void addAlignment(SAMRecord alignment);
+
+    /**
+     * Must be called or file will likely be defective. 
+     */
+    void close();
+}
diff --git a/lib/edu/mit/broad/sam/SAMFileWriterFactory.java b/lib/edu/mit/broad/sam/SAMFileWriterFactory.java
new file mode 100644
index 0000000000..3d75948557
--- /dev/null
+++ b/lib/edu/mit/broad/sam/SAMFileWriterFactory.java
@@ -0,0 +1,64 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.sam;
+
+import java.io.File;
+
+/**
+ * Create a SAMFileWriter for writing SAM or BAM.
+ */
+public class SAMFileWriterFactory {
+
+    /**
+     * Create a BAMFileWriter that is ready to receive SAMRecords
+     * @param header entire header. Sort order is determined by the sortOrder property of this arg
+     * @param presorted if true, SAMRecords must be added to the SAMFileWriter in order that agrees with header.sortOrder
+     * @param outputFile where to write the output.
+     * @return
+     */
+    public SAMFileWriter makeBAMWriter(final SAMFileHeader header, final boolean presorted, final File outputFile) {
+        final BAMFileWriter ret = new BAMFileWriter(outputFile);
+        ret.setSortOrder(header.getSortOrder(), presorted);
+        ret.setHeader(header);
+        return ret;
+    }
+
+    /**
+     * Create a SAMTextWriter that is ready to receive SAMRecords
+     * @param header entire header. Sort order is determined by the sortOrder property of this arg
+     * @param presorted if true, SAMRecords must be added to the SAMFileWriter in order that agrees with header.sortOrder
+     * @param outputFile where to write the output.
+     * @return
+     */
+    public SAMFileWriter makeSAMWriter(final SAMFileHeader header, final boolean presorted, final File outputFile) {
+        final SAMTextWriter ret = new SAMTextWriter(outputFile);
+        ret.setSortOrder(header.getSortOrder(), presorted);
+        ret.setHeader(header);
+        return ret;
+    }
+
+    /**
+     * Create either a SAM or a BAM writer based on examination of the outputFile
+     * @param header entire header. Sort order is determined by the sortOrder property of this arg
+     * @param presorted presorted if true, SAMRecords must be added to the SAMFileWriter in order that agrees with header.sortOrder
+     * @param outputFile
+     * @return outputFile where to write the output.  Must end with .sam or .bam
+     */
+    public SAMFileWriter makeSAMOrBAMWriter(final SAMFileHeader header, final boolean presorted, final File outputFile) {
+        final String filename = outputFile.getName();
+        if (filename.endsWith(".bam")) {
+            return makeBAMWriter(header, presorted, outputFile);
+        }
+        if (filename.endsWith(".sam")) {
+            return makeSAMWriter(header, presorted, outputFile);
+        }
+        throw new IllegalArgumentException("SAM/BAM file should end with .sam or .bam: " + outputFile);
+    }
+}
diff --git a/lib/edu/mit/broad/sam/SAMFileWriterImpl.java b/lib/edu/mit/broad/sam/SAMFileWriterImpl.java
new file mode 100644
index 0000000000..78521af447
--- /dev/null
+++ b/lib/edu/mit/broad/sam/SAMFileWriterImpl.java
@@ -0,0 +1,157 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.sam;
+
+import edu.mit.broad.sam.util.SortingCollection;
+
+import java.io.File;
+import java.io.StringWriter;
+
+/**
+ * Base class for implementing SAM writer with any underlying format.
+ * Mostly this manages accumulation & sorting of SAMRecords when appropriate,
+ * and produces the text version of the header, since that seems to be a popular item
+ * in both text and binary file formats.
+ */
+abstract class SAMFileWriterImpl implements SAMFileWriter
+{
+    private static final int MAX_RECORDS_IN_RAM = 500000;
+    private SAMFileHeader.SortOrder sortOrder;
+    private SAMFileHeader header;
+    private SortingCollection<SAMRecord> alignmentSorter;
+
+    // If true, records passed to addAlignment are already in the order specified by sortOrder
+    private boolean presorted;
+
+    // These two fields are for validating presorted records.
+    private SAMRecord prevAlignment;
+    private SAMRecordComparator presortedComparator;
+
+    /**
+     * Must be called before calling writeHeader().  SortOrder value in the header passed
+     * to writeHeader() is ignored.  If setSortOrder is not called, default is SortOrder.unsorted
+     * @param sortOrder
+     */
+    public void setSortOrder(final SAMFileHeader.SortOrder sortOrder, final boolean presorted) {
+        if (header != null) {
+            throw new IllegalStateException("Cannot call SAMFileWriterImpl.setSortOrder after setHeader for " +
+                    getFilename());
+        }
+        this.sortOrder = sortOrder;
+        this.presorted = presorted;
+    }
+
+    /**
+     * Must be called before addAlignment.
+     * @param header
+     */
+    public void setHeader(final SAMFileHeader header)
+    {
+        this.header = header;
+        if (sortOrder == null) {
+             sortOrder = SAMFileHeader.SortOrder.unsorted;
+        }
+        header.setSortOrder(sortOrder);
+        final StringWriter headerTextBuffer = new StringWriter();
+        new SAMTextHeaderCodec().encode(headerTextBuffer, header);
+        final String headerText = headerTextBuffer.toString();
+
+        writeHeader(headerText);
+
+        if (presorted) {
+            if (sortOrder.equals(SAMFileHeader.SortOrder.unsorted)) {
+                presorted = false;
+            } else {
+                presortedComparator = makeComparator();
+            }
+        } else if (!sortOrder.equals(SAMFileHeader.SortOrder.unsorted)) {
+            alignmentSorter = SortingCollection.newInstance(SAMRecord.class,
+                    new BAMRecordCodec(header), makeComparator(), MAX_RECORDS_IN_RAM);
+        }
+    }
+
+    protected SAMFileHeader getHeader() {
+        return header;
+    }
+
+    private SAMRecordComparator makeComparator() {
+        switch (sortOrder) {
+            case coordinate:
+                return new SAMRecordCoordinateComparator(header);
+            case queryname:
+                return new SAMRecordQueryNameComparator();
+            case unsorted:
+                return null;
+        }
+        throw new IllegalStateException("sortOrder should not be null");
+    }
+
+    public void addAlignment(final SAMRecord alignment)
+    {
+        if (sortOrder.equals(SAMFileHeader.SortOrder.unsorted)) {
+            if (!header.getGroupOrder().equals(SAMFileHeader.GroupOrder.none)) {
+                throw new UnsupportedOperationException("GroupOrder " + header.getGroupOrder() + " is not supported");
+            }
+            writeAlignment(alignment);
+        } else if (presorted) {
+            assertPresorted(alignment);
+            writeAlignment(alignment);
+        } else {
+            alignmentSorter.add(alignment);
+        }
+    }
+
+    private void assertPresorted(final SAMRecord alignment) {
+        if (prevAlignment != null) {
+            if (presortedComparator.fileOrderCompare(prevAlignment, alignment) > 0) {
+                throw new IllegalArgumentException("Alignments added out of order in SAMFileWriterImpl.addAlignment for " +
+                getFilename() + ". Sort order is " + this.sortOrder + ". Offending records are at ["
+                        + prevAlignment.getReferenceName() + ":" + prevAlignment.getAlignmentStart() + "] and ["
+                        + alignment.getReferenceName() + ":" + alignment.getAlignmentStart() + "]");
+            }
+        }
+        prevAlignment = alignment;
+    }
+
+    public final void close()
+    {
+        if (alignmentSorter != null) {
+            for (final SAMRecord alignment : alignmentSorter) {
+                writeAlignment(alignment);
+            }
+            alignmentSorter.cleanup();
+        }
+        finish();
+    }
+
+    /**
+     * Writes the record to disk.  Sort order has been taken care of by the time
+     * this method is called.
+     * @param alignment
+     */
+    abstract protected void writeAlignment(SAMRecord alignment);
+
+    /**
+     * Write the header to disk.  Header object is available via getHeader().
+     * @param textHeader for convenience if the implementation needs it.
+     */
+    abstract protected void writeHeader(String textHeader);
+
+    /**
+     * Do any required flushing here.
+     */
+    abstract protected void finish();
+
+    /**
+     * For producing error messages.
+     * @return Output filename, or null if there isn't one.
+     */
+    abstract protected String getFilename();
+}
diff --git a/lib/edu/mit/broad/sam/SAMFormatException.java b/lib/edu/mit/broad/sam/SAMFormatException.java
new file mode 100644
index 0000000000..f055d10758
--- /dev/null
+++ b/lib/edu/mit/broad/sam/SAMFormatException.java
@@ -0,0 +1,30 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2008 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.sam;
+
+/**
+ * Thrown when a SAM file being read (text or binary) looks bad.
+ */
+public class SAMFormatException extends RuntimeException {
+    public SAMFormatException() {
+    }
+
+    public SAMFormatException(final String s) {
+        super(s);
+    }
+
+    public SAMFormatException(final String s, final Throwable throwable) {
+        super(s, throwable);
+    }
+
+    public SAMFormatException(final Throwable throwable) {
+        super(throwable);
+    }
+}
diff --git a/lib/edu/mit/broad/sam/SAMLocusIterator.java b/lib/edu/mit/broad/sam/SAMLocusIterator.java
new file mode 100644
index 0000000000..e494d389a2
--- /dev/null
+++ b/lib/edu/mit/broad/sam/SAMLocusIterator.java
@@ -0,0 +1,308 @@
+package edu.mit.broad.sam;
+
+import edu.mit.broad.sam.util.CloseableIterator;
+import edu.mit.broad.arachne.GenomeMask;
+
+import java.util.*;
+
+/**
+ * Iterator that traverses a SAM File, accumulating information on a per-locus basis
+ */
+public class SAMLocusIterator implements Iterable<SAMLocusIterator.LocusInfo>, CloseableIterator<SAMLocusIterator.LocusInfo> {
+    public static class LocusInfo {
+        protected final String chrom;
+        protected final int position;
+        protected final List<Byte> bases = new ArrayList<Byte>(100);
+        protected final List<Byte> qualities = new ArrayList<Byte>(100);
+        protected final List<Boolean> negativeStrandFlags = new ArrayList<Boolean>(100);
+
+        LocusInfo(final String chrom, final int position) {
+            this.chrom = chrom;
+            this.position = position;
+        }
+
+        public void add(final Byte readBase, final Byte baseQuality, final boolean strand) {
+            bases.add(readBase);
+            qualities.add(baseQuality);
+            negativeStrandFlags.add(strand);
+        }
+
+        public String getChrom() { return chrom; }
+        public int getPosition() { return position; }
+        public List<Byte> getBases() { return bases; }
+        public List<Byte> getQualities() { return qualities; }
+        public List<Boolean> getNegativeStrandFlags() { return negativeStrandFlags; }
+
+        public String getBasesAsString() { return bytesToString(bases); }
+
+        private static String bytesToString(final List<Byte> data) {
+            if (data == null || data.size() == 0) {
+                return "";
+            }
+
+            final char[] chars = new char[data.size()];
+            for (int i = 0; i < data.size(); i++) {
+                chars[i] = (char) (data.get(i) & 0xFF);
+            }
+            return new String(chars);
+        }
+    }
+
+
+
+
+    private final CloseableIterator<SAMRecord> underlyingIterator;
+    private final NotPrimarySkippingIterator it;
+    private final LinkedList<LocusInfo> complete = new LinkedList<LocusInfo>();
+    private final LinkedList<LocusInfo> accumulator = new LinkedList<LocusInfo>();
+
+    private boolean includeNonPfReads = false;
+    private boolean includeDuplicates = false;
+    private int qualityScoreCutoff = -Integer.MAX_VALUE;
+    
+    private GenomeMask mask;
+    private int lastContig = 0;
+    private int lastPosition = 0;
+
+    private boolean finishedAlignedReads = false;
+
+
+    // this should probably take a SAM
+    public SAMLocusIterator(final CloseableIterator<SAMRecord> samIterator) {
+        this.underlyingIterator = samIterator;
+        this.it = new NotPrimarySkippingIterator(samIterator);
+    }
+
+    public Iterator<LocusInfo> iterator() {
+        return this;
+    }
+
+    public void close() {
+        this.underlyingIterator.close();
+    }
+
+    private boolean samHasMore() {
+        return !finishedAlignedReads && it.hasCurrent();
+    }
+    public boolean hasNext() {
+        return ((complete.size() > 0) || (accumulator.size() > 0) || (samHasMore()) || hasRemainingMaskBases());
+    }
+
+    private boolean hasRemainingMaskBases() {
+        if (mask == null) return false;
+
+        // if there are more contigs in the mask, by definition some of them must have
+        // marked bases otherwise if we're in the last contig, but we're not at the last marked position,
+        // there is also more in the mask
+        return (lastContig <= mask.getMaxContig() ||
+               (lastContig == mask.getMaxContig() && lastPosition <= mask.get(lastContig).nextSetBit(lastPosition+1)));
+    }
+
+    public LocusInfo next() {
+
+        // if we don't have any completed entries to return, try and make some!
+        while(complete.size() == 0 && samHasMore()) {
+            final SAMRecord rec = it.getCurrent();
+            final String cigar = rec.getCigarString();
+            
+            // as soon as we hit our first non-aligned read, we can stop!
+            if (cigar.equals("*")) {
+                this.finishedAlignedReads = true;
+                continue;
+            }
+
+            // skip dupe reads, if so requested
+            if (!isIncludeDuplicates() && rec.getDuplicateReadFlag()) { it.advance(); continue; }
+
+            // skip non-PF reads, if so requested
+            if (!isIncludeNonPfReads() && rec.getReadFailsVendorQualityCheckFlag()) { it.advance(); continue; }
+            
+            // when we switch contigs, emit everything in the accumulator
+            if (accumulator.size() > 0 && !accumulator.getFirst().chrom.equals(rec.getReferenceName())) {
+                while (accumulator.size() > 0) {
+                    popLocus();
+                }
+            }
+
+            // pop off things we're not going to accumulate more coverage at the locus in question
+            while(accumulator.size() > 0 && accumulator.getFirst().position < rec.getAlignmentStart()) {
+                popLocus();
+            }
+
+            // check that it's a non-gapped alignment for now!
+            // TODO: handle gapped and clipped alignments
+            if (!cigar.matches("[0-9]+M")) {
+                System.out.println("Cannot deal with clipped or gapped alignments. CIGAR="+cigar);
+                System.exit(1);
+            }
+
+            // at this point, either the list is empty or the head should
+            // be the same position as the first base of the read
+
+            // interpret the CIGAR string and add the base info
+            for(int j=0; j < rec.getReadBases().length; j++) {
+                // if the position is empty, initialize it
+                if (j > accumulator.size() - 1) {
+                    accumulator.add(new LocusInfo(rec.getReferenceName(), rec.getAlignmentStart() + j));
+                }
+
+                // if the quality score cutoff is met, accumulate the base info
+                if (rec.getBaseQualities()[j] >= getQualityScoreCutoff()) {
+                    accumulator.get(j).add(rec.getReadBases()[j], rec.getBaseQualities()[j], rec.getReadNegativeStrandFlag());
+                }
+            }
+
+
+            it.advance();
+        }
+
+        // if we have nothing to return to the user, and we're at the end of the SAM iterator,
+        // push everything into the complete queue
+        if (complete.size() == 0 && !samHasMore()) {
+            while(accumulator.size() > 0) {
+                popLocus();
+            }
+        }
+
+        // if there are completed entries, return those
+        if (complete.size() > 0) {
+            return complete.removeFirst();
+        } else {
+
+            // In this case... we're past the last read from SAM so see if we can
+            // fill out any more (zero coverage) entries from the mask
+            LocusInfo zeroResult = null;
+            while (zeroResult == null && lastContig <= mask.getMaxContig()) {
+                final int nextbit = mask.get(lastContig).nextSetBit(lastPosition+1);
+
+                // try the next contig
+                if (nextbit == -1) {
+                    lastContig++;
+                    lastPosition = 0;
+                } else {
+                    lastPosition = nextbit;
+                    zeroResult = new LocusInfo(contigToChrom[lastContig], lastPosition);
+                }
+            }
+
+            return zeroResult;
+        }
+    }
+
+    /**
+     * Pop the first entry from the LocusInfo accumulator into the complete queue.  In addition,
+     * check the GenomeMask and if there are intervening mask positions between the last popped base and the one
+     * about to be popped, put those on the complete queue as well.
+     */
+    private void popLocus() {
+        final LocusInfo li = accumulator.removeFirst();
+
+        // fill in any gaps based on our genome mask
+        final int liContig = chromToContig.get(li.getChrom());
+
+        // if we're not on the same contig, fill in the rest of the bits for the previous contig first...
+        if (lastContig < liContig) {
+            while (lastContig < liContig) {
+                int nextbit = 0;
+
+                if (mask != null && mask.get(lastContig) != null) {
+                    while (nextbit != -1) {
+                        nextbit = mask.get(lastContig).nextSetBit(lastPosition + 1);
+                        if (nextbit > -1) {
+                            complete.addLast(new LocusInfo(contigToChrom[lastContig], nextbit));
+                            lastPosition = nextbit;
+                        }
+                    }
+                }
+                lastPosition=0;
+                lastContig++;
+            }
+        }
+
+        // now that we're on the same contig, fill in any unfilled positions
+        // if we have some bits in the mask to fill in...
+        if (mask != null && mask.get(lastContig) != null && lastPosition + 1 < li.getPosition()) {
+            while (lastPosition + 1 < li.getPosition()) {
+
+                final int nextbit = mask.get(lastContig).nextSetBit(lastPosition + 1);
+
+                // if there are no more mask bits, or the next mask bit is
+                // at or after the current data, just continue on
+                if (nextbit == -1 || nextbit >= li.getPosition()) { break; }
+
+                // otherwise, pop on the desired empty locus info
+                complete.addLast(new LocusInfo(contigToChrom[lastContig], nextbit));
+                lastPosition = nextbit;
+            }
+        }
+
+        // only add to the complete queue if it's in the mask (or we have no mask!)
+        if (mask == null || mask.get(chromToContig.get(li.getChrom()), li.getPosition())) {
+            complete.addLast(li);
+        }
+
+        lastContig = liContig;
+        lastPosition = li.getPosition();
+
+
+    }
+
+    public void remove() {
+        throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
+    }
+
+    // --------------------------------------------------------------------------------------------
+    // Helper methods below this point...
+    // --------------------------------------------------------------------------------------------
+
+    public void setGenomeMask(final GenomeMask mask) { this.mask = mask; }
+    public GenomeMask getGenomeMask() { return this.mask; }
+
+    public boolean isIncludeNonPfReads() { return includeNonPfReads; }
+    public void setIncludeNonPfReads(final boolean includeNonPfReads) { this.includeNonPfReads = includeNonPfReads; }
+
+    public boolean isIncludeDuplicates() { return includeDuplicates; }
+    public void setIncludeDuplicates(final boolean includeDuplicates) { this.includeDuplicates = includeDuplicates; }
+
+    public int getQualityScoreCutoff() { return qualityScoreCutoff; }
+    public void setQualityScoreCutoff(final int qualityScoreCutoff) { this.qualityScoreCutoff = qualityScoreCutoff; }
+
+
+    // TODO: once we have a foundation method for access to reference data, this should all change
+    // to be based on that, rather than this strange mashup of contig and chrom
+    private static final Map<String, Integer> chromToContig = new HashMap<String, Integer>();
+    {
+        for(int i=1; i<=22; i++) {
+            chromToContig.put("chr"+i, i);
+        }
+        chromToContig.put("chrM", 0);
+        chromToContig.put("chrX", 23);
+        chromToContig.put("chrY", 24);
+        chromToContig.put("chr1_random", 25);
+        chromToContig.put("chr2_random", 26);
+        chromToContig.put("chr3_random", 27);
+        chromToContig.put("chr4_random", 28);
+        chromToContig.put("chr5_random", 29);
+        chromToContig.put("chr6_random", 30);
+        chromToContig.put("chr7_random", 31);
+        chromToContig.put("chr8_random", 32);
+        chromToContig.put("chr9_random", 33);
+        chromToContig.put("chr10_random", 34);
+        chromToContig.put("chr11_random", 35);
+        chromToContig.put("chr13_random", 36);
+        chromToContig.put("chr15_random", 37);
+        chromToContig.put("chr16_random", 38);
+        chromToContig.put("chr17_random", 39);
+        chromToContig.put("chr18_random", 40);
+        chromToContig.put("chr19_random", 41);
+        chromToContig.put("chr21_random", 42);
+        chromToContig.put("chr22_random", 43);
+        chromToContig.put("chrX_random", 44);
+    }
+
+    private static final String[] contigToChrom = new String[] { "chrM","chr1","chr2","chr3","chr4","chr5","chr6","chr7","chr8","chr9","chr10","chr11","chr12","chr13","chr14","chr15","chr16","chr17","chr18","chr19","chr20","chr21","chr22","chrX","chrY",
+        "chr1_random","chr2_random","chr3_random","chr4_random","chr5_random","chr6_random","chr7_random","chr8_random","chr9_random","chr10_random","chr11_random","chr13_random","chr15_random","chr16_random","chr17_random","chr18_random","chr19_random","chr21_random","chr22_random","chrX_random" };
+
+
+
+}
diff --git a/lib/edu/mit/broad/sam/SAMProgramRecord.java b/lib/edu/mit/broad/sam/SAMProgramRecord.java
new file mode 100644
index 0000000000..d2597adb35
--- /dev/null
+++ b/lib/edu/mit/broad/sam/SAMProgramRecord.java
@@ -0,0 +1,85 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2008 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.sam;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+public class SAMProgramRecord {
+    public static final String PROGRAM_GROUP_ID_TAG = "ID";
+    private static final String PROGRAM_VERSION_TAG = "VN";
+    private static final String COMMAND_LINE_TAG = "CL";
+    private final String mProgramGroupId;
+    private final Map<String, String> mAttributes = new HashMap<String, String>();
+
+    public SAMProgramRecord(final String programGroupId) {
+        this.mProgramGroupId = programGroupId;
+    }
+
+    public String getProgramGroupId() {
+        return mProgramGroupId;
+    }
+
+    public String getAttribute(final String key) {
+        return mAttributes.get(key);
+    }
+
+    public void setAttribute(final String key, final String value) {
+        mAttributes.put(key, value);
+    }
+
+    public Set<Map.Entry<String, String>> getAttributes() {
+        return mAttributes.entrySet();
+    }
+
+    public String getProgramVersion() {
+        return getAttribute(PROGRAM_VERSION_TAG);
+    }
+
+    public void setProgramVersion(final String version) {
+        setAttribute(PROGRAM_VERSION_TAG, version);
+    }
+
+    public String getCommandLine() {
+        return getAttribute(COMMAND_LINE_TAG);
+    }
+
+    public void setCommandLine(final String commandLine) {
+        setAttribute(COMMAND_LINE_TAG, commandLine);
+    }
+
+    /**
+     * @return true if this == that except for the program group ID, which is arbitrary
+     */
+    public boolean equivalent(final SAMProgramRecord that) {
+        return mAttributes.equals(that.mAttributes);
+    }
+
+    @Override
+    public boolean equals(final Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+
+        final SAMProgramRecord that = (SAMProgramRecord) o;
+
+        if (mAttributes != null ? !mAttributes.equals(that.mAttributes) : that.mAttributes != null) return false;
+        if (mProgramGroupId != null ? !mProgramGroupId.equals(that.mProgramGroupId) : that.mProgramGroupId != null) return false;
+
+        return true;
+    }
+
+    @Override
+    public int hashCode() {
+        int result = mProgramGroupId != null ? mProgramGroupId.hashCode() : 0;
+        result = 31 * result + (mAttributes != null ? mAttributes.hashCode() : 0);
+        return result;
+    }
+}
diff --git a/lib/edu/mit/broad/sam/SAMReadGroupRecord.java b/lib/edu/mit/broad/sam/SAMReadGroupRecord.java
new file mode 100644
index 0000000000..3bdf1f6bb9
--- /dev/null
+++ b/lib/edu/mit/broad/sam/SAMReadGroupRecord.java
@@ -0,0 +1,84 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2008 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever.
+ * Neither the Broad Institute nor MIT can be responsible for its use, misuse,
+ * or functionality.
+ */
+package edu.mit.broad.sam;
+
+
+import java.util.*;
+
+/**
+ * Header information about a read group.
+ */
+public class SAMReadGroupRecord
+{
+    private String mReadGroupId = null;
+    private final Map<String, Object> mAttributes = new HashMap<String, Object>();
+    public static final String READ_GROUP_ID_TAG = "ID";
+    public static final String READ_GROUP_SAMPLE_TAG = "SM";
+    public static final String PREDICTED_MEDIAN_INSERT_SIZE_TAG = "PI";
+    public static final String DATE_RUN_PRODUCED_TAG = "DT";
+
+    public SAMReadGroupRecord(final String id) {
+        mReadGroupId = id;
+    }
+
+    public String getReadGroupId() {
+        return mReadGroupId;
+    }
+
+    public String getSample() {
+        return (String) getAttribute("SM");
+    }
+
+    public void setSample(final String value) {
+        setAttribute("SM", value);
+    }
+
+    public String getLibrary() {
+        return (String) getAttribute("LB");
+    }
+
+    public void setLibrary(final String value) {
+        setAttribute("LB", value);
+    }
+
+    public Object getAttribute(final String key) {
+        return mAttributes.get(key);
+    }
+
+    public void setAttribute(final String key, final Object value) {
+        mAttributes.put(key, value);
+    }
+
+    public Set<Map.Entry<String, Object>> getAttributes() {
+        return mAttributes.entrySet();
+    }
+
+    @Override
+    public boolean equals(final Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+
+        final SAMReadGroupRecord that = (SAMReadGroupRecord) o;
+
+        if (mAttributes != null ? !mAttributes.equals(that.mAttributes) : that.mAttributes != null) return false;
+        if (mReadGroupId != null ? !mReadGroupId.equals(that.mReadGroupId) : that.mReadGroupId != null) return false;
+
+        return true;
+    }
+
+    @Override
+    public int hashCode() {
+        int result = mReadGroupId != null ? mReadGroupId.hashCode() : 0;
+        result = 31 * result + (mAttributes != null ? mAttributes.hashCode() : 0);
+        return result;
+    }
+}
+
diff --git a/lib/edu/mit/broad/sam/SAMRecord.java b/lib/edu/mit/broad/sam/SAMRecord.java
new file mode 100644
index 0000000000..ca603994df
--- /dev/null
+++ b/lib/edu/mit/broad/sam/SAMRecord.java
@@ -0,0 +1,732 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2008 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever.
+ * Neither the Broad Institute nor MIT can be responsible for its use, misuse,
+ * or functionality.
+ */
+package edu.mit.broad.sam;
+
+
+import edu.mit.broad.sam.util.StringUtil;
+
+import java.util.*;
+
+/**
+ * Java binding for a SAM file record.
+ */
+public class SAMRecord
+{
+    public static final int UNKNOWN_MAPPING_QUALITY = 255;
+    public static final int NO_MAPPING_QUALITY = 0;
+    public static final String NO_ALIGNMENT_REFERENCE_NAME = "*";
+    public static final String NO_ALIGNMENT_CIGAR = "*";
+    public static final int NO_ALIGNMENT_START = 0;
+    public static final byte[] NULL_SEQUENCE = "*".getBytes();
+    public static final byte[] NULL_QUALS = "*".getBytes();
+    private static final int READ_PAIRED_FLAG = 0x1;
+    private static final int PROPER_PAIR_FLAG = 0x2;
+    private static final int READ_UNMAPPED_FLAG = 0x4;
+    private static final int MATE_UNMAPPED_FLAG = 0x8;
+    private static final int READ_STRAND_FLAG = 0x10;
+    private static final int MATE_STRAND_FLAG = 0x20;
+    private static final int FIRST_OF_PAIR_FLAG = 0x40;
+    private static final int SECOND_OF_PAIR_FLAG = 0x80;
+    private static final int NOT_PRIMARY_ALIGNMENT_FLAG = 0x100;
+    private static final int READ_FAILS_VENDOR_QUALITY_CHECK_FLAG = 0x200;
+    private static final int DUPLICATE_READ_FLAG = 0x400;
+
+
+    private String mReadName = null;
+    private byte[] mReadBases = NULL_SEQUENCE;
+    private byte[] mBaseQualities = NULL_QUALS;
+    private String mReferenceName = NO_ALIGNMENT_REFERENCE_NAME;
+    private int mAlignmentStart = NO_ALIGNMENT_START;
+    private int mMappingQuality = NO_MAPPING_QUALITY;
+    private String mCigarString = NO_ALIGNMENT_CIGAR;
+    private Cigar mCigar = null;
+    private List<AlignmentBlock> mAlignmentBlocks = null;
+    private int mFlags = 0;
+    private String mMateReferenceName = NO_ALIGNMENT_REFERENCE_NAME;
+    private int mMateAlignmentStart = 0;
+    private int mInferredInsertSize = 0;
+    private Map<String, Object> mAttributes = null;
+    private Integer mReferenceIndex = null;
+    private Integer mMateReferenceIndex = null;
+    private Integer mIndexingBin = null;
+
+    // Optional, but handy for looking of reference indices
+    private SAMFileHeader mHeader = null;
+
+
+    public SAMRecord() {
+    }
+
+    public String getReadName() {
+        return mReadName;
+    }
+
+    /**
+     * This method is preferred over getReadName().length(), because for BAMRecord
+     * it may be faster.
+     * @return length not including a null terminator
+     */
+    public int getReadNameLength() {
+        return mReadName.length();
+    }
+
+    public void setReadName(final String value) {
+        mReadName = value;
+    }
+
+    public String getReadString() {
+        return StringUtil.bytesToString(getReadBases());
+    }
+
+    public void setReadString(final String value) {
+        mReadBases = StringUtil.stringToBytes(value);
+    }
+
+    // Read bases, as bytes
+    public byte[] getReadBases() {
+        return mReadBases;
+    }
+
+    public void setReadBases(final byte[] value) {
+        mReadBases = value;
+    }
+
+    /**
+     * This method is preferred over getReadBases().length, because for BAMRecord it may be faster.
+     * @return number of bases in the read
+     */
+    public int getReadLength() {
+        return getReadBases().length;
+    }
+
+    // Base qualities, encoded as a FASTQ string
+    public String getBaseQualityString() {
+        return SAMUtils.phredToFastq(getBaseQualities());
+    }
+
+    public void setBaseQualityString(final String value) {
+        setBaseQualities(SAMUtils.fastqToPhred(value));
+    }
+
+    public byte[] getBaseQualities() {
+        return mBaseQualities;
+    }
+
+    public void setBaseQualities(final byte[] value) {
+        mBaseQualities = value;
+    }
+
+    public String getReferenceName() {
+        return mReferenceName;
+    }
+
+    public void setReferenceName(final String value) {
+        mReferenceName = value;
+        mReferenceIndex = null;
+    }
+
+    public Integer getReferenceIndex(final SAMFileHeader header) {
+        if (mReferenceIndex == null) {
+            if (mReferenceName == null) {
+                mReferenceIndex = -1;
+            } else if (NO_ALIGNMENT_REFERENCE_NAME.equals(mReferenceName)) {
+                mReferenceIndex = -1;
+            } else {
+                mReferenceIndex = header.getSequenceIndex(mReferenceName);
+            }
+        }
+        return mReferenceIndex;
+    }
+
+    public Integer getReferenceIndex() {
+        return getReferenceIndex(mHeader);
+    }
+
+
+    public void setReferenceIndex(final int referenceIndex, final SAMFileHeader header) {
+        mReferenceIndex = referenceIndex;
+        if (mReferenceIndex == -1) {
+            mReferenceName = NO_ALIGNMENT_REFERENCE_NAME;
+        } else {
+            mReferenceName = header.getSequence(referenceIndex).getSequenceName();
+        }
+    }
+
+    public void setReferenceIndex(final int referenceIndex) {
+        setReferenceIndex(referenceIndex, mHeader);
+    }
+
+
+    public String getMateReferenceName() {
+        return mMateReferenceName;
+    }
+
+    public void setMateReferenceName(final String mateReferenceName) {
+        this.mMateReferenceName = mateReferenceName;
+        mMateReferenceIndex = null;
+    }
+
+    public Integer getMateReferenceIndex(final SAMFileHeader header) {
+        if (mMateReferenceIndex == null) {
+            if (mMateReferenceName == null) {
+                mMateReferenceIndex = -1;
+            } else if (NO_ALIGNMENT_REFERENCE_NAME.equals(mMateReferenceName)){
+                mMateReferenceIndex = -1;
+            } else {
+                mMateReferenceIndex = header.getSequenceIndex(mMateReferenceName);
+            }
+        }
+        return mMateReferenceIndex;
+    }
+
+    public Integer getMateReferenceIndex() {
+        return getMateReferenceIndex(mHeader);
+    }
+
+    public void setMateReferenceIndex(final int referenceIndex, final SAMFileHeader header) {
+        mMateReferenceIndex = referenceIndex;
+        if (mMateReferenceIndex == -1) {
+            mMateReferenceName = NO_ALIGNMENT_REFERENCE_NAME;
+        } else {
+            mMateReferenceName = header.getSequence(referenceIndex).getSequenceName();
+        }
+    }
+
+    public void setMateReferenceIndex(final int referenceIndex) {
+        setMateReferenceIndex(referenceIndex, mHeader);
+    }
+
+
+    public int getAlignmentStart() {
+        return mAlignmentStart;
+    }
+
+    public void setAlignmentStart(final int value) {
+        mAlignmentStart = value;
+    }
+
+    public int getAlignmentEnd() {
+        final byte[] readBases = getReadBases();
+        if (mAlignmentStart == NO_ALIGNMENT_START || Arrays.equals(NULL_SEQUENCE, readBases) || readBases == null) {
+            return -1;
+        }
+        return mAlignmentStart + getCigar().getReferenceLength() - 1;
+    }
+
+    /**
+     * Returns the alignment start adjusted for clipped bases.  For example if the read
+     * has an alignment start of 100 but the first 4 bases were clipped (hard or soft clipped)
+     * then this method will return 96.
+     */
+    public int getUnclippedStart() {
+        int pos = getAlignmentStart();
+
+        for (final CigarElement cig : getCigar().getCigarElements()) {
+            final CigarOperator op = cig.getOperator();
+            if (op == CigarOperator.SOFT_CLIP || op == CigarOperator.HARD_CLIP) {
+                pos -= cig.getLength();
+            }
+            else {
+                break;
+            }
+        }
+
+        return pos;
+    }
+
+    /**
+     * Returns the alignment end adjusted for clipped bases.  For example if the read
+     * has an alignment end of 100 but the last 7 bases were clipped (hard or soft clipped)
+     * then this method will return 107.
+     */
+    public int getUnclippedEnd() {
+        int pos = getAlignmentEnd();
+        List<CigarElement> cigs = getCigar().getCigarElements();
+        for (int i=cigs.size() - 1; i>=0; --i) {
+            final CigarElement cig = cigs.get(i);
+            final CigarOperator op = cig.getOperator();
+
+            if (op == CigarOperator.SOFT_CLIP || op == CigarOperator.HARD_CLIP) {
+                pos += cig.getLength();
+            }
+            else {
+                break;
+            }
+        }
+
+        return pos;               
+    }
+
+    public void setAlignmentEnd(final int value) {
+        throw new UnsupportedOperationException("Not supported: setAlignmentEnd");
+    }
+
+    public int getMateAlignmentStart() {
+        return mMateAlignmentStart;
+    }
+
+    public void setMateAlignmentStart(final int mateAlignmentStart) {
+        this.mMateAlignmentStart = mateAlignmentStart;
+    }
+
+    public int getInferredInsertSize() {
+        return mInferredInsertSize;
+    }
+
+    public void setInferredInsertSize(final int inferredInsertSize) {
+        this.mInferredInsertSize = inferredInsertSize;
+    }
+
+    public int getMappingQuality() {
+        return mMappingQuality;
+    }
+
+    public void setMappingQuality(final int value) {
+        mMappingQuality = value;
+    }
+
+    public String getCigarString() {
+        if (mCigarString == null && getCigar() != null) {
+            mCigarString = TextCigarCodec.getSingleton().encode(getCigar());
+        }
+        return mCigarString;
+    }
+
+    public void setCigarString(final String value) {
+        mCigarString = value;
+        mCigar = null;
+    }
+
+    public Cigar getCigar() {
+        if (mCigar == null && mCigarString != null) {
+            mCigar = TextCigarCodec.getSingleton().decode(mCigarString);
+        }
+        return mCigar;
+    }
+
+    /**
+     * This method is preferred over getCigar().getNumElements(), because for BAMRecord it may be faster.
+     * @return number of cigar elements (number + operator) in the cigar string
+     */
+    public int getCigarLength() {
+        return getCigar().numCigarElements();
+    }
+
+    public void setCigar(final Cigar cigar) {
+        this.mCigar = cigar;
+        mCigarString = null;
+    }
+
+    public int getFlags() {
+        return mFlags;
+    }
+
+    public void setFlags(final int value) {
+        mFlags = value;
+    }
+
+    /**
+     * the read is paired in sequencing, no matter whether it is mapped in a pair
+     */
+    public boolean getReadPairedFlag() {
+        return (mFlags & READ_PAIRED_FLAG) != 0;
+    }
+
+    private void requireReadPaired() {
+        if (!getReadPairedFlag()) {
+            throw new IllegalStateException("Inappropriate call if not paired read");
+        }
+    }
+
+    /**
+     * the read is mapped in a proper pair (depends on the protocol, normally inferred during alignment)
+     */
+    public boolean getProperPairFlag() {
+        requireReadPaired();
+        return (mFlags & PROPER_PAIR_FLAG) != 0;
+    }
+
+    /**
+     * the query sequence itself is unmapped
+     */
+    public boolean getReadUnmappedFlag() {
+        return (mFlags & READ_UNMAPPED_FLAG) != 0;
+    }
+
+    /**
+     * the mate is unmapped
+     */
+    public boolean getMateUnmappedFlag() {
+        requireReadPaired();
+        return (mFlags & MATE_UNMAPPED_FLAG) != 0;
+    }
+
+    /**
+     * strand of the query (false for forward; true for reverse strand)
+     */
+    public boolean getReadNegativeStrandFlag() {
+        return (mFlags & READ_STRAND_FLAG) != 0;
+    }
+
+    /**
+     * strand of the mate (false for forward; true for reverse strand)
+     */
+    public boolean getMateNegativeStrandFlag() {
+        requireReadPaired();
+        return (mFlags & MATE_STRAND_FLAG) != 0;
+    }
+
+    /**
+     * the read is the first read in a pair
+     */
+    public boolean getFirstOfPairFlag() {
+        requireReadPaired();
+        return (mFlags & FIRST_OF_PAIR_FLAG) != 0;
+    }
+
+    /**
+     * the read is the second read in a pair
+     */
+    public boolean getSecondOfPairFlag() {
+        requireReadPaired();
+        return (mFlags & SECOND_OF_PAIR_FLAG) != 0;
+    }
+
+    /**
+     * the alignment is not primary (a read having split hits may have multiple primary alignment records)
+     */
+    public boolean getNotPrimaryAlignmentFlag() {
+        return (mFlags & NOT_PRIMARY_ALIGNMENT_FLAG) != 0;
+    }
+
+    /**
+     * the read fails platform/vendor quality checks
+     */
+    public boolean getReadFailsVendorQualityCheckFlag() {
+        return (mFlags & READ_FAILS_VENDOR_QUALITY_CHECK_FLAG) != 0;
+    }
+
+    /**
+     * the read is either a PCR duplicate or an optical duplicate
+     */
+    public boolean getDuplicateReadFlag() {
+        return (mFlags & DUPLICATE_READ_FLAG) != 0;
+    }
+
+    /**
+     * the read is paired in sequencing, no matter whether it is mapped in a pair
+     */
+    public void setReadPairedFlag(final boolean flag) {
+        setFlag(flag, READ_PAIRED_FLAG);
+    }
+
+    /**
+     * the read is mapped in a proper pair (depends on the protocol, normally inferred during alignment)
+     */
+    public void setProperPairFlag(final boolean flag) {
+        setFlag(flag, PROPER_PAIR_FLAG);
+    }
+
+    /**
+     * the query sequence itself is unmapped
+     */
+    public void setReadUmappedFlag(final boolean flag) {
+        setFlag(flag, READ_UNMAPPED_FLAG);
+    }
+
+    /**
+     * the mate is unmapped
+     */
+    public void setMateUnmappedFlag(final boolean flag) {
+        setFlag(flag, MATE_UNMAPPED_FLAG);
+    }
+
+    /**
+     * strand of the query (false for forward; true for reverse strand)
+     */
+    public void setReadNegativeStrandFlag(final boolean flag) {
+        setFlag(flag, READ_STRAND_FLAG);
+    }
+
+    /**
+     * strand of the mate (false for forward; true for reverse strand)
+     */
+    public void setMateNegativeStrandFlag(final boolean flag) {
+        setFlag(flag, MATE_STRAND_FLAG);
+    }
+
+    /**
+     * the read is the first read in a pair
+     */
+    public void setFirstOfPairFlag(final boolean flag) {
+        setFlag(flag, FIRST_OF_PAIR_FLAG);
+    }
+
+    /**
+     * the read is the second read in a pair
+     */
+    public void setSecondOfPairFlag(final boolean flag) {
+        setFlag(flag, SECOND_OF_PAIR_FLAG);
+    }
+
+    /**
+     * the alignment is not primary (a read having split hits may have multiple primary alignment records)
+     */
+    public void setNotPrimaryAlignmentFlag(final boolean flag) {
+        setFlag(flag, NOT_PRIMARY_ALIGNMENT_FLAG);
+    }
+
+    /**
+     * the read fails platform/vendor quality checks
+     */
+    public void setReadFailsVendorQualityCheckFlag(final boolean flag) {
+        setFlag(flag, READ_FAILS_VENDOR_QUALITY_CHECK_FLAG);
+    }
+
+    /**
+     * the read is either a PCR duplicate or an optical duplicate
+     */
+    public void setDuplicateReadFlag(final boolean flag) {
+        setFlag(flag, DUPLICATE_READ_FLAG);
+    }
+
+    private void setFlag(final boolean flag, final int bit) {
+        if (flag) {
+            mFlags |= bit;
+        } else {
+            mFlags &= ~bit;
+        }
+    }
+
+    public Object getAttribute(final String key) {
+        if (mAttributes == null) {
+            return null;
+        }
+        return mAttributes.get(key);
+    }
+
+    public void setAttribute(final String key, final Object value) {
+        if (mAttributes == null) {
+            mAttributes = new LinkedHashMap<String, Object>();
+        }
+        mAttributes.put(key, value);
+    }
+
+    public Set<Map.Entry<String, Object>> getAttributes() {
+        if (mAttributes == null) {
+            return null;
+        }
+        return mAttributes.entrySet();
+    }
+
+    public Integer getIndexingBin() {
+        return mIndexingBin;
+    }
+
+    public void setIndexingBin(final Integer mIndexingBin) {
+        this.mIndexingBin = mIndexingBin;
+    }
+
+    public SAMFileHeader getHeader() {
+        return mHeader;
+    }
+
+    public void setHeader(final SAMFileHeader mHeader) {
+        this.mHeader = mHeader;
+    }
+
+    /**
+     * If this record has a valid binary representation of the variable-length portion of a binary record stored,
+     * return that byte array, otherwise return null.  This will never be true for SAMRecords.  It will be true
+     * for BAMRecords that have not been eagerDecoded(), and for which none of the data in the variable-length
+     * portion has been changed.
+     */
+    public byte[] getVariableBinaryRepresentation() {
+        return null;
+    }
+
+    /**
+     * Depending on the concrete implementation, the binary file size of attributes may be known without
+     * computing them all.
+     * @return binary file size of attribute, if known, else -1
+     */
+    public int getAttributesBinarySize() {
+        return -1;
+    }
+
+    public String format() {
+        final StringBuilder buffer = new StringBuilder();
+        addField(buffer, getReadName(), null, null);
+        addField(buffer, getFlags(), null, null);
+        addField(buffer, getReferenceName(), null, "*");
+        addField(buffer, getAlignmentStart(), 0, "*");
+        addField(buffer, getMappingQuality(), 0, "0");
+        addField(buffer, getCigarString(), null, "*");
+        addField(buffer, getMateReferenceName(), null, "*");
+        addField(buffer, getMateAlignmentStart(), 0, "*");
+        addField(buffer, getInferredInsertSize(), 0, "*");
+        addField(buffer, getReadString(), null, "*");
+        addField(buffer, getBaseQualityString(), null, "*");
+        if (mAttributes != null) {
+            for (final Map.Entry<String, Object> entry : getAttributes()) {
+                addField(buffer, formatTagValue(entry.getKey(), entry.getValue()));
+            }
+        }
+        return buffer.toString();
+    }
+
+    private void addField(final StringBuilder buffer, final Object value, final Object defaultValue, final String defaultString) {
+        if (safeEquals(value, defaultValue)) {
+            addField(buffer, defaultString);
+        } else if (value == null) {
+            addField(buffer, "");
+        } else {
+            addField(buffer, value.toString());
+        }
+    }
+
+    private void addField(final StringBuilder buffer, final String field) {
+        if (buffer.length() > 0) {
+            buffer.append('\t');
+        }
+        buffer.append(field);
+    }
+
+    private String formatTagValue(final String key, final Object value) {
+        if (value == null || value instanceof String) {
+            return key + ":Z:" + value;
+        } else if (value instanceof Integer) {
+            return key + ":i:" + value;
+        } else if (value instanceof Character) {
+            return key + ":A:" + value;
+        } else if (value instanceof Float) {
+            return key + ":f:" + value;
+        } else if (value instanceof byte[]) {
+            return key + ":H:" + SAMUtils.bytesToHexString((byte[]) value);
+        } else {
+            throw new RuntimeException("Unexpected value type for key " + key +
+                                       ": " + value);
+        }
+    }
+
+    private boolean safeEquals(final Object o1, final Object o2) {
+        if (o1 == o2) {
+            return true;
+        } else if (o1 == null || o2 == null) {
+            return false;
+        } else {
+            return o1.equals(o2);
+        }
+    }
+
+    /**
+     * Force all lazily-initialized data members to be initialized.  If a subclass overrides this method,
+     * typically it should also call  super method.
+     */
+    protected void eagerDecode() {
+        getCigar();
+        getCigarString();
+    }
+
+    /**
+     * Returns blocks of the read sequence that have been aligned directly to the
+     * reference sequence. Note that clipped portions of the read and inserted and
+     * deleted bases (vs. the reference) are not represented in the alignment blocks.
+     */
+    public List<AlignmentBlock> getAlignmentBlocks() {
+        if (this.mAlignmentBlocks != null) return this.mAlignmentBlocks;
+
+        final Cigar cigar = getCigar();
+        if (cigar == null) return Collections.emptyList();
+
+
+        this.mAlignmentBlocks = new ArrayList<AlignmentBlock>();
+        int readBase = 1;
+        int refBase  = getAlignmentStart();
+
+        for (final CigarElement e : cigar.getCigarElements()) {
+            switch (e.getOperator()) {
+                case H : break; // ignore hard clips
+                case P : break; // ignore pads
+                case S : readBase += e.getLength(); break; // soft clip read bases
+                case N : refBase += e.getLength(); break;  // reference skip
+                case D : refBase += e.getLength(); break;
+                case I : readBase += e.getLength(); break;
+                case M :
+                    final int length = e.getLength();
+                    this.mAlignmentBlocks.add(new AlignmentBlock(readBase, refBase, length));
+                    readBase += length;
+                    refBase  += length;
+                    break;
+                default : throw new IllegalStateException("Case statement didn't deal with cigar op: " + e.getOperator());
+            }
+        }
+
+        return this.mAlignmentBlocks;
+    }
+
+    @Override
+    public boolean equals(final Object o) {
+        if (this == o) return true;
+        if (!(o instanceof SAMRecord)) return false;
+
+        final SAMRecord samRecord = (SAMRecord) o;
+        eagerDecode();
+        samRecord.eagerDecode();
+
+        if (mAlignmentStart != samRecord.mAlignmentStart) return false;
+        if (mFlags != samRecord.mFlags) return false;
+        if (mInferredInsertSize != samRecord.mInferredInsertSize) return false;
+        if (mMappingQuality != samRecord.mMappingQuality) return false;
+        if (mMateAlignmentStart != samRecord.mMateAlignmentStart) return false;
+        if (mAttributes != null ? !mAttributes.equals(samRecord.mAttributes) : samRecord.mAttributes != null)
+            return false;
+        if (!Arrays.equals(mBaseQualities, samRecord.mBaseQualities)) return false;
+        if (mCigar != null ? !mCigar.equals(samRecord.mCigar) : samRecord.mCigar != null)
+            return false;
+        if (mIndexingBin != null ? !mIndexingBin.equals(samRecord.mIndexingBin) : samRecord.mIndexingBin != null)
+            return false;
+        if (mMateReferenceIndex != null ? !mMateReferenceIndex.equals(samRecord.mMateReferenceIndex) : samRecord.mMateReferenceIndex != null)
+            return false;
+        if (mMateReferenceName != null ? !mMateReferenceName.equals(samRecord.mMateReferenceName) : samRecord.mMateReferenceName != null)
+            return false;
+        if (!Arrays.equals(mReadBases, samRecord.mReadBases)) return false;
+        if (mReadName != null ? !mReadName.equals(samRecord.mReadName) : samRecord.mReadName != null) return false;
+        if (mReferenceIndex != null ? !mReferenceIndex.equals(samRecord.mReferenceIndex) : samRecord.mReferenceIndex != null)
+            return false;
+        if (mReferenceName != null ? !mReferenceName.equals(samRecord.mReferenceName) : samRecord.mReferenceName != null)
+            return false;
+
+        return true;
+    }
+
+    @Override
+    public int hashCode() {
+        eagerDecode();
+        int result = mReadName != null ? mReadName.hashCode() : 0;
+        result = 31 * result + (mReadBases != null ? Arrays.hashCode(mReadBases) : 0);
+        result = 31 * result + (mBaseQualities != null ? Arrays.hashCode(mBaseQualities) : 0);
+        result = 31 * result + (mReferenceName != null ? mReferenceName.hashCode() : 0);
+        result = 31 * result + mAlignmentStart;
+        result = 31 * result + mMappingQuality;
+        result = 31 * result + (mCigarString != null ? mCigarString.hashCode() : 0);
+        result = 31 * result + mFlags;
+        result = 31 * result + (mMateReferenceName != null ? mMateReferenceName.hashCode() : 0);
+        result = 31 * result + mMateAlignmentStart;
+        result = 31 * result + mInferredInsertSize;
+        result = 31 * result + (mAttributes != null ? mAttributes.hashCode() : 0);
+        result = 31 * result + (mReferenceIndex != null ? mReferenceIndex.hashCode() : 0);
+        result = 31 * result + (mMateReferenceIndex != null ? mMateReferenceIndex.hashCode() : 0);
+        result = 31 * result + (mIndexingBin != null ? mIndexingBin.hashCode() : 0);
+        return result;
+    }
+}
+
diff --git a/lib/edu/mit/broad/sam/SAMRecordComparator.java b/lib/edu/mit/broad/sam/SAMRecordComparator.java
new file mode 100644
index 0000000000..0a2afd8389
--- /dev/null
+++ b/lib/edu/mit/broad/sam/SAMRecordComparator.java
@@ -0,0 +1,23 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.sam;
+
+import java.util.Comparator;
+
+public interface SAMRecordComparator extends Comparator<SAMRecord> {
+
+    /**
+     * Less stringent compare method than the regular compare.  If the two records
+     * are equal enough that their ordering in a sorted SAM file would be arbitrary,
+     * this method returns 0.
+     * @return
+     */
+    public int fileOrderCompare(SAMRecord samRecord1, SAMRecord samRecord2);
+}
diff --git a/lib/edu/mit/broad/sam/SAMRecordCoordinateComparator.java b/lib/edu/mit/broad/sam/SAMRecordCoordinateComparator.java
new file mode 100644
index 0000000000..e195d97088
--- /dev/null
+++ b/lib/edu/mit/broad/sam/SAMRecordCoordinateComparator.java
@@ -0,0 +1,58 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.sam;
+
+/**
+ * Comparator for sorting SAMRecords by coordinate.  Note that the header is required because
+ * the order of sequences in the header defines the major sort order.
+ */
+public class SAMRecordCoordinateComparator implements SAMRecordComparator {
+    private final SAMFileHeader header;
+    public SAMRecordCoordinateComparator(final SAMFileHeader header) {
+        this.header = header;
+    }
+    public int compare(final SAMRecord samRecord1, final SAMRecord samRecord2) {
+        final int cmp = fileOrderCompare(samRecord1, samRecord2);
+        if (cmp != 0) {
+            return cmp;
+        }
+        if (samRecord1.getReadNegativeStrandFlag() == samRecord2.getReadNegativeStrandFlag()) {
+            return samRecord1.getReadName().compareTo(samRecord2.getReadName());
+        }
+        else {
+            return (samRecord1.getReadNegativeStrandFlag()? 1: -1);
+        }
+
+
+
+    }
+
+    /**
+     * Less stringent compare method than the regular compare.  If the two records
+     * are equal enough that their ordering in a sorted SAM file would be arbitrary,
+     * this method returns 0.  If read is paired and unmapped, use the mate mapping to sort.
+     *
+     * @return
+     */
+    public int fileOrderCompare(final SAMRecord samRecord1, final SAMRecord samRecord2) {
+        int refIndex1 = samRecord1.getReferenceIndex(header);
+        int refIndex2 = samRecord2.getReferenceIndex(header);
+        if (refIndex1 == -1) {
+            return (refIndex2 == -1? 0: 1);
+        } else if (refIndex2 == -1) {
+            return -1;
+        }
+        int cmp = refIndex1 - refIndex2;
+        if (cmp != 0) {
+            return cmp;
+        }
+        return samRecord1.getAlignmentStart() - samRecord2.getAlignmentStart();
+    }
+}
diff --git a/lib/edu/mit/broad/sam/SAMRecordQueryNameComparator.java b/lib/edu/mit/broad/sam/SAMRecordQueryNameComparator.java
new file mode 100644
index 0000000000..3318488b13
--- /dev/null
+++ b/lib/edu/mit/broad/sam/SAMRecordQueryNameComparator.java
@@ -0,0 +1,38 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.sam;
+
+/**
+ * For "queryname" ordering of SAMRecords
+ */
+public class SAMRecordQueryNameComparator implements SAMRecordComparator {
+
+    public int compare(final SAMRecord samRecord1, final SAMRecord samRecord2) {
+        final int cmp = fileOrderCompare(samRecord1, samRecord2);
+        if (cmp != 0) {
+            return cmp;
+        }
+        if (samRecord1.getReadNegativeStrandFlag() == samRecord2.getReadNegativeStrandFlag()) {
+            return 0;
+        }
+        return (samRecord1.getReadNegativeStrandFlag()? 1: -1);
+    }
+
+    /**
+     * Less stringent compare method than the regular compare.  If the two records
+     * are equal enough that their ordering in a sorted SAM file would be arbitrary,
+     * this method returns 0.
+     *
+     * @return
+     */
+    public int fileOrderCompare(final SAMRecord samRecord1, final SAMRecord samRecord2) {
+        return samRecord1.getReadName().compareTo(samRecord2.getReadName());
+    }
+}
diff --git a/lib/edu/mit/broad/sam/SAMRecordSetBuilder.java b/lib/edu/mit/broad/sam/SAMRecordSetBuilder.java
new file mode 100644
index 0000000000..6e6e2714fd
--- /dev/null
+++ b/lib/edu/mit/broad/sam/SAMRecordSetBuilder.java
@@ -0,0 +1,274 @@
+package edu.mit.broad.sam;
+
+import edu.mit.broad.sam.util.CloseableIterator;
+import edu.mit.broad.sam.util.CoordMath;
+import edu.mit.broad.sam.util.RuntimeIOException;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.*;
+
+/**
+ * Factory class for creating SAMRecords for testing purposes. Various methods can be called
+ * to add new SAM records (or pairs of records) to a list which can then be returned at
+ * any point. The records must reference human chromosomes (excluding randoms etc.).
+ *
+ * Although this is a class for testing, it is in the src tree because it is included in the sam jarfile.
+ *
+ * @author Tim Fennell
+ */
+public class SAMRecordSetBuilder implements Iterable<SAMRecord> {
+    private static final String[] chroms = {
+            "chrM", "chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10",
+            "chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18", "chr19", "chr20",
+            "chr21", "chr22", "chrX", "chrY"
+    };
+    private static final byte[] BASES = {'A','C','G','T'};
+    private static final String READ_GROUP_ID = "1";
+    private static final String SAMPLE = "FREE_SAMPLE";
+    private final Random random = new Random();
+
+    private SAMFileHeader header;
+    private Collection<SAMRecord> records;
+
+    private final int readLength = 36 ;
+
+    private SAMProgramRecord programRecord = null;
+
+
+    /**
+     * Constructs a new SAMRecordSetBuilder with all the data needed to keep the records
+     * sorted in coordinate order.
+     */
+    public SAMRecordSetBuilder() {
+        this(true, SAMFileHeader.SortOrder.coordinate);
+    }
+
+    public SAMRecordSetBuilder(final boolean sortForMe, final SAMFileHeader.SortOrder sortOrder) {
+        final List<SAMSequenceRecord> sequences = new ArrayList<SAMSequenceRecord>();
+        for (final String chrom : chroms) {
+            sequences.add(new SAMSequenceRecord(chrom));
+        }
+
+        this.header = new SAMFileHeader();
+        this.header.setSequences(sequences);
+        this.header.setSortOrder(sortOrder);
+        if (sortForMe) {
+            final SAMRecordComparator comparator;
+            if (sortOrder == SAMFileHeader.SortOrder.queryname) {
+                comparator = new SAMRecordQueryNameComparator();
+            } else {
+                comparator = new SAMRecordCoordinateComparator(header);
+            }
+            this.records = new TreeSet<SAMRecord>(comparator);
+        } else {
+            this.records = new ArrayList<SAMRecord>();
+        }
+        final SAMReadGroupRecord readGroupRecord = new SAMReadGroupRecord(READ_GROUP_ID);
+        readGroupRecord.setSample(SAMPLE);
+        final List<SAMReadGroupRecord> readGroups = new ArrayList<SAMReadGroupRecord>();
+        readGroups.add(readGroupRecord);
+        this.header.setReadGroups(readGroups);
+    }
+
+    /**
+     * Adds the given program record to the header, and assigns the PG tag to any SAMRecords
+     * created after it has been added. May be called multiple times in order to assign different
+     * PG IDs to different SAMRecords.  programRecord may be null to stop assignment of PG tag.
+     * It is up to the caller to ensure that program record IDs do not collide.
+     */
+    public void setProgramRecord(SAMProgramRecord programRecord) {
+        this.programRecord = programRecord;
+        if (programRecord != null) {
+            this.header.addProgramRecord(programRecord);
+        }
+    }
+
+    /** Returns the accumulated list of sam records. */
+    public Collection<SAMRecord> getRecords() { return this.records; }
+
+    /** Returns a CloseableIterator over the collection of SAMRecords. */
+    public CloseableIterator<SAMRecord> iterator() {
+        return new CloseableIterator<SAMRecord>() {
+            private final Iterator<SAMRecord> iterator = records.iterator();
+            public void close() { /** Do nothing. */  }
+            public boolean hasNext() { return this.iterator.hasNext(); }
+            public SAMRecord next() { return this.iterator.next(); }
+            public void remove() { this.iterator.remove(); }
+        };
+    }
+
+    /**
+     * Adds a skeletal fragment (non-PE) record to the set using the provided
+     * contig start and strand information.
+     */
+    public void addFrag(final String name, final int contig, final int start, final boolean negativeStrand) {
+        final SAMRecord rec = new SAMRecord();
+        rec.setReadName(name);
+        rec.setReferenceIndex(contig, this.header);
+        rec.setReferenceName(chroms[contig]);
+        rec.setAlignmentStart(start);
+        rec.setReadNegativeStrandFlag(negativeStrand);
+        rec.setCigarString(readLength + "M");
+        rec.setMappingQuality(255);
+        rec.setAttribute(SAMTag.RG.name(), READ_GROUP_ID);
+        if (programRecord != null) {
+            rec.setAttribute(SAMTag.PG.name(), programRecord.getProgramGroupId());
+        }
+
+        fillInBasesAndQualities(rec);
+        this.records.add(rec);
+    }
+
+    /** Adds an unmapped fragment read to the builder. */
+    public void addUnmappedFragment(final String name) {
+        final SAMRecord rec = new SAMRecord();
+        rec.setReadName(name);
+        rec.setReadUmappedFlag(true);
+        rec.setAttribute(SAMTag.RG.name(), READ_GROUP_ID);
+        if (programRecord != null) {
+            rec.setAttribute(SAMTag.PG.name(), programRecord.getProgramGroupId());
+        }
+        fillInBasesAndQualities(rec);
+        this.records.add(rec);
+    }
+
+    /**
+     * Adds a skeletal fragment (non-PE) record to the set using the provided
+     * contig start and strand information.  The pair is assumed to be a well
+     * formed pair sitting on a single contig.
+     */
+    public void addPair(final String name, final int contig, final int start1, final int start2) {
+        final SAMRecord end1 = new SAMRecord();
+        final SAMRecord end2 = new SAMRecord();
+        final boolean end1IsFirstOfPair = this.random.nextBoolean();
+
+        end1.setReadName(name);
+        end1.setReferenceIndex(contig, this.header);
+        end1.setAlignmentStart(start1);
+        end1.setReadNegativeStrandFlag(false);
+        end1.setCigarString(readLength + "M");
+        end1.setMappingQuality(255);
+        end1.setReadPairedFlag(true);
+        end1.setProperPairFlag(true);
+        end1.setMateReferenceIndex(contig, this.header);
+        end1.setMateAlignmentStart(start2);
+        end1.setMateNegativeStrandFlag(true);
+        end1.setFirstOfPairFlag(end1IsFirstOfPair);
+        end1.setSecondOfPairFlag(!end1IsFirstOfPair);
+        end1.setInferredInsertSize((int) CoordMath.getLength(start1, CoordMath.getEnd(start2, this.readLength)));
+        end1.setAttribute(SAMTag.RG.name(), READ_GROUP_ID);
+        if (programRecord != null) {
+            end1.setAttribute(SAMTag.PG.name(), programRecord.getProgramGroupId());
+        }
+        fillInBasesAndQualities(end1);
+
+        end2.setReadName(name);
+        end2.setReferenceIndex(contig, this.header);
+        end2.setAlignmentStart(start2);
+        end2.setReadNegativeStrandFlag(true);
+        end2.setCigarString(readLength + "M");
+        end2.setMappingQuality(255);
+        end2.setReadPairedFlag(true);
+        end2.setProperPairFlag(true);
+        end2.setMateReferenceIndex(contig, this.header);
+        end2.setMateAlignmentStart(start1);
+        end2.setMateNegativeStrandFlag(false);
+        end2.setFirstOfPairFlag(!end1IsFirstOfPair);
+        end2.setSecondOfPairFlag(end1IsFirstOfPair);
+        end2.setInferredInsertSize(end1.getInferredInsertSize());
+        end2.setAttribute(SAMTag.RG.name(), READ_GROUP_ID);
+        if (programRecord != null) {
+            end2.setAttribute(SAMTag.PG.name(), programRecord.getProgramGroupId());
+        }
+        fillInBasesAndQualities(end2);
+
+        this.records.add(end1);
+        this.records.add(end2);
+    }
+
+    /** Adds a pair with both ends unmapped to the builder. */
+    public void addUnmappedPair(final String name) {
+        final SAMRecord end1 = new SAMRecord();
+        final SAMRecord end2 = new SAMRecord();
+        final boolean end1IsFirstOfPair = this.random.nextBoolean();
+
+        end1.setReadName(name);
+        end1.setReadPairedFlag(false);
+        end1.setReadUmappedFlag(true);
+        end1.setProperPairFlag(false);
+        end1.setFirstOfPairFlag(end1IsFirstOfPair);
+        end1.setSecondOfPairFlag(!end1IsFirstOfPair);
+        end1.setAttribute(SAMTag.RG.name(), READ_GROUP_ID);
+        if (programRecord != null) {
+            end1.setAttribute(SAMTag.PG.name(), programRecord.getProgramGroupId());
+        }
+        fillInBasesAndQualities(end1);
+
+        end2.setReadName(name);
+        end2.setReadPairedFlag(false);
+        end2.setReadUmappedFlag(true);
+        end2.setProperPairFlag(false);
+        end2.setFirstOfPairFlag(!end1IsFirstOfPair);
+        end2.setSecondOfPairFlag(end1IsFirstOfPair);
+        end2.setAttribute(SAMTag.RG.name(), READ_GROUP_ID);
+        if (programRecord != null) {
+            end2.setAttribute(SAMTag.PG.name(), programRecord.getProgramGroupId());
+        }
+        fillInBasesAndQualities(end2);
+
+        this.records.add(end1);
+        this.records.add(end2);
+    }
+
+    /**
+     * Fills in bases and qualities with randomly generated data.
+     * Relies on the alignment start and end having been set to get read length.
+     */
+    private void fillInBasesAndQualities(final SAMRecord rec) {
+        final int length = this.readLength;
+        final byte[] bases = new byte[length];
+        final byte[] quals = new byte[length];
+
+        for (int i=0; i<length; ++i) {
+            bases[i] = BASES[this.random.nextInt(BASES.length)];
+            quals[i] = (byte) this.random.nextInt(50);
+        }
+
+        rec.setReadBases(bases);
+        rec.setBaseQualities(quals);
+    }
+
+    /**
+     * Creates samFileReader from the data in instance of this class
+     * @return SAMFileReader 
+     */
+    public SAMFileReader getSamReader() {
+
+        File tempFile = null;
+
+        try {
+            tempFile = File.createTempFile("temp", ".sam");
+        } catch (IOException e) {
+            throw new RuntimeIOException("problems creating tempfile", e);
+        }
+
+
+
+        this.header.setAttribute("VN", "1.0");
+        final SAMFileWriter w = new SAMFileWriterFactory().makeBAMWriter(this.header, true, tempFile);
+        for (final SAMRecord r:this.getRecords()){
+            w.addAlignment(r);
+        }
+
+
+        w.close();
+
+        final SAMFileReader reader = new SAMFileReader(tempFile);
+        tempFile.delete();
+
+        return reader;
+    }
+
+
+}
diff --git a/lib/edu/mit/broad/sam/SAMSequenceRecord.java b/lib/edu/mit/broad/sam/SAMSequenceRecord.java
new file mode 100644
index 0000000000..2d19bf2d9f
--- /dev/null
+++ b/lib/edu/mit/broad/sam/SAMSequenceRecord.java
@@ -0,0 +1,148 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2008 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever.
+ * Neither the Broad Institute nor MIT can be responsible for its use, misuse,
+ * or functionality.
+ */
+package edu.mit.broad.sam;
+
+
+import java.util.*;
+
+/**
+ * Header information about a reference sequence.
+ */
+public class SAMSequenceRecord
+{
+    private String mSequenceName = null;
+    private int mSequenceIndex = -1;
+    private int mSequenceLength = 0;
+    private Map<String, Object> mAttributes = null;
+    public static final String SEQUENCE_NAME_TAG = "SN";
+    public static final String SEQUENCE_LENGTH_TAG = "LN";
+    public static final String MD5_TAG = "M5";
+    public static final String ASSEMBLY_TAG = "AS";
+    public static final String URI_TAG = "UR";
+    public static final String SPECIES_TAG = "SP";
+
+    public SAMSequenceRecord(final String name) {
+        mSequenceName = name;
+    }
+
+    public String getSequenceName() {
+        return mSequenceName;
+    }
+
+    public int getSequenceLength() {
+        return mSequenceLength;
+    }
+
+    public void setSequenceLength(final int value) {
+        mSequenceLength = value;
+    }
+
+    public String getAssembly() {
+        return (String) getAttribute("AS");
+    }
+
+    public void setAssembly(final String value) {
+        setAttribute("AS", value);
+    }
+
+    public String getSpecies() {
+        return (String) getAttribute("SP");
+    }
+
+    public void setSpecies(final String value) {
+        setAttribute("SP", value);
+    }
+
+    public Object getAttribute(final String key) {
+        if (mAttributes == null) {
+            return null;
+        }
+        return mAttributes.get(key);
+    }
+
+    public void setAttribute(final String key, final Object value) {
+        if (mAttributes == null) {
+            mAttributes = new HashMap<String, Object>();
+        }
+        mAttributes.put(key, value);
+    }
+
+    public Set<Map.Entry<String, Object>> getAttributes() {
+        if (mAttributes == null) {
+            return null;
+        }
+        return mAttributes.entrySet();
+    }
+
+    // Private state used only by SAM implementation.
+    int getSequenceIndex() {
+        return mSequenceIndex;
+    }
+
+    // Private state used only by SAM implementation.
+    void setSequenceIndex(final int value) {
+        mSequenceIndex = value;
+    }
+
+    /**
+     * Looser comparison than equals().  If one SAMSequenceRecord has an attribute that the other does not
+     * have, that is not considered inequality.  However, if they both have an attribute, but have different
+     * values for that atttribute, then they are considered unequal.  This results in an intransitive equality test,
+     * i.e. a.isSameSequence(b) && b.isSameSequence(c) does not necessarily imply a.isSameSequence(c)
+     */
+    public boolean isSameSequence(final SAMSequenceRecord that) {
+        if (this == that) return true;
+        if (that == null) return false;
+
+        if (mSequenceIndex != that.mSequenceIndex) return false;
+        if (mSequenceLength != that.mSequenceLength) return false;
+        if (mSequenceName != null ? !mSequenceName.equals(that.mSequenceName) : that.mSequenceName != null)
+            return false;
+        // If one record has an optional attribute and the other does not, that is not considered inequality.
+        
+        if (mAttributes != null) {
+            for (final Map.Entry<String, Object> entry: getAttributes()) {
+                final Object thatAttribute = that.getAttribute(entry.getKey());
+                if (thatAttribute != null && !entry.getValue().equals(thatAttribute)) {
+                    return false;
+                }
+            }
+        }
+
+        return true;
+    }
+
+    @Override
+    public boolean equals(final Object o) {
+        if (this == o) return true;
+        if (!(o instanceof SAMSequenceRecord)) return false;
+
+        final SAMSequenceRecord that = (SAMSequenceRecord) o;
+
+        if (mSequenceIndex != that.mSequenceIndex) return false;
+        if (mSequenceLength != that.mSequenceLength) return false;
+        if (mAttributes != null ? !mAttributes.equals(that.mAttributes) : that.mAttributes != null) return false;
+        if (mSequenceName != null ? !mSequenceName.equals(that.mSequenceName) : that.mSequenceName != null)
+            return false;
+
+        return true;
+    }
+
+    @Override
+    public int hashCode() {
+        int result = mSequenceName != null ? mSequenceName.hashCode() : 0;
+        result = 31 * result + mSequenceIndex;
+        result = 31 * result + mSequenceLength;
+        result = 31 * result + (mAttributes != null ? mAttributes.hashCode() : 0);
+        return result;
+    }
+}
+
diff --git a/lib/edu/mit/broad/sam/SAMTag.java b/lib/edu/mit/broad/sam/SAMTag.java
new file mode 100644
index 0000000000..5189782cc0
--- /dev/null
+++ b/lib/edu/mit/broad/sam/SAMTag.java
@@ -0,0 +1,16 @@
+package edu.mit.broad.sam;/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+
+/**
+ * The standard tags defined in the SAM spec
+ */
+public enum SAMTag {
+    RG, LB, PU, PG, AS, SQ, MQ, NM, H0, H1, H2, UQ, PQ, NH, IH, HI, MD, CS, CQ, CM, R2, Q2, S2, CC, CP, SM, AM, MF
+}
diff --git a/lib/edu/mit/broad/sam/SAMTextHeaderCodec.java b/lib/edu/mit/broad/sam/SAMTextHeaderCodec.java
new file mode 100644
index 0000000000..202f5f5bf5
--- /dev/null
+++ b/lib/edu/mit/broad/sam/SAMTextHeaderCodec.java
@@ -0,0 +1,323 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.sam;
+
+import edu.mit.broad.sam.util.LineReader;
+import edu.mit.broad.sam.util.RuntimeIOException;
+import edu.mit.broad.sam.util.StringUtil;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.IOException;
+import java.io.Writer;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * This is actually two classes in one (not sure if that is a good idea) -- a parser
+ * for a SAM text header, and a generator of SAM text header.
+ */
+public class SAMTextHeaderCodec {
+    private static final String HEADER_LINE_START = "@";
+
+    // These attributes are populated when parsing or generating
+    private SAMFileHeader mFileHeader;
+
+    // These attributes are populated when parsing text
+    private String mCurrentLine;
+    private LineReader mReader;
+    private File mFile;
+    private List<SAMSequenceRecord> sequences;
+    private List<SAMReadGroupRecord> readGroups;
+
+    // These attributes are populated when generating text
+    private BufferedWriter writer;
+
+    private static final String TAG_KEY_VALUE_SEPARATOR = ":";
+    private static final String FIELD_SEPARATOR = "\t";
+
+    public SAMTextHeaderCodec() {
+    }
+
+    /**
+     * Reads text and converts to a SAMFileHeader object.  Note that one line past
+     * the header must be read in order to determine the end of the header.  This line can be
+     * obtained after parseTextHeader() has returned by calling getCurrentLine()
+     * @param reader Where to get header text from.
+     * @param file Name of the input file, for error messages.  May be null.
+     * @return complete header object.
+     */
+    public SAMFileHeader decode(final LineReader reader, final File file) {
+        mFileHeader = new SAMFileHeader();
+        mReader = reader;
+        mFile = file;
+        sequences = new ArrayList<SAMSequenceRecord>();
+        readGroups = new ArrayList<SAMReadGroupRecord>();
+
+        while (advanceLine() != null) {
+            if (!mCurrentLine.startsWith(HEADER_LINE_START)) {
+                break;
+            }
+            final ParsedHeaderLine parsedHeaderLine = new ParsedHeaderLine(mCurrentLine);
+            switch (parsedHeaderLine.getHeaderRecordType()) {
+
+                case HD:
+                    parseHDLine(parsedHeaderLine);
+                    break;
+                case PG:
+                    parsePGLine(parsedHeaderLine);
+                    break;
+                case RG:
+                    parseRGLine(parsedHeaderLine);
+                    break;
+                case SQ:
+                    parseSQLine(parsedHeaderLine);
+                    break;
+                default:
+                    throw new IllegalStateException("Unrecognized header record type: " +
+                            parsedHeaderLine.getHeaderRecordType());
+            }
+        }
+        mFileHeader.setSequences(sequences);
+        mFileHeader.setReadGroups(readGroups);
+        return mFileHeader;
+    }
+
+    private String advanceLine() {
+        mCurrentLine = mReader.readLine();
+        return mCurrentLine;
+    }
+
+    private void parsePGLine(final ParsedHeaderLine parsedHeaderLine) {
+        assert(HeaderRecordType.PG.equals(parsedHeaderLine.getHeaderRecordType()));
+        parsedHeaderLine.requireTag(SAMProgramRecord.PROGRAM_GROUP_ID_TAG);
+        final SAMProgramRecord programRecord = new SAMProgramRecord(parsedHeaderLine.removeValue(SAMProgramRecord.PROGRAM_GROUP_ID_TAG));
+        for (final Map.Entry<String, String> entry : parsedHeaderLine.mKeyValuePairs.entrySet()) {
+            programRecord.setAttribute(entry.getKey(), entry.getValue());
+        }
+        mFileHeader.addProgramRecord(programRecord);
+    }
+
+    private void parseRGLine(final ParsedHeaderLine parsedHeaderLine) {
+        assert(HeaderRecordType.RG.equals(parsedHeaderLine.getHeaderRecordType()));
+        parsedHeaderLine.requireTag(SAMReadGroupRecord.READ_GROUP_ID_TAG);
+        parsedHeaderLine.requireTag(SAMReadGroupRecord.READ_GROUP_SAMPLE_TAG);
+        final SAMReadGroupRecord samReadGroupRecord = new SAMReadGroupRecord(parsedHeaderLine.removeValue(SAMReadGroupRecord.READ_GROUP_ID_TAG));
+        for (final Map.Entry<String, String> entry : parsedHeaderLine.mKeyValuePairs.entrySet()) {
+            samReadGroupRecord.setAttribute(entry.getKey(), entry.getValue());
+        }
+
+        // Convert non-String attributes to the appropriate types
+        final String predictedMedianInsertSize =
+                (String)samReadGroupRecord.getAttribute(SAMReadGroupRecord.PREDICTED_MEDIAN_INSERT_SIZE_TAG);
+        if (predictedMedianInsertSize != null) {
+            try {
+                samReadGroupRecord.setAttribute(SAMReadGroupRecord.PREDICTED_MEDIAN_INSERT_SIZE_TAG,
+                    Integer.parseInt(predictedMedianInsertSize));
+            } catch (NumberFormatException e) {
+                throw new SAMFormatException(SAMReadGroupRecord.PREDICTED_MEDIAN_INSERT_SIZE_TAG +
+                        " is not numeric: " + predictedMedianInsertSize, e);
+            }
+        }
+
+/*
+TODO: Need an ISO 6801 date parser
+        String dateRunProduced = (String)samReadGroupRecord.getAttribute(SAMReadGroupRecord.DATE_RUN_PRODUCED_TAG);
+        if (dateRunProduced != null) {
+            try {
+                Date date = dateParser.parse(dateRunProduced);
+                samReadGroupRecord.setAttribute(SAMReadGroupRecord.DATE_RUN_PRODUCED_TAG, date);
+            } catch (ParseException e) {
+                throw new SAMFormatException(SAMReadGroupRecord.DATE_RUN_PRODUCED_TAG + " cannot be parsed as a date: " +
+                        dateRunProduced, e);
+            }
+        }
+*/
+
+        readGroups.add(samReadGroupRecord);
+    }
+
+    private void parseSQLine(final ParsedHeaderLine parsedHeaderLine) {
+        assert(HeaderRecordType.SQ.equals(parsedHeaderLine.getHeaderRecordType()));
+        parsedHeaderLine.requireTag(SAMSequenceRecord.SEQUENCE_NAME_TAG);
+        parsedHeaderLine.requireTag(SAMSequenceRecord.SEQUENCE_LENGTH_TAG);
+        final SAMSequenceRecord samSequenceRecord = new SAMSequenceRecord(parsedHeaderLine.removeValue(SAMSequenceRecord.SEQUENCE_NAME_TAG));
+        samSequenceRecord.setSequenceLength(Integer.parseInt(parsedHeaderLine.removeValue(SAMSequenceRecord.SEQUENCE_LENGTH_TAG)));
+        for (final Map.Entry<String, String> entry : parsedHeaderLine.mKeyValuePairs.entrySet()) {
+            samSequenceRecord.setAttribute(entry.getKey(), entry.getValue());
+        }
+        sequences.add(samSequenceRecord);
+    }
+
+    private void parseHDLine(final ParsedHeaderLine parsedHeaderLine) {
+        assert(HeaderRecordType.HD.equals(parsedHeaderLine.getHeaderRecordType()));
+        parsedHeaderLine.requireTag(SAMFileHeader.VERSION_TAG);
+        for (final Map.Entry<String, String> entry : parsedHeaderLine.mKeyValuePairs.entrySet()) {
+            mFileHeader.setAttribute(entry.getKey(), entry.getValue());
+        }
+    }
+
+    private RuntimeException reportErrorParsingLine(final String reason) {
+        String fileMessage = "";
+        if (mFile != null) {
+            fileMessage = "File " + mFile + "; ";
+        }
+        return new SAMFormatException("Error parsing text SAM file. " + reason + "; " + fileMessage +
+                "Line " + mReader.getLineNumber() + "\nLine: " + mCurrentLine);
+    }
+
+    private enum HeaderRecordType {
+        HD, SQ, RG, PG
+    }
+
+    private class ParsedHeaderLine {
+        private final HeaderRecordType mHeaderRecordType;
+        private final Map<String, String> mKeyValuePairs = new HashMap<String, String>();
+
+        ParsedHeaderLine(final String line) {
+            assert(line.startsWith(HEADER_LINE_START));
+            final String[] fields = line.split(FIELD_SEPARATOR);
+            try {
+                mHeaderRecordType = HeaderRecordType.valueOf(fields[0].substring(1));
+            } catch (IllegalArgumentException e) {
+                throw reportErrorParsingLine("Unrecognized header record type");
+            }
+            for (int i = 1; i < fields.length; ++i) {
+                final String[] keyAndValue = fields[i].split(TAG_KEY_VALUE_SEPARATOR, 2);
+                if (keyAndValue.length != 2) {
+                    throw reportErrorParsingLine("Problem parsing " + HEADER_LINE_START + mHeaderRecordType +
+                            " key:value pair");
+                }
+                mKeyValuePairs.put(keyAndValue[0], keyAndValue[1]);
+            }
+        }
+
+        void requireTag(final String tag) {
+            if (!mKeyValuePairs.containsKey(tag)) {
+                throw reportErrorParsingLine(HEADER_LINE_START + mHeaderRecordType + " line missing " + tag + " tag");
+            }
+        }
+
+        public HeaderRecordType getHeaderRecordType() {
+            return mHeaderRecordType;
+        }
+
+        boolean containsKey(final String key) {
+            return mKeyValuePairs.containsKey(key);
+        }
+
+        String getValue(final String key) {
+            return mKeyValuePairs.get(key);
+        }
+
+        String removeValue(final String key) {
+            final String ret = mKeyValuePairs.get(key);
+            mKeyValuePairs.remove(key);
+            return ret;
+        }
+
+    }
+
+    /**
+     * After parsing the text header, this object has gobbled one line too many.  Call this to get that line.
+     * @return the first non-header line, or null if there isn't one.
+     */
+    public String getCurrentLine() {
+        return mCurrentLine;
+    }
+
+    /**
+     *
+     * @param writer where to write the header text
+     * @param header object to be converted to text.
+     */
+    public void encode(final Writer writer, final SAMFileHeader header) {
+        mFileHeader = header;
+        this.writer = new BufferedWriter(writer);
+        writeHDLine();
+        for (final SAMSequenceRecord sequenceRecord: header.getSequences()) {
+            writeSQLine(sequenceRecord);
+        }
+
+        for (final SAMReadGroupRecord readGroup : header.getReadGroups()) {
+            writeRGLine(readGroup);
+        }
+        for (final SAMProgramRecord programRecord : header.getProgramRecords()) {
+            writePGLine(programRecord);
+        }
+        try {
+            this.writer.flush();
+        } catch (IOException e) {
+            throw new RuntimeIOException(e);
+        }
+    }
+
+    private void println(final String s) {
+        try {
+            writer.append(s);
+            writer.append("\n");
+        } catch (IOException e) {
+            throw new RuntimeIOException(e);
+        }
+    }
+
+    private void writePGLine(SAMProgramRecord programRecord) {
+        if (programRecord == null) {
+            return;
+        }
+        final String[] fields = new String[2 + programRecord.getAttributes().size()];
+        fields[0] = HEADER_LINE_START + HeaderRecordType.PG;
+        fields[1] = SAMProgramRecord.PROGRAM_GROUP_ID_TAG + TAG_KEY_VALUE_SEPARATOR + programRecord.getProgramGroupId();
+        int i = 2;
+        for (final Map.Entry<String, String> entry: programRecord.getAttributes()) {
+            fields[i++] = entry.getKey() + TAG_KEY_VALUE_SEPARATOR + entry.getValue();
+        }
+        println(StringUtil.join(FIELD_SEPARATOR, fields));
+    }
+
+    private void writeRGLine(final SAMReadGroupRecord readGroup) {
+        final String[] fields = new String[2 + readGroup.getAttributes().size()];
+        fields[0] = HEADER_LINE_START + HeaderRecordType.RG;
+        fields[1] = SAMReadGroupRecord.READ_GROUP_ID_TAG + TAG_KEY_VALUE_SEPARATOR + readGroup.getReadGroupId();
+        int i = 2;
+        for (final Map.Entry<String, Object> entry: readGroup.getAttributes()) {
+            fields[i++] = entry.getKey() + TAG_KEY_VALUE_SEPARATOR + entry.getValue().toString();
+        }
+        println(StringUtil.join(FIELD_SEPARATOR, fields));
+    }
+
+    private void writeHDLine() {
+        final String[] fields = new String[1 + mFileHeader.getAttributes().size()];
+        fields[0] = HEADER_LINE_START + HeaderRecordType.HD;
+        int i = 1;
+        for (final Map.Entry<String, Object> entry: mFileHeader.getAttributes()) {
+            fields[i++] = entry.getKey() + TAG_KEY_VALUE_SEPARATOR + entry.getValue().toString();
+        }
+        println(StringUtil.join(FIELD_SEPARATOR, fields));
+    }
+
+    private void writeSQLine(final SAMSequenceRecord sequenceRecord) {
+        final int numAttributes =sequenceRecord.getAttributes() != null ? sequenceRecord.getAttributes().size() : 0;
+        final String[] fields = new String[3 + numAttributes];
+        fields[0] = HEADER_LINE_START + HeaderRecordType.SQ;
+        fields[1] = SAMSequenceRecord.SEQUENCE_NAME_TAG + TAG_KEY_VALUE_SEPARATOR + sequenceRecord.getSequenceName();
+        fields[2] = SAMSequenceRecord.SEQUENCE_LENGTH_TAG + TAG_KEY_VALUE_SEPARATOR + Integer.toString(sequenceRecord.getSequenceLength());
+        int i = 3;
+        if (sequenceRecord.getAttributes() != null) {
+            for (final Map.Entry<String, Object> entry: sequenceRecord.getAttributes()) {
+                fields[i++] = entry.getKey() + TAG_KEY_VALUE_SEPARATOR + entry.getValue().toString();
+            }
+        }
+        println(StringUtil.join(FIELD_SEPARATOR, fields));
+    }
+
+}
diff --git a/lib/edu/mit/broad/sam/SAMTextReader.java b/lib/edu/mit/broad/sam/SAMTextReader.java
new file mode 100644
index 0000000000..267f704616
--- /dev/null
+++ b/lib/edu/mit/broad/sam/SAMTextReader.java
@@ -0,0 +1,336 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2008 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever.
+ * Neither the Broad Institute nor MIT can be responsible for its use, misuse,
+ * or functionality.
+ */
+package edu.mit.broad.sam;
+
+
+import edu.mit.broad.sam.util.AsciiLineReader;
+import edu.mit.broad.sam.util.CloseableIterator;
+import edu.mit.broad.sam.util.StringUtil;
+
+import java.io.File;
+import java.io.InputStream;
+import java.util.Map;
+
+/**
+ * Internal class for reading SAM text files.
+ */
+class SAMTextReader
+    extends SAMFileReader.ReaderImplementation
+{
+    private static final int QNAME_COL = 0;
+    private static final int FLAG_COL = 1;
+    private static final int RNAME_COL = 2;
+    private static final int POS_COL = 3;
+    private static final int MAPQ_COL = 4;
+    private static final int CIGAR_COL = 5;
+    private static final int MRNM_COL = 6;
+    private static final int MPOS_COL = 7;
+    private static final int ISIZE_COL = 8;
+    private static final int SEQ_COL = 9;
+    private static final int QUAL_COL = 10;
+
+    private static final int NUM_REQUIRED_FIELDS = 11;
+
+    private AsciiLineReader mReader;
+    private SAMFileHeader mFileHeader = null;
+    private String mCurrentLine = null;
+    private RecordIterator mIterator = null;
+    private File mFile = null;
+    private final TextTagCodec tagCodec = new TextTagCodec();
+    private SAMFileReader.ValidationStringency validationStringency = SAMFileReader.ValidationStringency.DEFAULT_STRINGENCY;
+
+    SAMTextReader(final InputStream stream) {
+        mReader = new AsciiLineReader(stream);
+        readHeader();
+    }
+
+    SAMTextReader(final InputStream stream, final File file) {
+        this(stream);
+        mFile = file;
+    }
+
+    void close() {
+        if (mReader != null) {
+            try {
+                mReader.close();
+            } finally {
+                mReader = null;
+            }
+        }
+    }
+
+    SAMFileHeader getFileHeader() {
+        return mFileHeader;
+    }
+
+    public SAMFileReader.ValidationStringency getValidationStringency() {
+        return validationStringency;
+    }
+
+    public void setValidationStringency(final SAMFileReader.ValidationStringency lenientValidation) {
+        this.validationStringency = lenientValidation;
+    }
+
+    CloseableIterator<SAMRecord> getIterator() {
+        if (mReader == null) {
+            throw new IllegalStateException("File reader is closed");
+        }
+        if (mIterator != null) {
+            throw new IllegalStateException("Iteration in progress");
+        }
+        mIterator = new RecordIterator();
+        return mIterator;
+    }
+
+    CloseableIterator<SAMRecord> query(final String sequence, final int start, final int end, final boolean contained) {
+        throw new UnsupportedOperationException("Cannot query SAM text files");
+    }
+
+    private void readHeader() {
+        final SAMTextHeaderCodec headerCodec = new SAMTextHeaderCodec();
+        mFileHeader = headerCodec.decode(mReader, mFile);
+        mCurrentLine = headerCodec.getCurrentLine();
+    }
+
+    private String advanceLine() {
+        mCurrentLine = mReader.readLine();
+        return mCurrentLine;
+    }
+
+    private String makeErrorString(final String reason) {
+        String fileMessage = "";
+        if (mFile != null) {
+            fileMessage = "File " + mFile + "; ";
+        }
+        return "Error parsing text SAM file. " + reason + "; " + fileMessage +
+                "Line " + mReader.getLineNumber() + "\nLine: " + mCurrentLine;
+    }
+
+    private RuntimeException reportFatalErrorParsingLine(final String reason) {
+        return new SAMFormatException(makeErrorString(reason));
+    }
+
+    private void reportErrorParsingLine(final String reason) {
+        final String errorMessage = makeErrorString(reason);
+
+        if (validationStringency == SAMFileReader.ValidationStringency.STRICT) {
+            throw new SAMFormatException(errorMessage);
+        } else if (validationStringency == SAMFileReader.ValidationStringency.LENIENT) {
+            System.err.println("Ignoring SAM validation error due to lenient parsing:");
+            System.err.println(errorMessage);
+        }
+    }
+
+    private void reportErrorParsingLine(final Exception e) {
+        final String errorMessage = makeErrorString(e.getMessage());
+        if (validationStringency == SAMFileReader.ValidationStringency.STRICT) {
+            throw new SAMFormatException(errorMessage);
+        } else if (validationStringency == SAMFileReader.ValidationStringency.LENIENT) {
+            System.err.println("Ignoring SAM validation error due to lenient parsing:");
+            System.err.println(errorMessage);
+        }
+    }
+
+    private class RecordIterator implements CloseableIterator<SAMRecord> {
+
+        /**
+         * Allocate this once rather than for every line as a performance optimization.
+         * The size is arbitrary -- merely large enough to handle the maximum number
+         * of fields we might expect from a reasonable SAM file.
+         */
+        private final String[] mFields = new String[10000];
+
+        private SAMRecord mCurrentRecord;
+
+        private RecordIterator() {
+            assert(mReader != null);
+            if (mCurrentLine != null) {
+                parseLine();
+            }
+
+        }
+
+        public void close() {
+            mCurrentRecord = null;
+            SAMTextReader.this.close();
+        }
+
+        public boolean hasNext() {
+            return mCurrentRecord != null;
+        }
+
+        public SAMRecord next() {
+            if (!hasNext()) {
+                throw new IllegalStateException("Cannot call next() on exhausted iterator");
+            }
+            final SAMRecord ret = mCurrentRecord;
+            mCurrentRecord = null;
+            advanceLine();
+            if (mCurrentLine != null) {
+                parseLine();
+            }
+            return ret;
+        }
+
+        public void remove() {
+            throw new UnsupportedOperationException("Not supported: remove");
+        }
+
+        int parseInt(final String s, final String fieldName) {
+            final int ret;
+            try {
+                ret = Integer.parseInt(s);
+            } catch (NumberFormatException e) {
+                throw reportFatalErrorParsingLine("Non-numeric value in " + fieldName + " column");
+            }
+            return ret;
+        }
+
+        void validateReferenceName(final String rname, final String fieldName) {
+            if (fieldName.equals("MRNM") && rname.equals("=")) {
+                return;
+            }
+            if (getFileHeader().getSequences().size() != 0) {
+                if (getFileHeader().getSequence(rname) == null) {
+                    reportErrorParsingLine(fieldName + " '" + rname + "' not found in any SQ record");
+                }
+            }
+        }
+
+        private void parseLine() {
+            final int numFields = StringUtil.split(mCurrentLine, mFields, '\t');
+            if (numFields < NUM_REQUIRED_FIELDS) {
+                reportErrorParsingLine("Not enough fields");
+            }
+            if (numFields == mFields.length) {
+                reportErrorParsingLine("Too many fields in SAM text record.");
+            }
+            for (int i = 0; i < numFields; ++i) {
+                if (mFields[i].length() == 0) {
+                    reportErrorParsingLine("Empty field at position " + i + " (zero-based)");
+                }
+            }
+            mCurrentRecord = new SAMRecord();
+            mCurrentRecord.setReadName(mFields[QNAME_COL]);
+
+            final int flags = parseInt(mFields[FLAG_COL], "FLAG");
+            mCurrentRecord.setFlags(flags);
+
+            final String rname = mFields[RNAME_COL];
+            if (!rname.equals("*")) {
+                validateReferenceName(rname, "RNAME");
+                mCurrentRecord.setReferenceName(rname);
+            } else if (!mCurrentRecord.getReadUnmappedFlag()) {
+                    reportErrorParsingLine("RNAME is not specified but flags indicate mapped");
+                }
+
+            final int pos = parseInt(mFields[POS_COL], "POS");
+            final int mapq = parseInt(mFields[MAPQ_COL], "MAPQ");
+            final String cigar = mFields[CIGAR_COL];
+            if (!SAMRecord.NO_ALIGNMENT_REFERENCE_NAME.equals(mCurrentRecord.getReferenceName())) {
+                if (pos == 0) {
+                    reportErrorParsingLine("POS must be non-zero if RNAME is specified");
+                }
+                if (!mCurrentRecord.getReadUnmappedFlag() && cigar.equals("*")) {
+                    reportErrorParsingLine("CIGAR must not be '*' if RNAME is specified");
+                }
+            } else {
+                if (pos != 0) {
+                    reportErrorParsingLine("POS must be zero if RNAME is not specified");
+                }
+                if (mapq != 0) {
+                    reportErrorParsingLine("MAPQ must be zero if RNAME is not specified");
+                }
+                if (!cigar.equals("*")) {
+                    reportErrorParsingLine("CIGAR must be '*' if RNAME is not specified");
+                }
+            }
+            mCurrentRecord.setAlignmentStart(pos);
+            mCurrentRecord.setMappingQuality(mapq);
+            mCurrentRecord.setCigarString(cigar);
+
+            final String mateRName = mFields[MRNM_COL];
+            if (mateRName.equals("*")) {
+                if (mCurrentRecord.getReadPairedFlag() && !mCurrentRecord.getMateUnmappedFlag()) {
+                    reportErrorParsingLine("MRNM not specified but flags indicate mate mapped");
+                }
+            }
+            else {
+                if (!mCurrentRecord.getReadPairedFlag()) {
+                    reportErrorParsingLine("MRNM specified but flags indicate unpaired");
+                }
+                if (mCurrentRecord.getMateUnmappedFlag()) {
+                    reportErrorParsingLine("MRNM specified but flags indicate mate unmapped");
+                }
+
+                validateReferenceName(mateRName, "MRNM");
+                if (mateRName.equals("=")) {
+                    if (mCurrentRecord.getReferenceName() == null) {
+                        reportErrorParsingLine("MRNM is '=', but RNAME is not set");
+                    }
+                    mCurrentRecord.setMateReferenceName(mCurrentRecord.getReferenceName());
+                } else {
+                    mCurrentRecord.setMateReferenceName(mateRName);
+                }
+            }
+
+            final int matePos = parseInt(mFields[MPOS_COL], "MPOS");
+            final int isize = parseInt(mFields[ISIZE_COL], "ISIZE");
+            if (!mCurrentRecord.getMateReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME)) {
+                if (matePos == 0) {
+                    reportErrorParsingLine("MPOS must be non-zero if MRNM is specified");
+                }
+                if (isize == 0 && mCurrentRecord.getReferenceName().equals(mCurrentRecord.getMateReferenceName())) {
+                    reportErrorParsingLine("ISIZE must be non-zero if RNAME == MRNM");
+                }
+            } else {
+                if (matePos != 0) {
+                    reportErrorParsingLine("MPOS must be zero if MRNM is not specified");
+                }
+                if (isize != 0) {
+                    reportErrorParsingLine("ISIZE must be zero if MRNM is not specified");
+                }
+            }
+            mCurrentRecord.setMateAlignmentStart(matePos);
+            mCurrentRecord.setInferredInsertSize(isize);
+            if (!mFields[SEQ_COL].equals("*")) {
+                mCurrentRecord.setReadString(mFields[SEQ_COL]);
+            }
+            if (!mFields[QUAL_COL].equals("*")) {
+                if (mCurrentRecord.getReadString() == null) {
+                    reportErrorParsingLine("QUAL should not be specified if SEQ is not specified");
+                }
+                if (mCurrentRecord.getReadString().length() != mFields[QUAL_COL].length()) {
+                    reportErrorParsingLine("length(QUAL) != length(SEQ)");
+                }
+                mCurrentRecord.setBaseQualityString(mFields[QUAL_COL]);
+            }
+
+            for (int i = NUM_REQUIRED_FIELDS; i < numFields; ++i) {
+                parseTag(mFields[i]);
+            }
+
+        }
+
+        private void parseTag(final String tag) {
+            Map.Entry<String, Object> entry = null;
+            try {
+                entry = tagCodec.decode(tag);
+            } catch (SAMFormatException e) {
+                reportErrorParsingLine(e);
+            }
+            if (entry != null) {
+                mCurrentRecord.setAttribute(entry.getKey(), entry.getValue());
+            }
+        }
+    }
+}
+
diff --git a/lib/edu/mit/broad/sam/SAMTextWriter.java b/lib/edu/mit/broad/sam/SAMTextWriter.java
new file mode 100644
index 0000000000..e3e8e65727
--- /dev/null
+++ b/lib/edu/mit/broad/sam/SAMTextWriter.java
@@ -0,0 +1,121 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.sam;
+
+import edu.mit.broad.sam.util.AsciiWriter;
+import edu.mit.broad.sam.util.RuntimeIOException;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.Writer;
+import java.util.Map;
+
+class SAMTextWriter extends SAMFileWriterImpl {
+    private static final String FIELD_SEPARATOR = "\t";
+
+    private final Writer out;
+    private final File file;
+    private final TextTagCodec tagCodec = new TextTagCodec();
+
+    SAMTextWriter(final File file) {
+        try {
+            this.file = file;
+            this.out = new AsciiWriter(new FileOutputStream(file));
+        } catch (IOException e) {
+            throw new RuntimeIOException(e);
+        }
+    }
+
+    /**
+     * Writes the record to disk.  Sort order has been taken care of by the time
+     * this method is called.
+     *
+     * @param alignment
+     */
+    protected void writeAlignment(final SAMRecord alignment) {
+        try {
+            out.write(alignment.getReadName());
+            out.write(FIELD_SEPARATOR);
+            out.write(Integer.toString(alignment.getFlags()));
+            out.write(FIELD_SEPARATOR);
+            out.write(alignment.getReferenceName());
+            out.write(FIELD_SEPARATOR);
+            out.write(Integer.toString(alignment.getAlignmentStart()));
+            out.write(FIELD_SEPARATOR);
+            out.write(Integer.toString(alignment.getMappingQuality()));
+            out.write(FIELD_SEPARATOR);
+            out.write(alignment.getCigarString());
+            out.write(FIELD_SEPARATOR);
+
+            // I think == is OK here.  If not, it isn't an error, just less efficient storage
+            if (alignment.getReferenceName() == alignment.getMateReferenceName() &&
+                    SAMRecord.NO_ALIGNMENT_REFERENCE_NAME != alignment.getReferenceName()) {
+                out.write("=");
+            } else {
+                out.write(alignment.getMateReferenceName());
+            }
+            out.write(FIELD_SEPARATOR);
+            out.write(Integer.toString(alignment.getMateAlignmentStart()));
+            out.write(FIELD_SEPARATOR);
+            out.write(Integer.toString(alignment.getInferredInsertSize()));
+            out.write(FIELD_SEPARATOR);
+            out.write(alignment.getReadString());
+            out.write(FIELD_SEPARATOR);
+            out.write(alignment.getBaseQualityString());
+            if (alignment.getAttributes() != null) {
+                for (final Map.Entry<String, Object> attribute : alignment.getAttributes()) {
+                    out.write(FIELD_SEPARATOR);
+                    out.write(tagCodec.encode(attribute.getKey(), attribute.getValue()));
+                }
+            }
+            out.write("\n");
+
+        } catch (IOException e) {
+            throw new RuntimeIOException(e);
+        }
+    }
+
+    /**
+     * Write the header to disk.  Header object is available via getHeader().
+     *
+     * @param textHeader for convenience if the implementation needs it.
+     */
+    protected void writeHeader(final String textHeader) {
+        try {
+            out.write(textHeader);
+        } catch (IOException e) {
+            throw new RuntimeIOException(e);
+        }
+    }
+
+    /**
+     * Do any required flushing here.
+     */
+    protected void finish() {
+        try {
+            out.close();
+        } catch (IOException e) {
+            throw new RuntimeIOException(e);
+        }
+    }
+
+    /**
+     * For producing error messages.
+     *
+     * @return Output filename, or null if there isn't one.
+     */
+    protected String getFilename() {
+        if (file == null) {
+            return null;
+        }
+        return file.getAbsolutePath();
+    }
+}
diff --git a/lib/edu/mit/broad/sam/SAMTools.java b/lib/edu/mit/broad/sam/SAMTools.java
new file mode 100644
index 0000000000..0a320ba84a
--- /dev/null
+++ b/lib/edu/mit/broad/sam/SAMTools.java
@@ -0,0 +1,106 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2008 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever.
+ * Neither the Broad Institute nor MIT can be responsible for its use, misuse,
+ * or functionality.
+ */
+package edu.mit.broad.sam;
+
+
+import edu.mit.broad.sam.util.CloseableIterator;
+import java.io.*;
+
+
+/**
+ * Command line utility for manipulating SAM/BAM files.
+ */
+public class SAMTools
+{
+    private String mCommand = null;
+    private File mInputFile = null;
+
+
+    public static void main(final String[] args)
+        throws Exception {
+        final int status = new SAMTools().run(args);
+        if (status != 0) {
+            System.exit(status);
+        }
+    }
+
+    private SAMTools() {
+    }
+
+    private void usage() {
+        System.out.println();
+        System.out.println("SAMTools version 0.1.0");
+        System.out.println("Tools for manipulating SAM/BAM files");
+        System.out.println();
+        System.out.println("Usage: SAMTools <command> <options...>");
+        System.out.println();
+        System.out.println("Commands:");
+        System.out.println("  help");
+        System.out.println("  view        <file>");
+        System.out.println();
+    }
+
+    private boolean parseArguments(final String[] args) {
+        if (args.length == 0) {
+            usage();
+            return true;
+        }
+        final String command = args[0];
+        final int argpos = 1;
+        final int argcount = args.length - argpos;
+        if (command.equals("help")) {
+            usage();
+            return true;
+        } else if (command.equals("view")) {
+            if (argcount != 1) {
+                usage();
+                return false;
+            }
+            mInputFile = new File(args[1]);
+            if (!mInputFile.exists()) {
+                System.out.println("Input file not found: " + mInputFile);
+                return false;
+            }
+        } else {
+            System.out.println("Unrecognized command: " + command);
+            System.out.println();
+            usage();
+            return false;
+        }
+        mCommand = command;
+        return true;
+    }
+
+    private int run(final String[] args)
+        throws Exception {
+        if (!parseArguments(args)) {
+            return 1;
+        }
+        if (mCommand == null) {
+            return 0;
+        }
+        if (mCommand.equals("view")) {
+            return runView();
+        }
+        return 1;
+    }
+
+    private int runView() {
+        final SAMFileReader reader = new SAMFileReader(mInputFile);
+        final CloseableIterator<SAMRecord> iterator = reader.iterator();
+        while (iterator.hasNext()) {
+            final SAMRecord record = iterator.next();
+            System.out.println(record.format());
+        }
+        iterator.close();
+        return 0;
+    }
+}
diff --git a/lib/edu/mit/broad/sam/SAMUtils.java b/lib/edu/mit/broad/sam/SAMUtils.java
new file mode 100644
index 0000000000..c17ca773cd
--- /dev/null
+++ b/lib/edu/mit/broad/sam/SAMUtils.java
@@ -0,0 +1,269 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2008 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever.
+ * Neither the Broad Institute nor MIT can be responsible for its use, misuse,
+ * or functionality.
+ */
+package edu.mit.broad.sam;
+
+
+/**
+ * Utilty methods.
+ */
+final class SAMUtils
+{
+    private static final byte COMPRESSED_EQUAL_LOW = 0;
+    private static final byte COMPRESSED_A_LOW = 1;
+    private static final byte COMPRESSED_C_LOW = 2;
+    private static final byte COMPRESSED_G_LOW = 4;
+    private static final byte COMPRESSED_T_LOW = 8;
+    private static final byte COMPRESSED_N_LOW = 15;
+    private static final byte COMPRESSED_EQUAL_HIGH = COMPRESSED_EQUAL_LOW << 4;
+    private static final byte COMPRESSED_A_HIGH = COMPRESSED_A_LOW << 4;
+    private static final byte COMPRESSED_C_HIGH = COMPRESSED_C_LOW << 4;
+    private static final byte COMPRESSED_G_HIGH = COMPRESSED_G_LOW << 4;
+    private static final byte COMPRESSED_T_HIGH = (byte)(COMPRESSED_T_LOW << 4);
+    private static final byte COMPRESSED_N_HIGH = (byte)(COMPRESSED_N_LOW << 4);
+
+    private SAMUtils() {
+    }
+
+    static int unpackInt16(final byte[] buffer, final int offset) {
+        return ((buffer[offset] & 0xFF) |
+                ((buffer[offset+1] & 0xFF) << 8));
+    }
+
+    static int unpackInt32(final byte[] buffer, final int offset) {
+        return ((buffer[offset] & 0xFF) |
+                ((buffer[offset+1] & 0xFF) << 8) |
+                ((buffer[offset+2] & 0xFF) << 16) |
+                ((buffer[offset+3] & 0xFF) << 24));
+    }
+
+    /**
+     * Convert from a byte array containing =AaCcGgTtNn, to a byte array half as long,
+     * with =, A, C, G, T converted to 0, 1, 2, 4, 8, 15
+     * @param readBases
+     * @return
+     */
+    static byte[] bytesToCompressedBases(final byte[] readBases) {
+        final byte[] compressedBases = new byte[(readBases.length + 1)/2];
+        int i;
+        for (i = 1; i < readBases.length; i+=2) {
+            compressedBases[i/2] = (byte)(charToCompressedBaseHigh(readBases[i-1]) |
+                                    charToCompressedBaseLow(readBases[i]));
+        }
+        // Last nybble
+        if (i == readBases.length) {
+            compressedBases[i/2] = charToCompressedBaseHigh((char)readBases[i-1]);
+        }
+        return compressedBases;
+    }
+
+    static byte[] compressedBasesToBytes(final int length, final byte[] compressedBases, final int compressedOffset) {
+        final byte[] ret = new byte[length];
+        int i;
+        for (i = 1; i < length; i+=2) {
+            ret[i-1] = compressedBaseToByteHigh(compressedBases[i/2 + compressedOffset]);
+            ret[i] = compressedBaseToByteLow(compressedBases[i/2 + compressedOffset]);
+        }
+        // Last nybble
+        if (i == length) {
+            ret[i-1] = compressedBaseToByteHigh(compressedBases[i/2 + compressedOffset]);
+        }
+        return ret;
+    }
+
+    /**
+     *
+     * @param base One of =AaCcGgTtNn
+     * @return nybble-encoded equivalent
+     */
+    private static byte charToCompressedBaseLow(final int base) {
+        switch (base) {
+            case '=':
+                return COMPRESSED_EQUAL_LOW;
+            case 'a':
+            case 'A':
+                return COMPRESSED_A_LOW;
+            case 'c':
+            case 'C':
+                return COMPRESSED_C_LOW;
+            case 'g':
+            case 'G':
+                return COMPRESSED_G_LOW;
+            case 't':
+            case 'T':
+                return COMPRESSED_T_LOW;
+            case 'n':
+            case 'N':
+            case '.':
+                return COMPRESSED_N_LOW;
+            default:
+                throw new IllegalArgumentException("Bad  byte passed to charToCompressedBase: " + base);
+        }
+    }
+
+    private static byte charToCompressedBaseHigh(final int base) {
+        switch (base) {
+            case '=':
+                return COMPRESSED_EQUAL_HIGH;
+            case 'a':
+            case 'A':
+                return COMPRESSED_A_HIGH;
+            case 'c':
+            case 'C':
+                return COMPRESSED_C_HIGH;
+            case 'g':
+            case 'G':
+                return COMPRESSED_G_HIGH;
+            case 't':
+            case 'T':
+                return COMPRESSED_T_HIGH;
+            case 'n':
+            case 'N':
+            case '.':
+                return COMPRESSED_N_HIGH;
+            default:
+                throw new IllegalArgumentException("Bad  byte passed to charToCompressedBase: " + base);
+        }
+    }
+
+    /**
+     *
+     * @param base One of COMPRESSED_*
+     * @return one of ACGTN=
+     */
+    private static byte compressedBaseToByteLow(final int base) {
+        switch (base & 0xf) {
+            case COMPRESSED_EQUAL_LOW:
+                return '=';
+            case COMPRESSED_A_LOW:
+                return 'A';
+            case COMPRESSED_C_LOW:
+                return 'C';
+            case COMPRESSED_G_LOW:
+                return 'G';
+            case COMPRESSED_T_LOW:
+                return 'T';
+            case COMPRESSED_N_LOW:
+                return 'N';
+            default:
+                throw new IllegalArgumentException("Bad  byte passed to charToCompressedBase: " + base);
+        }
+    }
+
+    private static byte compressedBaseToByteHigh(final int base) {
+        switch ((byte)(base & 0xf0)) {
+            case COMPRESSED_EQUAL_HIGH:
+                return '=';
+            case COMPRESSED_A_HIGH:
+                return 'A';
+            case COMPRESSED_C_HIGH:
+                return 'C';
+            case COMPRESSED_G_HIGH:
+                return 'G';
+            case COMPRESSED_T_HIGH:
+                return 'T';
+            case COMPRESSED_N_HIGH:
+                return 'N';
+            default:
+                throw new IllegalArgumentException("Bad  byte passed to charToCompressedBase: " + base);
+        }
+    }
+
+    static String bytesToHexString(final byte[] data) {
+        final char[] chars = new char[2 * data.length];
+        for (int i = 0; i < data.length; i++) {
+            final byte b = data[i];
+            chars[2*i] = toHexDigit((b >> 4) & 0xF);
+            chars[2*i+1] = toHexDigit(b & 0xF);
+        }
+        return new String(chars);
+    }
+
+    static byte[] hexStringToBytes(final String s)  throws NumberFormatException {
+        if (s.length() % 2 != 0) {
+            throw new NumberFormatException("Hex representation of byte string does not have even number of hex chars: " + s);
+        }
+        final byte[] ret = new byte[s.length() / 2];
+        for (int i = 0; i < ret.length; ++i) {
+            ret[i] = (byte) (fromHexDigit(s.charAt(i * 2)) << 4 + fromHexDigit(s.charAt(i * 2 + 1)));
+        }
+        return ret;
+    }
+
+    static String phredToFastq(final byte[] data) {
+        if (data == null) {
+            return null;
+        }
+        return phredToFastq(data, 0, data.length);
+    }
+
+    static String phredToFastq(final byte[] buffer, final int offset, final int length) {
+        final char[] chars = new char[length];
+        for (int i = 0; i < length; i++) {
+            chars[i] = phredToFastq(buffer[offset+i] & 0xFF);
+        }
+        return new String(chars);
+    }
+
+    static char phredToFastq(final int phredScore) {
+        if (phredScore < 0 || phredScore > 63) {
+            throw new IllegalArgumentException("Cannot encode phred score: " + phredScore);
+        }
+        return (char) (33 + phredScore);
+    }
+
+    static byte[] fastqToPhred(final String fastq) {
+        if (fastq == null) {
+            return null;
+        }
+        final int length = fastq.length();
+        final byte[] scores = new byte[length];
+        for (int i = 0; i < length; i++) {
+            scores[i] = (byte) fastqToPhred(fastq.charAt(i));
+        }
+        return scores;
+    }
+
+    static int fastqToPhred(final char ch) {
+        if (ch < 33 || ch > 126) {
+            throw new IllegalArgumentException("Invalid fastq character: " + ch);
+        }
+        return (ch - 33);
+    }
+
+    private static char toHexDigit(final int value) {
+        return (char) ((value < 10) ? ('0' + value) : ('A' + value - 10));
+    }
+
+    private static int fromHexDigit(final char c) throws NumberFormatException {
+        final int ret = Character.digit(c, 16);
+        if (ret == -1) {
+            throw new NumberFormatException("Not a valid hex digit: " + c);
+        }
+        return ret;
+    }
+
+    /**
+     * calculate the bin given an alignment in [beg,end)
+     * Copied from SAM spec. 
+     */
+    static int reg2bin(final int beg, int end)
+    {
+
+        --end;
+
+        if (beg>>14 == end>>14) return ((1<<15)-1)/7 + (beg>>14);
+        if (beg>>17 == end>>17) return ((1<<12)-1)/7 + (beg>>17);
+        if (beg>>20 == end>>20) return  ((1<<9)-1)/7 + (beg>>20);
+        if (beg>>23 == end>>23) return  ((1<<6)-1)/7 + (beg>>23);
+        if (beg>>26 == end>>26) return  ((1<<3)-1)/7 + (beg>>26);
+        return 0;
+    }
+}
diff --git a/lib/edu/mit/broad/sam/TextCigarCodec.java b/lib/edu/mit/broad/sam/TextCigarCodec.java
new file mode 100755
index 0000000000..a1abc2620b
--- /dev/null
+++ b/lib/edu/mit/broad/sam/TextCigarCodec.java
@@ -0,0 +1,78 @@
+/*
+  The Broad Institute
+  SOFTWARE COPYRIGHT NOTICE AGREEMENT
+  This software and its documentation are copyright 2009 by the
+  Broad Institute/Massachusetts Institute of Technology. All rights are
+  reserved.
+
+  This software is supplied without any warranty or guaranteed support
+  whatsoever. Neither the Broad Institute nor MIT can be responsible for its
+  use, misuse, or functionality.
+*/
+package edu.mit.broad.sam;
+
+/**
+ * Convert between string and internal CIGAR representations
+ */
+public class TextCigarCodec
+{
+    private static final byte ZERO_BYTE = "0".getBytes()[0];
+    private static final byte NINE_BYTE = "9".getBytes()[0];
+    
+    private static final TextCigarCodec singleton = new TextCigarCodec();
+
+    /**
+     * It is not necssary to get the singleton but it is preferrable to use the same one
+     * over and over vs. creating a new object for each BAMRecord.
+     */
+    static TextCigarCodec getSingleton() {
+        return singleton;
+    }
+
+
+    /**
+     * Convert from interal CIGAR representation to String
+     */
+    String encode(final Cigar cigar) {
+        if (cigar.numCigarElements() == 0) {
+            return SAMRecord.NO_ALIGNMENT_CIGAR;
+        }
+        final StringBuilder ret = new StringBuilder();
+        for (final CigarElement cigarElement : cigar.getCigarElements()) {
+            ret.append(cigarElement.getLength());
+            ret.append(cigarElement.getOperator());
+        }
+        return ret.toString();
+    }
+
+    Cigar decode(final String textCigar) {
+        if (SAMRecord.NO_ALIGNMENT_CIGAR.equals(textCigar)) {
+            return new Cigar();
+        }
+        final Cigar ret = new Cigar();
+        final byte[] cigarBytes = textCigar.getBytes();
+        for (int i = 0; i < cigarBytes.length; ++i) {
+            if (!isDigit(cigarBytes[i])) {
+                throw new IllegalArgumentException("Malformed CIGAR string: " + textCigar);
+            }
+            int length = (cigarBytes[i] - ZERO_BYTE);
+            for (++i; isDigit(cigarBytes[i]); ++i) {
+                length = (length * 10) + cigarBytes[i] - ZERO_BYTE;
+            }
+            final CigarOperator operator = CigarOperator.characterToEnum(cigarBytes[i]);
+            ret.add(new CigarElement(length, operator));
+        }
+        return ret;
+    }
+    
+    private boolean isDigit(final byte c) {
+        return c >= ZERO_BYTE && c <= NINE_BYTE;
+    }
+
+    
+        
+}
+
+/******************************************************************/
+/**************************[END OF TextCigarCodec.java]*************************/
+/******************************************************************/
diff --git a/lib/edu/mit/broad/sam/TextTagCodec.java b/lib/edu/mit/broad/sam/TextTagCodec.java
new file mode 100644
index 0000000000..69fd53b1f7
--- /dev/null
+++ b/lib/edu/mit/broad/sam/TextTagCodec.java
@@ -0,0 +1,96 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.sam;
+
+import edu.mit.broad.sam.util.StringUtil;
+
+import java.util.Map;
+
+class TextTagCodec {
+    private static final int NUM_TAG_FIELDS = 3;
+
+    /**
+     * This is really a local variable of decode(), but allocated here to reduce allocations.
+     */
+    private final String[] fields = new String[NUM_TAG_FIELDS];
+
+    String encode(final String key, Object value) {
+        final StringBuilder sb = new StringBuilder(key);
+        sb.append(':');
+        char tagType = BinaryTagCodec.getTagValueType(value);
+        switch (tagType) {
+            case 'c':
+            case 'C':
+            case 's':
+            case 'S':
+            case 'I':
+                tagType = 'i';
+        }
+        if (tagType == 'H') {
+            value = SAMUtils.bytesToHexString((byte[])value);
+        }
+        sb.append(tagType);
+        sb.append(':');
+        sb.append(value.toString());
+        return sb.toString();
+    }
+
+    Map.Entry<String, Object> decode(final String tag) {
+        final int numFields = StringUtil.split(tag, fields, ':');
+        if (numFields != TextTagCodec.NUM_TAG_FIELDS) {
+            throw new SAMFormatException("Not enough fields in tag '" + tag + "'");
+        }
+        final String key = fields[0];
+        final String type = fields[1];
+        final String stringVal = fields[2];
+        final Object val;
+        if (type.equals("Z")) {
+            val = stringVal;
+        } else if (type.equals("A")) {
+            if (stringVal.length() != 1) {
+                throw new SAMFormatException("Tag of type A should have a single-character value");
+            }
+            val = stringVal.charAt(0);
+        } else if (type.equals("i")) {
+            try {
+                val = new Integer(stringVal);
+            } catch (NumberFormatException e) {
+                throw new SAMFormatException("Tag of type i should have signed decimal value");
+            }
+        } else if (type.equals("f")) {
+            try {
+                val = new Float(stringVal);
+            } catch (NumberFormatException e) {
+                throw new SAMFormatException("Tag of type f should have single-precision floating point value");
+            }
+        } else if (type.equals("H")) {
+            try {
+                val = SAMUtils.hexStringToBytes(stringVal);
+            } catch (NumberFormatException e) {
+                throw new SAMFormatException("Tag of type H should have valid hex string with even number of digits");
+            }
+        } else {
+            throw new SAMFormatException("Unrecognized tag type: " + type);
+        }
+        return new Map.Entry<String, Object>() {
+            public String getKey() {
+                return key;
+            }
+
+            public Object getValue() {
+                return val;
+            }
+
+            public Object setValue(final Object o) {
+                throw new UnsupportedOperationException();
+            }
+        };
+    }
+}
diff --git a/lib/edu/mit/broad/sam/apps/AccumulateCoverage.java b/lib/edu/mit/broad/sam/apps/AccumulateCoverage.java
new file mode 100644
index 0000000000..99a3917fff
--- /dev/null
+++ b/lib/edu/mit/broad/sam/apps/AccumulateCoverage.java
@@ -0,0 +1,132 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2008 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.sam.apps;
+
+import edu.mit.broad.sam.SAMFileReader;
+import edu.mit.broad.sam.SAMLocusIterator;
+import edu.mit.broad.sam.SAMFileHeader;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.Writer;
+import java.io.FileWriter;
+import java.util.List;
+
+public class AccumulateCoverage {
+
+    public static void main(final String[] argv) throws Exception {
+        if (argv.length != 1) {
+            System.err.println("ERROR: Incorrect number of arguments");
+            usage();
+            System.exit(1);
+        }
+        final AccumulateCoverage ac = new AccumulateCoverage(argv[0]);
+    }
+
+    private static void usage() {
+        System.err.println("USAGE: AccumulateCoverage <SAMFile>");
+    }
+
+
+
+    public AccumulateCoverage(final String samFile) throws IOException {
+        final long startTime = System.currentTimeMillis();
+        final Writer writer = new FileWriter("/Users/kcibul/projects/sam/acccov.out");
+
+        final SAMFileReader samReader = new SAMFileReader(new File(samFile));
+
+        // ensure the file is sorted
+//TODO: is the SAM reader implementation broken?
+        if (samReader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
+            System.out.println("SAM Files must be coordinate-sorted, this is " + samReader.getFileHeader().getSortOrder());
+            System.exit(1);
+        }
+
+        final SAMLocusIterator sli = new SAMLocusIterator(samReader.iterator());
+
+        for (final SAMLocusIterator.LocusInfo li : sli) {
+
+        String chrom = li.getChrom().substring(3);
+        if (chrom.equals("M")) { chrom = "0"; }
+        if (chrom.equals("X")) { chrom = "23"; }
+        if (chrom.equals("Y")) { chrom = "24"; }
+
+        final StringBuilder sb = new StringBuilder();
+        sb.append(chrom)
+                .append(":")
+                .append(li.getPosition()-1)
+                .append(" ")
+                .append(li.getBases().size())
+                .append("\n");
+
+        writer.write(sb.toString());
+        //System.out.print(sb);
+
+//        // TODO: zero based or 1 based?
+//        System.out.print(li.chrom + "\t" + (li.position-1) + "\t" + li.bases.size() + "\t");
+//
+//        // TODO: print and capitalize by strand (like pileup)
+//        System.out.print(bytesToString(li.bases));
+//        System.out.print("\t");
+//        System.out.print(phredToFastq(li.qualities));
+//        System.out.print("\n");
+        }
+
+
+        writer.flush();
+        writer.close();
+        final long elapsed = System.currentTimeMillis() - startTime;
+
+        System.out.println("Completed in " + elapsed + "ms");
+    }
+
+
+    static String bytesToString(final List<Byte> data) {
+        if (data == null || data.size() == 0) {
+            return null;
+        }
+
+        final char[] chars = new char[data.size()];
+        for (int i = 0; i < data.size(); i++) {
+            chars[i] = (char) (data.get(i) & 0xFF);
+        }
+        return new String(chars);
+    }
+
+
+    static String phredToFastq(final List<Byte> data) {
+        final byte[] arrData = new byte[data.size()];
+        for(int i=0; i< data.size(); i++) { arrData[i] = data.get(i); }
+        return phredToFastq(arrData);
+    }
+
+    static String phredToFastq(final byte[] data) {
+        if (data == null) {
+            return null;
+        }
+        return phredToFastq(data, 0, data.length);
+    }
+
+    static String phredToFastq(final byte[] buffer, final int offset, final int length) {
+        final char[] chars = new char[length];
+        for (int i = 0; i < length; i++) {
+            chars[i] = phredToFastq(buffer[offset+i] & 0xFF);
+        }
+        return new String(chars);
+    }
+
+    static char phredToFastq(final int phredScore) {
+        if (phredScore < 0 || phredScore > 63) {
+            throw new IllegalArgumentException("Cannot encode phred score: " + phredScore);
+        }
+        return (char) (33 + phredScore);
+    }
+
+}
\ No newline at end of file
diff --git a/lib/edu/mit/broad/sam/apps/CompareSAMs.java b/lib/edu/mit/broad/sam/apps/CompareSAMs.java
new file mode 100644
index 0000000000..8b0ca1b572
--- /dev/null
+++ b/lib/edu/mit/broad/sam/apps/CompareSAMs.java
@@ -0,0 +1,486 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2008 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.sam.apps;
+
+import edu.mit.broad.sam.*;
+
+import java.io.File;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class CompareSAMs {
+    public static void main(final String[] argv) {
+        if (argv.length != 2) {
+            System.err.println("ERROR: Incorrect number of arguments");
+            usage();
+            System.exit(1);
+        }
+        final CompareSAMs compareSAMs = new CompareSAMs(argv);
+        if (!compareSAMs.areEqual()) {
+            System.exit(1);
+        }
+    }
+
+    private static void usage() {
+        System.err.println("USAGE: CompareSAMS <SAMFile1> <SAMFile2>");
+    }
+
+    private final String[] samFiles;
+    private final SAMFileReader[] samReaders = new SAMFileReader[2];
+    private boolean sequenceDictionariesDiffer;
+    private int mappingsMatch = 0;
+    private int unmappedBoth = 0;
+    private int unmappedLeft = 0;
+    private int unmappedRight = 0;
+    private int mappingsDiffer = 0;
+    private int missingLeft = 0;
+    private int missingRight = 0;
+    private boolean areEqual;
+
+    public CompareSAMs(final String[] samFiles) {
+        this.samFiles = samFiles;
+        for (int i = 0; i < samFiles.length; ++i) {
+            samReaders[i] = new SAMFileReader(new File(samFiles[i]));
+        }
+        areEqual = compareHeaders();
+        areEqual = compareAlignments() && areEqual;
+        printReport();
+        if (!areEqual) {
+            System.out.println("SAM files differ.");
+        } else {
+            System.out.println("SAM files match.");
+        }
+    }
+
+    private void printReport() {
+        System.out.println("Match\t" + mappingsMatch);
+        System.out.println("Differ\t" + mappingsDiffer);
+        System.out.println("Unmapped_both\t" + unmappedBoth);
+        System.out.println("Unmapped_left\t" + unmappedLeft);
+        System.out.println("Unmapped_right\t" + unmappedRight);
+        System.out.println("Missing_left\t" + missingLeft);
+        System.out.println("Missing_right\t" + missingRight);
+    }
+
+    private boolean compareAlignments() {
+        if (!compareValues(samReaders[0].getFileHeader().getSortOrder(), samReaders[1].getFileHeader().getSortOrder(),
+                "Sort Order")) {
+            System.out.println("Cannot compare alignments if sort orders differ.");
+            return false;
+        }
+        switch (samReaders[0].getFileHeader().getSortOrder()) {
+            case coordinate:
+                if (sequenceDictionariesDiffer) {
+                    System.out.println("Cannot compare coordinate-sorted SAM files because sequence dictionaries differ.");
+                    return false;
+                }
+                return compareCoordinateSortedAlignments();
+            case queryname:
+                return compareQueryNameSortedAlignments();
+            case unsorted:
+                return compareUnsortedAlignments();
+            default:
+                // unreachable
+                assert(false);
+                return false;
+        }
+    }
+
+
+    private boolean compareCoordinateSortedAlignments() {
+        final NotPrimarySkippingIterator itLeft =
+                new NotPrimarySkippingIterator(samReaders[0].iterator());
+        final NotPrimarySkippingIterator itRight =
+                new NotPrimarySkippingIterator(samReaders[1].iterator());
+
+        // Save any reads which haven't been matched during in-order scan.
+        final Map<String, SAMRecord> leftUnmatched = new HashMap<String, SAMRecord>();
+        final Map<String, SAMRecord> rightUnmatched = new HashMap<String, SAMRecord>();
+
+        boolean ret = true;
+
+        while (itLeft.hasCurrent()) {
+            if (!itRight.hasCurrent()) {
+                // Exhausted right side.  See if any of the remaining left reads match
+                // any of the saved right reads.
+                for( ; itLeft.hasCurrent(); itLeft.advance()) {
+                    final SAMRecord left = itLeft.getCurrent();
+                    final SAMRecord right = rightUnmatched.remove(left.getReadName());
+                    if (right == null) {
+                        ++missingRight;
+                    } else {
+                        tallyAlignmentRecords(left, right);
+                    }
+                }
+                break;
+            }
+            // Don't assume stability of order beyond the coordinate.  Therefore grab all the
+            // reads from the left that has the same coordinate.
+            final SAMRecord left = itLeft.getCurrent();
+            final Map<String, SAMRecord> leftCurrentCoordinate = new HashMap<String, SAMRecord>();
+            leftCurrentCoordinate.put(left.getReadName(), left);
+            while (itLeft.advance()) {
+                final SAMRecord nextLeft = itLeft.getCurrent();
+                if (compareAlignmentCoordinates(left, nextLeft) == 0) {
+                    leftCurrentCoordinate.put(nextLeft.getReadName(), nextLeft);
+                } else {
+                    break;
+                }
+            }
+            // Advance the right iterator until it is >= the left reads that have just been grabbed
+            while (itRight.hasCurrent() && compareAlignmentCoordinates(left, itRight.getCurrent()) > 0) {
+                final SAMRecord right = itRight.getCurrent();
+                rightUnmatched.put(right.getReadName(), right);
+                itRight.advance();
+            }
+            // For each right read that has the same coordinate as the current left reads,
+            // see if there is a matching left read.  If so, process and discard.  If not,
+            // save the right read for later.
+            for (;itRight.hasCurrent() && compareAlignmentCoordinates(left, itRight.getCurrent()) == 0; itRight.advance()) {
+                final SAMRecord right = itRight.getCurrent();
+                final SAMRecord matchingLeft = leftCurrentCoordinate.remove(right.getReadName());
+                if (matchingLeft != null) {
+                    ret = tallyAlignmentRecords(matchingLeft, right) && ret;
+                } else {
+                    rightUnmatched.put(right.getReadName(), right);
+                }
+            }
+
+            // Anything left in leftCurrentCoordinate has not been matched
+            for (final SAMRecord samRecord : leftCurrentCoordinate.values()) {
+                leftUnmatched.put(samRecord.getReadName(), samRecord);
+            }
+        }
+        // The left iterator has been exhausted.  See if any of the remaining right reads
+        // match any of the saved left reads.
+        for( ; itRight.hasCurrent(); itRight.advance()) {
+            final SAMRecord right = itRight.getCurrent();
+            final SAMRecord left = leftUnmatched.remove(right.getReadName());
+            if (left != null) {
+                tallyAlignmentRecords(left, right);
+            } else {
+                ++missingLeft;
+            }
+        }
+
+        // Look up reads that were unmatched from left, and see if they are in rightUnmatched.
+        // If found, remove from rightUnmatched and tally.
+        for (final Map.Entry<String, SAMRecord> leftEntry : leftUnmatched.entrySet()) {
+            final String readName = leftEntry.getKey();
+            final SAMRecord left = leftEntry.getValue();
+            final SAMRecord right = rightUnmatched.remove(readName);
+            if (right == null) {
+                ++missingRight;
+                continue;
+            }
+            tallyAlignmentRecords(left, right);
+        }
+
+        // Any elements remaining in rightUnmatched are guaranteed not to be in leftUnmatched.
+        missingLeft += rightUnmatched.size();
+
+        if (ret) {
+            if (missingLeft > 0 || missingRight > 0 || mappingsDiffer > 0 || unmappedLeft > 0 || unmappedRight > 0) {
+                ret = false;
+            }
+        }
+        return ret;
+    }
+
+    private int compareAlignmentCoordinates(final SAMRecord left, final SAMRecord right) {
+        final String leftReferenceName = left.getReferenceName();
+        final String rightReferenceName = right.getReferenceName();
+        if (leftReferenceName == null && rightReferenceName == null) {
+            return 0;
+        } else if (leftReferenceName == null) {
+            return 1;
+        } else if (rightReferenceName == null) {
+            return -1;
+        }
+        final int leftReferenceIndex = samReaders[0].getFileHeader().getSequenceIndex(leftReferenceName);
+        final int rightReferenceIndex = samReaders[0].getFileHeader().getSequenceIndex(rightReferenceName);
+        assert(leftReferenceIndex >= 0);
+        assert(rightReferenceIndex >= 0);
+        if (leftReferenceIndex != rightReferenceIndex) {
+            return leftReferenceIndex - rightReferenceIndex;
+        }
+        return left.getAlignmentStart() - right.getAlignmentStart();
+    }
+
+    private boolean compareQueryNameSortedAlignments() {
+        final NotPrimarySkippingIterator it1 = new NotPrimarySkippingIterator(samReaders[0].iterator());
+        final NotPrimarySkippingIterator it2 = new NotPrimarySkippingIterator(samReaders[1].iterator());
+
+        boolean ret = true;
+        while (it1.hasCurrent()) {
+            if (!it2.hasCurrent()) {
+                missingRight += countRemaining(it1);
+                return false;
+            }
+            final int cmp = it1.getCurrent().getReadName().compareTo(it2.getCurrent().getReadName());
+            if (cmp < 0) {
+                ++missingRight;
+                it1.advance();
+                ret = false;
+            } else if (cmp > 0) {
+                ++missingLeft;
+                it2.advance();
+                ret = false;
+            } else {
+                if (!tallyAlignmentRecords(it1.getCurrent(), it2.getCurrent())) {
+                    ret = false;
+                }
+                it1.advance();
+                it2.advance();
+            }
+        }
+        if (it2.hasCurrent()) {
+            missingLeft += countRemaining(it2);
+            return false;
+        }
+        return ret;
+    }
+
+    private boolean compareUnsortedAlignments() {
+        final NotPrimarySkippingIterator it1 = new NotPrimarySkippingIterator(samReaders[0].iterator());
+        final NotPrimarySkippingIterator it2 = new NotPrimarySkippingIterator(samReaders[1].iterator());
+        boolean ret = true;
+        for (; it1.hasCurrent(); it1.advance(), it2.advance()) {
+            if (!it2.hasCurrent()) {
+                missingRight += countRemaining(it1);
+                return false;
+            }
+            final SAMRecord s1 = it1.getCurrent();
+            final SAMRecord s2 = it2.getCurrent();
+            if (!compareValues(s1.getReadName(), s2.getReadName(), "Read names")) {
+                System.out.println("Read names cease agreeing in unsorted SAM files .  Comparison aborting.");
+            }
+            ret = tallyAlignmentRecords(s1, s2) && ret;
+        }
+
+        if (it2.hasCurrent()) {
+            missingLeft += countRemaining(it2);
+            return false;
+        }
+        return ret;
+    }
+
+    private int countRemaining(final NotPrimarySkippingIterator it) {
+        int i;
+        for (i = 0; it.hasCurrent(); ++i) {
+            it.advance();
+        }
+        return i;
+    }
+
+    private boolean tallyAlignmentRecords(final SAMRecord s1, final SAMRecord s2) {
+        assert (s1.getReadName().equals(s2.getReadName()));
+        if (s1.getReadUnmappedFlag() && s2.getReadUnmappedFlag()) {
+            ++unmappedBoth;
+            return true;
+        }
+        if (s1.getReadUnmappedFlag()) {
+            ++unmappedLeft;
+            return false;
+        }
+        if (s2.getReadUnmappedFlag()) {
+            ++unmappedRight;
+            return false;
+        }
+        final boolean ret = (s1.getReferenceName().equals(s2.getReferenceName()) &&
+                s1.getAlignmentStart() == s2.getAlignmentStart() &&
+                s1.getReadNegativeStrandFlag() == s1.getReadNegativeStrandFlag());
+        if (!ret) {
+            ++mappingsDiffer;
+        } else {
+            ++mappingsMatch;
+        }
+        return ret;
+    }
+
+
+    private boolean compareHeaders() {
+        final SAMFileHeader h1 = samReaders[0].getFileHeader();
+        final SAMFileHeader h2 = samReaders[1].getFileHeader();
+        boolean ret = compareValues(h1.getVersion(), h2.getVersion(), "File format version");
+        ret = compareValues(h1.getCreator(), h2.getCreator(), "File creator") && ret;
+        ret = compareValues(h1.getAttribute("SO"), h2.getAttribute("SO"), "Sort order") && ret;
+        if (!compareSequenceDictionaries(h1, h2)) {
+            ret = false;
+            sequenceDictionariesDiffer = true;
+        }
+        ret = compareReadGroups(h1, h2) && ret;
+        ret = compareProgramRecords(h1, h2) && ret;
+        return ret;
+    }
+
+    private boolean compareProgramRecords(final SAMFileHeader h1, final SAMFileHeader h2) {
+        final List<SAMProgramRecord> l1 = h1.getProgramRecords();
+        final List<SAMProgramRecord> l2 = h2.getProgramRecords();
+        if (!compareValues(l1.size(), l2.size(), "Number of read groups")) {
+            return false;
+        }
+        boolean ret = true;
+        for (int i = 0; i < l1.size(); ++i) {
+            ret = compareProgramRecord(l1.get(i), l2.get(i)) && ret;
+        }
+        return ret;
+    }
+
+    private boolean compareProgramRecord(final SAMProgramRecord programRecord1, final SAMProgramRecord programRecord2) {
+        if (programRecord1 == null && programRecord2 == null) {
+            return true;
+        }
+        if (programRecord1 == null) {
+            reportDifference("null", programRecord2.getProgramGroupId(), "Program Record");
+            return false;
+        }
+        if (programRecord2 == null) {
+            reportDifference(programRecord1.getProgramGroupId(), "null", "Program Record");
+            return false;
+        }
+        boolean ret = compareValues(programRecord1.getProgramGroupId(), programRecord2.getProgramGroupId(),
+                "Program Name");
+        final String[] attributes = {"VN", "CL"};
+        for (final String attribute: attributes) {
+            ret = compareValues(programRecord1.getAttribute(attribute), programRecord2.getAttribute(attribute),
+                    attribute + " Program Record attribute") && ret;
+        }
+        return ret;
+    }
+
+    private boolean compareReadGroups(final SAMFileHeader h1, final SAMFileHeader h2) {
+        final List<SAMReadGroupRecord> l1 = h1.getReadGroups();
+        final List<SAMReadGroupRecord> l2 = h2.getReadGroups();
+        if (!compareValues(l1.size(), l2.size(), "Number of read groups")) {
+            return false;
+        }
+        boolean ret = true;
+        for (int i = 0; i < l1.size(); ++i) {
+            ret = compareReadGroup(l1.get(i), l2.get(i)) && ret;
+        }
+        return ret;
+    }
+
+    private boolean compareReadGroup(final SAMReadGroupRecord samReadGroupRecord1, final SAMReadGroupRecord samReadGroupRecord2) {
+        boolean ret = compareValues(samReadGroupRecord1.getReadGroupId(), samReadGroupRecord2.getReadGroupId(),
+                "Read Group ID");
+        ret = compareValues(samReadGroupRecord1.getSample(), samReadGroupRecord2.getSample(),
+                "Sample for read group " + samReadGroupRecord1.getReadGroupId()) && ret;
+        ret = compareValues(samReadGroupRecord1.getLibrary(), samReadGroupRecord2.getLibrary(),
+                "Library for read group " + samReadGroupRecord1.getReadGroupId()) && ret;
+        final String[] attributes = {"DS", "PU", "PI", "CN", "DT", "PL"};
+        for (final String attribute : attributes) {
+            ret = compareValues(samReadGroupRecord1.getAttribute(attribute), samReadGroupRecord2.getAttribute(attribute),
+                attribute + " for read group " + samReadGroupRecord1.getReadGroupId()) && ret;
+        }
+        return ret;
+    }
+
+    private boolean compareSequenceDictionaries(final SAMFileHeader h1, final SAMFileHeader h2) {
+        final List<SAMSequenceRecord> s1 = h1.getSequences();
+        final List<SAMSequenceRecord> s2 = h2.getSequences();
+        if (s1.size() != s2.size()) {
+            reportDifference(s1.size(), s2.size(), "Length of sequence dictionaries");
+            return false;
+        }
+        boolean ret = true;
+        for (int i = 0; i < s1.size(); ++i) {
+            ret = compareSequenceRecord(s1.get(i), s2.get(i), i+1) && ret;
+        }
+        return ret;
+    }
+
+    private boolean compareSequenceRecord(final SAMSequenceRecord sequenceRecord1, final SAMSequenceRecord sequenceRecord2, final int which) {
+        if (!sequenceRecord1.getSequenceName().equals(sequenceRecord2.getSequenceName())) {
+            reportDifference(sequenceRecord1.getSequenceName(), sequenceRecord2.getSequenceName(),
+                    "Name of sequence record " + which);
+            return false;
+        }
+        boolean ret = compareValues(sequenceRecord1.getSequenceLength(), sequenceRecord2.getSequenceLength(), "Length of sequence " +
+                sequenceRecord1.getSequenceName());
+        ret = compareValues(sequenceRecord1.getSpecies(), sequenceRecord2.getSpecies(), "Species of sequence " +
+                sequenceRecord1.getSequenceName()) && ret;
+        ret = compareValues(sequenceRecord1.getAssembly(), sequenceRecord2.getAssembly(), "Assembly of sequence " +
+                sequenceRecord1.getSequenceName()) && ret;
+        ret = compareValues(sequenceRecord1.getAttribute("M5"), sequenceRecord2.getAttribute("M5"), "MD5 of sequence " +
+                sequenceRecord1.getSequenceName()) && ret;
+        ret = compareValues(sequenceRecord1.getAttribute("UR"), sequenceRecord2.getAttribute("UR"), "URI of sequence " +
+                sequenceRecord1.getSequenceName()) && ret;
+        return ret;
+    }
+
+    private <T> boolean compareValues(final T v1, final T v2, final String label) {
+        if (v1 == null) {
+            if (v2 == null) {
+                return true;
+            }
+            reportDifference(v1, v2, label);
+            return false;
+        }
+        if (v2 == null) {
+            reportDifference(v1, v2, label);
+            return false;
+        }
+        if (!v1.equals(v2)) {
+            reportDifference(v1, v2, label);
+            return false;
+        }
+        return true;
+    }
+
+    private void reportDifference(final String s1, final String s2, final String label) {
+        System.out.println(label + " differs.");
+        System.out.println(samFiles[0] + ": " + s1);
+        System.out.println(samFiles[1] + ": " + s2);
+    }
+    private void reportDifference(Object o1, Object o2, final String label) {
+        if (o1 == null) {
+            o1 = "null";
+        }
+        if (o2 == null) {
+            o2 = "null";
+        }
+        reportDifference(o1.toString(), o2.toString(), label);
+    }
+
+    public int getMappingsMatch() {
+        return mappingsMatch;
+    }
+
+    public int getUnmappedBoth() {
+        return unmappedBoth;
+    }
+
+    public int getUnmappedLeft() {
+        return unmappedLeft;
+    }
+
+    public int getUnmappedRight() {
+        return unmappedRight;
+    }
+
+    public int getMappingsDiffer() {
+        return mappingsDiffer;
+    }
+
+    public int getMissingLeft() {
+        return missingLeft;
+    }
+
+    public int getMissingRight() {
+        return missingRight;
+    }
+
+    public boolean areEqual() {
+        return areEqual;
+    }
+}
diff --git a/lib/edu/mit/broad/sam/apps/allelecaller/AbstractAlleleCaller.java b/lib/edu/mit/broad/sam/apps/allelecaller/AbstractAlleleCaller.java
new file mode 100644
index 0000000000..9265d539c4
--- /dev/null
+++ b/lib/edu/mit/broad/sam/apps/allelecaller/AbstractAlleleCaller.java
@@ -0,0 +1,166 @@
+package edu.mit.broad.sam.apps.allelecaller;
+
+import edu.mit.broad.sam.SAMLocusIterator;
+import edu.mit.broad.arachne.FastbReader;
+
+import java.io.IOException;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.util.SortedSet;
+import java.util.List;
+
+/**
+ * Base class for AlleleCallers.  Handles efficient access to the reference, output of data to a
+ * standard file format, and application of priors
+ */
+public abstract class AbstractAlleleCaller {
+    // writer for output
+    private final BufferedWriter writer;
+
+    // for providing access to reference data
+    // TODO: replace with standard mechanism when defined/implemented
+    private final FastbReader fastbReader;
+    private String cachedChromName;
+    private String cachedChrom;
+
+    public AbstractAlleleCaller(final File fastbReference, final BufferedWriter writer) throws IOException {
+        this.writer = writer;
+        this.fastbReader = new FastbReader(fastbReference);
+    }
+
+
+    /**
+     * emit allele calls to the writer specified in the constructor
+     * 
+     * @param li Locus to call
+     */
+    public void callAlleles(final SAMLocusIterator.LocusInfo li) throws IOException {
+
+        // TODO: replace with standard mechanism when defined/implemented (making use of SAM Header)
+        // make sure we have access to reference chrom information
+        if (!li.getChrom().equals(cachedChromName)) {
+            final int contig = translateChromToContig(li.getChrom());
+            cachedChrom = null; // CRITICAL -- to allow for GC
+            cachedChrom = fastbReader.readSequence(contig);
+            cachedChromName = li.getChrom();
+        }
+
+        final char ref = cachedChrom.charAt(li.getPosition() - 1);
+
+
+        // delegate to the specific implementation
+        final SortedSet<GenotypeTheory> likelihoods = call(ref, li.getBasesAsString(), li.getQualities());
+
+
+        final GenotypeTheory bestTheory = likelihoods.first();
+        GenotypeTheory nextBestTheory = null;
+        GenotypeTheory refTheory = null;
+        final String refString = new String(new char[]{ref,ref});
+        final DiploidGenotype refGenotype = DiploidGenotype.valueOf(refString);
+
+
+        final StringBuilder theoryString = new StringBuilder();
+        int k=0;
+        for(final GenotypeTheory t : likelihoods) {
+            if (k == 1) { nextBestTheory = t; }
+            if (t.getGenotype() == refGenotype) { refTheory = t; }
+
+            theoryString.append(t.getGenotype())
+                        .append(":")
+                        .append(String.format("%.2f",t.getLikelihood()))
+                        .append(" ");
+            k++;
+        }
+
+        final double btnb = bestTheory.getLikelihood() - nextBestTheory.getLikelihood();
+        final double btr = bestTheory.getLikelihood() - refTheory.getLikelihood();
+
+        final DiploidGenotype gt = likelihoods.first().getGenotype();
+
+        final String type;
+        if (!gt.isHet() && gt.getAllele1() == ref) {
+            type = "homozygous";
+        } else if (!gt.isHet() && gt.getAllele1() != ref) {
+            type = "homozygous-SNP";
+        } else {
+            type = "heterozygous-SNP";
+        }
+
+        final String bases = li.getBasesAsString();
+        int a = 0,c = 0,g = 0,t = 0;
+        for(int i=0; i<bases.length(); i++) {
+            if (bases.charAt(i) == 'A') { a++; }
+            else if (bases.charAt(i) == 'C') { c++; }
+            else if (bases.charAt(i) == 'G') { g++; }
+            else if (bases.charAt(i) == 'T') { t++; }
+            else { throw new RuntimeException("Unknown Base " + bases.charAt(i)); }
+        }
+
+        writer.write(
+                translateChromToContig(li.getChrom()) + ":" +
+                (li.getPosition()-1) + " " +   // arachne output is 0-based
+                ref + " " +
+                gt + " " +
+                String.format("%f %f", btnb,btr) + " " +
+                type + " " +
+                "A:" + a + " " +
+                "C:" + c + " " +
+                "G:" + g + " " +
+                "T:" + t + " " +
+                bases.length() + " " +
+                "0 1 1 " + // used prior, is alignable, bait present
+                theoryString
+        );
+
+
+        writer.write("\n");
+    }
+
+    abstract protected SortedSet<GenotypeTheory> call(char ref, String bases, List<Byte> quals);
+    
+
+    /**
+     * Apply a general population-based prior to the likelihood:
+     * <ul>
+     * <li>ref is .999</li>
+     * <li>het is 10^-3</li>
+     * <li>homozygous, non-reference is 10^-5</li>
+     *
+     * @param ref reference allele
+     * @param allele1 first allele of the genotype
+     * @param allele2 second allele of the genotype
+     * @return prior, given the reference and genotype alleles
+     */
+    protected double getPrior(final char ref, final DiploidGenotype gt) {
+        final double prior;
+        if (gt.isHom() && gt.getAllele1() == ref) {
+            prior = 0.999; // reference
+        } else {
+            if (gt.getAllele1() != ref && gt.getAllele2() != ref) {
+                prior = 0.00001; // neither base is reference
+            } else {
+                prior = 0.001; // het, one base is reference
+            }
+        }
+        return prior;
+    }
+
+    // --------------------------------------------------------------------------------------------
+    // Helper methods below this point...
+    // --------------------------------------------------------------------------------------------
+
+
+    private final String[] chroms = new String[]{"chrM","chr1","chr2","chr3","chr4","chr5","chr6","chr7","chr8","chr9","chr10","chr11","chr12","chr13","chr14","chr15","chr16","chr17","chr18","chr19","chr20","chr21","chr22","chrX","chrY","chr1_random","chr2_random","chr3_random","chr4_random","chr5_random","chr6_random","chr7_random","chr8_random","chr9_random","chr10_random","chr11_random","chr13_random","chr15_random","chr16_random","chr17_random","chr18_random","chr19_random","chr21_random","chr22_random","chrX_random"};
+    private int translateChromToContig(final String chrom) {
+        for(int i=0; i<chroms.length; i++) {
+            if (chrom.equals(chroms[i])) { return i; }
+        }
+        return -1;
+    }
+
+    public boolean isHet(final String alleles) {
+        return (alleles.charAt(0) != (alleles.charAt(1)));
+    }
+
+
+}
diff --git a/lib/edu/mit/broad/sam/apps/allelecaller/AlleleCaller.java b/lib/edu/mit/broad/sam/apps/allelecaller/AlleleCaller.java
new file mode 100644
index 0000000000..68c2e95ad5
--- /dev/null
+++ b/lib/edu/mit/broad/sam/apps/allelecaller/AlleleCaller.java
@@ -0,0 +1,93 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2008 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.sam.apps.allelecaller;
+
+import edu.mit.broad.sam.SAMFileReader;
+import edu.mit.broad.sam.SAMLocusIterator;
+import edu.mit.broad.sam.SAMFileHeader;
+import edu.mit.broad.arachne.GenomeMask;
+import edu.mit.broad.picard.cmdline.CommandLineProgram;
+import edu.mit.broad.picard.cmdline.Usage;
+import edu.mit.broad.picard.cmdline.Option;
+
+import java.io.*;
+import java.util.zip.ZipInputStream;
+
+public class AlleleCaller extends CommandLineProgram {
+    // Usage and parameters
+    @Usage(programVersion="1.0") public String USAGE = "Basic Allele Caller\n";
+    @Option(shortName="I", doc="SAM or BAM file for calling") public File INPUT_FILE;
+    @Option(shortName="O", doc="Allele Call output file") public File OUTPUT_FILE;
+    @Option(shortName="R", doc="Reference FASTB file") public File REF_FILE;
+    @Option(shortName="T", doc="CRD-format target map file", optional = true) public File TARGET_FILE;
+    @Option(shortName="Q", doc="Minimum quality score threshold to use in allele calling", optional = true) public Integer QUAL_SCORE_THRESHOLD;
+
+
+    /** Required main method implementation. */
+    public static void main(String[] argv) {
+        System.exit(new AlleleCaller().instanceMain(argv));
+    }
+
+
+    protected int doWork() {
+        try {
+            final BufferedWriter writer = new BufferedWriter(new FileWriter(OUTPUT_FILE));
+
+            // TODO -- parameterize, or create separate executables...
+    //        AbstractAlleleCaller caller = new FlatQualityAlleleCaller(reference, writer);
+            final AbstractAlleleCaller caller = new QualityScoreAlleleCaller(REF_FILE, writer);
+            final long startTime = System.currentTimeMillis();
+
+            final SAMFileReader samReader = getSamReader(INPUT_FILE);
+
+            final SAMLocusIterator sli = new SAMLocusIterator(samReader.iterator());
+
+            if (TARGET_FILE != null) {
+                sli.setGenomeMask(new GenomeMask(TARGET_FILE));
+            }
+
+            if (QUAL_SCORE_THRESHOLD != null) {
+                System.out.println("Masking out bases with < Q"+QUAL_SCORE_THRESHOLD);
+                sli.setQualityScoreCutoff(QUAL_SCORE_THRESHOLD);
+            }
+
+            for (final SAMLocusIterator.LocusInfo li : sli) {
+                if (li != null) caller.callAlleles(li);
+            }
+
+            final long elapsed = System.currentTimeMillis() - startTime;
+            System.out.println("Completed in " + elapsed + "ms");
+
+            writer.flush();
+            writer.close();
+        } catch (IOException ioe) {
+            throw new RuntimeException(ioe);                                          
+        }
+        return 0;
+    }
+
+    private static void usage() {
+        System.err.println("USAGE: AlleleCaller <SAMFile|BAMFile> <TargetMap> <ReferenceFastb> <OutputFile>");
+    }
+
+
+    private SAMFileReader getSamReader(final File samFile) {
+        final SAMFileReader samReader = new SAMFileReader(samFile);
+
+        // ensure the file is sorted
+        if (samReader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
+            System.out.println("SAM Files must be coordinate-sorted, this is " + samReader.getFileHeader().getSortOrder());
+            System.exit(1);
+        }
+
+        return samReader;
+    }
+
+}
\ No newline at end of file
diff --git a/lib/edu/mit/broad/sam/apps/allelecaller/DiploidGenotype.java b/lib/edu/mit/broad/sam/apps/allelecaller/DiploidGenotype.java
new file mode 100644
index 0000000000..d259a60752
--- /dev/null
+++ b/lib/edu/mit/broad/sam/apps/allelecaller/DiploidGenotype.java
@@ -0,0 +1,27 @@
+package edu.mit.broad.sam.apps.allelecaller;
+
+public enum DiploidGenotype {
+    AA('A','A'),
+    AC('A','C'),
+    AG('A','G'),
+    AT('A','T'),
+    CC('C','C'),
+    CG('C','G'),
+    CT('C','T'),
+    GG('G','G'),
+    GT('G','T'),
+    TT('T','T');
+
+    private final char allele1;
+    private final char allele2;
+
+    private DiploidGenotype(final char allele1, final char allele2) {
+        this.allele1 = allele1;
+        this.allele2 = allele2;
+    }
+
+    public char getAllele1() { return allele1; }
+    public char getAllele2() { return allele2; }
+    public boolean isHet() { return this.allele1 != this.allele2; }
+    public boolean isHom() { return this.allele1 == this.allele2; }
+}
diff --git a/lib/edu/mit/broad/sam/apps/allelecaller/FlatQualityAlleleCaller.java b/lib/edu/mit/broad/sam/apps/allelecaller/FlatQualityAlleleCaller.java
new file mode 100644
index 0000000000..7a77d4524d
--- /dev/null
+++ b/lib/edu/mit/broad/sam/apps/allelecaller/FlatQualityAlleleCaller.java
@@ -0,0 +1,74 @@
+package edu.mit.broad.sam.apps.allelecaller;
+
+import java.io.IOException;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.util.*;
+import static java.lang.Math.*;
+
+
+/**
+ * Bayesian-based allele caller using flat qualities and a 1e-3 error rate, based on CRD algorithm
+ */
+public class FlatQualityAlleleCaller extends AbstractAlleleCaller {
+
+    public FlatQualityAlleleCaller(final File fastbReference, final BufferedWriter writer) throws IOException {
+        super(fastbReference, writer);
+    }
+
+
+    protected SortedSet<GenotypeTheory> call(final char ref, final String bases, final List<Byte> quals) {
+        final float eps = 1e-3f;
+
+        // count up the base by nucleotide and put them into a map
+        final int depth = bases.length();
+        int a = 0,c = 0,g = 0,t = 0;
+        for(int i=0; i< bases.length(); i++) {
+            if (bases.charAt(i) == 'A') { a++; }
+            else if (bases.charAt(i) == 'C') { c++; }
+            else if (bases.charAt(i) == 'G') { g++; }
+            else if (bases.charAt(i) == 'T') { t++; }
+            else { throw new RuntimeException("Unknown Base " + bases.charAt(i)); }
+        }
+
+        final Map<Character, Integer> counts = new HashMap<Character, Integer>();
+        counts.put('A', a);
+        counts.put('C', c);
+        counts.put('G', g);
+        counts.put('T', t);
+
+
+        // for each of the 10 theories, calculate the likelihood
+        final SortedSet<GenotypeTheory> results = new TreeSet<GenotypeTheory>();
+        for(final DiploidGenotype theory : DiploidGenotype.values()) {
+            final double likelihood;
+            final char allele1 = theory.getAllele1();
+            final char allele2 = theory.getAllele2();
+
+            if (!theory.isHet()) {
+                likelihood = log10(1-eps)*counts.get(allele1) + log10(eps)*(depth - counts.get(allele1));
+            } else {
+                final int major_allele_counts;
+                final int minor_allele_counts;
+                if (counts.get(allele1) > counts.get(allele2)) {
+                    major_allele_counts = counts.get(allele1);
+                    minor_allele_counts = counts.get(allele2);
+                } else {
+                    major_allele_counts = counts.get(allele2);
+                    minor_allele_counts = counts.get(allele1);
+                }
+
+                likelihood = log10(0.5 - (eps/2.0) )*major_allele_counts +
+                    log10(0.5 - (eps/2.0) )*minor_allele_counts +
+                    log10(eps)*(depth - major_allele_counts - minor_allele_counts);
+            }
+
+            final double prior = getPrior(ref, theory);
+            results.add(new GenotypeTheory(theory, likelihood + log10(prior)));
+        }
+
+
+        return results;
+
+    }
+}
diff --git a/lib/edu/mit/broad/sam/apps/allelecaller/GenotypeTheory.java b/lib/edu/mit/broad/sam/apps/allelecaller/GenotypeTheory.java
new file mode 100644
index 0000000000..709e1c4397
--- /dev/null
+++ b/lib/edu/mit/broad/sam/apps/allelecaller/GenotypeTheory.java
@@ -0,0 +1,46 @@
+package edu.mit.broad.sam.apps.allelecaller;
+
+/**
+ * Datastructure to hold a single genotype along with a likelihood.
+ */
+public class GenotypeTheory implements Comparable<GenotypeTheory> {
+    private DiploidGenotype genotype;
+    private double likelihood;
+
+    public GenotypeTheory(final DiploidGenotype genotype, final double likelihood) {
+        this.genotype = genotype;
+        this.likelihood = likelihood;
+    }
+
+    public DiploidGenotype getGenotype() {
+        return genotype;
+    }
+
+    public void setGenotype(final DiploidGenotype genotype) {
+        this.genotype = genotype;
+    }
+
+    public double getLikelihood() {
+        return likelihood;
+    }
+
+    public void setLikelihood(final double likelihood) {
+        this.likelihood = likelihood;
+    }
+
+    /**
+     * Genotype Theories are sorted first by descending likelihood (ie
+     * the GenotypeTheory with biggest likelihood comes first).  Ties are
+     * broken by lexical sorting of the genotypes themselves
+     *
+     */
+    public int compareTo(final GenotypeTheory other) {
+        if (this.getLikelihood() == other.getLikelihood()) {
+            return this.getGenotype().compareTo(other.getGenotype());
+        } else if (this.getLikelihood() > other.getLikelihood()) {
+            return -1;
+        } else {
+            return 1;
+        }
+    }
+}
diff --git a/lib/edu/mit/broad/sam/apps/allelecaller/QualityScoreAlleleCaller.java b/lib/edu/mit/broad/sam/apps/allelecaller/QualityScoreAlleleCaller.java
new file mode 100644
index 0000000000..23b310bd2a
--- /dev/null
+++ b/lib/edu/mit/broad/sam/apps/allelecaller/QualityScoreAlleleCaller.java
@@ -0,0 +1,80 @@
+package edu.mit.broad.sam.apps.allelecaller;
+
+import java.util.*;
+import static java.lang.Math.log10;
+import static java.lang.Math.pow;
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.io.File;
+
+/**
+ * Bayesian-based allele caller using quality scores, based on CRD algorithm
+ */
+public class QualityScoreAlleleCaller extends AbstractAlleleCaller {
+
+    public QualityScoreAlleleCaller(final File fastbReference, final BufferedWriter writer) throws IOException {
+        super(fastbReference, writer);
+    }
+
+    protected SortedSet<GenotypeTheory> call(final char ref, final String bases, final List<Byte> quals) {
+
+        // for each of the 10 theories, calculate the likelihood using quality scores
+        final SortedSet<GenotypeTheory> results = new TreeSet<GenotypeTheory>();
+        for(final DiploidGenotype theory : DiploidGenotype.values()) {
+            double likelihood = 0;
+
+            for(int i=0; i<bases.length(); i++) {
+                final char base = bases.charAt(i);
+                final byte qual = quals.get(i);
+
+                if (theory.isHom()) {
+                    if (base == theory.getAllele1() || base == theory.getAllele2()) {
+                        likelihood += getOneMinusQual(qual);
+                    } else {
+                        // the real math would be
+                        //     likelihood += log10(pow(10,(qual/-10.0)));
+                        // but it simplifies to
+                        likelihood += qual/-10.0;
+                    }
+                } else {
+                    if (base == theory.getAllele1() || base == theory.getAllele2()) {
+                        likelihood += getOneHalfMinusQual(qual);
+                    } else {
+                        // the real math would be
+                        //     likelihood += log10(pow(10,(qual/-10.0)));
+                        // but it simplifies to
+                        likelihood += qual/-10.0;
+                    }
+                }
+            }
+
+            final double prior = getPrior(ref, theory);
+            results.add(new GenotypeTheory(theory, likelihood + log10(prior)));
+        }
+
+
+        return results;
+    }
+
+    private static final double[] oneMinusData = new double[Byte.MAX_VALUE];
+    {
+        for(int qual=0; qual < Byte.MAX_VALUE; qual++) {
+            oneMinusData[qual] = log10(1.0 - pow(10,(qual/-10.0)));
+        }
+    }
+    private double getOneMinusQual(final byte qual) {
+        return oneMinusData[qual];
+    }
+
+    private static final double[] oneHalfMinusData = new double[Byte.MAX_VALUE];
+    {
+        for(int qual=0; qual < Byte.MAX_VALUE; qual++) {
+            oneHalfMinusData[qual] = log10(0.5-pow(10,(qual/-10.0))/2.0);
+        }
+    }
+
+    private double getOneHalfMinusQual(final byte qual) {
+        return oneHalfMinusData[qual];
+    }
+
+}
diff --git a/lib/edu/mit/broad/sam/util/AsciiLineReader.java b/lib/edu/mit/broad/sam/util/AsciiLineReader.java
new file mode 100644
index 0000000000..009fc7b670
--- /dev/null
+++ b/lib/edu/mit/broad/sam/util/AsciiLineReader.java
@@ -0,0 +1,172 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This is copyright (2007-2008) by the Broad Institute/Massachusetts Institute
+ * of Technology.  It is licensed to You under the Gnu Public License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *    http://www.opensource.org/licenses/gpl-2.0.php
+ *
+ * This software is supplied without any warranty or guaranteed support
+ * whatsoever. Neither the Broad Institute nor MIT can be responsible for its
+ * use, misuse, or functionality.
+ */
+package edu.mit.broad.sam.util;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * Fast replacement for BufferedReader that assumes that bytes can be converted to chars simply by casting.
+ * @author jrobinso
+ */
+public class AsciiLineReader implements LineReader {
+    private static final byte LINEFEED = (byte)('\n' & 0xff);
+    private static final byte CARRIAGE_RETURN = (byte)('\r' & 0xff);
+
+    private final InputStream is;
+    private byte[] buffer;
+    private int nextChar;
+    private int nChars;
+    // Allocate this only once, despite the fact that it is essentially a local variable of readLine()
+    private byte[] lineBuffer = new byte[1000];
+
+    private int lineNumber = 0;
+
+    public AsciiLineReader(final InputStream is) {
+        this(is, 512000);
+    }
+
+    public AsciiLineReader(final InputStream is, final int bufferSize) {
+        this.is = is;
+        buffer = new byte[bufferSize];
+        nextChar = nChars = 0;
+    }
+
+    public String readLine() {
+        return readLine(false);
+    }
+
+    /**
+     * Read a line of text.  A line is considered to be terminated by any one
+     * of a line feed ('\n'), a carriage return ('\r'), or a carriage return
+     * followed immediately by a linefeed.
+     *
+     * @param      includeTerminators  If true, the line-termination characters
+     *             are included in the returned string. 
+     *
+     * @return     A String containing the contents of the line or null if the 
+     *             end of the stream has been reached
+     */
+    public String readLine(final boolean includeTerminators){
+        int linePosition = 0;
+
+        while (true)
+        {
+            if (nChars == -1)
+            {
+                return null;
+            }
+
+            // Refill buffer if neccessary
+            if (nextChar == nChars)
+            {
+                fill();
+                if (nextChar == nChars || nChars == -1)
+                {
+                    // eof reached.  Return the last line, or null if this is a new line
+                    if (linePosition > 0)
+                    {
+                        ++lineNumber;
+                        return StringUtil.bytesToString(lineBuffer, 0, linePosition);
+                    } else
+                    {
+                        return null;
+                    }
+                }
+            }
+
+
+            final byte b = buffer[nextChar++];
+            if (b == LINEFEED || b == CARRIAGE_RETURN)
+            {
+
+                if (includeTerminators)
+                {
+                    lineBuffer[linePosition++] = b;
+                    if (b == CARRIAGE_RETURN && peek() == LINEFEED)
+                    {
+                        lineBuffer[linePosition++] = b;
+                        nextChar++; // <= to account for the '\n' we just ate
+                    }
+                }
+                else {
+                    if (b == CARRIAGE_RETURN && peek() == LINEFEED)
+                    {
+                        nextChar++; // <= skip the trailing \n in case of \r\n termination
+                    }
+                    
+                }
+                ++lineNumber;
+                return StringUtil.bytesToString(lineBuffer, 0, linePosition);
+            } else
+            {
+                // Expand line buffer size if neccessary.  Reservce at least 2 characters
+                // for potential line-terminators in return string
+
+                if (linePosition > (lineBuffer.length - 3))
+                {
+                    final byte[] temp = new byte[lineBuffer.length + 100];
+                    System.arraycopy(lineBuffer, 0, temp, 0, lineBuffer.length);
+                    lineBuffer = temp;
+                }
+
+                lineBuffer[linePosition++] = b;
+            }
+        }
+    }
+
+    public int getLineNumber() {
+        return lineNumber;
+    }
+
+    /**
+     * Peek ahead one character, filling from the underlying stream if neccessary.
+     * 
+     * @return
+     * @throws java.io.IOException
+     */
+    private byte peek(){
+        // Refill buffer if neccessary
+        if (nextChar == nChars)
+        {
+            fill();
+            if (nextChar == nChars)
+            {
+                // eof reached.  
+                return 0;
+            }
+        }
+        return buffer[nextChar];
+
+    }
+
+    private void fill() {
+        try {
+            nChars = is.read(buffer);
+            nextChar = 0;
+        } catch (IOException e) {
+            throw new RuntimeIOException(e);
+        }
+    }
+    
+    public void close()  {
+        try {
+            is.close();
+        } catch (IOException e) {
+            // Ignore exception
+        }
+    }
+}
+
diff --git a/lib/edu/mit/broad/sam/util/AsciiWriter.java b/lib/edu/mit/broad/sam/util/AsciiWriter.java
new file mode 100644
index 0000000000..8395cf84d2
--- /dev/null
+++ b/lib/edu/mit/broad/sam/util/AsciiWriter.java
@@ -0,0 +1,55 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.sam.util;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.Writer;
+
+/**
+ * Fast (I hope) Writer that converts char to byte merely by casting, rather than charset conversion.
+ */
+public class AsciiWriter extends Writer {
+
+    private final OutputStream os;
+    // Buffer size has not been tuned.
+    private final byte[] buffer = new byte[10000];
+    private int numBytes;
+
+    public AsciiWriter(final OutputStream os) {
+        this.os = os;
+        numBytes = 0;
+    }
+
+    public void close() throws IOException {
+        flush();
+        os.close();
+    }
+
+    public void flush() throws IOException {
+        os.write(buffer, 0, numBytes);
+        numBytes = 0;
+        os.flush();
+    }
+
+    public void write(final char[] chars, int offset, int length) throws IOException {
+        while (length > 0) {
+            final int charsToConvert = Math.min(length, buffer.length - numBytes);
+            StringUtil.charsToBytes(chars, offset, charsToConvert, buffer, numBytes);
+            numBytes += charsToConvert;
+            offset += charsToConvert;
+            length -= charsToConvert;
+            if (numBytes == buffer.length) {
+                os.write(buffer, 0, numBytes);
+                numBytes = 0;
+            }
+        }
+    }
+}
diff --git a/lib/edu/mit/broad/sam/util/BinaryCodec.java b/lib/edu/mit/broad/sam/util/BinaryCodec.java
new file mode 100644
index 0000000000..18191a257d
--- /dev/null
+++ b/lib/edu/mit/broad/sam/util/BinaryCodec.java
@@ -0,0 +1,478 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.sam.util;
+
+import java.io.*;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+
+/**
+ * @author Dave Tefft
+ */
+public class BinaryCodec {
+
+    //Outstream to write to
+    private OutputStream outputStream;
+    //If a file or filename was given it will be stored here
+    private String outputFileName;
+
+    //Input stream to read from
+    private InputStream inputStream;
+    //If a file or filename was give to read from it will be stored here
+    private String inputFileName;
+
+    /*
+    Mode that the BinaryCodec is in.  It is either writing to a binary file or reading from.
+    This is set to true if it is writing to a binary file
+    Right now we don't support reading and writing to the same file with the same BinaryCodec instance
+    */
+    private boolean isWriting;
+
+    private ByteBuffer byteBuffer;
+
+    //Byte order used for the Picard project
+    private static final ByteOrder LITTLE_ENDIAN = ByteOrder.LITTLE_ENDIAN;
+    private static final byte NULL_BYTE[] = {0};
+
+    private static final long MAX_UBYTE = (Byte.MAX_VALUE + 1) * 2;
+    private static final long MAX_USHORT = (Short.MAX_VALUE + 1) * 2;
+    private static final long MAX_UINT = ((long)Integer.MAX_VALUE + 1) * 2;
+
+    // We never serialize more than this much at a time.
+    private static final int MAX_BYTE_BUFFER = 8;
+
+    //////////////////////////////////////////////////
+    // Constructors                                 //
+    //////////////////////////////////////////////////
+
+    /**
+     * Constructs BinaryCodec from a file and set it's mode to writing or not
+     *
+     * @param file    file to be written to or read from
+     * @param writing whether the file is being written to
+     */
+    public BinaryCodec(final File file, final boolean writing) {
+        try {
+            this.isWriting = writing;
+            if (this.isWriting) {
+                this.outputStream = new FileOutputStream(file);
+                this.outputFileName = file.getName();
+            } else {
+                this.inputStream = new FileInputStream(file);
+                this.inputFileName = file.getName();
+            }
+        } catch (FileNotFoundException e) {
+            throw new RuntimeIOException("File not found: " + file, e);
+        }
+        initByteBuffer();
+    }
+
+    /**
+     * Constructs BinaryCodec from a file name and set it's mode to writing or not
+     *
+     * @param fileName name of the file to be written to or read from
+     * @param writing  writing whether the file is being written to
+     */
+    public BinaryCodec(final String fileName, final boolean writing) {
+        this(new File(fileName), writing);
+    }
+
+    /**
+     * Constructs BinaryCodec from an output stream
+     *
+     * @param outputStream Stream to write to, since it's an output stream we know that isWriting
+     *                     should be set to true
+     */
+    public BinaryCodec(final OutputStream outputStream) {
+        isWriting = true;
+        this.outputStream = outputStream;
+        initByteBuffer();
+    }
+
+    /**
+     * Constructs BinaryCodec from an input stream
+     *
+     * @param inputStream Stream to read from, since we are reading isWriting is set to false
+     */
+    public BinaryCodec(final InputStream inputStream) {
+        isWriting = false;
+        this.inputStream = inputStream;
+        initByteBuffer();
+    }
+
+    /**
+     * Shared among ctors
+     */
+    private void initByteBuffer() {
+        byteBuffer = ByteBuffer.allocate(MAX_BYTE_BUFFER);
+        byteBuffer.order(LITTLE_ENDIAN);
+    }
+
+    //////////////////////////////////////////////////
+    // Writing methods                              //
+    //////////////////////////////////////////////////
+
+
+    /**
+     * Write whatever has been put into the byte buffer
+     * @param numBytes -- how much to write.  Note that in case of writing an unsigned value,
+     * more bytes were put into the ByteBuffer than will get written out.
+     */
+    private void writeByteBuffer(final int numBytes) {
+        assert(numBytes <= byteBuffer.limit());
+        writeBytes(byteBuffer.array(), 0, numBytes);
+    }
+
+    /**
+     * Writes a byte to the output buffer
+     *
+     * @param bite byte array to write
+     */
+    public void writeByte(final byte bite) {
+        byteBuffer.clear();
+        byteBuffer.put(bite);
+        writeByteBuffer(1);
+    }
+
+    public void writeByte(final int b) {
+        writeByte((byte)b);
+    }
+
+    /**
+     * Writes a byte array to the output buffer
+     *
+     * @param bytes byte array to write
+     */
+    public void writeBytes(final byte[] bytes) {
+        writeBytes(bytes,  0, bytes.length);
+    }
+
+    public void writeBytes(final byte[] bytes, final int startOffset, final int numBytes) {
+        if (!isWriting) {
+            throw new IllegalStateException("Calling write method on BinaryCodec open for read.");
+        }
+        try {
+            outputStream.write(bytes, startOffset, numBytes);
+        } catch (IOException e) {
+            throw new RuntimeIOException(constructErrorMessage("Write error"), e);
+        }
+    }
+
+    /**
+     * Write an int to the output stream
+     *
+     * @param value int to write
+     */
+    public void writeInt(final int value) {
+        byteBuffer.clear();
+        byteBuffer.putInt(value);
+        writeByteBuffer(4);
+    }
+
+    /**
+     * Write a double to the output stream
+     *
+     * @param value double to write
+     */
+    public void writeDouble(final double value) {
+        byteBuffer.clear();
+        byteBuffer.putDouble(value);
+        writeByteBuffer(8);
+    }
+
+    /**
+     * Write a long to the output stream
+     *
+     * @param value long to write
+     */
+    public void writeLong(final long value) {
+        byteBuffer.clear();
+        byteBuffer.putLong(value);
+        writeByteBuffer(8);
+    }
+
+
+    public void writeShort(final short value) {
+        byteBuffer.clear();
+        byteBuffer.putShort(value);
+        writeByteBuffer(2);
+    }
+
+    /**
+     * Write a float to the output stream
+     *
+     * @param value float to write
+     */
+    public void writeFloat(final float value) {
+        byteBuffer.clear();
+        byteBuffer.putFloat(value);
+        writeByteBuffer(4);
+    }
+
+    /**
+     * Writes a string to the buffer
+     *
+     * @param value       string to write to buffer
+     * @param writeLength prefix the string with the length as an int
+     * @param appendNull  add a null byte to the end of the string
+     */
+    public void writeString(final String value, final boolean writeLength, final boolean appendNull) {
+        if (writeLength) {
+            int lengthToWrite = value.length();
+            if (appendNull) lengthToWrite++;
+            writeInt(lengthToWrite);
+        }
+
+        //Actually writes the string to a buffer
+        writeString(value);
+
+        if (appendNull) writeBytes(NULL_BYTE);
+
+    }
+
+
+    /**
+     * Write a string to the buffer
+     *
+     * @param value string to write
+     */
+    private void writeString(final String value) {
+        writeBytes(StringUtil.stringToBytes(value));
+    }
+
+    // NOTE: The unsigned methods all have little-endianness built into them.
+    public void writeUByte(final short val) {
+        if (val < 0) {
+            throw new IllegalArgumentException("Negative value (" + val + ") passed to unsigned writing method.");
+        }
+        if (val > MAX_UBYTE) {
+            throw new IllegalArgumentException("Value (" + val + ") to large to be written as ubyte.");
+        }
+        byteBuffer.clear();
+        byteBuffer.putShort(val);
+        writeByteBuffer(1);
+    }
+
+    public void writeUShort(final int val) {
+        if (val < 0) {
+            throw new IllegalArgumentException("Negative value (" + val + ") passed to unsigned writing method.");
+        }
+        if (val > MAX_USHORT) {
+            throw new IllegalArgumentException("Value (" + val + ") to large to be written as ushort.");
+        }
+        byteBuffer.clear();
+        byteBuffer.putInt(val);
+        writeByteBuffer(2);
+    }
+
+    public void writeUInt(final long val) {
+        if (val < 0) {
+            throw new IllegalArgumentException("Negative value (" + val + ") passed to unsigned writing method.");
+        }
+        if (val > MAX_UINT) {
+            throw new IllegalArgumentException("Value (" + val + ") to large to be written as uint.");
+        }
+        byteBuffer.clear();
+        byteBuffer.putLong(val);
+        writeByteBuffer(4);
+    }
+
+    //////////////////////////////////////////////////
+    // Reading methods                              //
+    //////////////////////////////////////////////////
+
+    /**
+     * Read a byte array off the input stream
+     *
+     * @return number of bytes read
+     */
+    public void readBytes(final byte[] buffer) {
+        readBytes(buffer, 0, buffer.length);
+    }
+
+    public void readBytes(final byte[] buffer, final int offset, final int length) {
+        final int numRead = readBytesOrFewer(buffer, offset, length);
+        if (numRead < length) {
+            throw new RuntimeEOFException(constructErrorMessage("Premature EOF"));
+        }
+    }
+
+    public int readBytesOrFewer(final byte[] buffer, final int offset, final int length) {
+        if (isWriting) {
+            throw new IllegalStateException("Calling read method on BinaryCodec open for write.");
+        }
+        try {
+            return inputStream.read(buffer, offset, length);
+        } catch (IOException e) {
+            throw new RuntimeIOException(constructErrorMessage("Read error"), e);
+        }
+    }
+
+    public byte readByte() {
+        readByteBuffer(1);
+        byteBuffer.flip();
+        return byteBuffer.get();
+    }
+
+    /**
+     * Read a string off the input stream
+     *
+     * @param length length of string to read
+     * @return String read from stream
+     */
+    public String readString(final int length) {
+        final byte[] buffer = new byte[length];
+        readBytes(buffer);
+
+        return StringUtil.bytesToString(buffer);
+    }
+
+    public String readNullTerminatedString() {
+        return StringUtil.readNullTerminatedString(this);
+    }
+
+    private void readByteBuffer(final int numBytes) {
+        assert(numBytes <= byteBuffer.capacity());
+        readBytes(byteBuffer.array(), 0, numBytes);
+        byteBuffer.limit(byteBuffer.capacity());
+        byteBuffer.position(numBytes);
+    }
+
+    /**
+     * Read an int off the input stream
+     *
+     * @return int from input stream
+     */
+    public int readInt() {
+        readByteBuffer(4);
+        byteBuffer.flip();
+        return byteBuffer.getInt();
+    }
+
+    /**
+     * Reads a double off the input stream
+     *
+     * @return double
+     */
+    public double readDouble() {
+        readByteBuffer(8);
+        byteBuffer.flip();
+        return byteBuffer.getDouble();
+    }
+
+    /**
+     * Reads a long off the input stream
+     *
+     * @return long
+     */
+    public long readLong()  {
+        readByteBuffer(8);
+        byteBuffer.flip();
+        return byteBuffer.getLong();
+    }
+
+    public short readShort() {
+        readByteBuffer(2);
+        byteBuffer.flip();
+        return byteBuffer.getShort();
+    }
+
+    /**
+     * Reads a float off the input stream
+     *
+     * @return float
+     */
+    public float readFloat() {
+        readByteBuffer(4);
+        byteBuffer.flip();
+        return byteBuffer.getFloat();
+    }
+
+    public short readUByte() {
+        readByteBuffer(1);
+        byteBuffer.put((byte)0);
+        byteBuffer.flip();
+        return byteBuffer.getShort();
+    }
+
+    public int readUShort() {
+        readByteBuffer(2);
+        byteBuffer.putShort((short)0);
+        byteBuffer.flip();
+        return byteBuffer.getInt();
+    }
+
+    public long readUInt() {
+        readByteBuffer(4);
+        byteBuffer.putInt(0);
+        byteBuffer.flip();
+        return byteBuffer.getLong();
+    }
+
+    /**
+     * Close the appropriate stream
+     */
+    public void close() {
+        try {
+            if (this.isWriting) this.outputStream.close();
+            else this.inputStream.close();
+        } catch (IOException e) {
+            throw new RuntimeIOException(e.getMessage(), e);
+        }
+    }
+
+    private String constructErrorMessage(final String msg) {
+        final StringBuilder sb = new StringBuilder(msg);
+        sb.append("; BinaryCodec in ");
+        sb.append(isWriting? "write": "read");
+        sb.append("mode; ");
+        final String filename = isWriting? outputFileName: inputFileName;
+        if (filename != null) {
+            sb.append("file: ");
+            sb.append(filename);
+        } else  {
+            sb.append("streamed file (filename not available)");
+        }
+        return sb.toString();
+    }
+
+    //////////////////////////////////////////////////
+    // Some getters                                 //
+    //////////////////////////////////////////////////
+
+
+    public String getInputFileName() {
+        return inputFileName;
+    }
+
+    public String getOutputFileName() {
+        return outputFileName;
+    }
+
+    public void setOutputFileName(final String outputFileName) {
+        this.outputFileName = outputFileName;
+    }
+
+    public void setInputFileName(final String inputFileName) {
+        this.inputFileName = inputFileName;
+    }
+
+    public boolean isWriting() {
+        return isWriting;
+    }
+
+    public OutputStream getOutputStream() {
+        return outputStream;
+    }
+
+    public InputStream getInputStream() {
+        return inputStream;
+    }
+}
diff --git a/lib/edu/mit/broad/sam/util/BlockCompressedInputStream.java b/lib/edu/mit/broad/sam/util/BlockCompressedInputStream.java
new file mode 100755
index 0000000000..626e5c17c0
--- /dev/null
+++ b/lib/edu/mit/broad/sam/util/BlockCompressedInputStream.java
@@ -0,0 +1,258 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2008 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever.
+ * Neither the Broad Institute nor MIT can be responsible for its use, misuse,
+ * or functionality.
+ */
+package edu.mit.broad.sam.util;
+
+
+import java.io.*;
+import java.util.zip.GZIPInputStream;
+
+/*
+ * Utility class for reading BGZF block compressed files.
+ */
+public class BlockCompressedInputStream
+        extends InputStream
+{
+
+    private InputStream mStream = null;
+    private RandomAccessFile mFile = null;
+    private byte[] mFileBuffer = null;
+    private byte[] mCurrentBlock = null;
+    private int mCurrentOffset = 0;
+    private long mBlockAddress = 0;
+    private int mLastBlockLength = 0;
+
+
+    public BlockCompressedInputStream(final InputStream stream) {
+        mStream = toBufferedStream(stream);
+        mFile = null;
+    }
+
+    public BlockCompressedInputStream(final File file)
+        throws IOException {
+        mFile = new RandomAccessFile(file, "r");
+        mStream = null;
+    }
+
+    public int available()
+        throws IOException {
+        if (mCurrentBlock == null || mCurrentOffset == mCurrentBlock.length) {
+            readBlock();
+        }
+        if (mCurrentBlock == null) {
+            return 0;
+        }
+        return mCurrentBlock.length - mCurrentOffset;
+    }
+
+    public void close()
+        throws IOException {
+        if (mFile != null) {
+            mFile.close();
+            mFile = null;
+        } else if (mStream != null) {
+            mStream.close();
+            mStream = null;
+        }
+        // Encourage garbage collection
+        mFileBuffer = null;
+        mCurrentBlock = null;
+    }
+
+    public int read()
+        throws IOException {
+        return (available() > 0) ? mCurrentBlock[mCurrentOffset++] : -1;
+    }
+
+    public int read(final byte[] buffer)
+        throws IOException {
+        return read(buffer, 0, buffer.length);
+    }
+
+    public int read(final byte[] buffer, int offset, int length)
+        throws IOException {
+        int bytesRead = 0;
+        while (length > 0) {
+            final int available = available();
+            if (available == 0) {
+                break;
+            }
+            final int copyLength = Math.min(length, available);
+            System.arraycopy(mCurrentBlock, mCurrentOffset, buffer, offset, copyLength);
+            mCurrentOffset += copyLength;
+            offset += copyLength;
+            length -= copyLength;
+            bytesRead += copyLength;
+        }
+        return bytesRead;
+    }
+
+    public void seek(final long pos)
+        throws IOException {
+        // Note: pos is a special virtual file pointer, not an actual byte offset
+        if (mFile == null) {
+            throw new IOException("Cannot seek on stream based file");
+        }
+        // Decode virtual file pointer
+        // Upper 48 bits is the byte offset into the compressed stream of a block.
+        // Lower 16 bits is the byte offset into the uncompressed stream inside the block.
+        final long compressedOffset = pos >> 16;
+        final int uncompressedOffset = (int) (pos & 0xFFFF);
+        mFile.seek(compressedOffset);
+        mBlockAddress = compressedOffset;
+        mLastBlockLength = 0;
+        readBlock();
+        if (uncompressedOffset >= available()) {
+            throw new IOException("Invalid file pointer: " + pos);
+        }
+        mCurrentOffset = uncompressedOffset;
+    }
+
+    public long getFilePointer() {
+        return ((mBlockAddress << 16) | mCurrentOffset);
+    }
+
+    public static boolean isValidFile(final InputStream stream)
+        throws IOException {
+        if (!stream.markSupported()) {
+            throw new RuntimeException("Cannot test non-buffered stream");
+        }
+        stream.mark(BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH);
+        final byte[] buffer = new byte[BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH];
+        final int count = readBytes(stream, buffer, 0, BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH);
+        stream.reset();
+        if (count != BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH) {
+            return false;
+        }
+        return isValidBlockHeader(buffer);
+    }
+
+    private static boolean isValidBlockHeader(final byte[] buffer) {
+        return (buffer[0] == BlockCompressedStreamConstants.GZIP_ID1 &&
+                (buffer[1] & 0xFF) == BlockCompressedStreamConstants.GZIP_ID2 &&
+                (buffer[3] & BlockCompressedStreamConstants.GZIP_FLG) != 0 &&
+                buffer[10] == BlockCompressedStreamConstants.GZIP_XLEN &&
+                buffer[12] == BlockCompressedStreamConstants.BGZF_ID1 &&
+                buffer[13] == BlockCompressedStreamConstants.BGZF_ID2);
+    }
+
+    private void readBlock()
+        throws IOException {
+
+        if (mFileBuffer == null) {
+            mFileBuffer = new byte[BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE];
+        }
+        int count = readBytes(mFileBuffer, 0, BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH);
+        if (count == 0) {
+            return;
+        }
+        if (count != BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH) {
+            throw new IOException("Premature end of file");
+        }
+        final int blockLength = unpackInt16(mFileBuffer, BlockCompressedStreamConstants.BLOCK_LENGTH_OFFSET) + 1;
+        if (blockLength < BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH || blockLength > mFileBuffer.length) {
+            throw new IOException("Unexpected compressed block length: " + blockLength);
+        }
+        final int remaining = blockLength - BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH;
+        count = readBytes(mFileBuffer, BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH, remaining);
+        if (count != remaining) {
+            throw new IOException("Premature end of file");
+        }
+        inflateBlock(mFileBuffer, blockLength);
+        mCurrentOffset = 0;
+        mBlockAddress += mLastBlockLength;
+        mLastBlockLength = blockLength; 
+    }
+
+    private void inflateBlock(final byte[] compressedBlock, final int compressedLength)
+        throws IOException {
+        final int uncompressedLength = unpackInt32(compressedBlock, compressedLength-4);
+        byte[] buffer = mCurrentBlock;
+        mCurrentBlock = null;
+        if (buffer == null || buffer.length != uncompressedLength) {
+            buffer = new byte[uncompressedLength];
+        }
+        final GZIPInputStream gzipStream =
+            new GZIPInputStream(new ByteArrayInputStream(compressedBlock, 0, compressedLength));
+        try {
+            final int count = readBytes(gzipStream, buffer, 0, buffer.length);
+            if (count != buffer.length) {
+                throw new IOException("Block inflate failed");
+            }
+            // Note: available() does not return zero here.
+            // The only safe way to test is to try to read a byte.
+            if (gzipStream.read() != -1) {
+                throw new IOException("Block inflate failed");
+            }
+        } finally {
+            gzipStream.close();
+        }
+        mCurrentBlock = buffer;
+    }
+
+    private int readBytes(final byte[] buffer, final int offset, final int length)
+        throws IOException {
+        if (mFile != null) {
+            return readBytes(mFile, buffer, offset, length);
+        } else if (mStream != null) {
+            return readBytes(mStream, buffer, offset, length);
+        } else {
+            return 0;
+        }
+    }
+
+    private static int readBytes(final RandomAccessFile file, final byte[] buffer, final int offset, final int length)
+        throws IOException {
+        int bytesRead = 0;
+        while (bytesRead < length) {
+            final int count = file.read(buffer, offset + bytesRead, length - bytesRead);
+            if (count <= 0) {
+                break;
+            }
+            bytesRead += count;
+        }
+        return bytesRead;
+    }
+
+    private static int readBytes(final InputStream stream, final byte[] buffer, final int offset, final int length)
+        throws IOException {
+        int bytesRead = 0;
+        while (bytesRead < length) {
+            final int count = stream.read(buffer, offset + bytesRead, length - bytesRead);
+            if (count <= 0) {
+                break;
+            }
+            bytesRead += count;
+        }
+        return bytesRead;
+    }
+
+    private BufferedInputStream toBufferedStream(final InputStream stream) {
+        if (stream instanceof BufferedInputStream) {
+            return (BufferedInputStream) stream;
+        } else {
+            return new BufferedInputStream(stream);
+        }
+    }
+
+    private int unpackInt16(final byte[] buffer, final int offset) {
+        return ((buffer[offset] & 0xFF) |
+                ((buffer[offset+1] & 0xFF) << 8));
+    }
+
+    private int unpackInt32(final byte[] buffer, final int offset) {
+        return ((buffer[offset] & 0xFF) |
+                ((buffer[offset+1] & 0xFF) << 8) |
+                ((buffer[offset+2] & 0xFF) << 16) |
+                ((buffer[offset+3] & 0xFF) << 24));
+    }
+}
+
+
diff --git a/lib/edu/mit/broad/sam/util/BlockCompressedOutputStream.java b/lib/edu/mit/broad/sam/util/BlockCompressedOutputStream.java
new file mode 100644
index 0000000000..11b775b88e
--- /dev/null
+++ b/lib/edu/mit/broad/sam/util/BlockCompressedOutputStream.java
@@ -0,0 +1,177 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.sam.util;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.zip.CRC32;
+import java.util.zip.Deflater;
+
+/**
+ * Writer for a file that is a series of gzip blocks.  The caller just treats it as an
+ * OutputStream, and under the covers a gzip block is written when the amount of uncompressed as-yet-unwritten
+ * bytes reaches a threshold.  Note that the flush() method should not be called by client
+ * unless you know what you're doing, because it forces a gzip block to be written even if the
+ * number of buffered bytes has not reached threshold.  close(), on the other hand, must be called
+ * when done writing in order to force the last gzip block to be written.
+ */
+public class BlockCompressedOutputStream
+        extends OutputStream
+{
+    private final BinaryCodec codec;
+    private final byte[] uncompressedBuffer = new byte[BlockCompressedStreamConstants.DEFAULT_UNCOMPRESSED_BLOCK_SIZE];
+    private int numUncompressedBytes = 0;
+    private final byte[] compressedBuffer =
+            new byte[BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE -
+                    BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH];
+    private final Deflater deflater = new Deflater(BlockCompressedStreamConstants.GZIP_CM_DEFLATE, true);
+    private final CRC32 crc32 = new CRC32();
+    private final byte[] singleByteArray = new byte[1];
+
+    private int numberOfThrottleBacks = 0;
+
+    public BlockCompressedOutputStream(final String filename) {
+        codec = new BinaryCodec(filename, true);
+    }
+
+    public BlockCompressedOutputStream(final File file) {
+        codec = new BinaryCodec(file, true);
+    }
+
+    @Override
+    public void write(final byte[] bytes) throws IOException {
+        write(bytes, 0, bytes.length);
+    }
+
+    @Override
+    public void write(final byte[] bytes, int startIndex, int numBytes) throws IOException {
+        assert(numUncompressedBytes < uncompressedBuffer.length);
+        while (numBytes > 0) {
+            final int bytesToWrite = Math.min(uncompressedBuffer.length - numUncompressedBytes, numBytes);
+            System.arraycopy(bytes, startIndex, uncompressedBuffer, numUncompressedBytes, bytesToWrite);
+            numUncompressedBytes += bytesToWrite;
+            startIndex += bytesToWrite;
+            numBytes -= bytesToWrite;
+            assert(numBytes >= 0);
+            if (numUncompressedBytes == uncompressedBuffer.length) {
+                deflateBlock();
+            }
+        }
+    }
+
+    /**
+     * WARNING: flush() affects the output format, because it causes the current contents of uncompressedBuffer
+     * to be compressed and written, even if it isn't full.  Unless you know what you're doing, don't call flush().
+     * Instead, call close(), which will flush any unwritten data before closing the underlying stream.
+     *
+     */
+    @Override
+    public void flush() throws IOException {
+        while (numUncompressedBytes > 0) {
+            deflateBlock();
+        }
+        codec.getOutputStream().flush();
+    }
+
+    /**
+     * close() must be called in order to flush any remaining buffered bytes.
+     *
+     */
+    @Override
+    public void close() throws IOException {
+        flush();
+        if (numberOfThrottleBacks > 0) {
+            System.err.println("In BlockCompressedOutputStream, had to throttle back " + numberOfThrottleBacks +
+            " times for file " + codec.getOutputFileName());
+        }
+        codec.close();
+    }
+
+    public void write(final int i) throws IOException {
+        singleByteArray[0] = (byte)i;
+        write(singleByteArray);
+    }
+
+    /**
+     * Attempt to write the data in uncompressedBuffer to the underlying file in a gzip block.
+     * If the entire uncompressedBuffer does not fit in the maximum allowed size, reduce the amount
+     * of data to be compressed, and slide the excess down in uncompressedBuffer so it can be picked
+     * up in the next deflate event.
+     * @return size of gzip block that was written.
+     */
+    private int deflateBlock() {
+        if (numUncompressedBytes == 0) {
+            return 0;
+        }
+        int bytesToCompress = numUncompressedBytes;
+        while (true) {
+            // Compress the input
+            deflater.reset();
+            deflater.setInput(uncompressedBuffer, 0, bytesToCompress);
+            deflater.finish();
+            final int compressedSize = deflater.deflate(compressedBuffer, 0, compressedBuffer.length);
+
+            // If it didn't all fit in compressedBuffer.length, reduce the amount to
+            // be compressed and try again.
+            if (deflater.getBytesRead() < bytesToCompress) {
+                bytesToCompress -= BlockCompressedStreamConstants.UNCOMPRESSED_THROTTLE_AMOUNT;
+                ++numberOfThrottleBacks;
+                assert(bytesToCompress > 0);
+                continue;
+            }
+            // Data compressed small enough, so write it out.
+            crc32.reset();
+            crc32.update(uncompressedBuffer, 0, bytesToCompress);
+            
+            final int totalBlockSize = writeGzipBlock(compressedSize, bytesToCompress, crc32.getValue());
+            assert(bytesToCompress <= numUncompressedBytes);
+
+            // Clear out from uncompressedBuffer the data that was written 
+            if (bytesToCompress == numUncompressedBytes) {
+                numUncompressedBytes = 0;
+            } else {
+                System.arraycopy(uncompressedBuffer, bytesToCompress, uncompressedBuffer, 0,
+                        numUncompressedBytes - bytesToCompress);
+                numUncompressedBytes -= bytesToCompress;
+            }
+            return totalBlockSize;
+        }
+        // unreachable
+    }
+
+    /**
+     * Writes the entire gzip block, assuming the compressed data is stored in compressedBuffer
+     * @return  size of gzip block that was written.
+     */
+    private int writeGzipBlock(final int compressedSize, final int uncompressedSize, final long crc) {
+        // Init gzip header
+        codec.writeByte(BlockCompressedStreamConstants.GZIP_ID1);
+        codec.writeByte(BlockCompressedStreamConstants.GZIP_ID2);
+        codec.writeByte(BlockCompressedStreamConstants.GZIP_CM_DEFLATE);
+        codec.writeByte(BlockCompressedStreamConstants.GZIP_FLG);
+        codec.writeInt(0); // Modification time
+        codec.writeByte(BlockCompressedStreamConstants.GZIP_XFL);
+        codec.writeByte(BlockCompressedStreamConstants.GZIP_OS_UNKNOWN);
+        codec.writeShort(BlockCompressedStreamConstants.GZIP_XLEN);
+        codec.writeByte(BlockCompressedStreamConstants.BGZF_ID1);
+        codec.writeByte(BlockCompressedStreamConstants.BGZF_ID2);
+        codec.writeShort(BlockCompressedStreamConstants.BGZF_LEN);
+        final int totalBlockSize = compressedSize + BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH +
+                BlockCompressedStreamConstants.BLOCK_FOOTER_LENGTH;
+
+        // I don't know why we store block size - 1, but that is what the spec says
+        codec.writeShort((short)(totalBlockSize - 1));
+        codec.writeBytes(compressedBuffer, 0, compressedSize);
+        codec.writeInt((int)crc);
+        codec.writeInt(uncompressedSize);
+        return totalBlockSize;
+    }
+}
diff --git a/lib/edu/mit/broad/sam/util/BlockCompressedStreamConstants.java b/lib/edu/mit/broad/sam/util/BlockCompressedStreamConstants.java
new file mode 100644
index 0000000000..7a5ffc0ce9
--- /dev/null
+++ b/lib/edu/mit/broad/sam/util/BlockCompressedStreamConstants.java
@@ -0,0 +1,63 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.sam.util;
+
+/**
+ * Constants shared by BlockCompressed{Input,Output}Stream classes
+ */
+public class BlockCompressedStreamConstants {
+    // Number of bytes in the gzip block before the deflated data.
+    // This is not the standard header size, because we include one optional subfield,
+    // but it is the standard for us.
+    public static final int BLOCK_HEADER_LENGTH = 18;
+
+    // Location in the gzip block of the total block size (actually total block size - 1)
+    public static final int BLOCK_LENGTH_OFFSET = 16;
+
+    // Number of bytes that follow the deflated data
+    public static final int BLOCK_FOOTER_LENGTH = 8;
+
+    // We require that a compressed block (including header and footer, be <= this)
+    public static final int MAX_COMPRESSED_BLOCK_SIZE = 64 * 1024;
+
+    // Push out a gzip block when this many uncompressed bytes have been accumulated.
+    public static final int DEFAULT_UNCOMPRESSED_BLOCK_SIZE = 64 * 1024;
+
+    // If after compressing a block, the compressed block is found to be >
+    // MAX_COMPRESSED_BLOCK_SIZE, including overhead, then throttle back bytes to
+    // be compressed by this amount and try again.
+    public static final int UNCOMPRESSED_THROTTLE_AMOUNT = 1024;
+
+    // Magic numbers
+    public static final byte GZIP_ID1 = 31;
+    public static final int GZIP_ID2 = 139;
+
+    // FEXTRA flag means there are optional fields
+    public static final int GZIP_FLG = 4;
+
+    // extra flags
+    public static final int GZIP_XFL = 0;
+
+    // length of extra subfield
+    public static final short GZIP_XLEN = 6;
+
+    // The deflate compression, which is customarily used by gzip
+    public static final byte GZIP_CM_DEFLATE = 8;
+
+    // We don't care about OS because we're not doing line terminator translation
+    public static final int GZIP_OS_UNKNOWN = 255;
+
+    // The subfield ID
+    public static final byte BGZF_ID1 = 66;
+    public static final byte BGZF_ID2 = 67;
+
+    // subfield length in bytes
+    public static final byte BGZF_LEN = 2;
+}
diff --git a/lib/edu/mit/broad/sam/util/CloseableIterator.java b/lib/edu/mit/broad/sam/util/CloseableIterator.java
new file mode 100755
index 0000000000..1c46127695
--- /dev/null
+++ b/lib/edu/mit/broad/sam/util/CloseableIterator.java
@@ -0,0 +1,32 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2008 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever.
+ * Neither the Broad Institute nor MIT can be responsible for its use, misuse,
+ * or functionality.
+ */
+package edu.mit.broad.sam.util;
+
+import java.util.Iterator;
+
+/**
+ * This interface is used by iterators that use releasable resources during iteration.
+ * 
+ * The consumer of a CloseableIterator should ensure that the close() method is always called,
+ * for example by putting such a call in a finally block.  Two conventions should be followed
+ * by all implementors of CloseableIterator:
+ * 1) The close() method should be idempotent.  Calling close() twice should have no effect.
+ * 2) When hasNext() returns false, the iterator implementation should automatically close itself.
+ *    The latter makes it somewhat safer for consumers to use the for loop syntax for iteration:
+ *    for (Type obj : getCloseableIterator()) { ... }
+ * 
+ * We do not inherit from java.io.Closeable because IOExceptions are a pain to deal with.
+ */
+public interface CloseableIterator<T>
+    extends Iterator<T> {
+
+    public void close();
+}
diff --git a/lib/edu/mit/broad/sam/util/CoordMath.java b/lib/edu/mit/broad/sam/util/CoordMath.java
new file mode 100644
index 0000000000..61eed6d012
--- /dev/null
+++ b/lib/edu/mit/broad/sam/util/CoordMath.java
@@ -0,0 +1,75 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.sam.util;
+
+public class CoordMath {
+
+    public static long getLength(final long start, final long end) {
+        return (end - start) + 1;
+    }
+
+    public static long getStart(final long end, final long length) {
+        return end - length + 1;
+    }
+
+    public static long getEnd(final long start, final long length) {
+        return start + length - 1;
+    }
+
+    /**
+     * Offsets are meant to exclude the 'offset' number of bases
+     */
+    public static long getStartFromOffset(final long offset, final long length) {
+        return offset + 1;
+    }
+
+    public static long getEndFromOffset(final long offset, final long length) {
+        return length - offset;
+    }
+
+    public static long getLengthFromOffsets(final long startOffset, final long endOffset, final long length) {
+        return getLength(getStartFromOffset(startOffset, length),
+                         getEndFromOffset(endOffset, length));
+    }
+
+    /**
+     * Gets a sub-sequence from a java.lang.String (which is zero based) using one based
+     * sequence coordinated.  The base at the end coordinate will be included.
+     *
+     * @param sequence The String of base pairs
+     * @param begin The one based start coordinate
+     * @param end The one based end coordinate
+     * @return The subsequence specified
+     */
+    public static String getSubsequence(final String sequence, final int begin, final int end) {
+        return sequence.substring(begin-1, end);
+    }
+
+    /**
+     * Checks to see if the two sets of coordinates have any overlap.
+     */
+    public static boolean overlaps(final long start, final long end, final long start2, final long end2) {
+        return (start2 >= start && start2 <= end) || (end2 >=start && end2 <= end) ||
+                encloses(start2, end2, start, end);
+    }
+
+    /** Returns true if the "inner" coords and totally enclosed by the "outer" coords. */
+    public static boolean encloses(final long outerStart, final long outerEnd, final long innerStart, final long innerEnd) {
+        return innerStart >= outerStart && innerEnd <= outerEnd;
+    }
+
+    /**
+     * Determines the amount of overlap between two coordinate ranges. Assumes that the two ranges
+     * actually do overlap and therefore may produce strange results when they do not!
+     */
+    public static long getOverlap(final long start, final long end, final long start2, final long end2) {
+        return getLength(Math.max(start, start2), Math.min(end, end2));
+    }
+}
diff --git a/lib/edu/mit/broad/sam/util/LineReader.java b/lib/edu/mit/broad/sam/util/LineReader.java
new file mode 100644
index 0000000000..237444e78f
--- /dev/null
+++ b/lib/edu/mit/broad/sam/util/LineReader.java
@@ -0,0 +1,33 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.sam.util;
+
+/**
+ * Interface allows for implementations that read lines from a String, an ASCII file, or somewhere else.
+ */
+public interface LineReader {
+
+    /**
+     * Read a line and remove the line terminator
+     */
+    String readLine();
+
+    /**
+     * Read a line and optionally include the line terminator
+     * @param includeTerminators
+     * @return
+     */
+    String readLine(boolean includeTerminators);
+
+    /**
+     * @return 1-based number of line most recently read
+     */
+    int getLineNumber();
+}
diff --git a/lib/edu/mit/broad/sam/util/NonDestructiveIterator.java b/lib/edu/mit/broad/sam/util/NonDestructiveIterator.java
new file mode 100644
index 0000000000..3490b31e35
--- /dev/null
+++ b/lib/edu/mit/broad/sam/util/NonDestructiveIterator.java
@@ -0,0 +1,48 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2008 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.sam.util;
+
+import java.util.Iterator;
+
+/**
+ * PeekIterator is a better class to use than this.
+ * @param <T>
+ * @param <ITERATOR>
+ */
+public class NonDestructiveIterator<T, ITERATOR extends Iterator<T>> {
+    private T current = null;
+    private final ITERATOR underlyingIterator;
+
+    public NonDestructiveIterator(final ITERATOR underlyingIterator) {
+        this.underlyingIterator = underlyingIterator;
+        advance();
+    }
+
+    public T getCurrent() {
+        return current;
+    }
+
+    public ITERATOR getUnderlyingIterator() {
+        return underlyingIterator;
+    }
+
+    public boolean advance() {
+        if (this.underlyingIterator.hasNext()) {
+            current = this.underlyingIterator.next();
+        } else {
+            current = null;
+        }
+        return hasCurrent();
+    }
+
+    public boolean hasCurrent() {
+        return getCurrent() != null;
+    }
+}
diff --git a/lib/edu/mit/broad/sam/util/PeekIterator.java b/lib/edu/mit/broad/sam/util/PeekIterator.java
new file mode 100644
index 0000000000..6346a10a33
--- /dev/null
+++ b/lib/edu/mit/broad/sam/util/PeekIterator.java
@@ -0,0 +1,49 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.sam.util;
+
+import java.util.Iterator;
+
+public class PeekIterator<T> implements Iterator<T> {
+    Iterator<T> underlyingIterator;
+    T peekedElement = null;
+
+    public PeekIterator(final Iterator<T> underlyingIterator) {
+        this.underlyingIterator = underlyingIterator;
+    }
+
+    public boolean hasNext() {
+        return peekedElement != null || underlyingIterator.hasNext();  
+    }
+
+    public T next() {
+        if (peekedElement != null) {
+            final T ret = peekedElement;
+            peekedElement = null;
+            return ret;
+        }
+        return underlyingIterator.next();
+    }
+
+    public T peek() {
+        if (peekedElement == null) {
+            peekedElement = underlyingIterator.next();
+        }
+        return peekedElement;
+    }
+
+    public void remove() {
+        throw new UnsupportedOperationException();
+    }
+
+    public Iterator<T> getUnderlyingIterator() {
+        return underlyingIterator;
+    }
+}
diff --git a/lib/edu/mit/broad/sam/util/RuntimeEOFException.java b/lib/edu/mit/broad/sam/util/RuntimeEOFException.java
new file mode 100644
index 0000000000..ff99358f36
--- /dev/null
+++ b/lib/edu/mit/broad/sam/util/RuntimeEOFException.java
@@ -0,0 +1,27 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.sam.util;
+
+public class RuntimeEOFException extends RuntimeException {
+    public RuntimeEOFException() {
+    }
+
+    public RuntimeEOFException(final String s) {
+        super(s);
+    }
+
+    public RuntimeEOFException(final String s, final Throwable throwable) {
+        super(s, throwable);
+    }
+
+    public RuntimeEOFException(final Throwable throwable) {
+        super(throwable);
+    }
+}
diff --git a/lib/edu/mit/broad/sam/util/RuntimeIOException.java b/lib/edu/mit/broad/sam/util/RuntimeIOException.java
new file mode 100644
index 0000000000..b6e51bcfb7
--- /dev/null
+++ b/lib/edu/mit/broad/sam/util/RuntimeIOException.java
@@ -0,0 +1,27 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2008 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.sam.util;
+
+public class RuntimeIOException extends RuntimeException {
+    public RuntimeIOException() {
+    }
+
+    public RuntimeIOException(final String s) {
+        super(s);
+    }
+
+    public RuntimeIOException(final String s, final Throwable throwable) {
+        super(s, throwable);
+    }
+
+    public RuntimeIOException(final Throwable throwable) {
+        super(throwable);
+    }
+}
diff --git a/lib/edu/mit/broad/sam/util/SortingCollection.java b/lib/edu/mit/broad/sam/util/SortingCollection.java
new file mode 100644
index 0000000000..b501a08b3c
--- /dev/null
+++ b/lib/edu/mit/broad/sam/util/SortingCollection.java
@@ -0,0 +1,369 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.sam.util;
+
+import java.io.*;
+import java.lang.reflect.Array;
+import java.util.*;
+
+/**
+ * Collection to which many records can be added.  After all records are added, the collection can be
+ * iterated, and the records will be returned in order defined by the comparator.  Records may be spilled
+ * to a temporary directory if there are more records added than will fit in memory.  As a result of this,
+ * the objects returned may not be identical to the objects added to the collection, but they should be
+ * equal as determined by the codec used to write them to disk and read them back.
+ */
+public class SortingCollection<T>
+        implements Iterable<T> {
+
+    /**
+     * Client must implement this class, which defines the way in which records are written to and
+     * read from file.
+     */
+    public interface Codec<T> {
+        /**
+         * Where to write encoded output
+         * @param os
+         */
+        void setOutputStream(OutputStream os);
+
+        /**
+         * Where to read encoded input from
+         * @param is
+         */
+        void setInputStream(InputStream is);
+        /**
+         * Write object to output stream
+         * @param val what to write
+         */
+        void encode(T val);
+
+        /**
+         * Read the next record from the input stream and convert into a java object.
+         * @return null if no more records.  Should throw exception if EOF is encountered in the middle of
+         * a record.
+         */
+        T decode();
+
+        /**
+         * Must return a cloned copy of the codec that can be used independently of
+         * the original instance.
+         */
+        Codec<T> clone();
+    }
+
+    /**
+     * Where files of sorted records go.
+     */
+    private final File tmpDir;
+    private final SortingCollection.Codec<T> codec;
+    private final Comparator<T> comparator;
+    private final int maxRecordsInRam;
+    private int numRecordsInRam = 0;
+    private T[] ramRecords;
+    private boolean iterationStarted = false;
+    private boolean cleanedUp = false;
+
+    /**
+     * List of files in tmpDir containing sorted records
+     */
+    private final List<File> files = new ArrayList<File>();
+
+    /**
+     * Prepare to accumulate records to be sorted
+     * @param componentType Class of the record to be sorted.  Necessary because of Java generic lameness.
+     * @param codec For writing records to file and reading them back into RAM
+     * @param comparator Defines output sort order
+     * @param maxRecordsInRam
+     * @param tmpDir Where to write files of records that will not fit in RAM
+     */
+    private SortingCollection(final Class<T> componentType, final SortingCollection.Codec<T> codec,
+                             final Comparator<T> comparator, final int maxRecordsInRam, final File tmpDir) {
+        if (maxRecordsInRam <= 0) {
+            throw new IllegalArgumentException("maxRecordsInRam must be > 0");
+        }
+        this.tmpDir = tmpDir;
+        this.codec = codec;
+        this.comparator = comparator;
+        this.maxRecordsInRam = maxRecordsInRam;
+        this.ramRecords = (T[])Array.newInstance(componentType, maxRecordsInRam);
+    }
+
+    public void add(final T rec) {
+        if (iterationStarted) {
+            throw new IllegalStateException("Cannot add after calling iterator()");
+        }
+        if (numRecordsInRam == maxRecordsInRam) {
+            spillToDisk();
+        }
+        ramRecords[numRecordsInRam++] = rec;
+    }
+
+    /**
+     * Sort the records in memory, write them to a file, and clear the buffer of records in memory.
+     */
+    private void spillToDisk() {
+        try {
+            Arrays.sort(this.ramRecords, 0, this.numRecordsInRam, this.comparator);
+            final File f = File.createTempFile("sortingcollection.", ".tmp", this.tmpDir);
+            OutputStream os = null;
+            try {
+                os = new BufferedOutputStream(new FileOutputStream(f));
+                this.codec.setOutputStream(os);
+                f.deleteOnExit();
+                for (int i = 0; i < this.numRecordsInRam; ++i) {
+                    this.codec.encode(ramRecords[i]);
+                    // Facilitate GC
+                    this.ramRecords[i] = null;
+                }
+
+                os.flush();
+            }
+            finally {
+                if (os != null) {
+                    os.close();
+                }
+            }
+
+            this.numRecordsInRam = 0;
+            this.files.add(f);
+
+        }
+        catch (IOException e) {
+            throw new RuntimeIOException(e);
+        }
+    }
+
+    /**
+     * Prepare to iterate through the records in order.  This method may be called more than once,
+     * but add() may not be called after this method has been called.
+     */
+    public CloseableIterator<T> iterator() {
+        if (this.cleanedUp) {
+            throw new IllegalStateException("Cannot call iterator() after cleanup() was called.");
+        }
+
+        this.iterationStarted = true;
+        if (this.files.isEmpty()) {
+            return new InMemoryIterator();
+        }
+
+        if (this.numRecordsInRam > 0) {
+            spillToDisk();
+        }
+
+        // Facilitate GC
+        this.ramRecords = null;
+        return new MergingIterator();
+    }
+
+    /**
+     * Delete any temporary files.  After this method is called, iterator() may not be called.
+     */
+    public void cleanup() {
+        this.iterationStarted = true;
+        this.cleanedUp = true;
+
+        for (final File f : this.files) {
+            f.delete();
+        }
+    }
+
+    /**
+     * Syntactic sugar around the ctor, to save some typing of type parameters
+     *
+     * @param componentType Class of the record to be sorted.  Necessary because of Java generic lameness.
+     * @param codec For writing records to file and reading them back into RAM
+     * @param comparator Defines output sort order
+     * @param maxRecordsInRAM
+     * @param tmpDir Where to write files of records that will not fit in RAM
+     */
+    public static <T> SortingCollection<T> newInstance(final Class<T> componentType,
+                                                       final SortingCollection.Codec<T> codec,
+                                                       final Comparator<T> comparator,
+                                                       final int maxRecordsInRAM,
+                                                       final File tmpDir) {
+        return new SortingCollection<T>(componentType, codec, comparator, maxRecordsInRAM, tmpDir);
+
+    }
+
+    public static <T> SortingCollection<T> newInstance(final Class<T> componentType,
+                                                       final SortingCollection.Codec<T> codec,
+                                                       final Comparator<T> comparator,
+                                                       final int maxRecordsInRAM) {
+
+        final File tmpDir = new File(System.getProperty("java.io.tmpdir"));
+        return new SortingCollection<T>(componentType, codec, comparator, maxRecordsInRAM, tmpDir);
+    }
+
+    /**
+     * For iteration when number of records added is less than the threshold for spilling to disk.
+     */
+    class InMemoryIterator implements CloseableIterator<T> {
+        private int iterationIndex = 0;
+
+        InMemoryIterator() {
+            Arrays.sort(SortingCollection.this.ramRecords,
+                        0,
+                        SortingCollection.this.numRecordsInRam,
+                        SortingCollection.this.comparator);
+        }
+
+        public void close() {
+            // nothing to do
+        }
+
+        public boolean hasNext() {
+            return this.iterationIndex < SortingCollection.this.numRecordsInRam;
+        }
+
+        public T next() {
+            if (!hasNext()) {
+                throw new NoSuchElementException();
+            }
+            return SortingCollection.this.ramRecords[iterationIndex++];
+        }
+
+        public void remove() {
+            throw new UnsupportedOperationException();
+        }
+    }
+
+    /**
+     * For iteration when spilling to disk has occurred.
+     * Each file is has records in sort order within the file.
+     * This iterator automatically closes when it iterates to the end, but if not iterating
+     * to the end it is a good idea to call close().
+     *
+     * Algorithm: MergingIterator maintains a PriorityQueue of PeekFileRecordIterators.
+     * Each PeekFileRecordIterator iterates through a file in which the records are sorted.
+     * The comparator for PeekFileRecordIterator used by the PriorityQueue peeks at the next record from
+     * the file, so the first element in the PriorityQueue is the file that has the next record to be emitted.
+     * In order to get the next record, the first PeekFileRecordIterator in the PriorityQueue is popped,
+     * the record is obtained from that iterator, and then if that iterator is not empty, it is pushed back into
+     * the PriorityQueue.  Because it now has a different record as its next element, it may go into another
+     * location in the PriorityQueue
+     */
+    class MergingIterator implements CloseableIterator<T> {
+        private final PriorityQueue<PeekFileRecordIterator> priorityQueue;
+
+        MergingIterator() {
+            this.priorityQueue = new PriorityQueue<PeekFileRecordIterator>(SortingCollection.this.files.size(),
+                                                                           new PeekFileRecordIteratorComparator());
+            for (final File f : SortingCollection.this.files) {
+                final FileRecordIterator it = new FileRecordIterator(f);
+                if (it.hasNext()) {
+                    this.priorityQueue.offer(new PeekFileRecordIterator(it));
+                }
+                else {
+                    it.close();
+                }
+            }
+        }
+
+        public boolean hasNext() {
+            return !this.priorityQueue.isEmpty();
+        }
+
+        public T next() {
+            if (!hasNext()) {
+                throw new NoSuchElementException();
+            }
+
+            final PeekFileRecordIterator fileIterator = priorityQueue.poll();
+            final T ret = fileIterator.next();
+            if (fileIterator.hasNext()) {
+                this.priorityQueue.offer(fileIterator);
+            }
+            else {
+                ((CloseableIterator<T>)fileIterator.getUnderlyingIterator()).close();
+            }
+
+            return ret;
+        }
+
+        public void remove() {
+            throw new UnsupportedOperationException();
+        }
+
+        public void close() {
+            while (!this.priorityQueue.isEmpty()) {
+                final PeekFileRecordIterator it = this.priorityQueue.poll();
+                ((CloseableIterator<T>)it.getUnderlyingIterator()).close();
+            }
+        }
+    }
+
+    /**
+     * Read a file of records in format defined by the codec
+     */
+    class FileRecordIterator implements CloseableIterator<T> {
+        private final File file;
+        private final FileInputStream is;
+        private final Codec<T> codec;
+        private T currentRecord = null;
+
+        FileRecordIterator(final File file) {
+            this.file = file;
+            try {
+                this.is = new FileInputStream(file);
+                this.codec = SortingCollection.this.codec.clone();
+                this.codec.setInputStream(this.is);
+                advance();
+            }
+            catch (FileNotFoundException e) {
+                throw new RuntimeIOException(e);
+            }
+        }
+
+        public boolean hasNext() {
+            return this.currentRecord != null;
+        }
+
+        public T next() {
+            if (!hasNext()) {
+                throw new NoSuchElementException();
+            }
+            final T ret = this.currentRecord;
+            advance();
+            return ret;
+        }
+
+        public void remove() {
+            throw new UnsupportedOperationException();
+        }
+
+        private void advance() {
+            this.currentRecord = this.codec.decode();
+        }
+
+        public void close() {
+            try { this.is.close(); }
+            catch (IOException e) { }
+        }
+    }
+
+
+    /**
+     * Just a typedef
+     */
+    class PeekFileRecordIterator extends PeekIterator<T> {
+        PeekFileRecordIterator(final Iterator<T> underlyingIterator) {
+            super(underlyingIterator);
+        }
+    }
+
+    class PeekFileRecordIteratorComparator implements Comparator<PeekFileRecordIterator> {
+
+        public int compare(final PeekFileRecordIterator peekFileRecordIterator, final PeekFileRecordIterator peekFileRecordIterator1) {
+            return comparator.compare(peekFileRecordIterator.peek(), peekFileRecordIterator1.peek());
+        }
+    }
+}
diff --git a/lib/edu/mit/broad/sam/util/StringLineReader.java b/lib/edu/mit/broad/sam/util/StringLineReader.java
new file mode 100644
index 0000000000..8bcaf54472
--- /dev/null
+++ b/lib/edu/mit/broad/sam/util/StringLineReader.java
@@ -0,0 +1,65 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.sam.util;
+
+/**
+ * Implementation of LineReader that gets its input from a String.  No charset conversion
+ * is necessary because the String is in unicode.  Handles CR, LF or CRLF line termination,
+ * but if asked to return the line terminator, it always comes back as LF.
+ */
+public class StringLineReader implements LineReader {
+
+    private final String theString;
+    private int curPos = 0;
+    private int lineNumber = 0;
+
+    public StringLineReader(final String s) {
+        // Simplify later processing by replacing crlf with just lf, and replacing solo cr with lf
+        this.theString = s.replaceAll("\r\n", "\n").replaceAll("\r", "\n");
+    }
+
+    /**
+     * Read a line and remove the line terminator
+     */
+    public String readLine() {
+        return readLine(false);
+    }
+
+    /**
+     * Read a line and optionally include the line terminator
+     *
+     * @param includeTerminators
+     * @return
+     */
+    public String readLine(final boolean includeTerminators) {
+        if (curPos == theString.length()) {
+            return null;
+        }
+        final int nextLfIndex = theString.indexOf('\n', curPos);
+        if (nextLfIndex == -1) {
+            final int startPos = curPos;
+            curPos = theString.length();
+            ++lineNumber;
+            return theString.substring(startPos);
+        }
+        final int startPos = curPos;
+        final int endPos = nextLfIndex + (includeTerminators? 1: 0);
+        curPos = nextLfIndex + 1;
+        ++lineNumber;
+        return theString.substring(startPos, endPos);
+    }
+
+    /**
+     * @return 1-based number of line most recently read
+     */
+    public int getLineNumber() {
+        return lineNumber;
+    }
+}
diff --git a/lib/edu/mit/broad/sam/util/StringUtil.java b/lib/edu/mit/broad/sam/util/StringUtil.java
new file mode 100644
index 0000000000..b4ab475223
--- /dev/null
+++ b/lib/edu/mit/broad/sam/util/StringUtil.java
@@ -0,0 +1,136 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.sam.util;
+
+public class StringUtil {
+    /**
+     *
+     * @param separator String to interject between each string in strings arg
+     * @param strings List of strings to be joined.
+     * @return String that concatenates each item of strings arg, with separator btw each of them.
+     */
+    public static String join(final String separator, final String[] strings) {
+        if (strings.length == 0) {
+            return "";
+        }
+        final StringBuilder ret = new StringBuilder(strings[0]);
+        for (int i = 1; i < strings.length; ++i) {
+            ret.append(separator);
+            ret.append(strings[i]);
+        }
+        return ret.toString();
+    }
+
+    /**
+     * Split the string into tokesn separated by the given delimiter.  Profiling has
+     * revealed that the standard string.split() method typically takes > 1/2
+     * the total time when used for parsing ascii files.
+     *
+     * @param aString  the string to split
+     * @param tokens an array to hold the parsed tokens
+     * @param delim  character that delimits tokens
+     * @return the number of tokens parsed
+     */
+    public static int split(final String aString, final String[] tokens, final char delim) {
+
+        final int maxTokens = tokens.length;
+        int nTokens = 0;
+        int start = 0;
+        int end = aString.indexOf(delim);
+        if(end < 0) {
+            tokens[nTokens++] = aString;
+            return nTokens;
+        }
+        while ((end > 0) && (nTokens < maxTokens))
+        {
+            tokens[nTokens++] = aString.substring(start, end);
+            start = end + 1;
+            end = aString.indexOf(delim, start);
+
+        }
+        // Add the trailing string,  if there is room and if it is not empty.
+        if (nTokens < maxTokens)
+        {
+            final String trailingString = aString.substring(start);
+            if (trailingString.length() > 0)
+            {
+                tokens[nTokens++] = trailingString;
+            }
+        }
+        return nTokens;
+    }
+
+    ////////////////////////////////////////////////////////////////////
+    // The following methods all convert btw bytes and Strings, without
+    // using the Java character set mechanism.
+    ////////////////////////////////////////////////////////////////////
+
+    public static String bytesToString(final byte[] data) {
+        if (data == null) {
+            return null;
+        }
+        return bytesToString(data, 0, data.length);
+    }
+
+    @SuppressWarnings("deprecation")
+    public static String bytesToString(final byte[] buffer, final int offset, final int length) {
+/*
+        The non-deprecated way, that requires allocating char[]
+        final char[] charBuffer = new char[length];
+        for (int i = 0; i < length; ++i) {
+            charBuffer[i] = (char)buffer[i+offset];
+        }
+        return new String(charBuffer);
+*/
+        return new String(buffer, 0, offset, length);
+    }
+
+    @SuppressWarnings("deprecation")
+    public static byte[] stringToBytes(final String s) {
+/*
+        The non-deprecated way, that requires allocating char[]
+        final byte[] byteBuffer = new byte[s.length()];
+        final char[] charBuffer = s.toCharArray();
+        for (int i = 0; i < charBuffer.length; ++i) {
+            byteBuffer[i] = (byte)(charBuffer[i] & 0xff);
+        }
+        return byteBuffer;
+*/
+        final byte[] byteBuffer = new byte[s.length()];
+        s.getBytes(0, byteBuffer.length, byteBuffer, 0);
+        return byteBuffer;
+    }
+
+    // This method might more appropriately live in BinaryCodec, but all the byte <=> char conversion
+    // should be in the same place.
+    public static String readNullTerminatedString(final BinaryCodec binaryCodec) {
+        final StringBuilder ret = new StringBuilder();
+        for (byte b = binaryCodec.readByte(); b != 0; b = binaryCodec.readByte()) {
+            ret.append((char)(b & 0xff));
+        }
+        return ret.toString();
+    }
+
+    /**
+     * Convert chars to bytes merely by casting
+     * @param chars input chars
+     * @param charOffset where to start converting from chars array
+     * @param length how many chars to convert
+     * @param bytes where to put the converted output
+     * @param byteOffset where to start writing the converted output.
+     */
+    public static void charsToBytes(final char[] chars, final int charOffset, final int length,
+                                    final byte[] bytes, final int byteOffset) {
+        for (int i = 0; i < length; ++i) {
+            bytes[byteOffset + i] = (byte)chars[charOffset + i];
+        }
+    }
+
+}
diff --git a/src/edu/mit/broad/sting/ValidateSAM.java b/src/edu/mit/broad/sting/ValidateSAM.java
new file mode 100755
index 0000000000..237b838a6d
--- /dev/null
+++ b/src/edu/mit/broad/sting/ValidateSAM.java
@@ -0,0 +1,105 @@
+package edu.mit.broad.sting;
+
+import edu.mit.broad.sam.*;
+import edu.mit.broad.sam.SAMFileReader.ValidationStringency;
+import edu.mit.broad.sam.util.CloseableIterator;
+import edu.mit.broad.picard.cmdline.CommandLineProgram;
+import edu.mit.broad.picard.cmdline.Usage;
+import edu.mit.broad.picard.cmdline.Option;
+
+import java.io.*;
+
+public class ValidateSAM extends CommandLineProgram {
+    // Usage and parameters
+    @Usage(programVersion="0.1") public String USAGE = "SAM Validator\n";
+    @Option(shortName="I", doc="SAM or BAM file for validation") public File INPUT_FILE;
+    @Option(shortName="M", doc="Maximum number of errors to detect before exiting", optional=true) public String MAX_ERRORS_ARG = "-1";
+    @Option(shortName="S", doc="How strict should we be with validation", optional=true) public String STRICTNESS_ARG = "strict";
+
+    private long startTime = -1;
+    
+    /** Required main method implementation. */
+    public static void main(String[] argv) {
+        System.exit(new ValidateSAM().instanceMain(argv));
+    }
+
+    public void printProgress( int nRecords, int nErrors ) {
+        final double elapsed = (System.currentTimeMillis() - startTime) / 1000.0;
+        final double secsPer1MReads = (elapsed * 1000000.0) / nRecords;
+    	System.out.printf("Read %d records containing %d errors in %.2f secs (%.2f secs per 1M reads)%n", nRecords, nErrors, elapsed, secsPer1MReads);
+    }
+    
+    protected int doWork() {
+    	int MAX_ERRORS = -1;	// Don't bail ever
+        if ( MAX_ERRORS_ARG != null ) {
+        	MAX_ERRORS = Integer.parseInt(MAX_ERRORS_ARG);
+    	}
+    	
+        // Start the timer
+    	startTime = System.currentTimeMillis();
+
+    	// Initialize the sam reader
+    	CloseableIterator<SAMRecord> iter = null;
+      	try {
+      		 final SAMFileReader samReader = getSamReader(INPUT_FILE);
+      		 iter = samReader.iterator();
+        } catch (Exception ioe) {
+        	System.out.println("[VALIDATION FAILURE IN HEADER]: " + ioe);
+            ioe.printStackTrace();
+            return 1;
+        }
+        
+        int nRecords = 0;
+        int nErrors = 0;
+        while ( iter.hasNext() ) {
+        	nRecords++;
+        	try {
+            	final SAMRecord ri = iter.next();
+	        } catch (Exception ioe) {
+	        	nErrors++;
+	        	System.out.println("[VALIDATION FAILURE IN RECORD]: " + ioe);
+                ioe.printStackTrace();
+	        }
+	        
+	        if ( MAX_ERRORS > -1 && nErrors >= MAX_ERRORS ) {
+	        	System.out.println("Maximum number of errors encountered " + nErrors);
+	        	break;
+	        }
+	        
+	        if ( nRecords % 100000 == 0 ) {
+	        	printProgress( nRecords, nErrors );
+	        }
+        }
+
+    	printProgress( nRecords, nErrors );
+        return 0;
+    }
+
+    private static void usage() {
+        System.err.println("USAGE: edu.mit.broad.sting.ValidateSAM <SAMFile|BAMFile>");
+    }
+
+    private SAMFileReader getSamReader(final File samFile) {
+   	
+    	ValidationStringency strictness = SAMFileReader.ValidationStringency.STRICT;
+    	if ( STRICTNESS_ARG == null ) {
+            strictness = SAMFileReader.ValidationStringency.STRICT;
+    	}
+    	else if ( STRICTNESS_ARG.toLowerCase().equals("lenient") ) {
+    		strictness = SAMFileReader.ValidationStringency.LENIENT; 
+    	}
+    	else if ( STRICTNESS_ARG.toLowerCase().equals("silent") ) {
+    		strictness = SAMFileReader.ValidationStringency.SILENT;
+    	}
+    	else {
+            strictness = SAMFileReader.ValidationStringency.STRICT;
+    	}
+            
+        System.err.println("Strictness is " + strictness);   	
+    	final SAMFileReader samReader = new SAMFileReader(samFile, true);
+    	samReader.setValidationStringency(strictness);
+        
+        return samReader;
+    }
+
+}
\ No newline at end of file
diff --git a/src/edu/mit/broad/sting/atk/AnalysisTK.java b/src/edu/mit/broad/sting/atk/AnalysisTK.java
new file mode 100644
index 0000000000..4456d8eb79
--- /dev/null
+++ b/src/edu/mit/broad/sting/atk/AnalysisTK.java
@@ -0,0 +1,74 @@
+package edu.mit.broad.sting.atk;
+
+import edu.mit.broad.sam.SAMFileReader.ValidationStringency;
+import edu.mit.broad.picard.cmdline.CommandLineProgram;
+import edu.mit.broad.picard.cmdline.Usage;
+import edu.mit.broad.picard.cmdline.Option;
+
+import edu.mit.broad.sting.atk.modules.*;
+
+import java.io.*;
+import java.util.HashMap;
+
+public class AnalysisTK extends CommandLineProgram {
+    // Usage and parameters
+    @Usage(programVersion="0.1") public String USAGE = "SAM Validator\n";
+    @Option(shortName="I", doc="SAM or BAM file for validation") public File INPUT_FILE;
+    @Option(shortName="M", doc="Maximum number of reads to process before exiting", optional=true) public String MAX_READS_ARG = "-1";
+    @Option(shortName="S", doc="How strict should we be with validation", optional=true) public String STRICTNESS_ARG = "strict";
+    @Option(shortName="R", doc="Reference sequence file", optional=true) public File REF_FILE_ARG = null;
+    @Option(shortName="B", doc="Debugging output", optional=true) public String DEBUGGING_STR = null;
+    @Option(shortName="L", doc="Genome region to operation on: from chr:start-end", optional=true) public String REGION_STR = null;
+    @Option(shortName="T", doc="Type of analysis to run") public String AnalysisName = null;
+
+    public static HashMap<String, Object> MODULES = new HashMap<String,Object>();
+    public static void addModule(final String name, final Object walker) {
+        System.out.printf("* Adding module %s%n", name);
+        MODULES.put(name, walker);
+    }
+
+    static {
+        addModule("EmptyLocusWalker", new EmptyLocusWalker());
+        addModule("PileupWalker", new PileupWalker());
+    }
+
+    private TraversalEngine engine = null;
+    private int nSkippedIndels = 0;
+
+    public boolean DEBUGGING = false;
+
+    /** Required main method implementation. */
+    public static void main(String[] argv) {
+        System.exit(new AnalysisTK().instanceMain(argv));
+    }
+
+    protected int doWork() {
+        this.engine = new TraversalEngine(INPUT_FILE, REF_FILE_ARG);
+
+        ValidationStringency strictness = ValidationStringency.STRICT;
+    	if ( STRICTNESS_ARG == null ) {
+            strictness = ValidationStringency.STRICT;
+    	}
+    	else if ( STRICTNESS_ARG.toLowerCase().equals("lenient") ) {
+    		strictness = ValidationStringency.LENIENT;
+    	}
+    	else if ( STRICTNESS_ARG.toLowerCase().equals("silent") ) {
+    		strictness = ValidationStringency.SILENT;
+    	}
+    	else {
+            strictness = ValidationStringency.STRICT;
+    	}
+        System.err.println("Strictness is " + strictness);
+        engine.setStrictness(strictness);
+
+        engine.setDebugging(! ( DEBUGGING_STR == null || DEBUGGING_STR.toLowerCase().equals("true")));
+        engine.setMaxReads(Integer.parseInt(MAX_READS_ARG));
+
+        //LocusWalker<Integer,Integer> walker = new EmptyLocusWalker();
+        LocusWalker<?, ?> walker = (LocusWalker<?, ?>)MODULES.get(AnalysisName);
+        engine.initialize();
+        engine.traverseByLoci(walker);
+
+        return 0;
+    }
+}
\ No newline at end of file
diff --git a/src/edu/mit/broad/sting/atk/LocusContext.java b/src/edu/mit/broad/sting/atk/LocusContext.java
new file mode 100755
index 0000000000..12c246365b
--- /dev/null
+++ b/src/edu/mit/broad/sting/atk/LocusContext.java
@@ -0,0 +1,28 @@
+package edu.mit.broad.sting.atk;
+
+import edu.mit.broad.sam.SAMRecord;
+
+import java.util.List;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: mdepristo
+ * Date: Feb 22, 2009
+ * Time: 3:01:34 PM
+ * To change this template use File | Settings | File Templates.
+ */
+public class LocusContext {
+    public LocusContext() { };
+
+    // How big is the current context?
+    public int getLength() { return 1; }
+
+    // get the reference base at the current (relative) position
+    public byte getReferenceBase() { return 0; }
+
+    // get all of the reads within this context
+    public List<SAMRecord> getReads() { return null; }
+
+    // get a list of the equivalent positions within in the reads at Pos
+    public List<Integer> getOffsets() { return null; }
+}
diff --git a/src/edu/mit/broad/sting/atk/LocusIterator.java b/src/edu/mit/broad/sting/atk/LocusIterator.java
new file mode 100755
index 0000000000..83c93b8ed1
--- /dev/null
+++ b/src/edu/mit/broad/sting/atk/LocusIterator.java
@@ -0,0 +1,144 @@
+package edu.mit.broad.sting.atk;
+
+import edu.mit.broad.sam.util.CloseableIterator;
+import edu.mit.broad.sam.SAMRecord;
+import edu.mit.broad.sting.utils.PushbackIterator;
+import edu.mit.broad.sting.utils.Utils;
+import edu.mit.broad.sting.utils.Predicate;
+
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Iterator;
+
+/**
+ * Iterator that traverses a SAM File, accumulating information on a per-locus basis
+ */
+public class LocusIterator implements Iterable<LocusIterator>, CloseableIterator<LocusIterator> {
+
+    // -----------------------------------------------------------------------------------------------------------------
+    //
+    // member fields
+    //
+    // -----------------------------------------------------------------------------------------------------------------
+    private final PushbackIterator<SAMRecord> it;
+    private String contig = null;
+    private int position = -1;
+    private List<SAMRecord> reads = new ArrayList<SAMRecord>(100);
+    private List<Integer> offsets = new ArrayList<Integer>(100);
+
+    public String getContig() { return contig; }
+    public int getPosition() { return position; }
+    public List<SAMRecord> getReads() { return reads; }
+    public List<Integer> getOffsets() { return offsets; }
+
+    // -----------------------------------------------------------------------------------------------------------------
+    //
+    // constructors and other basic operations
+    //
+    // -----------------------------------------------------------------------------------------------------------------
+    public LocusIterator(final CloseableIterator<SAMRecord> samIterator) {
+        this.it = new PushbackIterator<SAMRecord>(samIterator);
+    }
+
+    public Iterator<LocusIterator> iterator() {
+        return this;
+    }
+
+    public void close() {
+        //this.it.close();
+    }
+
+    public boolean hasNext() {
+        return it.hasNext();
+    }
+
+    // -----------------------------------------------------------------------------------------------------------------
+    //
+    // next() routine and associated collection operations
+    //
+    // -----------------------------------------------------------------------------------------------------------------
+    public LocusIterator next() {
+        position += 1;
+
+        if ( position != -1 ) {
+            cleanReads();
+            expandReads();
+        }
+
+        if ( reads.isEmpty() ) {
+            //  the window is empty, we need to jump to the first pos of the first read in the stream
+            SAMRecord read = it.next();
+            pushRead(read);
+            contig = read.getReferenceName();
+            position = read.getAlignmentStart() - 1;
+            return next();
+        }
+        else {
+            // at this point, window contains all reads covering the pos, we need to return them
+            // and the offsets into each read for this loci
+            calcOffsetsOfWindow(position);
+            return this;
+        }
+    }
+
+    private void pushRead(SAMRecord read) {
+        //System.out.printf("  -> Adding read %s %d-%d flags %s%n", read.getReadName(), read.getAlignmentStart(), read.getAlignmentEnd(), Utils.readFlagsAsString(read));
+        reads.add(read);
+    }
+
+    class KeepReadPFunc implements Predicate<SAMRecord> {
+        public boolean apply(SAMRecord read) {
+            return position >= read.getAlignmentStart() &&
+                    position < read.getAlignmentEnd() &&
+                    read.getReferenceName().equals(contig); // should be index for efficiency
+        }
+    }
+    Predicate KeepReadP = new LocusIterator.KeepReadPFunc();
+
+    private void calcOffsetsOfWindow(final int position) {
+        offsets.clear();
+        for ( SAMRecord read : reads ) {
+//            def calcOffset( read ):
+//                offset = self.pos - read.start
+//                return offset
+//
+//            offsets = map(calcOffset, self.window)
+            final int offset = position - read.getAlignmentStart();
+            assert(offset < read.getReadLength() );
+            offsets.add(offset);
+            //System.out.printf("offsets [%d] %s%n", read.getAlignmentStart(), offsets);
+        }
+    }
+
+    private void cleanReads() {
+        // def keepReadP( read ):
+        //     return read.chr == chr and pos >= read.start and pos <= read.end
+        // self.window = filter( keepReadP, self.window )
+        reads = Utils.filter(KeepReadP, reads);
+    }
+
+    private void expandReads() {
+//        for read in self.rs:
+//            #print 'read', read, pos
+//            if read.chr == chr and read.start <= pos and read.end >= pos:
+//                self.pushRead(read)
+//            else:
+//                self.rs.unget( read )
+//                #self.rs = chain( [read], self.rs )
+//                break
+        while ( it.hasNext() ) {
+            SAMRecord read = it.next();
+            if ( KeepReadP.apply( read ) ) {
+                pushRead(read);
+            }
+            else {
+                it.pushback(read);
+                break;
+            }
+        }
+    }
+
+    public void remove() {
+        throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
+    }
+}
diff --git a/src/edu/mit/broad/sting/atk/LocusWalker.java b/src/edu/mit/broad/sting/atk/LocusWalker.java
new file mode 100755
index 0000000000..e65891ba7b
--- /dev/null
+++ b/src/edu/mit/broad/sting/atk/LocusWalker.java
@@ -0,0 +1,27 @@
+package edu.mit.broad.sting.atk;
+
+import edu.mit.broad.sting.atk.LocusIterator;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: mdepristo
+ * Date: Feb 22, 2009
+ * Time: 2:52:28 PM
+ * To change this template use File | Settings | File Templates.
+ */
+public interface LocusWalker<MapType, ReduceType> {
+    void initialize();
+    public String walkerType();
+
+    // Do we actually want to operate on the context?
+    boolean filter(char ref, LocusIterator context);
+
+    // Map over the edu.mit.broad.sting.atk.LocusContext
+    MapType map(char ref, LocusIterator context);
+
+    // Given result of map function
+    ReduceType reduceInit();
+    ReduceType reduce(MapType value, ReduceType sum);
+
+    void onTraveralDone();
+}
\ No newline at end of file
diff --git a/src/edu/mit/broad/sting/atk/ReadWalker.java b/src/edu/mit/broad/sting/atk/ReadWalker.java
new file mode 100755
index 0000000000..6f6fa915b7
--- /dev/null
+++ b/src/edu/mit/broad/sting/atk/ReadWalker.java
@@ -0,0 +1,28 @@
+package edu.mit.broad.sting.atk;
+
+import edu.mit.broad.sam.SAMRecord;
+import edu.mit.broad.sting.atk.LocusContext;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: mdepristo
+ * Date: Feb 22, 2009
+ * Time: 2:52:28 PM
+ * To change this template use File | Settings | File Templates.
+ */
+public interface ReadWalker<MapType, ReduceType> {
+    void initialize();
+    public String walkerType();
+    
+    // Do we actually want to operate on the context?
+    boolean filter(LocusContext context, SAMRecord read);
+
+    // Map over the edu.mit.broad.sting.atk.LocusContext
+    MapType map(LocusContext context, SAMRecord read);
+
+    // Given result of map function
+    ReduceType reduceInit();
+    ReduceType reduce(MapType value, ReduceType sum);
+
+    void onTraveralDone();   
+}
\ No newline at end of file
diff --git a/src/edu/mit/broad/sting/atk/TraversalEngine.java b/src/edu/mit/broad/sting/atk/TraversalEngine.java
new file mode 100755
index 0000000000..4c1d604102
--- /dev/null
+++ b/src/edu/mit/broad/sting/atk/TraversalEngine.java
@@ -0,0 +1,255 @@
+package edu.mit.broad.sting.atk;
+
+import edu.mit.broad.sam.*;
+import edu.mit.broad.sam.SAMFileReader.ValidationStringency;
+import edu.mit.broad.sam.util.CloseableIterator;
+import edu.mit.broad.sam.util.RuntimeIOException;
+import edu.mit.broad.picard.filter.SamRecordFilter;
+import edu.mit.broad.picard.filter.FilteringIterator;
+import edu.mit.broad.picard.reference.ReferenceSequenceFile;
+import edu.mit.broad.picard.reference.ReferenceSequenceFileFactory;
+import edu.mit.broad.sting.utils.ReferenceIterator;
+
+import java.io.*;
+
+public class TraversalEngine {
+    // Usage and parameters
+    private File readsFile = null;
+    private File refFileName = null;
+    private String regionStr = null;
+    private String traversalType = null;
+    private ValidationStringency strictness = ValidationStringency.STRICT;
+
+    private long startTime = -1;
+    private long maxReads = -1;
+    private long nRecords = 0;
+    private SAMFileReader samReader = null;
+    private ReferenceSequenceFile refFile = null;
+    private ReferenceIterator refIter = null;
+    private SAMFileReader readStream;
+
+    private int nReads = 0;
+    private int nSkippedReads = 0;
+    private int nUnmappedReads = 0;
+    private int nNotPrimary = 0;
+    private int nBadAlignments = 0;
+    private int nSkippedIndels = 0;
+
+    public boolean DEBUGGING = false;
+
+    // --------------------------------------------------------------------------------------------------------------
+    //
+    // Setting up the engine
+    //
+    // --------------------------------------------------------------------------------------------------------------
+    public TraversalEngine(File reads, File ref) {
+        readsFile = reads;
+        refFileName = ref;
+    }
+    
+    public void setRegion(final String reg) { regionStr = regionStr; }
+    public void setTraversalType(final String type) { traversalType = type; }
+    public void setStrictness( final ValidationStringency s ) { strictness = s; }
+    public void setMaxReads( final int maxReads ) { this.maxReads = maxReads; }
+    public void setDebugging( final boolean d ) { DEBUGGING = d; }
+
+    // --------------------------------------------------------------------------------------------------------------
+    //
+    // functions for dealing with the reference sequence
+    //
+    // --------------------------------------------------------------------------------------------------------------
+    public void printProgress(final String type) { printProgress( false, type ); }
+
+    public void printProgress( boolean mustPrint, final String type ) {
+        final long nRecords = this.nRecords;
+
+        if ( mustPrint || nRecords % 100000 == 0 ) {
+            final double elapsed = (System.currentTimeMillis() - startTime) / 1000.0;
+            final double secsPer1MReads = (elapsed * 1000000.0) / nRecords;
+            System.out.printf("Traversed %d %s %.2f secs (%.2f secs per 1M %s)%n", nRecords, type, elapsed, secsPer1MReads, type);
+        }
+    }
+
+    // --------------------------------------------------------------------------------------------------------------
+    //
+    // functions for dealing with the reference sequence
+    //
+    // --------------------------------------------------------------------------------------------------------------
+
+    protected void loadReference() {
+        if ( refFileName!= null ) {
+            this.refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(refFileName);
+            this.refIter = new ReferenceIterator(this.refFile);
+        }         
+    }
+
+    protected void testReference() {
+        String line = "";
+        refIter.seekForward("chr20", 79);
+        for ( int i = 0; i < this.maxReads && refIter.hasNext(); i++ ) {
+            final ReferenceIterator refSite = refIter.next();
+            final char refBase = refSite.getBaseAsChar();
+            line += refBase;
+            if ( (i + 1) % 80 == 0 ) {
+                System.out.println(line);
+                line = "";
+            }
+            //System.out.printf("  Reference: %s:%d %c%n", refSite.getCurrentContig().getName(), refSite.getPosition(), refBase);
+        }
+        System.out.println(line);
+        System.exit(1);
+    }
+
+    // --------------------------------------------------------------------------------------------------------------
+    //
+    // traversal functions
+    //
+    // --------------------------------------------------------------------------------------------------------------
+    protected int initialize() {
+        startTime = System.currentTimeMillis();
+        loadReference();
+        //testReference();
+        //loadReference();
+        readStream = initializeReadStreams();
+        return 0;
+    }
+
+    class locusStreamFilterFunc implements SamRecordFilter {
+        public boolean filterOut(SAMRecord rec) {
+            boolean result = false;
+            String why = "";
+            if ( rec.getReadUnmappedFlag() ) {
+                nUnmappedReads++;
+                result = true;
+                why = "Unmapped";
+            }
+            else if ( rec.getNotPrimaryAlignmentFlag() ) {
+                nNotPrimary++;
+                result = true;
+                why = "Not Primary";
+            }
+            else if ( rec.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START ) {
+                nBadAlignments++;
+                result = true;
+                why = "No alignment start";
+            }
+            else if ( rec.getCigar().numCigarElements() > 1 ) {
+                // FIXME -- deal with indels correctly!
+                nSkippedIndels++;
+                result = true;
+                why = "Skipping indel: " + rec.getCigarString();
+            }
+            else {
+                result = false;
+            }
+
+            if ( result ) {
+                nSkippedReads++;
+                //System.out.printf("  [filter] %s => %b %s%n", rec.getReadName(), result, why);
+            }
+            else {
+                nReads++;
+            }
+            return result;        
+        }
+    }
+
+    protected <M,T> int traverseByLoci(LocusWalker<M,T> walker) {
+        walker.initialize();
+        FilteringIterator filterIter = new FilteringIterator(readStream.iterator(), new locusStreamFilterFunc());
+        CloseableIterator<LocusIterator> iter = new LocusIterator(filterIter);
+
+        T sum = walker.reduceInit();
+        while ( iter.hasNext() ) {
+            this.nRecords++;
+
+            // actually get the read and hand it to the walker
+            final LocusIterator locus = iter.next();
+            final ReferenceIterator refSite = refIter.seekForward(locus.getContig(), locus.getPosition());
+            final char refBase = refSite.getBaseAsChar();
+
+            if ( DEBUGGING )
+                System.out.printf("  Reference: %s:%d %c%n", refSite.getCurrentContig().getName(), refSite.getPosition(), refBase);
+
+            final boolean keepMeP = walker.filter(refBase, locus);
+            if ( keepMeP ) {
+                M x = walker.map(refBase, locus);
+                sum = walker.reduce(x, sum);
+            }
+
+            if ( this.maxReads > 0 && this.nRecords > this.maxReads ) {
+                System.out.println("Maximum number of reads encountered, terminating traversal " + this.nRecords);
+                break;
+            }
+
+            printProgress("loci");
+        }
+
+        printProgress( true, "loci" );
+        System.out.println("Traversal reduce result is " + sum);
+        System.out.printf("Traversal skipped %d reads out of %d total (%.2f%%)%n", nSkippedReads, nReads, (nSkippedReads * 100.0) / nReads);
+        System.out.printf("  -> %d unmapped reads%n", nUnmappedReads );
+        System.out.printf("  -> %d non-primary reads%n", nNotPrimary );
+        System.out.printf("  -> %d reads with bad alignments%n", nBadAlignments );
+        System.out.printf("  -> %d reads with indels%n", nSkippedIndels );
+        walker.onTraveralDone();
+        return 0;
+    }
+
+    protected <M,R> int traverseByRead(ReadWalker<M,R> walker) {
+        walker.initialize();
+        CloseableIterator<SAMRecord> iter = readStream.iterator();
+        R sum = walker.reduceInit();
+        while ( iter.hasNext() ) {
+            this.nRecords++;
+
+            // actually get the read and hand it to the walker
+            final SAMRecord read = iter.next();
+            final boolean keepMeP = walker.filter(null, read);
+            if ( keepMeP ) {
+                M x = walker.map(null, read);
+                sum = walker.reduce(x, sum);
+            }
+
+            if ( this.maxReads > 0 && this.nRecords > this.maxReads ) {
+                System.out.println("Maximum number of reads encountered, terminating traversal " + this.nRecords);
+                break;
+            }
+
+            printProgress("reads");
+        }
+
+        printProgress( true, "reads" );
+        System.out.println("Traversal reduce result is " + sum);
+        walker.onTraveralDone();
+        return 0;
+    }
+
+    //
+    //
+    // Prepare the input streams
+    //
+    //
+    private SAMFileReader initializeReadStreams() {
+        SAMFileReader reader = getSamReader(readsFile);
+        return reader;
+    }
+
+    private SAMFileReader getSamReader(final File samFile) {
+        try {
+            final InputStream samInputStream = new FileInputStream(samFile);
+            final InputStream bufferedStream= new BufferedInputStream(samInputStream);
+            //final InputStream bufferedStream= new BufferedInputStream(samInputStream, 10000000);
+            final SAMFileReader samReader = new SAMFileReader(bufferedStream, true);
+            samReader.setValidationStringency(strictness);
+
+            final SAMFileHeader header = samReader.getFileHeader();
+            System.err.println("Sort order is: " + header.getSortOrder());
+
+            return samReader;
+        }
+        catch (IOException e) {
+            throw new RuntimeIOException(e);
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/edu/mit/broad/sting/atk/modules/EmptyLocusWalker.java b/src/edu/mit/broad/sting/atk/modules/EmptyLocusWalker.java
new file mode 100755
index 0000000000..6f7c248361
--- /dev/null
+++ b/src/edu/mit/broad/sting/atk/modules/EmptyLocusWalker.java
@@ -0,0 +1,38 @@
+package edu.mit.broad.sting.atk.modules;
+
+import edu.mit.broad.sting.atk.LocusWalker;
+import edu.mit.broad.sting.atk.LocusIterator;
+import edu.mit.broad.sam.SAMRecord;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: mdepristo
+ * Date: Feb 22, 2009
+ * Time: 3:22:14 PM
+ * To change this template use File | Settings | File Templates.
+ */
+public class EmptyLocusWalker implements LocusWalker<Integer, Integer> {
+    public void initialize() {
+    }
+
+    public String walkerType() { return "ByLocus"; }
+
+    // Do we actually want to operate on the context?
+    public boolean filter(char ref, LocusIterator context) {
+        return true;    // We are keeping all the reads
+    }
+
+    // Map over the edu.mit.broad.sting.atk.LocusContext
+    public Integer map(char ref, LocusIterator context) {
+        return 1;
+    }
+
+    // Given result of map function
+    public Integer reduceInit() { return 0; }
+    public Integer reduce(Integer value, Integer sum) {
+        return value + sum;
+    }
+
+    public void onTraveralDone() {
+    }
+}
\ No newline at end of file
diff --git a/src/edu/mit/broad/sting/atk/modules/PileupWalker.java b/src/edu/mit/broad/sting/atk/modules/PileupWalker.java
new file mode 100644
index 0000000000..eef13ee279
--- /dev/null
+++ b/src/edu/mit/broad/sting/atk/modules/PileupWalker.java
@@ -0,0 +1,69 @@
+package edu.mit.broad.sting.atk.modules;
+
+import edu.mit.broad.sting.atk.LocusWalker;
+import edu.mit.broad.sting.atk.LocusIterator;
+import edu.mit.broad.sam.SAMRecord;
+
+import java.util.List;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: mdepristo
+ * Date: Feb 22, 2009
+ * Time: 3:22:14 PM
+ * To change this template use File | Settings | File Templates.
+ */
+public class PileupWalker implements LocusWalker<Integer, Integer> {
+    public void initialize() {
+    }
+
+    public String walkerType() { return "ByLocus"; }
+
+    // Do we actually want to operate on the context?
+    public boolean filter(char ref, LocusIterator context) {
+        return true;    // We are keeping all the reads
+    }
+
+    // Map over the edu.mit.broad.sting.atk.LocusContext
+    public Integer map(char ref, LocusIterator context) {
+        //System.out.printf("Reads %s:%d %d%n", context.getContig(), context.getPosition(), context.getReads().size());
+        //for ( SAMRecord read : context.getReads() ) {
+        //    System.out.println("  -> " + read.getReadName());
+        //}
+
+        List<SAMRecord> reads = context.getReads();
+        List<Integer> offsets = context.getOffsets();
+        String bases = "";
+        String quals = "";
+        //String offsetString = "";
+        for ( int i = 0; i < reads.size(); i++ ) {
+            SAMRecord read = reads.get(i);
+            int offset = offsets.get(i);
+
+            //if ( offset >= read.getReadString().length() )
+            //    System.out.printf("  [%2d] [%s] %s%n", offset, read.format(), read.getReadString());
+
+            bases += read.getReadString().charAt(offset);
+            quals += read.getBaseQualityString().charAt(offset);
+            //offsetString += i;
+            //System.out.printf("  [%2d] [%s] %s%n", offset, read.getReadString().charAt(offset), read.getReadString());
+        }
+
+        if ( context.getPosition() % 10 == 0 )
+            System.out.printf("%s:%d: %s %s %s%n", context.getContig(), context.getPosition(), ref, bases, quals);
+
+        //for ( int offset : context.getOffsets() ) {
+        //    System.out.println("  -> " + read.getReadName());
+        //}
+        return 1;
+    }
+
+    // Given result of map function
+    public Integer reduceInit() { return 0; }
+    public Integer reduce(Integer value, Integer sum) {
+        return value + sum;
+    }
+
+    public void onTraveralDone() {
+    }
+}
\ No newline at end of file
diff --git a/src/edu/mit/broad/sting/atk/modules/ReadWalkerTest.java b/src/edu/mit/broad/sting/atk/modules/ReadWalkerTest.java
new file mode 100755
index 0000000000..75dd66182c
--- /dev/null
+++ b/src/edu/mit/broad/sting/atk/modules/ReadWalkerTest.java
@@ -0,0 +1,51 @@
+package edu.mit.broad.sting.atk.modules;
+
+import edu.mit.broad.sam.SAMRecord;
+import edu.mit.broad.sting.atk.ReadWalker;
+import edu.mit.broad.sting.atk.LocusContext;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: mdepristo
+ * Date: Feb 22, 2009
+ * Time: 3:22:14 PM
+ * To change this template use File | Settings | File Templates.
+ */
+public class ReadWalkerTest implements ReadWalker<Integer, Integer> {
+    long[] qualCounts = new long[100];
+
+    public void initialize() {
+        for ( int i = 0; i < this.qualCounts.length; i++ ) {
+            this.qualCounts[i] = 0;
+        }
+    }
+
+    public String walkerType() { return "ByRead"; }
+
+    // Do we actually want to operate on the context?
+    public boolean filter(LocusContext context, SAMRecord read) {
+        return true;    // We are keeping all the reads
+    }
+
+    // Map over the edu.mit.broad.sting.atk.LocusContext
+    public Integer map(LocusContext context, SAMRecord read) {
+        for ( byte qual : read.getBaseQualities() ) {
+            //System.out.println(qual);
+            this.qualCounts[qual]++;
+        }
+        //System.out.println(read.getReadName());
+        return 1;
+    }
+
+    // Given result of map function
+    public Integer reduceInit() { return 0; }
+    public Integer reduce(Integer value, Integer sum) {
+        return value + sum;
+    }
+
+    public void onTraveralDone() {
+        for ( int i = 0; i < this.qualCounts.length; i++ ) {
+            System.out.printf("%3d : %10d%n", i, this.qualCounts[i]);
+        }
+    }
+}
diff --git a/src/edu/mit/broad/sting/utils/EndlessIterator.java b/src/edu/mit/broad/sting/utils/EndlessIterator.java
new file mode 100755
index 0000000000..144473986c
--- /dev/null
+++ b/src/edu/mit/broad/sting/utils/EndlessIterator.java
@@ -0,0 +1,30 @@
+package edu.mit.broad.sting.utils;
+
+import java.util.Iterator;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: depristo
+ * Date: Feb 24, 2009
+ * Time: 10:24:38 AM
+ * To change this template use File | Settings | File Templates.
+ */
+public class EndlessIterator<T> implements Iterator<T> {
+    private T value;
+    
+    public EndlessIterator(T value) {
+        this.value = value;
+    }
+
+    public boolean hasNext() {
+        return true;
+    }
+
+    public T next() {
+        return this.value;
+    }
+
+    public void remove () {
+        throw new UnsupportedOperationException();
+    }
+}
\ No newline at end of file
diff --git a/src/edu/mit/broad/sting/utils/Predicate.java b/src/edu/mit/broad/sting/utils/Predicate.java
new file mode 100755
index 0000000000..1d015534e3
--- /dev/null
+++ b/src/edu/mit/broad/sting/utils/Predicate.java
@@ -0,0 +1,13 @@
+package edu.mit.broad.sting.utils;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: depristo
+ * Date: Feb 24, 2009
+ * Time: 10:15:19 AM
+ * To change this template use File | Settings | File Templates.
+ */
+public interface Predicate<T> {
+    public boolean apply(T arg);
+}
+
diff --git a/src/edu/mit/broad/sting/utils/PushbackIterator.java b/src/edu/mit/broad/sting/utils/PushbackIterator.java
new file mode 100755
index 0000000000..37dfe6bef5
--- /dev/null
+++ b/src/edu/mit/broad/sting/utils/PushbackIterator.java
@@ -0,0 +1,46 @@
+/*
+* The Broad Institute
+* SOFTWARE COPYRIGHT NOTICE AGREEMENT
+* This software and its documentation are copyright 2009 by the
+* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+*
+* This software is supplied without any warranty or guaranteed support whatsoever. Neither
+* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+*/
+package edu.mit.broad.sting.utils;
+
+import java.util.Iterator;
+
+public class PushbackIterator<T> implements Iterator<T> {
+    Iterator<T> underlyingIterator;
+    T pushedElement = null;
+
+    public PushbackIterator(final Iterator<T> underlyingIterator) {
+        this.underlyingIterator = underlyingIterator;
+    }
+
+    public boolean hasNext() {
+        return pushedElement != null || underlyingIterator.hasNext();
+    }
+
+    public T next() {
+        if (pushedElement != null) {
+            final T ret = pushedElement;
+            pushedElement = null;
+            return ret;
+        }
+        return underlyingIterator.next();
+    }
+
+    public void pushback(T elt) {
+        pushedElement = elt;
+    }
+
+    public void remove() {
+        throw new UnsupportedOperationException();
+    }
+
+    public Iterator<T> getUnderlyingIterator() {
+        return underlyingIterator;
+    }
+}
\ No newline at end of file
diff --git a/src/edu/mit/broad/sting/utils/ReferenceIterator.java b/src/edu/mit/broad/sting/utils/ReferenceIterator.java
new file mode 100755
index 0000000000..514d3dd203
--- /dev/null
+++ b/src/edu/mit/broad/sting/utils/ReferenceIterator.java
@@ -0,0 +1,148 @@
+package edu.mit.broad.sting.utils;
+
+import edu.mit.broad.picard.reference.ReferenceSequenceFile;
+import edu.mit.broad.picard.reference.ReferenceSequence;
+import edu.mit.broad.sam.util.StringUtil;
+
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: depristo
+ * Date: Feb 24, 2009
+ * Time: 10:45:01 AM
+ * To change this template use File | Settings | File Templates.
+ */
+public class ReferenceIterator implements Iterator<ReferenceIterator> {
+
+    // The reference sequence file generator
+    private ReferenceSequenceFile refFile;
+
+    private ReferenceSequence currentContig = null;
+    private ReferenceSequence nextContig = null;    
+    private int offset = -1;
+
+    public ReferenceIterator( ReferenceSequenceFile refFile ) {
+        this.refFile = refFile;
+    }
+
+    // --------------------------------------------------------------------------------------------------------------
+    //
+    // Accessing data
+    //
+    // --------------------------------------------------------------------------------------------------------------
+    public byte getBaseAsByte() { return currentContig.getBases()[offset]; }
+    public String getBaseAsString() { return StringUtil.bytesToString(currentContig.getBases(), offset, 1); }
+    public char getBaseAsChar() { return getBaseAsString().charAt(0); }
+    public ReferenceSequence getCurrentContig() { return currentContig; }
+    public int getPosition() { return offset + 1; }
+    
+    // --------------------------------------------------------------------------------------------------------------
+    //
+    // Iterator routines
+    //
+    // --------------------------------------------------------------------------------------------------------------
+    public boolean hasNext() {
+        if ( currentContig == null || offset + 1 < currentContig.length() ) {
+            return true;
+        }
+        else {
+            return loadNextContig();
+        }
+    }
+
+    public ReferenceIterator next() {
+        if ( currentContig != null ) {
+            //System.out.printf("  -> %s:%d %d%n", currentContig.getName(), offset, currentContig.length());
+        }
+        offset++;  // move on to the next position
+
+        if ( currentContig == null || offset >= currentContig.length() ) {
+            // We need to update the contig
+            //System.out.printf("  -> Updating length%n");
+            if ( nextContig != null ) {
+                // We've already loaded the next contig, swap it in, and recursively call next
+                swapNextContig();
+                return next();
+            }
+            else if ( loadNextContig() ){
+                // We sucessfully loaded the next contig, recursively call next
+                offset = -1;
+                return next();
+            }
+            else {
+                throw new NoSuchElementException();
+            }
+        }
+        else {
+            // We're good to go -- we're in the current contig
+            return this;
+        }
+    }
+
+    public void remove() {
+        throw new UnsupportedOperationException();
+    }
+
+    
+    // --------------------------------------------------------------------------------------------------------------
+    //
+    // Jumping forward
+    //
+    // --------------------------------------------------------------------------------------------------------------
+    public ReferenceIterator seekForward(final String contigName, final int pos) {
+        return seekForwardOffset(contigName, pos - 1);
+    }
+
+    private ReferenceIterator seekForwardOffset(final String contigName, final int seekOffset) {
+        // jumps us forward in the sequence to the contig / pos
+        if ( currentContig == null )
+            next();
+
+        //System.out.printf("  -> Seeking to %s %d from %s %d%n", contigName, seekOffset, currentContig.getName(), offset);
+        if ( contigName.equals(currentContig.getName()) ) {
+            // we're somewhere on this contig
+            if ( seekOffset < offset || seekOffset >= currentContig.length() ) {
+                // bad boy -- can't go backward safely or just beyond the contig length
+                throw new IllegalArgumentException("Bad seek to " + seekOffset + " current: " + offset);
+                //return null;
+            }
+            else {
+                offset = seekOffset - 1;
+                return next();
+            }
+        }
+        else {
+            while (true) {
+                // go searching through the reference
+                if ( ! loadNextContig() ) {
+                    // never found anything
+                    return null;
+                }
+                else if ( nextContig.getName().equals(contigName) ) {
+                    swapNextContig();
+                    return seekForward(contigName, seekOffset);
+                }
+            }
+        }
+    }
+
+
+    // --------------------------------------------------------------------------------------------------------------
+    //
+    // Interal state manipulation
+    //
+    // --------------------------------------------------------------------------------------------------------------
+    protected boolean loadNextContig() {
+        // returns true if we had another contig to load
+        nextContig = refFile.nextSequence();
+        return nextContig != null;
+    }
+
+    protected void swapNextContig() {
+        currentContig = nextContig;
+        nextContig = null;
+        offset = -1;
+    }
+}
diff --git a/src/edu/mit/broad/sting/utils/Utils.java b/src/edu/mit/broad/sting/utils/Utils.java
new file mode 100755
index 0000000000..094893448a
--- /dev/null
+++ b/src/edu/mit/broad/sting/utils/Utils.java
@@ -0,0 +1,55 @@
+package edu.mit.broad.sting.utils;
+
+import edu.mit.broad.sam.SAMRecord;
+
+import java.util.*;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: depristo
+ * Date: Feb 24, 2009
+ * Time: 10:12:31 AM
+ * To change this template use File | Settings | File Templates.
+ */
+public class Utils {
+    public static <T> List<T> filter(Predicate pred, Collection<T> c) {
+        List<T> filtered = new ArrayList<T>();
+        // loop through all the elements in c
+        for (T obj : c) {
+            // if the predicate is true for the current element
+            if (pred.apply(obj)) {
+                // append it to the result list
+                filtered.add(obj);
+            }
+        }
+        return filtered;
+    }
+
+    private static final Map<Integer,String> readFlagNames
+          = new HashMap<Integer,String>();
+
+     static {
+         readFlagNames.put(0x1, "Paired");
+         readFlagNames.put(0x2, "Proper");
+         readFlagNames.put(0x4, "Unmapped");
+         readFlagNames.put(0x8, "MateUnmapped");
+         readFlagNames.put(0x10, "Forward");
+         //readFlagNames.put(0x20, "MateForward");
+         readFlagNames.put(0x4, "FirstOfPair");
+         readFlagNames.put(0x8, "SecondOfPair");
+         readFlagNames.put(0x100, "NotPrimary");
+         readFlagNames.put(0x200, "NON-PF");
+         readFlagNames.put(0x400, "Duplicate");
+     }
+
+    public static String readFlagsAsString(SAMRecord rec) {
+        String flags = "";
+        for ( int flag : readFlagNames.keySet() ) {
+            if ( ( rec.getFlags() & flag ) != 0 ) {
+                flags += readFlagNames.get(flag) + " ";
+            }
+        }
+        return flags;
+    }
+
+}
diff --git a/src/scripts/TraverseTest.sh b/src/scripts/TraverseTest.sh
new file mode 100755
index 0000000000..3238c40438
--- /dev/null
+++ b/src/scripts/TraverseTest.sh
@@ -0,0 +1 @@
+java -cp out/production/AnalysisTK:../../jars/broad.jar edu.mit.broad.sting.atk.AnalysisTK $*
diff --git a/src/scripts/TraverseTestProf.sh b/src/scripts/TraverseTestProf.sh
new file mode 100755
index 0000000000..ff10a50cd4
--- /dev/null
+++ b/src/scripts/TraverseTestProf.sh
@@ -0,0 +1 @@
+java -agentlib:hprof=cpu=samples -cp out/production/AnalysisTK:../../jars/broad.jar edu.mit.broad.sting.atk.AnalysisTK $*