diff --git a/build.xml b/build.xml
index 91eb1f8e9a..fe4c7a3f47 100644
--- a/build.xml
+++ b/build.xml
@@ -70,8 +70,6 @@
-
-
@@ -147,11 +145,7 @@
-
-
-
-
-
@@ -179,13 +173,23 @@
-
-
+
+
+
+
+
-
+
+
+
+
+
+
+
+
@@ -215,12 +219,6 @@
-
-
-
-
-
-
@@ -371,7 +369,6 @@
-
@@ -388,7 +385,6 @@
-
@@ -696,7 +692,6 @@
-
@@ -1046,6 +1041,7 @@
+
diff --git a/ivy.xml b/ivy.xml
index ce724bc3cc..3f3d1c97fc 100644
--- a/ivy.xml
+++ b/ivy.xml
@@ -48,6 +48,9 @@
+
+
+
diff --git a/public/c/SeparateQltout.cc b/public/c/SeparateQltout.cc
new file mode 100644
index 0000000000..7644c96037
--- /dev/null
+++ b/public/c/SeparateQltout.cc
@@ -0,0 +1,70 @@
+#include "MainTools.h"
+#include "Basevector.h"
+#include "lookup/LookAlign.h"
+#include "lookup/SerialQltout.h"
+
+unsigned int MatchingEnd(look_align &la, vecbasevector &candidates, vecbasevector &ref) {
+ //la.PrintParseable(cout);
+
+ for (int i = 0; i < candidates.size(); i++) {
+ look_align newla = la;
+
+ if (newla.rc1) { candidates[i].ReverseComplement(); }
+ newla.ResetFromAlign(newla.a, candidates[i], ref[la.target_id]);
+
+ //newla.PrintParseable(cout, &candidates[i], &ref[newla.target_id]);
+ //cout << newla.Errors() << " " << la.Errors() << endl;
+
+ if (newla.Errors() == la.Errors()) {
+ return i;
+ }
+ }
+
+ //FatalErr("Query id " + ToString(la.query_id) + " had no matches.");
+
+ return candidates.size() + 1;
+}
+
+int main(int argc, char **argv) {
+ RunTime();
+
+ BeginCommandArguments;
+ CommandArgument_String(ALIGNS);
+ CommandArgument_String(FASTB_END_1);
+ CommandArgument_String(FASTB_END_2);
+ CommandArgument_String(REFERENCE);
+
+ CommandArgument_String(ALIGNS_END_1_OUT);
+ CommandArgument_String(ALIGNS_END_2_OUT);
+ EndCommandArguments;
+
+ vecbasevector ref(REFERENCE);
+ vecbasevector reads1(FASTB_END_1);
+ vecbasevector reads2(FASTB_END_2);
+
+ ofstream aligns1stream(ALIGNS_END_1_OUT.c_str());
+ ofstream aligns2stream(ALIGNS_END_2_OUT.c_str());
+
+ basevector bv;
+
+ SerialQltout sqltout(ALIGNS);
+ look_align la;
+ while (sqltout.Next(la)) {
+ vecbasevector candidates(2);
+ candidates[0] = reads1[la.query_id];
+ candidates[1] = reads2[la.query_id];
+
+ unsigned int matchingend = MatchingEnd(la, candidates, ref);
+ if (matchingend < 2) {
+ bv = (matchingend == 0) ? reads1[la.query_id] : reads2[la.query_id];
+
+ //la.PrintParseable(cout, &bv, &ref[la.target_id]);
+ la.PrintParseable(((matchingend == 0) ? aligns1stream : aligns2stream), &bv, &ref[la.target_id]);
+ }
+ }
+
+ aligns1stream.close();
+ aligns2stream.close();
+
+ return 0;
+}
diff --git a/public/c/bwa/Makefile b/public/c/bwa/Makefile
new file mode 100644
index 0000000000..6399a0e6d6
--- /dev/null
+++ b/public/c/bwa/Makefile
@@ -0,0 +1,21 @@
+CXX=g++
+CXXFLAGS=-g -Wall -O2 -m64 -fPIC
+
+.cpp.o:
+ $(CXX) -c $(CXXFLAGS) -I$(BWA_HOME) -I$(JAVA_INCLUDE) $< -o $@
+
+all: init lib
+
+init:
+ @echo Please make sure the following platforms are set correctly on your machine.
+ @echo BWA_HOME=$(BWA_HOME)
+ @echo JAVA_INCLUDE=$(JAVA_INCLUDE)
+ @echo TARGET_LIB=$(TARGET_LIB)
+ @echo EXTRA_LIBS=$(EXTRA_LIBS)
+ @echo LIBTOOL_COMMAND=$(LIBTOOL_COMMAND)
+
+lib: org_broadinstitute_sting_alignment_bwa_c_BWACAligner.o bwa_gateway.o
+ $(LIBTOOL_COMMAND) $? -o $(TARGET_LIB) -L$(BWA_HOME) -lbwacore $(EXTRA_LIBS)
+
+clean:
+ rm *.o libbwa.*
diff --git a/public/c/bwa/build_linux.sh b/public/c/bwa/build_linux.sh
new file mode 100755
index 0000000000..b3631a28df
--- /dev/null
+++ b/public/c/bwa/build_linux.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+export BWA_HOME="/humgen/gsa-scr1/hanna/src/bwa-trunk/bwa"
+export JAVA_INCLUDE="/broad/tools/Linux/x86_64/pkgs/jdk_1.6.0_12/include -I/broad/tools/Linux/x86_64/pkgs/jdk_1.6.0_12/include/linux"
+export TARGET_LIB="libbwa.so"
+export EXTRA_LIBS="-lc -lz -lstdc++ -lpthread"
+export LIBTOOL_COMMAND="g++ -shared -Wl,-soname,libbwa.so"
+make
diff --git a/public/c/bwa/build_mac.sh b/public/c/bwa/build_mac.sh
new file mode 100644
index 0000000000..bfed900bba
--- /dev/null
+++ b/public/c/bwa/build_mac.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+export BWA_HOME="/Users/mhanna/src/bwa"
+export JAVA_INCLUDE="/System/Library/Frameworks/JavaVM.framework/Headers"
+export TARGET_LIB="libbwa.dylib"
+export EXTRA_LIBS="-lc -lz -lsupc++"
+export LIBTOOL_COMMAND="libtool -dynamic"
+make
diff --git a/public/c/bwa/bwa_gateway.cpp b/public/c/bwa/bwa_gateway.cpp
new file mode 100644
index 0000000000..00f5aa5bcd
--- /dev/null
+++ b/public/c/bwa/bwa_gateway.cpp
@@ -0,0 +1,277 @@
+#include
+#include
+#include
+
+#include "bwase.h"
+#include "bwa_gateway.h"
+
+BWA::BWA(const char* ann_filename,
+ const char* amb_filename,
+ const char* pac_filename,
+ const char* forward_bwt_filename,
+ const char* forward_sa_filename,
+ const char* reverse_bwt_filename,
+ const char* reverse_sa_filename)
+{
+ // Load the bns (?) and reference
+ bns = bns_restore_core(ann_filename,amb_filename,pac_filename);
+ reference = new ubyte_t[bns->l_pac/4+1];
+ rewind(bns->fp_pac);
+ fread(reference, 1, bns->l_pac/4+1, bns->fp_pac);
+ fclose(bns->fp_pac);
+ bns->fp_pac = NULL;
+
+ // Load the BWTs (both directions) and suffix arrays (both directions)
+ bwts[0] = bwt_restore_bwt(forward_bwt_filename);
+ bwt_restore_sa(forward_sa_filename, bwts[0]);
+ bwts[1] = bwt_restore_bwt(reverse_bwt_filename);
+ bwt_restore_sa(reverse_sa_filename, bwts[1]);
+ load_default_options();
+
+ // Always reinitialize the random seed whenever a new set of files are loaded.
+ initialize_random_seed();
+
+ // initialize the bwase subsystem
+ bwase_initialize();
+}
+
+BWA::~BWA() {
+ delete[] reference;
+ bns_destroy(bns);
+ bwt_destroy(bwts[0]);
+ bwt_destroy(bwts[1]);
+}
+
+void BWA::find_paths(const char* bases, const unsigned read_length, bwt_aln1_t*& paths, unsigned& num_paths, unsigned& best_path_count, unsigned& second_best_path_count)
+{
+ bwa_seq_t* sequence = create_sequence(bases, read_length);
+
+ // Calculate the suffix array interval for each sequence, storing the result in sequence->aln (and sequence->n_aln).
+ // This method will destroy the contents of seq and rseq.
+ bwa_cal_sa_reg_gap(0,bwts,1,sequence,&options);
+
+ paths = new bwt_aln1_t[sequence->n_aln];
+ memcpy(paths,sequence->aln,sequence->n_aln*sizeof(bwt_aln1_t));
+ num_paths = sequence->n_aln;
+
+ // Call aln2seq to initialize the type of match present.
+ bwa_aln2seq(sequence->n_aln,sequence->aln,sequence);
+ best_path_count = sequence->c1;
+ second_best_path_count = sequence->c2;
+
+ bwa_free_read_seq(1,sequence);
+}
+
+Alignment* BWA::generate_single_alignment(const char* bases, const unsigned read_length) {
+ bwa_seq_t* sequence = create_sequence(bases,read_length);
+
+ // Calculate paths.
+ bwa_cal_sa_reg_gap(0,bwts,1,sequence,&options);
+
+ // Check for no alignments found and return null.
+ if(sequence->n_aln == 0) {
+ bwa_free_read_seq(1,sequence);
+ return NULL;
+ }
+
+ // bwa_cal_sa_reg_gap destroys the bases / read length. Copy them back in.
+ copy_bases_into_sequence(sequence,bases,read_length);
+
+ // Pick best alignment and propagate its information into the sequence.
+ bwa_aln2seq(sequence->n_aln,sequence->aln,sequence);
+
+ // Generate the best alignment from the sequence.
+ Alignment* alignment = new Alignment;
+ *alignment = generate_final_alignment_from_sequence(sequence);
+
+ bwa_free_read_seq(1,sequence);
+
+ return alignment;
+}
+
+void BWA::generate_alignments_from_paths(const char* bases,
+ const unsigned read_length,
+ bwt_aln1_t* paths,
+ const unsigned num_paths,
+ const unsigned best_count,
+ const unsigned second_best_count,
+ Alignment*& alignments,
+ unsigned& num_alignments)
+{
+ bwa_seq_t* sequence = create_sequence(bases,read_length);
+
+ sequence->aln = paths;
+ sequence->n_aln = num_paths;
+
+ // (Ab)use bwa_aln2seq to propagate values stored in the path out into the sequence itself.
+ bwa_aln2seq(sequence->n_aln,sequence->aln,sequence);
+
+ // But overwrite key parts of the sequence in case the user passed back only a smaller subset
+ // of the paths.
+ sequence->c1 = best_count;
+ sequence->c2 = second_best_count;
+ sequence->type = sequence->c1 > 1 ? BWA_TYPE_REPEAT : BWA_TYPE_UNIQUE;
+
+ num_alignments = 0;
+ for(unsigned i = 0; i < (unsigned)sequence->n_aln; i++)
+ num_alignments += (sequence->aln + i)->l - (sequence->aln + i)->k + 1;
+
+ alignments = new Alignment[num_alignments];
+ unsigned alignment_idx = 0;
+
+ for(unsigned path_idx = 0; path_idx < (unsigned)num_paths; path_idx++) {
+ // Stub in a 'working' path, so that only the desired alignment is local-aligned.
+ const bwt_aln1_t* path = paths + path_idx;
+ bwt_aln1_t working_path = *path;
+
+ // Loop through all alignments, aligning each one individually.
+ for(unsigned sa_idx = path->k; sa_idx <= path->l; sa_idx++) {
+ working_path.k = working_path.l = sa_idx;
+ sequence->aln = &working_path;
+ sequence->n_aln = 1;
+
+ sequence->sa = sa_idx;
+ sequence->strand = path->a;
+ sequence->score = path->score;
+
+ // Each time through bwa_refine_gapped, seq gets reversed. Revert the reverse.
+ // TODO: Fix the interface to bwa_refine_gapped so its easier to work with.
+ if(alignment_idx > 0)
+ seq_reverse(sequence->len, sequence->seq, 0);
+
+ // Copy the local alignment data into the alignment object.
+ *(alignments + alignment_idx) = generate_final_alignment_from_sequence(sequence);
+
+ alignment_idx++;
+ }
+ }
+
+ sequence->aln = NULL;
+ sequence->n_aln = 0;
+
+ bwa_free_read_seq(1,sequence);
+}
+
+Alignment BWA::generate_final_alignment_from_sequence(bwa_seq_t* sequence) {
+ // Calculate the local coordinate and local alignment.
+ bwa_cal_pac_pos_core(bwts[0],bwts[1],sequence,options.max_diff,options.fnr);
+ bwa_refine_gapped(bns, 1, sequence, reference, NULL);
+
+ // Copy the local alignment data into the alignment object.
+ Alignment alignment;
+
+ // Populate basic path info
+ alignment.edit_distance = sequence->nm;
+ alignment.num_mismatches = sequence->n_mm;
+ alignment.num_gap_opens = sequence->n_gapo;
+ alignment.num_gap_extensions = sequence->n_gape;
+ alignment.num_best = sequence->c1;
+ alignment.num_second_best = sequence->c2;
+
+ // Final alignment position.
+ alignment.type = sequence->type;
+ bns_coor_pac2real(bns, sequence->pos, pos_end(sequence) - sequence->pos, &alignment.contig);
+ alignment.pos = sequence->pos - bns->anns[alignment.contig].offset + 1;
+ alignment.negative_strand = sequence->strand;
+ alignment.mapping_quality = sequence->mapQ;
+
+ // Cigar step.
+ alignment.cigar = NULL;
+ if(sequence->cigar) {
+ alignment.cigar = new uint16_t[sequence->n_cigar];
+ memcpy(alignment.cigar,sequence->cigar,sequence->n_cigar*sizeof(uint16_t));
+ }
+ alignment.n_cigar = sequence->n_cigar;
+
+ // MD tag with a better breakdown of differences in the cigar
+ alignment.md = strdup(sequence->md);
+ delete[] sequence->md;
+ sequence->md = NULL;
+
+ return alignment;
+}
+
+void BWA::load_default_options()
+{
+ options.s_mm = 3;
+ options.s_gapo = 11;
+ options.s_gape = 4;
+ options.mode = 3;
+ options.indel_end_skip = 5;
+ options.max_del_occ = 10;
+ options.max_entries = 2000000;
+ options.fnr = 0.04;
+ options.max_diff = -1;
+ options.max_gapo = 1;
+ options.max_gape = 6;
+ options.max_seed_diff = 2;
+ options.seed_len = 2147483647;
+ options.n_threads = 1;
+ options.max_top2 = 30;
+ options.trim_qual = 0;
+}
+
+void BWA::initialize_random_seed()
+{
+ srand48(bns->seed);
+}
+
+void BWA::set_max_edit_distance(float edit_distance) {
+ if(edit_distance > 0 && edit_distance < 1) {
+ options.fnr = edit_distance;
+ options.max_diff = -1;
+ }
+ else {
+ options.fnr = -1.0;
+ options.max_diff = (int)edit_distance;
+ }
+}
+
+void BWA::set_max_gap_opens(int max_gap_opens) { options.max_gapo = max_gap_opens; }
+void BWA::set_max_gap_extensions(int max_gap_extensions) { options.max_gape = max_gap_extensions; }
+void BWA::set_disallow_indel_within_range(int indel_range) { options.indel_end_skip = indel_range; }
+void BWA::set_mismatch_penalty(int penalty) { options.s_mm = penalty; }
+void BWA::set_gap_open_penalty(int penalty) { options.s_gapo = penalty; }
+void BWA::set_gap_extension_penalty(int penalty) { options.s_gape = penalty; }
+
+/**
+ * Create a sequence with a set of reasonable initial defaults.
+ * Will leave seq and rseq empty.
+ */
+bwa_seq_t* BWA::create_sequence(const char* bases, const unsigned read_length)
+{
+ bwa_seq_t* sequence = new bwa_seq_t;
+
+ sequence->tid = -1;
+
+ sequence->name = 0;
+
+ copy_bases_into_sequence(sequence, bases, read_length);
+
+ sequence->qual = 0;
+ sequence->aln = 0;
+ sequence->md = 0;
+
+ sequence->cigar = NULL;
+ sequence->n_cigar = 0;
+
+ sequence->multi = NULL;
+ sequence->n_multi = 0;
+
+ return sequence;
+}
+
+void BWA::copy_bases_into_sequence(bwa_seq_t* sequence, const char* bases, const unsigned read_length)
+{
+ // seq, rseq will ultimately be freed by bwa_cal_sa_reg_gap
+ sequence->seq = new ubyte_t[read_length];
+ sequence->rseq = new ubyte_t[read_length];
+ for(unsigned i = 0; i < read_length; i++) sequence->seq[i] = nst_nt4_table[(unsigned)bases[i]];
+ memcpy(sequence->rseq,sequence->seq,read_length);
+
+ // BWA expects the read bases to arrive reversed.
+ seq_reverse(read_length,sequence->seq,0);
+ seq_reverse(read_length,sequence->rseq,1);
+
+ sequence->full_len = sequence->len = read_length;
+}
diff --git a/public/c/bwa/bwa_gateway.h b/public/c/bwa/bwa_gateway.h
new file mode 100644
index 0000000000..2d26ec6509
--- /dev/null
+++ b/public/c/bwa/bwa_gateway.h
@@ -0,0 +1,83 @@
+#ifndef BWA_GATEWAY
+#define BWA_GATEWAY
+
+#include
+
+#include "bntseq.h"
+#include "bwt.h"
+#include "bwtaln.h"
+
+class Alignment {
+ public:
+ uint32_t type;
+ int contig;
+ bwtint_t pos;
+ bool negative_strand;
+ uint32_t mapping_quality;
+
+ uint16_t *cigar;
+ int n_cigar;
+
+ uint8_t num_mismatches;
+ uint8_t num_gap_opens;
+ uint8_t num_gap_extensions;
+ uint16_t edit_distance;
+
+ uint32_t num_best;
+ uint32_t num_second_best;
+
+ char* md;
+};
+
+class BWA {
+ private:
+ bntseq_t *bns;
+ ubyte_t* reference;
+ bwt_t* bwts[2];
+ gap_opt_t options;
+
+ void load_default_options();
+ void initialize_random_seed();
+ bwa_seq_t* create_sequence(const char* bases, const unsigned read_length);
+ void copy_bases_into_sequence(bwa_seq_t* sequence, const char* bases, const unsigned read_length);
+ Alignment generate_final_alignment_from_sequence(bwa_seq_t* sequence);
+
+ public:
+ BWA(const char* ann_filename,
+ const char* amb_filename,
+ const char* pac_filename,
+ const char* forward_bwt_filename,
+ const char* forward_sa_filename,
+ const char* reverse_bwt_filename,
+ const char* reverse_sa_filename);
+ ~BWA();
+
+ // Parameterize the aligner.
+ void set_max_edit_distance(float edit_distance);
+ void set_max_gap_opens(int max_gap_opens);
+ void set_max_gap_extensions(int max_gap_extensions);
+ void set_disallow_indel_within_range(int indel_range);
+ void set_mismatch_penalty(int penalty);
+ void set_gap_open_penalty(int penalty);
+ void set_gap_extension_penalty(int penalty);
+
+ // Perform the alignment
+ Alignment* generate_single_alignment(const char* bases,
+ const unsigned read_length);
+ void find_paths(const char* bases,
+ const unsigned read_length,
+ bwt_aln1_t*& paths,
+ unsigned& num_paths,
+ unsigned& best_path_count,
+ unsigned& second_best_path_count);
+ void generate_alignments_from_paths(const char* bases,
+ const unsigned read_length,
+ bwt_aln1_t* paths,
+ const unsigned num_paths,
+ const unsigned best_count,
+ const unsigned second_best_count,
+ Alignment*& alignments,
+ unsigned& num_alignments);
+};
+
+#endif // BWA_GATEWAY
diff --git a/public/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.cpp b/public/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.cpp
new file mode 100644
index 0000000000..1ccbef0d41
--- /dev/null
+++ b/public/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.cpp
@@ -0,0 +1,437 @@
+#include
+#include
+#include
+
+#include "bntseq.h"
+#include "bwt.h"
+#include "bwtaln.h"
+#include "bwa_gateway.h"
+#include "org_broadinstitute_sting_alignment_bwa_c_BWACAligner.h"
+
+typedef void (BWA::*int_setter)(int value);
+typedef void (BWA::*float_setter)(float value);
+
+static jobject convert_to_java_alignment(JNIEnv* env, const jbyte* read_bases, const jsize read_length, const Alignment& alignment);
+static jstring get_configuration_file(JNIEnv* env, jobject configuration, const char* field_name);
+static void set_int_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, int_setter setter);
+static void set_float_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, float_setter setter);
+static void throw_config_value_exception(JNIEnv* env, const char* field_name, const char* message);
+
+JNIEXPORT jlong JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_create(JNIEnv* env, jobject instance, jobject bwtFiles, jobject configuration)
+{
+ jstring java_ann = get_configuration_file(env,bwtFiles,"annFile");
+ if(java_ann == NULL) return 0L;
+ jstring java_amb = get_configuration_file(env,bwtFiles,"ambFile");
+ if(java_amb == NULL) return 0L;
+ jstring java_pac = get_configuration_file(env,bwtFiles,"pacFile");
+ if(java_pac == NULL) return 0L;
+ jstring java_forward_bwt = get_configuration_file(env,bwtFiles,"forwardBWTFile");
+ if(java_forward_bwt == NULL) return 0L;
+ jstring java_forward_sa = get_configuration_file(env,bwtFiles,"forwardSAFile");
+ if(java_forward_sa == NULL) return 0L;
+ jstring java_reverse_bwt = get_configuration_file(env,bwtFiles,"reverseBWTFile");
+ if(java_reverse_bwt == NULL) return 0L;
+ jstring java_reverse_sa = get_configuration_file(env,bwtFiles,"reverseSAFile");
+ if(java_reverse_sa == NULL) return 0L;
+
+ const char* ann_filename = env->GetStringUTFChars(java_ann,JNI_FALSE);
+ if(env->ExceptionCheck()) return 0L;
+ const char* amb_filename = env->GetStringUTFChars(java_amb,JNI_FALSE);
+ if(env->ExceptionCheck()) return 0L;
+ const char* pac_filename = env->GetStringUTFChars(java_pac,JNI_FALSE);
+ if(env->ExceptionCheck()) return 0L;
+ const char* forward_bwt_filename = env->GetStringUTFChars(java_forward_bwt,JNI_FALSE);
+ if(env->ExceptionCheck()) return 0L;
+ const char* forward_sa_filename = env->GetStringUTFChars(java_forward_sa,JNI_FALSE);
+ if(env->ExceptionCheck()) return 0L;
+ const char* reverse_bwt_filename = env->GetStringUTFChars(java_reverse_bwt,JNI_FALSE);
+ if(env->ExceptionCheck()) return 0L;
+ const char* reverse_sa_filename = env->GetStringUTFChars(java_reverse_sa,JNI_FALSE);
+ if(env->ExceptionCheck()) return 0L;
+
+ BWA* bwa = new BWA(ann_filename,
+ amb_filename,
+ pac_filename,
+ forward_bwt_filename,
+ forward_sa_filename,
+ reverse_bwt_filename,
+ reverse_sa_filename);
+
+ Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_updateConfiguration(env,instance,(jlong)bwa,configuration);
+ if(env->ExceptionCheck()) return 0L;
+
+ env->ReleaseStringUTFChars(java_ann,ann_filename);
+ if(env->ExceptionCheck()) return 0L;
+ env->ReleaseStringUTFChars(java_amb,amb_filename);
+ if(env->ExceptionCheck()) return 0L;
+ env->ReleaseStringUTFChars(java_pac,pac_filename);
+ if(env->ExceptionCheck()) return 0L;
+ env->ReleaseStringUTFChars(java_forward_bwt,forward_bwt_filename);
+ if(env->ExceptionCheck()) return 0L;
+ env->ReleaseStringUTFChars(java_forward_sa,forward_sa_filename);
+ if(env->ExceptionCheck()) return 0L;
+ env->ReleaseStringUTFChars(java_reverse_bwt,reverse_bwt_filename);
+ if(env->ExceptionCheck()) return 0L;
+ env->ReleaseStringUTFChars(java_reverse_sa,reverse_sa_filename);
+ if(env->ExceptionCheck()) return 0L;
+
+ return (jlong)bwa;
+}
+
+JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_destroy(JNIEnv* env, jobject instance, jlong java_bwa)
+{
+ BWA* bwa = (BWA*)java_bwa;
+ delete bwa;
+}
+
+JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_updateConfiguration(JNIEnv *env, jobject instance, jlong java_bwa, jobject configuration) {
+ BWA* bwa = (BWA*)java_bwa;
+ set_float_configuration_param(env, configuration, "maximumEditDistance", bwa, &BWA::set_max_edit_distance);
+ if(env->ExceptionCheck()) return;
+ set_int_configuration_param(env, configuration, "maximumGapOpens", bwa, &BWA::set_max_gap_opens);
+ if(env->ExceptionCheck()) return;
+ set_int_configuration_param(env, configuration, "maximumGapExtensions", bwa, &BWA::set_max_gap_extensions);
+ if(env->ExceptionCheck()) return;
+ set_int_configuration_param(env, configuration, "disallowIndelWithinRange", bwa, &BWA::set_disallow_indel_within_range);
+ if(env->ExceptionCheck()) return;
+ set_int_configuration_param(env, configuration, "mismatchPenalty", bwa, &BWA::set_mismatch_penalty);
+ if(env->ExceptionCheck()) return;
+ set_int_configuration_param(env, configuration, "gapOpenPenalty", bwa, &BWA::set_gap_open_penalty);
+ if(env->ExceptionCheck()) return;
+ set_int_configuration_param(env, configuration, "gapExtensionPenalty", bwa, &BWA::set_gap_extension_penalty);
+ if(env->ExceptionCheck()) return;
+}
+
+JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getPaths(JNIEnv *env, jobject instance, jlong java_bwa, jbyteArray java_bases)
+{
+ BWA* bwa = (BWA*)java_bwa;
+
+ const jsize read_length = env->GetArrayLength(java_bases);
+ if(env->ExceptionCheck()) return NULL;
+
+ jbyte *read_bases = env->GetByteArrayElements(java_bases,JNI_FALSE);
+ if(read_bases == NULL) return NULL;
+
+ bwt_aln1_t* paths = NULL;
+ unsigned num_paths = 0;
+
+ unsigned best_path_count, second_best_path_count;
+ bwa->find_paths((const char*)read_bases,read_length,paths,num_paths,best_path_count,second_best_path_count);
+
+ jobjectArray java_paths = env->NewObjectArray(num_paths, env->FindClass("org/broadinstitute/sting/alignment/bwa/c/BWAPath"), NULL);
+ if(java_paths == NULL) return NULL;
+
+ for(unsigned path_idx = 0; path_idx < (unsigned)num_paths; path_idx++) {
+ bwt_aln1_t& path = *(paths + path_idx);
+
+ jclass java_path_class = env->FindClass("org/broadinstitute/sting/alignment/bwa/c/BWAPath");
+ if(java_path_class == NULL) return NULL;
+
+ jmethodID java_path_constructor = env->GetMethodID(java_path_class, "", "(IIIZJJIII)V");
+ if(java_path_constructor == NULL) return NULL;
+
+ // Note that k/l are being cast to long. Bad things will happen if JNI assumes that they're ints.
+ jobject java_path = env->NewObject(java_path_class,
+ java_path_constructor,
+ path.n_mm,
+ path.n_gapo,
+ path.n_gape,
+ path.a,
+ (jlong)path.k,
+ (jlong)path.l,
+ path.score,
+ best_path_count,
+ second_best_path_count);
+ if(java_path == NULL) return NULL;
+
+ env->SetObjectArrayElement(java_paths,path_idx,java_path);
+ if(env->ExceptionCheck()) return NULL;
+
+ env->DeleteLocalRef(java_path_class);
+ if(env->ExceptionCheck()) return NULL;
+ }
+
+ delete[] paths;
+
+ env->ReleaseByteArrayElements(java_bases,read_bases,JNI_FALSE);
+
+ return env->ExceptionCheck() ? NULL : java_paths;
+}
+
+JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_convertPathsToAlignments(JNIEnv *env, jobject instance, jlong java_bwa, jbyteArray java_bases, jobjectArray java_paths)
+{
+ BWA* bwa = (BWA*)java_bwa;
+
+ const jsize read_length = env->GetArrayLength(java_bases);
+ if(env->ExceptionCheck()) return NULL;
+
+ jbyte *read_bases = env->GetByteArrayElements(java_bases,JNI_FALSE);
+ if(read_bases == NULL) return NULL;
+
+ const jsize num_paths = env->GetArrayLength(java_paths);
+ bwt_aln1_t* paths = new bwt_aln1_t[num_paths];
+ unsigned best_count = 0, second_best_count = 0;
+
+ for(unsigned path_idx = 0; path_idx < (unsigned)num_paths; path_idx++) {
+ jobject java_path = env->GetObjectArrayElement(java_paths,path_idx);
+ jclass java_path_class = env->GetObjectClass(java_path);
+ if(java_path_class == NULL) return NULL;
+
+ bwt_aln1_t& path = *(paths + path_idx);
+
+ jfieldID mismatches_field = env->GetFieldID(java_path_class, "numMismatches", "I");
+ if(mismatches_field == NULL) return NULL;
+ path.n_mm = env->GetIntField(java_path,mismatches_field);
+ if(env->ExceptionCheck()) return NULL;
+
+ jfieldID gap_opens_field = env->GetFieldID(java_path_class, "numGapOpens", "I");
+ if(gap_opens_field == NULL) return NULL;
+ path.n_gapo = env->GetIntField(java_path,gap_opens_field);
+ if(env->ExceptionCheck()) return NULL;
+
+ jfieldID gap_extensions_field = env->GetFieldID(java_path_class, "numGapExtensions", "I");
+ if(gap_extensions_field == NULL) return NULL;
+ path.n_gape = env->GetIntField(java_path,gap_extensions_field);
+ if(env->ExceptionCheck()) return NULL;
+
+ jfieldID negative_strand_field = env->GetFieldID(java_path_class, "negativeStrand", "Z");
+ if(negative_strand_field == NULL) return NULL;
+ path.a = env->GetBooleanField(java_path,negative_strand_field);
+ if(env->ExceptionCheck()) return NULL;
+
+ jfieldID k_field = env->GetFieldID(java_path_class, "k", "J");
+ if(k_field == NULL) return NULL;
+ path.k = env->GetLongField(java_path,k_field);
+ if(env->ExceptionCheck()) return NULL;
+
+ jfieldID l_field = env->GetFieldID(java_path_class, "l", "J");
+ if(l_field == NULL) return NULL;
+ path.l = env->GetLongField(java_path,l_field);
+ if(env->ExceptionCheck()) return NULL;
+
+ jfieldID score_field = env->GetFieldID(java_path_class, "score", "I");
+ if(score_field == NULL) return NULL;
+ path.score = env->GetIntField(java_path,score_field);
+ if(env->ExceptionCheck()) return NULL;
+
+ jfieldID best_count_field = env->GetFieldID(java_path_class, "bestCount", "I");
+ if(best_count_field == NULL) return NULL;
+ best_count = env->GetIntField(java_path,best_count_field);
+ if(env->ExceptionCheck()) return NULL;
+
+ jfieldID second_best_count_field = env->GetFieldID(java_path_class, "secondBestCount", "I");
+ if(second_best_count_field == NULL) return NULL;
+ second_best_count = env->GetIntField(java_path,second_best_count_field);
+ if(env->ExceptionCheck()) return NULL;
+ }
+
+ Alignment* alignments = NULL;
+ unsigned num_alignments = 0;
+ bwa->generate_alignments_from_paths((const char*)read_bases,read_length,paths,num_paths,best_count,second_best_count,alignments,num_alignments);
+
+ jobjectArray java_alignments = env->NewObjectArray(num_alignments, env->FindClass("org/broadinstitute/sting/alignment/Alignment"), NULL);
+ if(java_alignments == NULL) return NULL;
+
+ for(unsigned alignment_idx = 0; alignment_idx < (unsigned)num_alignments; alignment_idx++) {
+ Alignment& alignment = *(alignments + alignment_idx);
+ jobject java_alignment = convert_to_java_alignment(env,read_bases,read_length,alignment);
+ if(java_alignment == NULL) return NULL;
+ env->SetObjectArrayElement(java_alignments,alignment_idx,java_alignment);
+ if(env->ExceptionCheck()) return NULL;
+ }
+
+ delete[] alignments;
+ delete[] paths;
+
+ env->ReleaseByteArrayElements(java_bases,read_bases,JNI_FALSE);
+
+ return env->ExceptionCheck() ? NULL : java_alignments;
+}
+
+JNIEXPORT jobject JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getBestAlignment(JNIEnv *env, jobject instance, jlong java_bwa, jbyteArray java_bases) {
+ BWA* bwa = (BWA*)java_bwa;
+
+ const jsize read_length = env->GetArrayLength(java_bases);
+ if(env->ExceptionCheck()) return NULL;
+
+ jbyte *read_bases = env->GetByteArrayElements(java_bases,JNI_FALSE);
+ if(read_bases == NULL) return NULL;
+
+ Alignment* best_alignment = bwa->generate_single_alignment((const char*)read_bases,read_length);
+ jobject java_best_alignment = (best_alignment != NULL) ? convert_to_java_alignment(env,read_bases,read_length,*best_alignment) : NULL;
+ delete best_alignment;
+
+ env->ReleaseByteArrayElements(java_bases,read_bases,JNI_FALSE);
+
+ return java_best_alignment;
+}
+
+static jobject convert_to_java_alignment(JNIEnv *env, const jbyte* read_bases, const jsize read_length, const Alignment& alignment) {
+ unsigned cigar_length;
+ if(alignment.type == BWA_TYPE_NO_MATCH) cigar_length = 0;
+ else if(!alignment.cigar) cigar_length = 1;
+ else cigar_length = alignment.n_cigar;
+
+ jcharArray java_cigar_operators = env->NewCharArray(cigar_length);
+ if(java_cigar_operators == NULL) return NULL;
+ jintArray java_cigar_lengths = env->NewIntArray(cigar_length);
+ if(java_cigar_lengths == NULL) return NULL;
+
+ if(alignment.cigar) {
+ for(unsigned cigar_idx = 0; cigar_idx < (unsigned)alignment.n_cigar; ++cigar_idx) {
+ jchar cigar_operator = "MIDS"[alignment.cigar[cigar_idx]>>14];
+ jint cigar_length = alignment.cigar[cigar_idx]&0x3fff;
+
+ env->SetCharArrayRegion(java_cigar_operators,cigar_idx,1,&cigar_operator);
+ if(env->ExceptionCheck()) return NULL;
+ env->SetIntArrayRegion(java_cigar_lengths,cigar_idx,1,&cigar_length);
+ if(env->ExceptionCheck()) return NULL;
+ }
+ }
+ else {
+ if(alignment.type != BWA_TYPE_NO_MATCH) {
+ jchar cigar_operator = 'M';
+ env->SetCharArrayRegion(java_cigar_operators,0,1,&cigar_operator);
+ if(env->ExceptionCheck()) return NULL;
+ env->SetIntArrayRegion(java_cigar_lengths,0,1,&read_length);
+ if(env->ExceptionCheck()) return NULL;
+ }
+ }
+ delete[] alignment.cigar;
+
+ jclass java_alignment_class = env->FindClass("org/broadinstitute/sting/alignment/Alignment");
+ if(java_alignment_class == NULL) return NULL;
+
+ jmethodID java_alignment_constructor = env->GetMethodID(java_alignment_class, "", "(IIZI[C[IILjava/lang/String;IIIII)V");
+ if(java_alignment_constructor == NULL) return NULL;
+
+ jstring java_md = env->NewStringUTF(alignment.md);
+ if(java_md == NULL) return NULL;
+ delete[] alignment.md;
+
+ jobject java_alignment = env->NewObject(java_alignment_class,
+ java_alignment_constructor,
+ alignment.contig,
+ alignment.pos,
+ alignment.negative_strand,
+ alignment.mapping_quality,
+ java_cigar_operators,
+ java_cigar_lengths,
+ alignment.edit_distance,
+ java_md,
+ alignment.num_mismatches,
+ alignment.num_gap_opens,
+ alignment.num_gap_extensions,
+ alignment.num_best,
+ alignment.num_second_best);
+ if(java_alignment == NULL) return NULL;
+
+ env->DeleteLocalRef(java_alignment_class);
+ if(env->ExceptionCheck()) return NULL;
+
+ return java_alignment;
+}
+
+static jstring get_configuration_file(JNIEnv* env, jobject configuration, const char* field_name) {
+ jclass configuration_class = env->GetObjectClass(configuration);
+ if(configuration_class == NULL) return NULL;
+
+ jfieldID configuration_field = env->GetFieldID(configuration_class, field_name, "Ljava/io/File;");
+ if(configuration_field == NULL) return NULL;
+
+ jobject configuration_file = (jobject)env->GetObjectField(configuration,configuration_field);
+
+ jclass file_class = env->FindClass("java/io/File");
+ if(file_class == NULL) return NULL;
+
+ jmethodID path_extractor = env->GetMethodID(file_class,"getAbsolutePath", "()Ljava/lang/String;");
+ if(path_extractor == NULL) return NULL;
+
+ jstring path = (jstring)env->CallObjectMethod(configuration_file,path_extractor);
+ if(path == NULL) return NULL;
+
+ env->DeleteLocalRef(configuration_class);
+ env->DeleteLocalRef(file_class);
+ env->DeleteLocalRef(configuration_file);
+
+ return path;
+}
+
+static void set_int_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, int_setter setter) {
+ jclass configuration_class = env->GetObjectClass(configuration);
+ if(configuration_class == NULL) return;
+
+ jfieldID configuration_field = env->GetFieldID(configuration_class, field_name, "Ljava/lang/Integer;");
+ if(configuration_field == NULL) return;
+
+ jobject boxed_value = env->GetObjectField(configuration,configuration_field);
+ if(env->ExceptionCheck()) return;
+
+ if(boxed_value != NULL) {
+ jclass int_box_class = env->FindClass("java/lang/Integer");
+ if(int_box_class == NULL) return;
+
+ jmethodID int_extractor = env->GetMethodID(int_box_class,"intValue", "()I");
+ if(int_extractor == NULL) return;
+
+ jint value = env->CallIntMethod(boxed_value,int_extractor);
+ if(env->ExceptionCheck()) return;
+
+ if(value < 0)
+ {
+ throw_config_value_exception(env,field_name,"cannot be set to a negative value");
+ return;
+ }
+
+ (bwa->*setter)(value);
+
+ env->DeleteLocalRef(int_box_class);
+ }
+
+ env->DeleteLocalRef(boxed_value);
+ env->DeleteLocalRef(configuration_class);
+}
+
+static void set_float_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, float_setter setter)
+{
+ jclass configuration_class = env->GetObjectClass(configuration);
+ if(configuration_class == NULL) return;
+
+ jfieldID configuration_field = env->GetFieldID(configuration_class, field_name, "Ljava/lang/Float;");
+ if(configuration_field == NULL) return;
+
+ jobject boxed_value = env->GetObjectField(configuration,configuration_field);
+ if(boxed_value != NULL) {
+ jclass float_box_class = env->FindClass("java/lang/Float");
+ if(float_box_class == NULL) return;
+
+ jmethodID float_extractor = env->GetMethodID(float_box_class,"floatValue", "()F");
+ if(float_extractor == NULL) return;
+
+ jfloat value = env->CallFloatMethod(boxed_value,float_extractor);
+ if(env->ExceptionCheck()) return;
+
+ if(value < 0)
+ {
+ throw_config_value_exception(env,field_name,"cannot be set to a negative value");
+ return;
+ }
+
+ (bwa->*setter)(value);
+
+ env->DeleteLocalRef(float_box_class);
+ }
+
+ env->DeleteLocalRef(boxed_value);
+ env->DeleteLocalRef(configuration_class);
+}
+
+static void throw_config_value_exception(JNIEnv* env, const char* field_name, const char* message)
+{
+ char* buffer = new char[strlen(field_name)+1+strlen(message)+1];
+ sprintf(buffer,"%s %s",field_name,message);
+ jclass sting_exception_class = env->FindClass("org/broadinstitute/sting/utils/StingException");
+ if(sting_exception_class == NULL) return;
+ env->ThrowNew(sting_exception_class, buffer);
+ delete[] buffer;
+}
diff --git a/public/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.h b/public/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.h
new file mode 100644
index 0000000000..0c44e430ac
--- /dev/null
+++ b/public/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.h
@@ -0,0 +1,61 @@
+/* DO NOT EDIT THIS FILE - it is machine generated */
+#include
+/* Header for class org_broadinstitute_sting_alignment_bwa_c_BWACAligner */
+
+#ifndef _Included_org_broadinstitute_sting_alignment_bwa_c_BWACAligner
+#define _Included_org_broadinstitute_sting_alignment_bwa_c_BWACAligner
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*
+ * Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner
+ * Method: create
+ * Signature: (Lorg/broadinstitute/sting/alignment/bwa/BWTFiles;Lorg/broadinstitute/sting/alignment/bwa/BWAConfiguration;)J
+ */
+JNIEXPORT jlong JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_create
+ (JNIEnv *, jobject, jobject, jobject);
+
+/*
+ * Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner
+ * Method: updateConfiguration
+ * Signature: (JLorg/broadinstitute/sting/alignment/bwa/BWAConfiguration;)V
+ */
+JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_updateConfiguration
+ (JNIEnv *, jobject, jlong, jobject);
+
+/*
+ * Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner
+ * Method: destroy
+ * Signature: (J)V
+ */
+JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_destroy
+ (JNIEnv *, jobject, jlong);
+
+/*
+ * Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner
+ * Method: getPaths
+ * Signature: (J[B)[Lorg/broadinstitute/sting/alignment/bwa/c/BWAPath;
+ */
+JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getPaths
+ (JNIEnv *, jobject, jlong, jbyteArray);
+
+/*
+ * Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner
+ * Method: convertPathsToAlignments
+ * Signature: (J[B[Lorg/broadinstitute/sting/alignment/bwa/c/BWAPath;)[Lorg/broadinstitute/sting/alignment/Alignment;
+ */
+JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_convertPathsToAlignments
+ (JNIEnv *, jobject, jlong, jbyteArray, jobjectArray);
+
+/*
+ * Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner
+ * Method: getBestAlignment
+ * Signature: (J[B)Lorg/broadinstitute/sting/alignment/Alignment;
+ */
+JNIEXPORT jobject JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getBestAlignment
+ (JNIEnv *, jobject, jlong, jbyteArray);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/public/c/libenvironhack/Makefile b/public/c/libenvironhack/Makefile
new file mode 100644
index 0000000000..302ff8e31e
--- /dev/null
+++ b/public/c/libenvironhack/Makefile
@@ -0,0 +1,10 @@
+CC=gcc
+CCFLAGS=-Wall -dynamiclib -arch i386 -arch x86_64
+
+libenvironhack.dylib: libenvironhack.c
+ $(CC) $(CCFLAGS) -init _init_environ $< -o $@
+
+all: libenvironhack.dylib
+
+clean:
+ rm -f libenvironhack.dylib
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/BatchMergeIntegrationTest.java b/public/c/libenvironhack/libenvironhack.c
old mode 100755
new mode 100644
similarity index 52%
rename from public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/BatchMergeIntegrationTest.java
rename to public/c/libenvironhack/libenvironhack.c
index 1d4ae0aa8b..8b2a2640e1
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/BatchMergeIntegrationTest.java
+++ b/public/c/libenvironhack/libenvironhack.c
@@ -22,25 +22,16 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-package org.broadinstitute.sting.gatk.walkers.variantutils;
+/*
+LSF 7.0.6 on the mac is missing the unsatisfied exported symbol for environ which was removed on MacOS X 10.5+.
+nm $LSF_LIBDIR/liblsf.dylib | grep environ
+See "man environ" for more info, along with http://lists.apple.com/archives/java-dev/2007/Dec/msg00096.html
+*/
-import org.broadinstitute.sting.WalkerTest;
-import org.testng.annotations.Test;
+#include
-import java.io.File;
-import java.util.Arrays;
+char **environ = (char **)0;
-public class BatchMergeIntegrationTest extends WalkerTest {
- @Test
- public void testBatchMerge1() {
- String bam = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.bam";
- String alleles = validationDataLocation + "batch.merge.alleles.vcf";
- WalkerTestSpec spec = new WalkerTestSpec(
- "-T UnifiedGenotyper -NO_HEADER -BTI alleles -stand_call_conf 0.0 -glm BOTH -G none -nsl -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -o %s -R " + b37KGReference
- + " -B:alleles,VCF " + alleles
- + " -I " + bam,
- 1,
- Arrays.asList("8bd114ceaf6a94e96a677fcc52350713"));
- executeTest("testBatchMerge UG genotype given alleles:" + new File(bam).getName() + " with " + new File(alleles).getName(), spec);
- }
+void init_environ(void) {
+ environ = (*_NSGetEnviron());
}
diff --git a/public/c/libenvironhack/libenvironhack.dylib b/public/c/libenvironhack/libenvironhack.dylib
new file mode 100755
index 0000000000..a45e038b49
Binary files /dev/null and b/public/c/libenvironhack/libenvironhack.dylib differ
diff --git a/public/java/src/org/broadinstitute/sting/alignment/Aligner.java b/public/java/src/org/broadinstitute/sting/alignment/Aligner.java
new file mode 100644
index 0000000000..4bf05cb75e
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/alignment/Aligner.java
@@ -0,0 +1,49 @@
+package org.broadinstitute.sting.alignment;
+
+import net.sf.samtools.SAMFileHeader;
+import net.sf.samtools.SAMRecord;
+
+/**
+ * Create perfect alignments from the read to the genome represented by the given BWT / suffix array.
+ *
+ * @author mhanna
+ * @version 0.1
+ */
+public interface Aligner {
+ /**
+ * Close this instance of the BWA pointer and delete its resources.
+ */
+ public void close();
+
+ /**
+ * Allow the aligner to choose one alignment randomly from the pile of best alignments.
+ * @param bases Bases to align.
+ * @return An align
+ */
+ public Alignment getBestAlignment(final byte[] bases);
+
+ /**
+ * Align the read to the reference.
+ * @param read Read to align.
+ * @param header Optional header to drop in place.
+ * @return A list of the alignments.
+ */
+ public SAMRecord align(final SAMRecord read, final SAMFileHeader header);
+
+ /**
+ * Get a iterator of alignments, batched by mapping quality.
+ * @param bases List of bases.
+ * @return Iterator to alignments.
+ */
+ public Iterable getAllAlignments(final byte[] bases);
+
+ /**
+ * Get a iterator of aligned reads, batched by mapping quality.
+ * @param read Read to align.
+ * @param newHeader Optional new header to use when aligning the read. If present, it must be null.
+ * @return Iterator to alignments.
+ */
+ public Iterable alignAll(final SAMRecord read, final SAMFileHeader newHeader);
+}
+
+
diff --git a/public/java/src/org/broadinstitute/sting/alignment/Alignment.java b/public/java/src/org/broadinstitute/sting/alignment/Alignment.java
new file mode 100644
index 0000000000..c63f5615fe
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/alignment/Alignment.java
@@ -0,0 +1,221 @@
+package org.broadinstitute.sting.alignment;
+
+import net.sf.samtools.*;
+import org.broadinstitute.sting.utils.BaseUtils;
+import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+
+/**
+ * Represents an alignment of a read to a site in the reference genome.
+ *
+ * @author mhanna
+ * @version 0.1
+ */
+public class Alignment {
+ protected int contigIndex;
+ protected long alignmentStart;
+ protected boolean negativeStrand;
+ protected int mappingQuality;
+
+ protected char[] cigarOperators;
+ protected int[] cigarLengths;
+
+ protected int editDistance;
+ protected String mismatchingPositions;
+
+ protected int numMismatches;
+ protected int numGapOpens;
+ protected int numGapExtensions;
+ protected int bestCount;
+ protected int secondBestCount;
+
+ /**
+ * Gets the index of the given contig.
+ * @return the inde
+ */
+ public int getContigIndex() { return contigIndex; }
+
+ /**
+ * Gets the starting position for the given alignment.
+ * @return Starting position.
+ */
+ public long getAlignmentStart() { return alignmentStart; }
+
+ /**
+ * Is the given alignment on the reverse strand?
+ * @return True if the alignment is on the reverse strand.
+ */
+ public boolean isNegativeStrand() { return negativeStrand; }
+
+ /**
+ * Gets the score of this alignment.
+ * @return The score.
+ */
+ public int getMappingQuality() { return mappingQuality; }
+
+ /**
+ * Gets the edit distance; will eventually end up in the NM SAM tag
+ * if this alignment makes it that far.
+ * @return The edit distance.
+ */
+ public int getEditDistance() { return editDistance; }
+
+ /**
+ * A string representation of which positions mismatch; contents of MD tag.
+ * @return String representation of mismatching positions.
+ */
+ public String getMismatchingPositions() { return mismatchingPositions; }
+
+ /**
+ * Gets the number of mismatches in the read.
+ * @return Number of mismatches.
+ */
+ public int getNumMismatches() { return numMismatches; }
+
+ /**
+ * Get the number of gap opens.
+ * @return Number of gap opens.
+ */
+ public int getNumGapOpens() { return numGapOpens; }
+
+ /**
+ * Get the number of gap extensions.
+ * @return Number of gap extensions.
+ */
+ public int getNumGapExtensions() { return numGapExtensions; }
+
+ /**
+ * Get the number of best alignments.
+ * @return Number of top scoring alignments.
+ */
+ public int getBestCount() { return bestCount; }
+
+ /**
+ * Get the number of second best alignments.
+ * @return Number of second best scoring alignments.
+ */
+ public int getSecondBestCount() { return secondBestCount; }
+
+ /**
+ * Gets the cigar for this alignment.
+ * @return sam-jdk formatted alignment.
+ */
+ public Cigar getCigar() {
+ Cigar cigar = new Cigar();
+ for(int i = 0; i < cigarOperators.length; i++) {
+ CigarOperator operator = CigarOperator.characterToEnum(cigarOperators[i]);
+ cigar.add(new CigarElement(cigarLengths[i],operator));
+ }
+ return cigar;
+ }
+
+ /**
+ * Temporarily implement getCigarString() for debugging; the TextCigarCodec is unfortunately
+ * package-protected.
+ * @return
+ */
+ public String getCigarString() {
+ Cigar cigar = getCigar();
+ if(cigar.isEmpty()) return "*";
+
+ StringBuilder cigarString = new StringBuilder();
+ for(CigarElement element: cigar.getCigarElements()) {
+ cigarString.append(element.getLength());
+ cigarString.append(element.getOperator());
+ }
+ return cigarString.toString();
+ }
+
+ /**
+ * Stub for inheritance.
+ */
+ public Alignment() {}
+
+ /**
+ * Create a new alignment object.
+ * @param contigIndex The contig to which this read aligned.
+ * @param alignmentStart The point within the contig to which this read aligned.
+ * @param negativeStrand Forward or reverse alignment of the given read.
+ * @param mappingQuality How good does BWA think this mapping is?
+ * @param cigarOperators The ordered operators in the cigar string.
+ * @param cigarLengths The lengths to which each operator applies.
+ * @param editDistance The edit distance (cumulative) of the read.
+ * @param mismatchingPositions String representation of which bases in the read mismatch.
+ * @param numMismatches Number of total mismatches in the read.
+ * @param numGapOpens Number of gap opens in the read.
+ * @param numGapExtensions Number of gap extensions in the read.
+ * @param bestCount Number of best alignments in the read.
+ * @param secondBestCount Number of second best alignments in the read.
+ */
+ public Alignment(int contigIndex,
+ int alignmentStart,
+ boolean negativeStrand,
+ int mappingQuality,
+ char[] cigarOperators,
+ int[] cigarLengths,
+ int editDistance,
+ String mismatchingPositions,
+ int numMismatches,
+ int numGapOpens,
+ int numGapExtensions,
+ int bestCount,
+ int secondBestCount) {
+ this.contigIndex = contigIndex;
+ this.alignmentStart = alignmentStart;
+ this.negativeStrand = negativeStrand;
+ this.mappingQuality = mappingQuality;
+ this.cigarOperators = cigarOperators;
+ this.cigarLengths = cigarLengths;
+ this.editDistance = editDistance;
+ this.mismatchingPositions = mismatchingPositions;
+ this.numMismatches = numMismatches;
+ this.numGapOpens = numGapOpens;
+ this.numGapExtensions = numGapExtensions;
+ this.bestCount = bestCount;
+ this.secondBestCount = secondBestCount;
+ }
+
+ /**
+ * Creates a read directly from an alignment.
+ * @param alignment The alignment to convert to a read.
+ * @param unmappedRead Source of the unmapped read. Should have bases, quality scores, and flags.
+ * @param newSAMHeader The new SAM header to use in creating this read. Can be null, but if so, the sequence
+ * dictionary in the
+ * @return A mapped alignment.
+ */
+ public static SAMRecord convertToRead(Alignment alignment, SAMRecord unmappedRead, SAMFileHeader newSAMHeader) {
+ SAMRecord read;
+ try {
+ read = (SAMRecord)unmappedRead.clone();
+ }
+ catch(CloneNotSupportedException ex) {
+ throw new ReviewedStingException("Unable to create aligned read from template.");
+ }
+
+ if(newSAMHeader != null)
+ read.setHeader(newSAMHeader);
+
+ // If we're realigning a previously aligned record, strip out the placement of the alignment.
+ read.setReferenceName(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME);
+ read.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START);
+ read.setMateReferenceName(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME);
+ read.setMateAlignmentStart(SAMRecord.NO_ALIGNMENT_START);
+
+ if(alignment != null) {
+ read.setReadUnmappedFlag(false);
+ read.setReferenceIndex(alignment.getContigIndex());
+ read.setAlignmentStart((int)alignment.getAlignmentStart());
+ read.setReadNegativeStrandFlag(alignment.isNegativeStrand());
+ read.setMappingQuality(alignment.getMappingQuality());
+ read.setCigar(alignment.getCigar());
+ if(alignment.isNegativeStrand()) {
+ read.setReadBases(BaseUtils.simpleReverseComplement(read.getReadBases()));
+ read.setBaseQualities(Utils.reverse(read.getBaseQualities()));
+ }
+ read.setAttribute("NM",alignment.getEditDistance());
+ read.setAttribute("MD",alignment.getMismatchingPositions());
+ }
+
+ return read;
+ }
+}
diff --git a/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java b/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java
new file mode 100644
index 0000000000..c6755e8789
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2010 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.alignment;
+
+import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
+import org.broadinstitute.sting.alignment.bwa.BWTFiles;
+import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
+import org.broadinstitute.sting.commandline.Argument;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
+import org.broadinstitute.sting.gatk.walkers.ReadWalker;
+import org.broadinstitute.sting.utils.BaseUtils;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+
+import java.util.Iterator;
+
+/**
+ * Validates consistency of the aligner interface by taking reads already aligned by BWA in a BAM file, stripping them
+ * of their alignment data, realigning them, and making sure one of the best resulting realignments matches the original
+ * alignment from the input file.
+ *
+ * @author mhanna
+ * @version 0.1
+ */
+public class AlignmentValidationWalker extends ReadWalker {
+ /**
+ * The supporting BWT index generated using BWT.
+ */
+ @Argument(fullName="BWTPrefix",shortName="BWT",doc="Index files generated by bwa index -d bwtsw",required=false)
+ private String prefix = null;
+
+ /**
+ * The instance used to generate alignments.
+ */
+ private BWACAligner aligner = null;
+
+ /**
+ * Create an aligner object. The aligner object will load and hold the BWT until close() is called.
+ */
+ @Override
+ public void initialize() {
+ if(prefix == null)
+ prefix = getToolkit().getArguments().referenceFile.getAbsolutePath();
+ BWTFiles bwtFiles = new BWTFiles(prefix);
+ BWAConfiguration configuration = new BWAConfiguration();
+ aligner = new BWACAligner(bwtFiles,configuration);
+ }
+
+ /**
+ * Aligns a read to the given reference.
+ * @param ref Reference over the read. Read will most likely be unmapped, so ref will be null.
+ * @param read Read to align.
+ * @return Number of reads aligned by this map (aka 1).
+ */
+ @Override
+ public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
+ //logger.info(String.format("examining read %s", read.getReadName()));
+
+ byte[] bases = read.getReadBases();
+ if(read.getReadNegativeStrandFlag()) bases = BaseUtils.simpleReverseComplement(bases);
+
+ boolean matches = true;
+ Iterable alignments = aligner.getAllAlignments(bases);
+ Iterator alignmentIterator = alignments.iterator();
+
+ if(!alignmentIterator.hasNext()) {
+ matches = read.getReadUnmappedFlag();
+ }
+ else {
+ Alignment[] alignmentsOfBestQuality = alignmentIterator.next();
+ for(Alignment alignment: alignmentsOfBestQuality) {
+ matches = (alignment.getContigIndex() == read.getReferenceIndex());
+ matches &= (alignment.getAlignmentStart() == read.getAlignmentStart());
+ matches &= (alignment.isNegativeStrand() == read.getReadNegativeStrandFlag());
+ matches &= (alignment.getCigar().equals(read.getCigar()));
+ matches &= (alignment.getMappingQuality() == read.getMappingQuality());
+ if(matches) break;
+ }
+ }
+
+ if(!matches) {
+ logger.error("Found mismatch!");
+ logger.error(String.format("Read %s:",read.getReadName()));
+ logger.error(String.format(" Contig index: %d",read.getReferenceIndex()));
+ logger.error(String.format(" Alignment start: %d", read.getAlignmentStart()));
+ logger.error(String.format(" Negative strand: %b", read.getReadNegativeStrandFlag()));
+ logger.error(String.format(" Cigar: %s%n", read.getCigarString()));
+ logger.error(String.format(" Mapping quality: %s%n", read.getMappingQuality()));
+ for(Alignment[] alignmentsByScore: alignments) {
+ for(int i = 0; i < alignmentsByScore.length; i++) {
+ logger.error(String.format("Alignment %d:",i));
+ logger.error(String.format(" Contig index: %d",alignmentsByScore[i].getContigIndex()));
+ logger.error(String.format(" Alignment start: %d", alignmentsByScore[i].getAlignmentStart()));
+ logger.error(String.format(" Negative strand: %b", alignmentsByScore[i].isNegativeStrand()));
+ logger.error(String.format(" Cigar: %s", alignmentsByScore[i].getCigarString()));
+ logger.error(String.format(" Mapping quality: %s%n", alignmentsByScore[i].getMappingQuality()));
+ }
+ }
+ throw new ReviewedStingException(String.format("Read %s mismatches!", read.getReadName()));
+ }
+
+ return 1;
+ }
+
+ /**
+ * Initial value for reduce. In this case, validated reads will be counted.
+ * @return 0, indicating no reads yet validated.
+ */
+ @Override
+ public Integer reduceInit() { return 0; }
+
+ /**
+ * Calculates the number of reads processed.
+ * @param value Number of reads processed by this map.
+ * @param sum Number of reads processed before this map.
+ * @return Number of reads processed up to and including this map.
+ */
+ @Override
+ public Integer reduce(Integer value, Integer sum) {
+ return value + sum;
+ }
+
+ /**
+ * Cleanup.
+ * @param result Number of reads processed.
+ */
+ @Override
+ public void onTraversalDone(Integer result) {
+ aligner.close();
+ super.onTraversalDone(result);
+ }
+
+}
diff --git a/public/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java b/public/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java
new file mode 100644
index 0000000000..7064e637fe
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2010 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.alignment;
+
+import net.sf.picard.reference.ReferenceSequenceFileFactory;
+import net.sf.samtools.SAMFileHeader;
+import net.sf.samtools.SAMRecord;
+import net.sf.samtools.SAMSequenceDictionary;
+import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
+import org.broadinstitute.sting.alignment.bwa.BWTFiles;
+import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
+import org.broadinstitute.sting.commandline.Argument;
+import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
+import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
+import org.broadinstitute.sting.gatk.walkers.ReadWalker;
+import org.broadinstitute.sting.gatk.walkers.WalkerName;
+
+import java.io.File;
+
+/**
+ * Aligns reads to a given reference using Heng Li's BWA aligner, presenting the resulting alignments in SAM or BAM format.
+ * Mimics the steps 'bwa aln' followed by 'bwa samse' using the BWA/C implementation.
+ *
+ * @author mhanna
+ * @version 0.1
+ */
+@WalkerName("Align")
+public class AlignmentWalker extends ReadWalker {
+ @Argument(fullName="target_reference",shortName="target_ref",doc="The reference to which reads in the source file should be aligned. Alongside this reference should sit index files " +
+ "generated by bwa index -d bwtsw. If unspecified, will default " +
+ "to the reference specified via the -R argument.",required=false)
+ private File targetReferenceFile = null;
+
+ @Output
+ private StingSAMFileWriter out = null;
+
+ /**
+ * The actual aligner.
+ */
+ private BWACAligner aligner = null;
+
+ /**
+ * New header to use, if desired.
+ */
+ private SAMFileHeader header;
+
+ /**
+ * Create an aligner object. The aligner object will load and hold the BWT until close() is called.
+ */
+ @Override
+ public void initialize() {
+ if(targetReferenceFile == null)
+ targetReferenceFile = getToolkit().getArguments().referenceFile;
+ BWTFiles bwtFiles = new BWTFiles(targetReferenceFile.getAbsolutePath());
+ BWAConfiguration configuration = new BWAConfiguration();
+ aligner = new BWACAligner(bwtFiles,configuration);
+
+ // Take the header of the SAM file, tweak it by adding in the reference dictionary and specifying that the target file is unsorted.
+ header = getToolkit().getSAMFileHeader().clone();
+ SAMSequenceDictionary referenceDictionary =
+ ReferenceSequenceFileFactory.getReferenceSequenceFile(targetReferenceFile).getSequenceDictionary();
+ header.setSequenceDictionary(referenceDictionary);
+ header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
+
+ out.writeHeader(header);
+ }
+
+ /**
+ * Aligns a read to the given reference.
+ * @param ref Reference over the read. Read will most likely be unmapped, so ref will be null.
+ * @param read Read to align.
+ * @return Number of alignments found for this read.
+ */
+ @Override
+ public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
+ SAMRecord alignedRead = aligner.align(read,header);
+ out.addAlignment(alignedRead);
+ return 1;
+ }
+
+ /**
+ * Initial value for reduce. In this case, alignments will be counted.
+ * @return 0, indicating no alignments yet found.
+ */
+ @Override
+ public Integer reduceInit() { return 0; }
+
+ /**
+ * Calculates the number of alignments found.
+ * @param value Number of alignments found by this map.
+ * @param sum Number of alignments found before this map.
+ * @return Number of alignments found up to and including this map.
+ */
+ @Override
+ public Integer reduce(Integer value, Integer sum) {
+ return value + sum;
+ }
+
+ /**
+ * Cleanup.
+ * @param result Number of reads processed.
+ */
+ @Override
+ public void onTraversalDone(Integer result) {
+ aligner.close();
+ super.onTraversalDone(result);
+ }
+
+}
diff --git a/public/java/src/org/broadinstitute/sting/alignment/CountBestAlignmentsWalker.java b/public/java/src/org/broadinstitute/sting/alignment/CountBestAlignmentsWalker.java
new file mode 100644
index 0000000000..57d92319f5
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/alignment/CountBestAlignmentsWalker.java
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2010 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.alignment;
+
+import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
+import org.broadinstitute.sting.alignment.bwa.BWTFiles;
+import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
+import org.broadinstitute.sting.commandline.Argument;
+import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
+import org.broadinstitute.sting.gatk.walkers.ReadWalker;
+
+import java.io.PrintStream;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+/**
+ * Counts the number of best alignments as presented by BWA and outputs a histogram of number of placements vs. the
+ * frequency of that number of placements.
+ *
+ * @author mhanna
+ * @version 0.1
+ */
+public class CountBestAlignmentsWalker extends ReadWalker {
+ /**
+ * The supporting BWT index generated using BWT.
+ */
+ @Argument(fullName="BWTPrefix",shortName="BWT",doc="Index files generated by bwa index -d bwtsw",required=false)
+ private String prefix = null;
+
+ @Output
+ private PrintStream out = null;
+
+ /**
+ * The actual aligner.
+ */
+ private Aligner aligner = null;
+
+ private SortedMap alignmentFrequencies = new TreeMap();
+
+ /**
+ * Create an aligner object. The aligner object will load and hold the BWT until close() is called.
+ */
+ @Override
+ public void initialize() {
+ if(prefix == null)
+ prefix = getToolkit().getArguments().referenceFile.getAbsolutePath();
+ BWTFiles bwtFiles = new BWTFiles(prefix);
+ BWAConfiguration configuration = new BWAConfiguration();
+ aligner = new BWACAligner(bwtFiles,configuration);
+ }
+
+ /**
+ * Aligns a read to the given reference.
+ * @param ref Reference over the read. Read will most likely be unmapped, so ref will be null.
+ * @param read Read to align.
+ * @return Number of alignments found for this read.
+ */
+ @Override
+ public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
+ Iterator alignmentIterator = aligner.getAllAlignments(read.getReadBases()).iterator();
+ if(alignmentIterator.hasNext()) {
+ int numAlignments = alignmentIterator.next().length;
+ if(alignmentFrequencies.containsKey(numAlignments))
+ alignmentFrequencies.put(numAlignments,alignmentFrequencies.get(numAlignments)+1);
+ else
+ alignmentFrequencies.put(numAlignments,1);
+ }
+ return 1;
+ }
+
+ /**
+ * Initial value for reduce. In this case, validated reads will be counted.
+ * @return 0, indicating no reads yet validated.
+ */
+ @Override
+ public Integer reduceInit() { return 0; }
+
+ /**
+ * Calculates the number of reads processed.
+ * @param value Number of reads processed by this map.
+ * @param sum Number of reads processed before this map.
+ * @return Number of reads processed up to and including this map.
+ */
+ @Override
+ public Integer reduce(Integer value, Integer sum) {
+ return value + sum;
+ }
+
+ /**
+ * Cleanup.
+ * @param result Number of reads processed.
+ */
+ @Override
+ public void onTraversalDone(Integer result) {
+ aligner.close();
+ for(Map.Entry alignmentFrequency: alignmentFrequencies.entrySet())
+ out.printf("%d\t%d%n", alignmentFrequency.getKey(), alignmentFrequency.getValue());
+ super.onTraversalDone(result);
+ }
+}
diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java
new file mode 100644
index 0000000000..ddbf784f57
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java
@@ -0,0 +1,38 @@
+package org.broadinstitute.sting.alignment.bwa;
+
+import org.broadinstitute.sting.alignment.Aligner;
+
+/**
+ * Align reads using BWA.
+ *
+ * @author mhanna
+ * @version 0.1
+ */
+public abstract class BWAAligner implements Aligner {
+ /**
+ * The supporting files used by BWA.
+ */
+ protected BWTFiles bwtFiles;
+
+ /**
+ * The current configuration for the BWA aligner.
+ */
+ protected BWAConfiguration configuration;
+
+ /**
+ * Create a new BWAAligner. Purpose of this call is to ensure that all BWA constructors accept the correct
+ * parameters.
+ * @param bwtFiles The many files representing BWTs persisted to disk.
+ * @param configuration Configuration parameters for the alignment.
+ */
+ public BWAAligner(BWTFiles bwtFiles, BWAConfiguration configuration) {
+ this.bwtFiles = bwtFiles;
+ this.configuration = configuration;
+ }
+
+ /**
+ * Update the configuration passed to the BWA aligner.
+ * @param configuration New configuration to set.
+ */
+ public abstract void updateConfiguration(BWAConfiguration configuration);
+}
diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/BWAConfiguration.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/BWAConfiguration.java
new file mode 100644
index 0000000000..73441cb6a4
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/BWAConfiguration.java
@@ -0,0 +1,44 @@
+package org.broadinstitute.sting.alignment.bwa;
+
+/**
+ * Configuration for the BWA/C aligner.
+ *
+ * @author mhanna
+ * @version 0.1
+ */
+public class BWAConfiguration {
+ /**
+ * The maximum edit distance used by BWA.
+ */
+ public Float maximumEditDistance = null;
+
+ /**
+ * How many gap opens are acceptable within this alignment?
+ */
+ public Integer maximumGapOpens = null;
+
+ /**
+ * How many gap extensions are acceptable within this alignment?
+ */
+ public Integer maximumGapExtensions = null;
+
+ /**
+ * Do we disallow indels within a certain range from the start / end?
+ */
+ public Integer disallowIndelWithinRange = null;
+
+ /**
+ * What is the scoring penalty for a mismatch?
+ */
+ public Integer mismatchPenalty = null;
+
+ /**
+ * What is the scoring penalty for a gap open?
+ */
+ public Integer gapOpenPenalty = null;
+
+ /**
+ * What is the scoring penalty for a gap extension?
+ */
+ public Integer gapExtensionPenalty = null;
+}
diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/BWTFiles.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/BWTFiles.java
new file mode 100644
index 0000000000..a0589ac842
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/BWTFiles.java
@@ -0,0 +1,234 @@
+package org.broadinstitute.sting.alignment.bwa;
+
+import net.sf.samtools.SAMSequenceDictionary;
+import net.sf.samtools.SAMSequenceRecord;
+import net.sf.samtools.util.StringUtil;
+import org.broadinstitute.sting.alignment.reference.bwt.*;
+import org.broadinstitute.sting.alignment.reference.packing.PackUtils;
+import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+
+import java.io.File;
+import java.io.IOException;
+
+/**
+ * Support files for BWT.
+ *
+ * @author mhanna
+ * @version 0.1
+ */
+public class BWTFiles {
+ /**
+ * ANN (?) file name.
+ */
+ public final File annFile;
+
+ /**
+ * AMB (?) file name.
+ */
+ public final File ambFile;
+
+ /**
+ * Packed reference sequence file.
+ */
+ public final File pacFile;
+
+ /**
+ * Reverse of packed reference sequence file.
+ */
+ public final File rpacFile;
+
+ /**
+ * Forward BWT file.
+ */
+ public final File forwardBWTFile;
+
+ /**
+ * Forward suffix array file.
+ */
+ public final File forwardSAFile;
+
+ /**
+ * Reverse BWT file.
+ */
+ public final File reverseBWTFile;
+
+ /**
+ * Reverse suffix array file.
+ */
+ public final File reverseSAFile;
+
+ /**
+ * Where these files autogenerated on the fly?
+ */
+ public final boolean autogenerated;
+
+ /**
+ * Create a new BWA configuration file using the given prefix.
+ * @param prefix Prefix to use when creating the configuration. Must not be null.
+ */
+ public BWTFiles(String prefix) {
+ if(prefix == null)
+ throw new ReviewedStingException("Prefix must not be null.");
+ annFile = new File(prefix + ".ann");
+ ambFile = new File(prefix + ".amb");
+ pacFile = new File(prefix + ".pac");
+ rpacFile = new File(prefix + ".rpac");
+ forwardBWTFile = new File(prefix + ".bwt");
+ forwardSAFile = new File(prefix + ".sa");
+ reverseBWTFile = new File(prefix + ".rbwt");
+ reverseSAFile = new File(prefix + ".rsa");
+ autogenerated = false;
+ }
+
+ /**
+ * Hand-create a new BWTFiles object, specifying a unique file object for each type.
+ * @param annFile ANN (alternate dictionary) file.
+ * @param ambFile AMB (holes) files.
+ * @param pacFile Packed representation of the forward reference sequence.
+ * @param forwardBWTFile BWT representation of the forward reference sequence.
+ * @param forwardSAFile SA representation of the forward reference sequence.
+ * @param rpacFile Packed representation of the reversed reference sequence.
+ * @param reverseBWTFile BWT representation of the reversed reference sequence.
+ * @param reverseSAFile SA representation of the reversed reference sequence.
+ */
+ private BWTFiles(File annFile,
+ File ambFile,
+ File pacFile,
+ File forwardBWTFile,
+ File forwardSAFile,
+ File rpacFile,
+ File reverseBWTFile,
+ File reverseSAFile) {
+ this.annFile = annFile;
+ this.ambFile = ambFile;
+ this.pacFile = pacFile;
+ this.forwardBWTFile = forwardBWTFile;
+ this.forwardSAFile = forwardSAFile;
+ this.rpacFile = rpacFile;
+ this.reverseBWTFile = reverseBWTFile;
+ this.reverseSAFile = reverseSAFile;
+ autogenerated = true;
+ }
+
+ /**
+ * Close out this files object, in the process deleting any temporary filse
+ * that were created.
+ */
+ public void close() {
+ if(autogenerated) {
+ boolean success = true;
+ success = annFile.delete();
+ success &= ambFile.delete();
+ success &= pacFile.delete();
+ success &= forwardBWTFile.delete();
+ success &= forwardSAFile.delete();
+ success &= rpacFile.delete();
+ success &= reverseBWTFile.delete();
+ success &= reverseSAFile.delete();
+
+ if(!success)
+ throw new ReviewedStingException("Unable to clean up autogenerated representation");
+ }
+ }
+
+ /**
+ * Create a new set of BWT files from the given reference sequence.
+ * @param referenceSequence Sequence from which to build metadata.
+ * @return A new object representing encoded representations of each sequence.
+ */
+ public static BWTFiles createFromReferenceSequence(byte[] referenceSequence) {
+ byte[] normalizedReferenceSequence = new byte[referenceSequence.length];
+ System.arraycopy(referenceSequence,0,normalizedReferenceSequence,0,referenceSequence.length);
+ normalizeReferenceSequence(normalizedReferenceSequence);
+
+ File annFile,ambFile,pacFile,bwtFile,saFile,rpacFile,rbwtFile,rsaFile;
+ try {
+ // Write the ann and amb for this reference sequence.
+ annFile = File.createTempFile("bwt",".ann");
+ ambFile = File.createTempFile("bwt",".amb");
+
+ SAMSequenceDictionary dictionary = new SAMSequenceDictionary();
+ dictionary.addSequence(new SAMSequenceRecord("autogenerated",normalizedReferenceSequence.length));
+
+ ANNWriter annWriter = new ANNWriter(annFile);
+ annWriter.write(dictionary);
+ annWriter.close();
+
+ AMBWriter ambWriter = new AMBWriter(ambFile);
+ ambWriter.writeEmpty(dictionary);
+ ambWriter.close();
+
+ // Write the encoded files for the forward version of this reference sequence.
+ pacFile = File.createTempFile("bwt",".pac");
+ bwtFile = File.createTempFile("bwt",".bwt");
+ saFile = File.createTempFile("bwt",".sa");
+
+ writeEncodedReferenceSequence(normalizedReferenceSequence,pacFile,bwtFile,saFile);
+
+ // Write the encoded files for the reverse version of this reference sequence.
+ byte[] reverseReferenceSequence = Utils.reverse(normalizedReferenceSequence);
+
+ rpacFile = File.createTempFile("bwt",".rpac");
+ rbwtFile = File.createTempFile("bwt",".rbwt");
+ rsaFile = File.createTempFile("bwt",".rsa");
+
+ writeEncodedReferenceSequence(reverseReferenceSequence,rpacFile,rbwtFile,rsaFile);
+ }
+ catch(IOException ex) {
+ throw new ReviewedStingException("Unable to write autogenerated reference sequence to temporary files");
+ }
+
+ // Make sure that, at the very least, all temporary files are deleted on exit.
+ annFile.deleteOnExit();
+ ambFile.deleteOnExit();
+ pacFile.deleteOnExit();
+ bwtFile.deleteOnExit();
+ saFile.deleteOnExit();
+ rpacFile.deleteOnExit();
+ rbwtFile.deleteOnExit();
+ rsaFile.deleteOnExit();
+
+ return new BWTFiles(annFile,ambFile,pacFile,bwtFile,saFile,rpacFile,rbwtFile,rsaFile);
+ }
+
+ /**
+ * Write the encoded form of the reference sequence. In the case of BWA, the encoded reference
+ * sequence is the reference itself in PAC format, the BWT, and the suffix array.
+ * @param referenceSequence The reference sequence to encode.
+ * @param pacFile Target for the PAC-encoded reference.
+ * @param bwtFile Target for the BWT representation of the reference.
+ * @param suffixArrayFile Target for the suffix array encoding of the reference.
+ * @throws java.io.IOException In case of issues writing to the file.
+ */
+ private static void writeEncodedReferenceSequence(byte[] referenceSequence,
+ File pacFile,
+ File bwtFile,
+ File suffixArrayFile) throws IOException {
+ PackUtils.writeReferenceSequence(pacFile,referenceSequence);
+
+ BWT bwt = BWT.createFromReferenceSequence(referenceSequence);
+ BWTWriter bwtWriter = new BWTWriter(bwtFile);
+ bwtWriter.write(bwt);
+ bwtWriter.close();
+
+ SuffixArray suffixArray = SuffixArray.createFromReferenceSequence(referenceSequence);
+ SuffixArrayWriter suffixArrayWriter = new SuffixArrayWriter(suffixArrayFile);
+ suffixArrayWriter.write(suffixArray);
+ suffixArrayWriter.close();
+ }
+
+ /**
+ * Convert the given reference sequence into a form suitable for building into
+ * on-the-fly sequences.
+ * @param referenceSequence The reference sequence to normalize.
+ * @throws org.broadinstitute.sting.utils.exceptions.ReviewedStingException if normalized sequence cannot be generated.
+ */
+ private static void normalizeReferenceSequence(byte[] referenceSequence) {
+ StringUtil.toUpperCase(referenceSequence);
+ for(byte base: referenceSequence) {
+ if(base != 'A' && base != 'C' && base != 'G' && base != 'T')
+ throw new ReviewedStingException(String.format("Base type %c is not supported when building references on-the-fly",(char)base));
+ }
+ }
+}
diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/c/BWACAligner.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/c/BWACAligner.java
new file mode 100644
index 0000000000..165314259f
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/c/BWACAligner.java
@@ -0,0 +1,259 @@
+package org.broadinstitute.sting.alignment.bwa.c;
+
+import net.sf.samtools.SAMFileHeader;
+import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.alignment.Alignment;
+import org.broadinstitute.sting.alignment.bwa.BWAAligner;
+import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
+import org.broadinstitute.sting.alignment.bwa.BWTFiles;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+
+import java.util.Arrays;
+import java.util.Iterator;
+
+/**
+ * An aligner using the BWA/C implementation.
+ *
+ * @author mhanna
+ * @version 0.1
+ */
+public class BWACAligner extends BWAAligner {
+ static {
+ System.loadLibrary("bwa");
+ }
+
+ /**
+ * A pointer to the C++ object representing the BWA engine.
+ */
+ private long thunkPointer = 0;
+
+ public BWACAligner(BWTFiles bwtFiles, BWAConfiguration configuration) {
+ super(bwtFiles,configuration);
+ if(thunkPointer != 0)
+ throw new ReviewedStingException("BWA/C attempting to reinitialize.");
+
+ if(!bwtFiles.annFile.exists()) throw new ReviewedStingException("ANN file is missing; please rerun 'bwa aln' to regenerate it.");
+ if(!bwtFiles.ambFile.exists()) throw new ReviewedStingException("AMB file is missing; please rerun 'bwa aln' to regenerate it.");
+ if(!bwtFiles.pacFile.exists()) throw new ReviewedStingException("PAC file is missing; please rerun 'bwa aln' to regenerate it.");
+ if(!bwtFiles.forwardBWTFile.exists()) throw new ReviewedStingException("Forward BWT file is missing; please rerun 'bwa aln' to regenerate it.");
+ if(!bwtFiles.forwardSAFile.exists()) throw new ReviewedStingException("Forward SA file is missing; please rerun 'bwa aln' to regenerate it.");
+ if(!bwtFiles.reverseBWTFile.exists()) throw new ReviewedStingException("Reverse BWT file is missing; please rerun 'bwa aln' to regenerate it.");
+ if(!bwtFiles.reverseSAFile.exists()) throw new ReviewedStingException("Reverse SA file is missing; please rerun 'bwa aln' to regenerate it.");
+
+ thunkPointer = create(bwtFiles,configuration);
+ }
+
+ /**
+ * Create an aligner object using an array of bytes as a reference.
+ * @param referenceSequence Reference sequence to encode ad-hoc.
+ * @param configuration Configuration for the given aligner.
+ */
+ public BWACAligner(byte[] referenceSequence, BWAConfiguration configuration) {
+ this(BWTFiles.createFromReferenceSequence(referenceSequence),configuration);
+ // Now that the temporary files are created, the temporary files can be destroyed.
+ bwtFiles.close();
+ }
+
+ /**
+ * Update the configuration passed to the BWA aligner.
+ * @param configuration New configuration to set.
+ */
+ @Override
+ public void updateConfiguration(BWAConfiguration configuration) {
+ if(thunkPointer == 0)
+ throw new ReviewedStingException("BWA/C: attempting to update configuration of uninitialized aligner.");
+ updateConfiguration(thunkPointer,configuration);
+ }
+
+ /**
+ * Close this instance of the BWA pointer and delete its resources.
+ */
+ @Override
+ public void close() {
+ if(thunkPointer == 0)
+ throw new ReviewedStingException("BWA/C close attempted, but BWA/C is not properly initialized.");
+ destroy(thunkPointer);
+ }
+
+ /**
+ * Allow the aligner to choose one alignment randomly from the pile of best alignments.
+ * @param bases Bases to align.
+ * @return An align
+ */
+ @Override
+ public Alignment getBestAlignment(final byte[] bases) {
+ if(thunkPointer == 0)
+ throw new ReviewedStingException("BWA/C getBestAlignment attempted, but BWA/C is not properly initialized.");
+ return getBestAlignment(thunkPointer,bases);
+ }
+
+ /**
+ * Get the best aligned read, chosen randomly from the pile of best alignments.
+ * @param read Read to align.
+ * @param newHeader New header to apply to this SAM file. Can be null, but if so, read header must be valid.
+ * @return Read with injected alignment data.
+ */
+ @Override
+ public SAMRecord align(final SAMRecord read, final SAMFileHeader newHeader) {
+ if(bwtFiles.autogenerated)
+ throw new UnsupportedOperationException("Cannot create target alignment; source contig was generated ad-hoc and is not reliable");
+ return Alignment.convertToRead(getBestAlignment(read.getReadBases()),read,newHeader);
+ }
+
+ /**
+ * Get a iterator of alignments, batched by mapping quality.
+ * @param bases List of bases.
+ * @return Iterator to alignments.
+ */
+ @Override
+ public Iterable getAllAlignments(final byte[] bases) {
+ final BWAPath[] paths = getPaths(bases);
+ return new Iterable() {
+ public Iterator iterator() {
+ return new Iterator() {
+ /**
+ * The last position accessed.
+ */
+ private int position = 0;
+
+ /**
+ * Whether all alignments have been seen based on the current position.
+ * @return True if any more alignments are pending. False otherwise.
+ */
+ public boolean hasNext() { return position < paths.length; }
+
+ /**
+ * Return the next cross-section of alignments, based on mapping quality.
+ * @return Array of the next set of alignments of a given mapping quality.
+ */
+ public Alignment[] next() {
+ if(position >= paths.length)
+ throw new UnsupportedOperationException("Out of alignments to return.");
+ int score = paths[position].score;
+ int startingPosition = position;
+ while(position < paths.length && paths[position].score == score) position++;
+ return convertPathsToAlignments(bases,Arrays.copyOfRange(paths,startingPosition,position));
+ }
+
+ /**
+ * Unsupported.
+ */
+ public void remove() { throw new UnsupportedOperationException("Cannot remove from an alignment iterator"); }
+ };
+ }
+ };
+ }
+
+ /**
+ * Get a iterator of aligned reads, batched by mapping quality.
+ * @param read Read to align.
+ * @param newHeader Optional new header to use when aligning the read. If present, it must be null.
+ * @return Iterator to alignments.
+ */
+ @Override
+ public Iterable alignAll(final SAMRecord read, final SAMFileHeader newHeader) {
+ if(bwtFiles.autogenerated)
+ throw new UnsupportedOperationException("Cannot create target alignment; source contig was generated ad-hoc and is not reliable");
+ final Iterable alignments = getAllAlignments(read.getReadBases());
+ return new Iterable() {
+ public Iterator iterator() {
+ final Iterator alignmentIterator = alignments.iterator();
+ return new Iterator() {
+ /**
+ * Whether all alignments have been seen based on the current position.
+ * @return True if any more alignments are pending. False otherwise.
+ */
+ public boolean hasNext() { return alignmentIterator.hasNext(); }
+
+ /**
+ * Return the next cross-section of alignments, based on mapping quality.
+ * @return Array of the next set of alignments of a given mapping quality.
+ */
+ public SAMRecord[] next() {
+ Alignment[] alignmentsOfQuality = alignmentIterator.next();
+ SAMRecord[] reads = new SAMRecord[alignmentsOfQuality.length];
+ for(int i = 0; i < alignmentsOfQuality.length; i++) {
+ reads[i] = Alignment.convertToRead(alignmentsOfQuality[i],read,newHeader);
+ }
+ return reads;
+ }
+
+ /**
+ * Unsupported.
+ */
+ public void remove() { throw new UnsupportedOperationException("Cannot remove from an alignment iterator"); }
+ };
+ }
+ };
+ }
+
+ /**
+ * Get the paths associated with the given base string.
+ * @param bases List of bases.
+ * @return A set of paths through the BWA.
+ */
+ public BWAPath[] getPaths(byte[] bases) {
+ if(thunkPointer == 0)
+ throw new ReviewedStingException("BWA/C getPaths attempted, but BWA/C is not properly initialized.");
+ return getPaths(thunkPointer,bases);
+ }
+
+ /**
+ * Create a pointer to the BWA/C thunk.
+ * @param files BWT source files.
+ * @param configuration Configuration of the aligner.
+ * @return Pointer to the BWA/C thunk.
+ */
+ protected native long create(BWTFiles files, BWAConfiguration configuration);
+
+ /**
+ * Update the configuration passed to the BWA aligner. For internal use only.
+ * @param thunkPointer pointer to BWA object.
+ * @param configuration New configuration to set.
+ */
+ protected native void updateConfiguration(long thunkPointer, BWAConfiguration configuration);
+
+ /**
+ * Destroy the BWA/C thunk.
+ * @param thunkPointer Pointer to the allocated thunk.
+ */
+ protected native void destroy(long thunkPointer);
+
+ /**
+ * Do the extra steps involved in converting a local alignment to a global alignment.
+ * @param bases ASCII representation of byte array.
+ * @param paths Paths through the current BWT.
+ * @return A list of alignments.
+ */
+ protected Alignment[] convertPathsToAlignments(byte[] bases, BWAPath[] paths) {
+ if(thunkPointer == 0)
+ throw new ReviewedStingException("BWA/C convertPathsToAlignments attempted, but BWA/C is not properly initialized.");
+ return convertPathsToAlignments(thunkPointer,bases,paths);
+ }
+
+ /**
+ * Caller to the path generation functionality within BWA/C. Call this method's getPaths() wrapper (above) instead.
+ * @param thunkPointer pointer to the C++ object managing BWA/C.
+ * @param bases ASCII representation of byte array.
+ * @return A list of paths through the specified BWT.
+ */
+ protected native BWAPath[] getPaths(long thunkPointer, byte[] bases);
+
+ /**
+ * Do the extra steps involved in converting a local alignment to a global alignment.
+ * Call this method's convertPathsToAlignments() wrapper (above) instead.
+ * @param thunkPointer pointer to the C++ object managing BWA/C.
+ * @param bases ASCII representation of byte array.
+ * @param paths Paths through the current BWT.
+ * @return A list of alignments.
+ */
+ protected native Alignment[] convertPathsToAlignments(long thunkPointer, byte[] bases, BWAPath[] paths);
+
+ /**
+ * Gets the best alignment from BWA/C, randomly selected from all best-aligned reads.
+ * @param thunkPointer Pointer to BWA thunk.
+ * @param bases bases to align.
+ * @return The best alignment from BWA/C.
+ */
+ protected native Alignment getBestAlignment(long thunkPointer, byte[] bases);
+}
diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/c/BWAPath.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/c/BWAPath.java
new file mode 100755
index 0000000000..347d4344ff
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/c/BWAPath.java
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.alignment.bwa.c;
+
+/**
+ * Models a BWA path.
+ *
+ * @author mhanna
+ * @version 0.1
+ */
+public class BWAPath {
+ /**
+ * Number of mismatches encountered along this path.
+ */
+ public final int numMismatches;
+
+ /**
+ * Number of gap opens encountered along this path.
+ */
+ public final int numGapOpens;
+
+ /**
+ * Number of gap extensions along this path.
+ */
+ public final int numGapExtensions;
+
+ /**
+ * Whether this alignment was found on the positive or negative strand.
+ */
+ public final boolean negativeStrand;
+
+ /**
+ * Starting coordinate in the BWT.
+ */
+ public final long k;
+
+ /**
+ * Ending coordinate in the BWT.
+ */
+ public final long l;
+
+ /**
+ * The score of this path.
+ */
+ public final int score;
+
+ /**
+ * The number of best alignments seen along this path.
+ */
+ public final int bestCount;
+
+ /**
+ * The number of second best alignments seen along this path.
+ */
+ public final int secondBestCount;
+
+ /**
+ * Create a new path with the given attributes.
+ * @param numMismatches Number of mismatches along path.
+ * @param numGapOpens Number of gap opens along path.
+ * @param numGapExtensions Number of gap extensions along path.
+ * @param k Index to first coordinate within BWT.
+ * @param l Index to last coordinate within BWT.
+ * @param score Score of this alignment. Not the mapping quality.
+ */
+ public BWAPath(int numMismatches, int numGapOpens, int numGapExtensions, boolean negativeStrand, long k, long l, int score, int bestCount, int secondBestCount) {
+ this.numMismatches = numMismatches;
+ this.numGapOpens = numGapOpens;
+ this.numGapExtensions = numGapExtensions;
+ this.negativeStrand = negativeStrand;
+ this.k = k;
+ this.l = l;
+ this.score = score;
+ this.bestCount = bestCount;
+ this.secondBestCount = secondBestCount;
+ }
+
+}
diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignerTestHarness.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignerTestHarness.java
new file mode 100644
index 0000000000..2d568a96a4
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignerTestHarness.java
@@ -0,0 +1,164 @@
+package org.broadinstitute.sting.alignment.bwa.java;
+
+import net.sf.picard.reference.IndexedFastaSequenceFile;
+import net.sf.samtools.*;
+import org.broadinstitute.sting.alignment.Aligner;
+import org.broadinstitute.sting.alignment.Alignment;
+import org.broadinstitute.sting.utils.BaseUtils;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+
+/**
+ * A test harness to ensure that the perfect aligner works.
+ *
+ * @author mhanna
+ * @version 0.1
+ */
+public class AlignerTestHarness {
+ public static void main( String argv[] ) throws FileNotFoundException {
+ if( argv.length != 6 ) {
+ System.out.println("PerfectAlignerTestHarness ");
+ System.exit(1);
+ }
+
+ File referenceFile = new File(argv[0]);
+ File bwtFile = new File(argv[1]);
+ File rbwtFile = new File(argv[2]);
+ File suffixArrayFile = new File(argv[3]);
+ File reverseSuffixArrayFile = new File(argv[4]);
+ File bamFile = new File(argv[5]);
+
+ align(referenceFile,bwtFile,rbwtFile,suffixArrayFile,reverseSuffixArrayFile,bamFile);
+ }
+
+ private static void align(File referenceFile, File bwtFile, File rbwtFile, File suffixArrayFile, File reverseSuffixArrayFile, File bamFile) throws FileNotFoundException {
+ Aligner aligner = new BWAJavaAligner(bwtFile,rbwtFile,suffixArrayFile,reverseSuffixArrayFile);
+ int count = 0;
+
+ SAMFileReader reader = new SAMFileReader(bamFile);
+ reader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT);
+
+ int mismatches = 0;
+ int failures = 0;
+
+ for(SAMRecord read: reader) {
+ count++;
+ if( count > 200000 ) break;
+ //if( count < 366000 ) continue;
+ //if( count > 2 ) break;
+ //if( !read.getReadName().endsWith("SL-XBC:1:82:506:404#0") )
+ // continue;
+ //if( !read.getReadName().endsWith("SL-XBC:1:36:30:1926#0") )
+ // continue;
+ //if( !read.getReadName().endsWith("SL-XBC:1:60:1342:1340#0") )
+ // continue;
+
+ SAMRecord alignmentCleaned = null;
+ try {
+ alignmentCleaned = (SAMRecord)read.clone();
+ }
+ catch( CloneNotSupportedException ex ) {
+ throw new ReviewedStingException("SAMRecord clone not supported", ex);
+ }
+
+ if( alignmentCleaned.getReadNegativeStrandFlag() )
+ alignmentCleaned.setReadBases(BaseUtils.simpleReverseComplement(alignmentCleaned.getReadBases()));
+
+ alignmentCleaned.setReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
+ alignmentCleaned.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START);
+ alignmentCleaned.setMappingQuality(SAMRecord.NO_MAPPING_QUALITY);
+ alignmentCleaned.setCigarString(SAMRecord.NO_ALIGNMENT_CIGAR);
+
+ // Clear everything except flags pertaining to pairing and set 'unmapped' status to true.
+ alignmentCleaned.setFlags(alignmentCleaned.getFlags() & 0x00A1 | 0x000C);
+
+ Iterable alignments = aligner.getAllAlignments(alignmentCleaned.getReadBases());
+ if(!alignments.iterator().hasNext() ) {
+ //throw new StingException(String.format("Unable to align read %s to reference; count = %d",read.getReadName(),count));
+ System.out.printf("Unable to align read %s to reference; count = %d%n",read.getReadName(),count);
+ failures++;
+ }
+
+ Alignment foundAlignment = null;
+ for(Alignment[] alignmentsOfQuality: alignments) {
+ for(Alignment alignment: alignmentsOfQuality) {
+ if( read.getReadNegativeStrandFlag() != alignment.isNegativeStrand() )
+ continue;
+ if( read.getAlignmentStart() != alignment.getAlignmentStart() )
+ continue;
+
+ foundAlignment = alignment;
+ }
+ }
+
+ if( foundAlignment != null ) {
+ //System.out.printf("%s: Aligned read to reference at position %d with %d mismatches, %d gap opens, and %d gap extensions.%n", read.getReadName(), foundAlignment.getAlignmentStart(), foundAlignment.getMismatches(), foundAlignment.getGapOpens(), foundAlignment.getGapExtensions());
+ }
+ else {
+ System.out.printf("Error aligning read %s%n", read.getReadName());
+
+ mismatches++;
+
+ IndexedFastaSequenceFile reference = new IndexedFastaSequenceFile(referenceFile);
+
+ System.out.printf("read = %s, position = %d, negative strand = %b%n", formatBasesBasedOnCigar(read.getReadString(),read.getCigar(),CigarOperator.DELETION),
+ read.getAlignmentStart(),
+ read.getReadNegativeStrandFlag());
+ int numDeletions = numDeletionsInCigar(read.getCigar());
+ String expectedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),read.getAlignmentStart(),read.getAlignmentStart()+read.getReadLength()+numDeletions-1).getBases());
+ System.out.printf("expected ref = %s%n", formatBasesBasedOnCigar(expectedRef,read.getCigar(),CigarOperator.INSERTION));
+
+ for(Alignment[] alignmentsOfQuality: alignments) {
+ for(Alignment alignment: alignmentsOfQuality) {
+ System.out.println();
+
+ Cigar cigar = ((BWAAlignment)alignment).getCigar();
+
+ System.out.printf("read = %s%n", formatBasesBasedOnCigar(read.getReadString(),cigar,CigarOperator.DELETION));
+
+ int deletionCount = ((BWAAlignment)alignment).getNumberOfBasesMatchingState(AlignmentState.DELETION);
+ String alignedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),alignment.getAlignmentStart(),alignment.getAlignmentStart()+read.getReadLength()+deletionCount-1).getBases());
+ System.out.printf("actual ref = %s, position = %d, negative strand = %b%n", formatBasesBasedOnCigar(alignedRef,cigar,CigarOperator.INSERTION),
+ alignment.getAlignmentStart(),
+ alignment.isNegativeStrand());
+ }
+ }
+
+ //throw new StingException(String.format("Read %s was placed at incorrect location; count = %d%n",read.getReadName(),count));
+ }
+
+
+ if( count % 1000 == 0 )
+ System.out.printf("%d reads examined.%n",count);
+ }
+
+ System.out.printf("%d reads examined; %d mismatches; %d failures.%n",count,mismatches,failures);
+ }
+
+ private static String formatBasesBasedOnCigar( String bases, Cigar cigar, CigarOperator toBlank ) {
+ StringBuilder formatted = new StringBuilder();
+ int readIndex = 0;
+ for(CigarElement cigarElement: cigar.getCigarElements()) {
+ if(cigarElement.getOperator() == toBlank) {
+ int number = cigarElement.getLength();
+ while( number-- > 0 ) formatted.append(' ');
+ }
+ else {
+ int number = cigarElement.getLength();
+ while( number-- > 0 ) formatted.append(bases.charAt(readIndex++));
+ }
+ }
+ return formatted.toString();
+ }
+
+ private static int numDeletionsInCigar( Cigar cigar ) {
+ int numDeletions = 0;
+ for(CigarElement cigarElement: cigar.getCigarElements()) {
+ if(cigarElement.getOperator() == CigarOperator.DELETION)
+ numDeletions += cigarElement.getLength();
+ }
+ return numDeletions;
+ }
+}
diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignmentMatchSequence.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignmentMatchSequence.java
new file mode 100644
index 0000000000..f1e3c31b67
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignmentMatchSequence.java
@@ -0,0 +1,150 @@
+package org.broadinstitute.sting.alignment.bwa.java;
+
+import net.sf.samtools.Cigar;
+import net.sf.samtools.CigarElement;
+import net.sf.samtools.CigarOperator;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+
+import java.util.ArrayDeque;
+import java.util.Deque;
+import java.util.Iterator;
+
+/**
+ * Represents a sequence of matches.
+ *
+ * @author mhanna
+ * @version 0.1
+ */
+public class AlignmentMatchSequence implements Cloneable {
+ /**
+ * Stores the particular match entries in the order they occur.
+ */
+ private Deque entries = new ArrayDeque();
+
+ /**
+ * Clone the given match sequence.
+ * @return A deep copy of the current match sequence.
+ */
+ public AlignmentMatchSequence clone() {
+ AlignmentMatchSequence copy = null;
+ try {
+ copy = (AlignmentMatchSequence)super.clone();
+ }
+ catch( CloneNotSupportedException ex ) {
+ throw new ReviewedStingException("Unable to clone AlignmentMatchSequence.");
+ }
+
+ copy.entries = new ArrayDeque();
+ for( AlignmentMatchSequenceEntry entry: entries )
+ copy.entries.add(entry.clone());
+
+ return copy;
+ }
+
+ public Cigar convertToCigar(boolean negativeStrand) {
+ Cigar cigar = new Cigar();
+ Iterator iterator = negativeStrand ? entries.descendingIterator() : entries.iterator();
+ while( iterator.hasNext() ) {
+ AlignmentMatchSequenceEntry entry = iterator.next();
+ CigarOperator operator;
+ switch( entry.getAlignmentState() ) {
+ case MATCH_MISMATCH: operator = CigarOperator.MATCH_OR_MISMATCH; break;
+ case INSERTION: operator = CigarOperator.INSERTION; break;
+ case DELETION: operator = CigarOperator.DELETION; break;
+ default: throw new ReviewedStingException("convertToCigar: cannot process state: " + entry.getAlignmentState());
+ }
+ cigar.add( new CigarElement(entry.count,operator) );
+ }
+ return cigar;
+ }
+
+ /**
+ * All a new alignment of the given state.
+ * @param state State to add to the sequence.
+ */
+ public void addNext( AlignmentState state ) {
+ AlignmentMatchSequenceEntry last = entries.peekLast();
+ // If the last entry is the same as this one, increment it. Otherwise, add a new entry.
+ if( last != null && last.alignmentState == state )
+ last.increment();
+ else
+ entries.add(new AlignmentMatchSequenceEntry(state));
+ }
+
+ /**
+ * Gets the current state of this alignment (what's the state of the last base?)
+ * @return State of the most recently aligned base.
+ */
+ public AlignmentState getCurrentState() {
+ if( entries.size() == 0 )
+ return AlignmentState.MATCH_MISMATCH;
+ return entries.peekLast().getAlignmentState();
+ }
+
+ /**
+ * How many bases in the read match the given state.
+ * @param state State to test.
+ * @return number of bases which match that state.
+ */
+ public int getNumberOfBasesMatchingState(AlignmentState state) {
+ int matches = 0;
+ for( AlignmentMatchSequenceEntry entry: entries ) {
+ if( entry.getAlignmentState() == state )
+ matches += entry.count;
+ }
+ return matches;
+ }
+
+ /**
+ * Stores an individual match sequence entry.
+ */
+ private class AlignmentMatchSequenceEntry implements Cloneable {
+ /**
+ * The state of the alignment throughout a given point in the sequence.
+ */
+ private final AlignmentState alignmentState;
+
+ /**
+ * The number of bases having this particular state.
+ */
+ private int count;
+
+ /**
+ * Create a new sequence entry with the given state.
+ * @param alignmentState The state that this sequence should contain.
+ */
+ AlignmentMatchSequenceEntry( AlignmentState alignmentState ) {
+ this.alignmentState = alignmentState;
+ this.count = 1;
+ }
+
+ /**
+ * Clone the given match sequence entry.
+ * @return A deep copy of the current match sequence entry.
+ */
+ public AlignmentMatchSequenceEntry clone() {
+ try {
+ return (AlignmentMatchSequenceEntry)super.clone();
+ }
+ catch( CloneNotSupportedException ex ) {
+ throw new ReviewedStingException("Unable to clone AlignmentMatchSequenceEntry.");
+ }
+ }
+
+ /**
+ * Retrieves the current state of the alignment.
+ * @return The state of the current sequence.
+ */
+ AlignmentState getAlignmentState() {
+ return alignmentState;
+ }
+
+ /**
+ * Increment the count of alignments having this particular state.
+ */
+ void increment() {
+ count++;
+ }
+ }
+}
+
diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignmentState.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignmentState.java
new file mode 100644
index 0000000000..92c603335a
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignmentState.java
@@ -0,0 +1,13 @@
+package org.broadinstitute.sting.alignment.bwa.java;
+
+/**
+ * The state of a given base in the alignment.
+ *
+ * @author mhanna
+ * @version 0.1
+ */
+public enum AlignmentState {
+ MATCH_MISMATCH,
+ INSERTION,
+ DELETION
+}
diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAAlignment.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAAlignment.java
new file mode 100644
index 0000000000..f3b515dba3
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAAlignment.java
@@ -0,0 +1,190 @@
+package org.broadinstitute.sting.alignment.bwa.java;
+
+import net.sf.samtools.Cigar;
+import org.broadinstitute.sting.alignment.Alignment;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+
+/**
+ * An alignment object to be used incrementally as the BWA aligner
+ * inspects the read.
+ *
+ * @author mhanna
+ * @version 0.1
+ */
+public class BWAAlignment extends Alignment implements Cloneable {
+ /**
+ * Track the number of alignments that have been created.
+ */
+ private static long numCreated;
+
+ /**
+ * Which number alignment is this?
+ */
+ private long creationNumber;
+
+ /**
+ * The aligner performing the alignments.
+ */
+ protected BWAJavaAligner aligner;
+
+ /**
+ * The sequence of matches/mismatches/insertions/deletions.
+ */
+ private AlignmentMatchSequence alignmentMatchSequence = new AlignmentMatchSequence();
+
+ /**
+ * Working variable. How many bases have been matched at this point.
+ */
+ protected int position;
+
+ /**
+ * Working variable. How many mismatches have been encountered at this point.
+ */
+ private int mismatches;
+
+ /**
+ * Number of gap opens in alignment.
+ */
+ private int gapOpens;
+
+ /**
+ * Number of gap extensions in alignment.
+ */
+ private int gapExtensions;
+
+ /**
+ * Working variable. The lower bound of the alignment within the BWT.
+ */
+ protected long loBound;
+
+ /**
+ * Working variable. The upper bound of the alignment within the BWT.
+ */
+ protected long hiBound;
+
+ protected void setAlignmentStart(long position) {
+ this.alignmentStart = position;
+ }
+
+ protected void setNegativeStrand(boolean negativeStrand) {
+ this.negativeStrand = negativeStrand;
+ }
+
+ /**
+ * Cache the score.
+ */
+ private int score;
+
+ public Cigar getCigar() {
+ return alignmentMatchSequence.convertToCigar(isNegativeStrand());
+ }
+
+ /**
+ * Gets the current state of this alignment (state of the last base viewed)..
+ * @return Current state of the alignment.
+ */
+ public AlignmentState getCurrentState() {
+ return alignmentMatchSequence.getCurrentState();
+ }
+
+ /**
+ * Adds the given state to the current alignment.
+ * @param state State to add to the given alignment.
+ */
+ public void addState( AlignmentState state ) {
+ alignmentMatchSequence.addNext(state);
+ }
+
+ /**
+ * Gets the BWA score of this alignment.
+ * @return BWA-style scores. 0 is best.
+ */
+ public int getScore() {
+ return score;
+ }
+
+ public int getMismatches() { return mismatches; }
+ public int getGapOpens() { return gapOpens; }
+ public int getGapExtensions() { return gapExtensions; }
+
+ public void incrementMismatches() {
+ this.mismatches++;
+ updateScore();
+ }
+
+ public void incrementGapOpens() {
+ this.gapOpens++;
+ updateScore();
+ }
+
+ public void incrementGapExtensions() {
+ this.gapExtensions++;
+ updateScore();
+ }
+
+ /**
+ * Updates the score based on new information about matches / mismatches.
+ */
+ private void updateScore() {
+ score = mismatches*aligner.MISMATCH_PENALTY + gapOpens*aligner.GAP_OPEN_PENALTY + gapExtensions*aligner.GAP_EXTENSION_PENALTY;
+ }
+
+ /**
+ * Create a new alignment with the given parent aligner.
+ * @param aligner Aligner being used.
+ */
+ public BWAAlignment( BWAJavaAligner aligner ) {
+ this.aligner = aligner;
+ this.creationNumber = numCreated++;
+ }
+
+ /**
+ * Clone the alignment.
+ * @return New instance of the alignment.
+ */
+ public BWAAlignment clone() {
+ BWAAlignment newAlignment = null;
+ try {
+ newAlignment = (BWAAlignment)super.clone();
+ }
+ catch( CloneNotSupportedException ex ) {
+ throw new ReviewedStingException("Unable to clone BWAAlignment.");
+ }
+ newAlignment.creationNumber = numCreated++;
+ newAlignment.alignmentMatchSequence = alignmentMatchSequence.clone();
+
+ return newAlignment;
+ }
+
+ /**
+ * How many bases in the read match the given state.
+ * @param state State to test.
+ * @return number of bases which match that state.
+ */
+ public int getNumberOfBasesMatchingState(AlignmentState state) {
+ return alignmentMatchSequence.getNumberOfBasesMatchingState(state);
+ }
+
+ /**
+ * Compare this alignment to another alignment.
+ * @param rhs Other alignment to which to compare.
+ * @return < 0 if this < other, == 0 if this == other, > 0 if this > other
+ */
+ public int compareTo(Alignment rhs) {
+ BWAAlignment other = (BWAAlignment)rhs;
+
+ // If the scores are different, disambiguate using the score.
+ if(score != other.score)
+ return score > other.score ? 1 : -1;
+
+ // Otherwise, use the order in which the elements were created.
+ if(creationNumber != other.creationNumber)
+ return creationNumber > other.creationNumber ? -1 : 1;
+
+ return 0;
+ }
+
+ public String toString() {
+ return String.format("position: %d, strand: %b, state: %s, mismatches: %d, gap opens: %d, gap extensions: %d, loBound: %d, hiBound: %d, score: %d, creationNumber: %d", position, negativeStrand, alignmentMatchSequence.getCurrentState(), mismatches, gapOpens, gapExtensions, loBound, hiBound, getScore(), creationNumber);
+ }
+}
diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAJavaAligner.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAJavaAligner.java
new file mode 100644
index 0000000000..fbeac91925
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAJavaAligner.java
@@ -0,0 +1,393 @@
+package org.broadinstitute.sting.alignment.bwa.java;
+
+import net.sf.samtools.SAMFileHeader;
+import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.alignment.Alignment;
+import org.broadinstitute.sting.alignment.bwa.BWAAligner;
+import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
+import org.broadinstitute.sting.alignment.reference.bwt.*;
+import org.broadinstitute.sting.utils.BaseUtils;
+import org.broadinstitute.sting.utils.Utils;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.PriorityQueue;
+
+/**
+ * Create imperfect alignments from the read to the genome represented by the given BWT / suffix array.
+ *
+ * @author mhanna
+ * @version 0.1
+ */
+public class BWAJavaAligner extends BWAAligner {
+ /**
+ * BWT in the forward direction.
+ */
+ private BWT forwardBWT;
+
+ /**
+ * BWT in the reverse direction.
+ */
+ private BWT reverseBWT;
+
+ /**
+ * Suffix array in the forward direction.
+ */
+ private SuffixArray forwardSuffixArray;
+
+ /**
+ * Suffix array in the reverse direction.
+ */
+ private SuffixArray reverseSuffixArray;
+
+ /**
+ * Maximum edit distance (-n option from original BWA).
+ */
+ private final int MAXIMUM_EDIT_DISTANCE = 4;
+
+ /**
+ * Maximum number of gap opens (-o option from original BWA).
+ */
+ private final int MAXIMUM_GAP_OPENS = 1;
+
+ /**
+ * Maximum number of gap extensions (-e option from original BWA).
+ */
+ private final int MAXIMUM_GAP_EXTENSIONS = 6;
+
+ /**
+ * Penalty for straight mismatches (-M option from original BWA).
+ */
+ public final int MISMATCH_PENALTY = 3;
+
+ /**
+ * Penalty for gap opens (-O option from original BWA).
+ */
+ public final int GAP_OPEN_PENALTY = 11;
+
+ /**
+ * Penalty for gap extensions (-E option from original BWA).
+ */
+ public final int GAP_EXTENSION_PENALTY = 4;
+
+ /**
+ * Skip the ends of indels.
+ */
+ public final int INDEL_END_SKIP = 5;
+
+ public BWAJavaAligner( File forwardBWTFile, File reverseBWTFile, File forwardSuffixArrayFile, File reverseSuffixArrayFile ) {
+ super(null,null);
+ forwardBWT = new BWTReader(forwardBWTFile).read();
+ reverseBWT = new BWTReader(reverseBWTFile).read();
+ forwardSuffixArray = new SuffixArrayReader(forwardSuffixArrayFile,forwardBWT).read();
+ reverseSuffixArray = new SuffixArrayReader(reverseSuffixArrayFile,reverseBWT).read();
+ }
+
+ /**
+ * Close this instance of the BWA pointer and delete its resources.
+ */
+ @Override
+ public void close() {
+ throw new UnsupportedOperationException("BWA aligner can't currently be closed.");
+ }
+
+ /**
+ * Update the current parameters of this aligner.
+ * @param configuration New configuration to set.
+ */
+ public void updateConfiguration(BWAConfiguration configuration) {
+ throw new UnsupportedOperationException("Configuration of the BWA aligner can't currently be changed.");
+ }
+
+ /**
+ * Allow the aligner to choose one alignment randomly from the pile of best alignments.
+ * @param bases Bases to align.
+ * @return An align
+ */
+ public Alignment getBestAlignment(final byte[] bases) { throw new UnsupportedOperationException("BWAJavaAligner does not yet support the standard Aligner interface."); }
+
+ /**
+ * Align the read to the reference.
+ * @param read Read to align.
+ * @param header Optional header to drop in place.
+ * @return A list of the alignments.
+ */
+ public SAMRecord align(final SAMRecord read, final SAMFileHeader header) { throw new UnsupportedOperationException("BWAJavaAligner does not yet support the standard Aligner interface."); }
+
+ /**
+ * Get a iterator of alignments, batched by mapping quality.
+ * @param bases List of bases.
+ * @return Iterator to alignments.
+ */
+ public Iterable getAllAlignments(final byte[] bases) { throw new UnsupportedOperationException("BWAJavaAligner does not yet support the standard Aligner interface."); }
+
+ /**
+ * Get a iterator of aligned reads, batched by mapping quality.
+ * @param read Read to align.
+ * @param newHeader Optional new header to use when aligning the read. If present, it must be null.
+ * @return Iterator to alignments.
+ */
+ public Iterable alignAll(final SAMRecord read, final SAMFileHeader newHeader) { throw new UnsupportedOperationException("BWAJavaAligner does not yet support the standard Aligner interface."); }
+
+
+ public List align( SAMRecord read ) {
+ List successfulMatches = new ArrayList();
+
+ Byte[] uncomplementedBases = normalizeBases(read.getReadBases());
+ Byte[] complementedBases = normalizeBases(Utils.reverse(BaseUtils.simpleReverseComplement(read.getReadBases())));
+
+ List forwardLowerBounds = LowerBound.create(uncomplementedBases,forwardBWT);
+ List reverseLowerBounds = LowerBound.create(complementedBases,reverseBWT);
+
+ // Seed the best score with any score that won't overflow on comparison.
+ int bestScore = Integer.MAX_VALUE - MISMATCH_PENALTY;
+ int bestDiff = MAXIMUM_EDIT_DISTANCE+1;
+ int maxDiff = MAXIMUM_EDIT_DISTANCE;
+
+ PriorityQueue alignments = new PriorityQueue();
+
+ // Create a fictional initial alignment, with the position just off the end of the read, and the limits
+ // set as the entire BWT.
+ alignments.add(createSeedAlignment(reverseBWT));
+ alignments.add(createSeedAlignment(forwardBWT));
+
+ while(!alignments.isEmpty()) {
+ BWAAlignment alignment = alignments.remove();
+
+ // From bwtgap.c in the original BWT; if the rank is worse than the best score + the mismatch PENALTY, move on.
+ if( alignment.getScore() > bestScore + MISMATCH_PENALTY )
+ break;
+
+ Byte[] bases = alignment.isNegativeStrand() ? complementedBases : uncomplementedBases;
+ BWT bwt = alignment.isNegativeStrand() ? forwardBWT : reverseBWT;
+ List lowerBounds = alignment.isNegativeStrand() ? reverseLowerBounds : forwardLowerBounds;
+
+ // if z < D(i) then return {}
+ int mismatches = maxDiff - alignment.getMismatches() - alignment.getGapOpens() - alignment.getGapExtensions();
+ if( alignment.position < lowerBounds.size()-1 && mismatches < lowerBounds.get(alignment.position+1).value )
+ continue;
+
+ if(mismatches == 0) {
+ exactMatch(alignment,bases,bwt);
+ if(alignment.loBound > alignment.hiBound)
+ continue;
+ }
+
+ // Found a valid alignment; store it and move on.
+ if(alignment.position >= read.getReadLength()-1) {
+ for(long bwtIndex = alignment.loBound; bwtIndex <= alignment.hiBound; bwtIndex++) {
+ BWAAlignment finalAlignment = alignment.clone();
+
+ if( finalAlignment.isNegativeStrand() )
+ finalAlignment.setAlignmentStart(forwardSuffixArray.get(bwtIndex) + 1);
+ else {
+ int sizeAlongReference = read.getReadLength() -
+ finalAlignment.getNumberOfBasesMatchingState(AlignmentState.INSERTION) +
+ finalAlignment.getNumberOfBasesMatchingState(AlignmentState.DELETION);
+ finalAlignment.setAlignmentStart(reverseBWT.length() - reverseSuffixArray.get(bwtIndex) - sizeAlongReference + 1);
+ }
+
+ successfulMatches.add(finalAlignment);
+
+ bestScore = Math.min(finalAlignment.getScore(),bestScore);
+ bestDiff = Math.min(finalAlignment.getMismatches()+finalAlignment.getGapOpens()+finalAlignment.getGapExtensions(),bestDiff);
+ maxDiff = bestDiff + 1;
+ }
+
+ continue;
+ }
+
+ //System.out.printf("Processing alignments; queue size = %d, alignment = %s, bound = %d, base = %s%n", alignments.size(), alignment, lowerBounds.get(alignment.position+1).value, alignment.position >= 0 ? (char)bases[alignment.position].byteValue() : "");
+ /*
+ System.out.printf("#1\t[%d,%d,%d,%c]\t[%d,%d,%d]\t[%d,%d]\t[%d,%d]%n",alignments.size(),
+ alignment.negativeStrand?1:0,
+ bases.length-alignment.position-1,
+ alignment.getCurrentState().toString().charAt(0),
+ alignment.getMismatches(),
+ alignment.getGapOpens(),
+ alignment.getGapExtensions(),
+ lowerBounds.get(alignment.position+1).value,
+ lowerBounds.get(alignment.position+1).width,
+ alignment.loBound,
+ alignment.hiBound);
+ */
+
+ // Temporary -- look ahead to see if the next alignment is bounded.
+ boolean allowDifferences = mismatches > 0;
+ boolean allowMismatches = mismatches > 0;
+
+ if( allowDifferences &&
+ alignment.position+1 >= INDEL_END_SKIP-1+alignment.getGapOpens()+alignment.getGapExtensions() &&
+ read.getReadLength()-1-(alignment.position+1) >= INDEL_END_SKIP+alignment.getGapOpens()+alignment.getGapExtensions() ) {
+ if( alignment.getCurrentState() == AlignmentState.MATCH_MISMATCH ) {
+ if( alignment.getGapOpens() < MAXIMUM_GAP_OPENS ) {
+ // Add a potential insertion extension.
+ BWAAlignment insertionAlignment = createInsertionAlignment(alignment);
+ insertionAlignment.incrementGapOpens();
+ alignments.add(insertionAlignment);
+
+ // Add a potential deletion by marking a deletion and augmenting the position.
+ List deletionAlignments = createDeletionAlignments(bwt,alignment);
+ for( BWAAlignment deletionAlignment: deletionAlignments )
+ deletionAlignment.incrementGapOpens();
+ alignments.addAll(deletionAlignments);
+ }
+ }
+ else if( alignment.getCurrentState() == AlignmentState.INSERTION ) {
+ if( alignment.getGapExtensions() < MAXIMUM_GAP_EXTENSIONS && mismatches > 0 ) {
+ // Add a potential insertion extension.
+ BWAAlignment insertionAlignment = createInsertionAlignment(alignment);
+ insertionAlignment.incrementGapExtensions();
+ alignments.add(insertionAlignment);
+ }
+ }
+ else if( alignment.getCurrentState() == AlignmentState.DELETION ) {
+ if( alignment.getGapExtensions() < MAXIMUM_GAP_EXTENSIONS && mismatches > 0 ) {
+ // Add a potential deletion by marking a deletion and augmenting the position.
+ List deletionAlignments = createDeletionAlignments(bwt,alignment);
+ for( BWAAlignment deletionAlignment: deletionAlignments )
+ deletionAlignment.incrementGapExtensions();
+ alignments.addAll(deletionAlignments);
+ }
+ }
+ }
+
+ // Mismatches
+ alignments.addAll(createMatchedAlignments(bwt,alignment,bases,allowDifferences&&allowMismatches));
+ }
+
+ return successfulMatches;
+ }
+
+ /**
+ * Create an seeding alignment to use as a starting point when traversing.
+ * @param bwt source BWT.
+ * @return Seed alignment.
+ */
+ private BWAAlignment createSeedAlignment(BWT bwt) {
+ BWAAlignment seed = new BWAAlignment(this);
+ seed.setNegativeStrand(bwt == forwardBWT);
+ seed.position = -1;
+ seed.loBound = 0;
+ seed.hiBound = bwt.length();
+ return seed;
+ }
+
+ /**
+ * Creates a new alignments representing direct matches / mismatches.
+ * @param bwt Source BWT with which to work.
+ * @param alignment Alignment for the previous position.
+ * @param bases The bases in the read.
+ * @param allowMismatch Should mismatching bases be allowed?
+ * @return New alignment representing this position if valid; null otherwise.
+ */
+ private List createMatchedAlignments( BWT bwt, BWAAlignment alignment, Byte[] bases, boolean allowMismatch ) {
+ List newAlignments = new ArrayList();
+
+ List baseChoices = new ArrayList();
+ Byte thisBase = bases[alignment.position+1];
+
+ if( allowMismatch )
+ baseChoices.addAll(Bases.allOf());
+ else
+ baseChoices.add(thisBase);
+
+ if( thisBase != null ) {
+ // Keep rotating the current base to the last position until we've hit the current base.
+ for( ;; ) {
+ baseChoices.add(baseChoices.remove(0));
+ if( thisBase.equals(baseChoices.get(baseChoices.size()-1)) )
+ break;
+
+ }
+ }
+
+ for(byte base: baseChoices) {
+ BWAAlignment newAlignment = alignment.clone();
+
+ newAlignment.loBound = bwt.counts(base) + bwt.occurrences(base,alignment.loBound-1) + 1;
+ newAlignment.hiBound = bwt.counts(base) + bwt.occurrences(base,alignment.hiBound);
+
+ // If this alignment is valid, skip it.
+ if( newAlignment.loBound > newAlignment.hiBound )
+ continue;
+
+ newAlignment.position++;
+ newAlignment.addState(AlignmentState.MATCH_MISMATCH);
+ if( bases[newAlignment.position] == null || base != bases[newAlignment.position] )
+ newAlignment.incrementMismatches();
+
+ newAlignments.add(newAlignment);
+ }
+
+ return newAlignments;
+ }
+
+ /**
+ * Create a new alignment representing an insertion at this point in the read.
+ * @param alignment Alignment from which to derive the insertion.
+ * @return New alignment reflecting the insertion.
+ */
+ private BWAAlignment createInsertionAlignment( BWAAlignment alignment ) {
+ // Add a potential insertion extension.
+ BWAAlignment newAlignment = alignment.clone();
+ newAlignment.position++;
+ newAlignment.addState(AlignmentState.INSERTION);
+ return newAlignment;
+ }
+
+ /**
+ * Create new alignments representing a deletion at this point in the read.
+ * @param bwt source BWT for inferring deletion info.
+ * @param alignment Alignment from which to derive the deletion.
+ * @return New alignments reflecting all possible deletions.
+ */
+ private List createDeletionAlignments( BWT bwt, BWAAlignment alignment) {
+ List newAlignments = new ArrayList();
+ for(byte base: Bases.instance) {
+ BWAAlignment newAlignment = alignment.clone();
+
+ newAlignment.loBound = bwt.counts(base) + bwt.occurrences(base,alignment.loBound-1) + 1;
+ newAlignment.hiBound = bwt.counts(base) + bwt.occurrences(base,alignment.hiBound);
+
+ // If this alignment is valid, skip it.
+ if( newAlignment.loBound > newAlignment.hiBound )
+ continue;
+
+ newAlignment.addState(AlignmentState.DELETION);
+
+ newAlignments.add(newAlignment);
+ }
+
+ return newAlignments;
+ }
+
+ /**
+ * Exactly match the given alignment against the given BWT.
+ * @param alignment Alignment to match.
+ * @param bases Bases to use.
+ * @param bwt BWT to use.
+ */
+ private void exactMatch( BWAAlignment alignment, Byte[] bases, BWT bwt ) {
+ while( ++alignment.position < bases.length ) {
+ byte base = bases[alignment.position];
+ alignment.loBound = bwt.counts(base) + bwt.occurrences(base,alignment.loBound-1) + 1;
+ alignment.hiBound = bwt.counts(base) + bwt.occurrences(base,alignment.hiBound);
+ if( alignment.loBound > alignment.hiBound )
+ return;
+ }
+ }
+
+ /**
+ * Make each base into A/C/G/T or null if unknown.
+ * @param bases Base string to normalize.
+ * @return Array of normalized bases.
+ */
+ private Byte[] normalizeBases( byte[] bases ) {
+ Byte[] normalBases = new Byte[bases.length];
+ for(int i = 0; i < bases.length; i++)
+ normalBases[i] = Bases.fromASCII(bases[i]);
+ return normalBases;
+ }
+}
diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/java/LowerBound.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/LowerBound.java
new file mode 100644
index 0000000000..be75142553
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/LowerBound.java
@@ -0,0 +1,88 @@
+package org.broadinstitute.sting.alignment.bwa.java;
+
+import org.broadinstitute.sting.alignment.reference.bwt.BWT;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * At any point along the given read, what is a good lower bound for the
+ * total number of differences?
+ *
+ * @author mhanna
+ * @version 0.1
+ */
+public class LowerBound {
+ /**
+ * Lower bound of the suffix array.
+ */
+ public final long loIndex;
+
+ /**
+ * Upper bound of the suffix array.
+ */
+ public final long hiIndex;
+
+ /**
+ * Width of the bwt from loIndex -> hiIndex, inclusive.
+ */
+ public final long width;
+
+ /**
+ * The lower bound at the given point.
+ */
+ public final int value;
+
+ /**
+ * Create a new lower bound with the given value.
+ * @param loIndex The lower bound of the BWT.
+ * @param hiIndex The upper bound of the BWT.
+ * @param value Value for the lower bound at this site.
+ */
+ private LowerBound(long loIndex, long hiIndex, int value) {
+ this.loIndex = loIndex;
+ this.hiIndex = hiIndex;
+ this.width = hiIndex - loIndex + 1;
+ this.value = value;
+ }
+
+ /**
+ * Create a non-optimal bound according to the algorithm specified in Figure 3 of the BWA paper.
+ * @param bases Bases of the read to use when creating a new BWT.
+ * @param bwt BWT to check against.
+ * @return A list of lower bounds at every point in the reference.
+ *
+ */
+ public static List create(Byte[] bases, BWT bwt) {
+ List bounds = new ArrayList();
+
+ long loIndex = 0, hiIndex = bwt.length();
+ int mismatches = 0;
+ for( int i = bases.length-1; i >= 0; i-- ) {
+ Byte base = bases[i];
+
+ // Ignore non-ACGT bases.
+ if( base != null ) {
+ loIndex = bwt.counts(base) + bwt.occurrences(base,loIndex-1) + 1;
+ hiIndex = bwt.counts(base) + bwt.occurrences(base,hiIndex);
+ }
+
+ if( base == null || loIndex > hiIndex ) {
+ loIndex = 0;
+ hiIndex = bwt.length();
+ mismatches++;
+ }
+ bounds.add(0,new LowerBound(loIndex,hiIndex,mismatches));
+ }
+
+ return bounds;
+ }
+
+ /**
+ * Create a string representation of this bound.
+ * @return String version of this bound.
+ */
+ public String toString() {
+ return String.format("LowerBound: w = %d, value = %d",width,value);
+ }
+}
diff --git a/public/java/src/org/broadinstitute/sting/alignment/package-info.java b/public/java/src/org/broadinstitute/sting/alignment/package-info.java
new file mode 100644
index 0000000000..60cf1e425a
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/alignment/package-info.java
@@ -0,0 +1,4 @@
+/**
+ * Analyses used to validate the correctness and performance the BWA Java bindings.
+ */
+package org.broadinstitute.sting.alignment;
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/AMBWriter.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/AMBWriter.java
new file mode 100644
index 0000000000..ec10415dd1
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/AMBWriter.java
@@ -0,0 +1,68 @@
+package org.broadinstitute.sting.alignment.reference.bwt;
+
+import net.sf.samtools.SAMSequenceDictionary;
+import net.sf.samtools.SAMSequenceRecord;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.PrintStream;
+
+/**
+ * Writes .amb files - a file indicating where 'holes' (indeterminant bases)
+ * exist in the contig. Currently, only empty, placeholder AMBs are supported.
+ *
+ * @author mhanna
+ * @version 0.1
+ */
+public class AMBWriter {
+ /**
+ * Number of holes is fixed at zero.
+ */
+ private static final int NUM_HOLES = 0;
+
+ /**
+ * Input stream from which to read BWT data.
+ */
+ private final PrintStream out;
+
+ /**
+ * Create a new ANNWriter targeting the given file.
+ * @param file file into which ANN data should be written.
+ * @throws java.io.IOException if there is a problem opening the output file.
+ */
+ public AMBWriter(File file) throws IOException {
+ out = new PrintStream(file);
+ }
+
+ /**
+ * Create a new ANNWriter targeting the given OutputStream.
+ * @param stream Stream into which ANN data should be written.
+ */
+ public AMBWriter(OutputStream stream) {
+ out = new PrintStream(stream);
+ }
+
+ /**
+ * Write the contents of the given dictionary into the AMB file.
+ * Assumes that there are no holes in the dictionary.
+ * @param dictionary Dictionary to write.
+ */
+ public void writeEmpty(SAMSequenceDictionary dictionary) {
+ long genomeLength = 0L;
+ for(SAMSequenceRecord sequence: dictionary.getSequences())
+ genomeLength += sequence.getSequenceLength();
+
+ int sequences = dictionary.getSequences().size();
+
+ // Write the header
+ out.printf("%d %d %d%n",genomeLength,sequences,NUM_HOLES);
+ }
+
+ /**
+ * Close the given output stream.
+ */
+ public void close() {
+ out.close();
+ }
+}
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/ANNWriter.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/ANNWriter.java
new file mode 100644
index 0000000000..8d692a9e77
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/ANNWriter.java
@@ -0,0 +1,95 @@
+package org.broadinstitute.sting.alignment.reference.bwt;
+
+import net.sf.samtools.SAMSequenceDictionary;
+import net.sf.samtools.SAMSequenceRecord;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.PrintStream;
+
+/**
+ * Writes .ann files - an alternate sequence dictionary format
+ * used by BWA/C. For best results, the input sequence dictionary
+ * should be created with Picard's CreateSequenceDictionary.jar,
+ * TRUNCATE_NAMES_AT_WHITESPACE=false.
+ *
+ * @author mhanna
+ * @version 0.1
+ */
+public class ANNWriter {
+ /**
+ * BWA uses a fixed seed of 11, written into every file.
+ */
+ private static final int BNS_SEED = 11;
+
+ /**
+ * A seemingly unused value that appears in every contig in the ANN.
+ */
+ private static final int GI = 0;
+
+ /**
+ * Input stream from which to read BWT data.
+ */
+ private final PrintStream out;
+
+ /**
+ * Create a new ANNWriter targeting the given file.
+ * @param file file into which ANN data should be written.
+ * @throws IOException if there is a problem opening the output file.
+ */
+ public ANNWriter(File file) throws IOException {
+ out = new PrintStream(file);
+ }
+
+ /**
+ * Create a new ANNWriter targeting the given OutputStream.
+ * @param stream Stream into which ANN data should be written.
+ */
+ public ANNWriter(OutputStream stream) {
+ out = new PrintStream(stream);
+ }
+
+ /**
+ * Write the contents of the given dictionary into the ANN file.
+ * Assumes that no ambs (blocks of indeterminate base) are present in the dictionary.
+ * @param dictionary Dictionary to write.
+ */
+ public void write(SAMSequenceDictionary dictionary) {
+ long genomeLength = 0L;
+ for(SAMSequenceRecord sequence: dictionary.getSequences())
+ genomeLength += sequence.getSequenceLength();
+
+ int sequences = dictionary.getSequences().size();
+
+ // Write the header
+ out.printf("%d %d %d%n",genomeLength,sequences,BNS_SEED);
+
+ for(SAMSequenceRecord sequence: dictionary.getSequences()) {
+ String fullSequenceName = sequence.getSequenceName();
+ String trimmedSequenceName = fullSequenceName;
+ String sequenceComment = "(null)";
+
+ long offset = 0;
+
+ // Separate the sequence name from the sequence comment, based on BWA's definition.
+ // BWA's definition appears to accept a zero-length contig name, so mimic that behavior.
+ if(fullSequenceName.indexOf(' ') >= 0) {
+ trimmedSequenceName = fullSequenceName.substring(0,fullSequenceName.indexOf(' '));
+ sequenceComment = fullSequenceName.substring(fullSequenceName.indexOf(' ')+1);
+ }
+
+ // Write the sequence GI (?), name, and comment.
+ out.printf("%d %s %s%n",GI,trimmedSequenceName,sequenceComment);
+ // Write the sequence offset, length, and ambs (currently fixed at 0).
+ out.printf("%d %d %d%n",offset,sequence.getSequenceLength(),0);
+ }
+ }
+
+ /**
+ * Close the given output stream.
+ */
+ public void close() {
+ out.close();
+ }
+}
diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWT.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWT.java
new file mode 100644
index 0000000000..7f8c48253d
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWT.java
@@ -0,0 +1,172 @@
+package org.broadinstitute.sting.alignment.reference.bwt;
+
+import org.broadinstitute.sting.alignment.reference.packing.PackUtils;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+
+/**
+ * Represents the Burrows-Wheeler Transform of a reference sequence.
+ *
+ * @author mhanna
+ * @version 0.1
+ */
+public class BWT {
+ /**
+ * Write an occurrence table after every SEQUENCE_BLOCK_SIZE bases.
+ * For this implementation to behave correctly, SEQUENCE_BLOCK_SIZE % 8 == 0
+ */
+ public static final int SEQUENCE_BLOCK_SIZE = 128;
+
+ /**
+ * The inverse SA, used as a placeholder for determining where the special EOL character sits.
+ */
+ protected final long inverseSA0;
+
+ /**
+ * Cumulative counts for the entire BWT.
+ */
+ protected final Counts counts;
+
+ /**
+ * The individual sequence blocks, modelling how they appear on disk.
+ */
+ protected final SequenceBlock[] sequenceBlocks;
+
+ /**
+ * Creates a new BWT with the given inverse SA, counts, and sequence (in ASCII).
+ * @param inverseSA0 Inverse SA entry for the first element. Will be missing from the BWT sequence.
+ * @param counts Cumulative count of bases, in A,C,G,T order.
+ * @param sequenceBlocks The full BWT sequence, sans the '$'.
+ */
+ public BWT( long inverseSA0, Counts counts, SequenceBlock[] sequenceBlocks ) {
+ this.inverseSA0 = inverseSA0;
+ this.counts = counts;
+ this.sequenceBlocks = sequenceBlocks;
+ }
+
+ /**
+ * Creates a new BWT with the given inverse SA, occurrences, and sequence (in ASCII).
+ * @param inverseSA0 Inverse SA entry for the first element. Will be missing from the BWT sequence.
+ * @param counts Count of bases, in A,C,G,T order.
+ * @param sequence The full BWT sequence, sans the '$'.
+ */
+ public BWT( long inverseSA0, Counts counts, byte[] sequence ) {
+ this(inverseSA0,counts,generateSequenceBlocks(sequence));
+ }
+
+ /**
+ * Extract the full sequence from the list of block.
+ * @return The full BWT string as a byte array.
+ */
+ public byte[] getSequence() {
+ byte[] sequence = new byte[(int)counts.getTotal()];
+ for( SequenceBlock block: sequenceBlocks )
+ System.arraycopy(block.sequence,0,sequence,block.sequenceStart,block.sequenceLength);
+ return sequence;
+ }
+
+ /**
+ * Get the total counts of bases lexicographically smaller than the given base, for Ferragina and Manzini's search.
+ * @param base The base.
+ * @return Total counts for all bases lexicographically smaller than this base.
+ */
+ public long counts(byte base) {
+ return counts.getCumulative(base);
+ }
+
+ /**
+ * Get the total counts of bases lexicographically smaller than the given base, for Ferragina and Manzini's search.
+ * @param base The base.
+ * @param index The position to search within the BWT.
+ * @return Total counts for all bases lexicographically smaller than this base.
+ */
+ public long occurrences(byte base,long index) {
+ SequenceBlock block = getSequenceBlock(index);
+ int position = getSequencePosition(index);
+ long accumulator = block.occurrences.get(base);
+ for(int i = 0; i <= position; i++) {
+ if(base == block.sequence[i])
+ accumulator++;
+ }
+ return accumulator;
+ }
+
+ /**
+ * The number of bases in the BWT as a whole.
+ * @return Number of bases.
+ */
+ public long length() {
+ return counts.getTotal();
+ }
+
+ /**
+ * Create a new BWT from the given reference sequence.
+ * @param referenceSequence Sequence from which to derive the BWT.
+ * @return reference sequence-derived BWT.
+ */
+ public static BWT createFromReferenceSequence(byte[] referenceSequence) {
+ SuffixArray suffixArray = SuffixArray.createFromReferenceSequence(referenceSequence);
+
+ byte[] bwt = new byte[(int)suffixArray.length()-1];
+ int bwtIndex = 0;
+ for(long suffixArrayIndex = 0; suffixArrayIndex < suffixArray.length(); suffixArrayIndex++) {
+ if(suffixArray.get(suffixArrayIndex) == 0)
+ continue;
+ bwt[bwtIndex++] = referenceSequence[(int)suffixArray.get(suffixArrayIndex)-1];
+ }
+
+ return new BWT(suffixArray.inverseSA0,suffixArray.occurrences,bwt);
+ }
+
+ /**
+ * Gets the base at a given position in the BWT.
+ * @param index The index to use.
+ * @return The base at that location.
+ */
+ protected byte getBase(long index) {
+ if(index == inverseSA0)
+ throw new ReviewedStingException(String.format("Base at index %d does not have a text representation",index));
+
+ SequenceBlock block = getSequenceBlock(index);
+ int position = getSequencePosition(index);
+ return block.sequence[position];
+ }
+
+ private SequenceBlock getSequenceBlock(long index) {
+ // If the index is above the SA-1[0], remap it to the appropriate coordinate space.
+ if(index > inverseSA0) index--;
+ return sequenceBlocks[(int)(index/SEQUENCE_BLOCK_SIZE)];
+ }
+
+ private int getSequencePosition(long index) {
+ // If the index is above the SA-1[0], remap it to the appropriate coordinate space.
+ if(index > inverseSA0) index--;
+ return (int)(index%SEQUENCE_BLOCK_SIZE);
+ }
+
+ /**
+ * Create a set of sequence blocks from one long sequence.
+ * @param sequence Sequence from which to derive blocks.
+ * @return Array of sequence blocks containing data from the sequence.
+ */
+ private static SequenceBlock[] generateSequenceBlocks( byte[] sequence ) {
+ Counts occurrences = new Counts();
+
+ int numSequenceBlocks = PackUtils.numberOfPartitions(sequence.length,SEQUENCE_BLOCK_SIZE);
+ SequenceBlock[] sequenceBlocks = new SequenceBlock[numSequenceBlocks];
+
+ for( int block = 0; block < numSequenceBlocks; block++ ) {
+ int blockStart = block*SEQUENCE_BLOCK_SIZE;
+ int blockLength = Math.min(SEQUENCE_BLOCK_SIZE, sequence.length-blockStart);
+ byte[] subsequence = new byte[blockLength];
+
+ System.arraycopy(sequence,blockStart,subsequence,0,blockLength);
+
+ sequenceBlocks[block] = new SequenceBlock(blockStart,blockLength,occurrences.clone(),subsequence);
+
+ for( byte base: subsequence )
+ occurrences.increment(base);
+ }
+
+ return sequenceBlocks;
+ }
+}
diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWTReader.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWTReader.java
new file mode 100644
index 0000000000..5c4f6d39d8
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWTReader.java
@@ -0,0 +1,89 @@
+package org.broadinstitute.sting.alignment.reference.bwt;
+
+import org.broadinstitute.sting.alignment.reference.packing.BasePackedInputStream;
+import org.broadinstitute.sting.alignment.reference.packing.PackUtils;
+import org.broadinstitute.sting.alignment.reference.packing.UnsignedIntPackedInputStream;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.nio.ByteOrder;
+/**
+ * Reads a BWT from a given file.
+ *
+ * @author mhanna
+ * @version 0.1
+ */
+public class BWTReader {
+ /**
+ * Input stream from which to read BWT data.
+ */
+ private FileInputStream inputStream;
+
+ /**
+ * Create a new BWT reader.
+ * @param inputFile File in which the BWT is stored.
+ */
+ public BWTReader( File inputFile ) {
+ try {
+ this.inputStream = new FileInputStream(inputFile);
+ }
+ catch( FileNotFoundException ex ) {
+ throw new ReviewedStingException("Unable to open input file", ex);
+ }
+ }
+
+ /**
+ * Read a BWT from the input stream.
+ * @return The BWT stored in the input stream.
+ */
+ public BWT read() {
+ UnsignedIntPackedInputStream uintPackedInputStream = new UnsignedIntPackedInputStream(inputStream, ByteOrder.LITTLE_ENDIAN);
+ BasePackedInputStream basePackedInputStream = new BasePackedInputStream(Integer.class, inputStream, ByteOrder.LITTLE_ENDIAN);
+
+ long inverseSA0;
+ long[] count;
+ SequenceBlock[] sequenceBlocks;
+
+ try {
+ inverseSA0 = uintPackedInputStream.read();
+ count = new long[PackUtils.ALPHABET_SIZE];
+ uintPackedInputStream.read(count);
+
+ long bwtSize = count[PackUtils.ALPHABET_SIZE-1];
+ sequenceBlocks = new SequenceBlock[PackUtils.numberOfPartitions(bwtSize,BWT.SEQUENCE_BLOCK_SIZE)];
+
+ for( int block = 0; block < sequenceBlocks.length; block++ ) {
+ int sequenceStart = block* BWT.SEQUENCE_BLOCK_SIZE;
+ int sequenceLength = (int)Math.min(BWT.SEQUENCE_BLOCK_SIZE,bwtSize-sequenceStart);
+
+ long[] occurrences = new long[PackUtils.ALPHABET_SIZE];
+ byte[] bwt = new byte[sequenceLength];
+
+ uintPackedInputStream.read(occurrences);
+ basePackedInputStream.read(bwt);
+
+ sequenceBlocks[block] = new SequenceBlock(sequenceStart,sequenceLength,new Counts(occurrences,false),bwt);
+ }
+ }
+ catch( IOException ex ) {
+ throw new ReviewedStingException("Unable to read BWT from input stream.", ex);
+ }
+
+ return new BWT(inverseSA0, new Counts(count,true), sequenceBlocks);
+ }
+
+ /**
+ * Close the input stream.
+ */
+ public void close() {
+ try {
+ inputStream.close();
+ }
+ catch( IOException ex ) {
+ throw new ReviewedStingException("Unable to close input file", ex);
+ }
+ }
+}
diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWTSupplementaryFileGenerator.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWTSupplementaryFileGenerator.java
new file mode 100644
index 0000000000..3370f79c89
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWTSupplementaryFileGenerator.java
@@ -0,0 +1,60 @@
+package org.broadinstitute.sting.alignment.reference.bwt;
+
+import net.sf.picard.reference.ReferenceSequenceFile;
+import net.sf.picard.reference.ReferenceSequenceFileFactory;
+import net.sf.samtools.SAMSequenceDictionary;
+
+import java.io.File;
+import java.io.IOException;
+
+/**
+ * Generate BWA supplementary files (.ann, .amb) from the command line.
+ *
+ * @author mhanna
+ * @version 0.1
+ */
+public class BWTSupplementaryFileGenerator {
+ enum SupplementaryFileType { ANN, AMB }
+
+ public static void main(String[] args) throws IOException {
+ if(args.length < 3)
+ usage("Incorrect number of arguments supplied");
+
+ File fastaFile = new File(args[0]);
+ File outputFile = new File(args[1]);
+ SupplementaryFileType outputType = null;
+ try {
+ outputType = Enum.valueOf(SupplementaryFileType.class,args[2]);
+ }
+ catch(IllegalArgumentException ex) {
+ usage("Invalid output type: " + args[2]);
+ }
+
+ ReferenceSequenceFile sequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(fastaFile);
+ SAMSequenceDictionary dictionary = sequenceFile.getSequenceDictionary();
+
+ switch(outputType) {
+ case ANN:
+ ANNWriter annWriter = new ANNWriter(outputFile);
+ annWriter.write(dictionary);
+ annWriter.close();
+ break;
+ case AMB:
+ AMBWriter ambWriter = new AMBWriter(outputFile);
+ ambWriter.writeEmpty(dictionary);
+ ambWriter.close();
+ break;
+ default:
+ usage("Unsupported output type: " + outputType);
+ }
+ }
+
+ /**
+ * Print usage information and exit.
+ */
+ private static void usage(String message) {
+ System.err.println(message);
+ System.err.println("Usage: BWTSupplementaryFileGenerator