diff --git a/build.xml b/build.xml index 91eb1f8e9a..fe4c7a3f47 100644 --- a/build.xml +++ b/build.xml @@ -70,8 +70,6 @@ - - @@ -147,11 +145,7 @@ - - - - - @@ -179,13 +173,23 @@ - - + + + + + - + + + + + + + + @@ -215,12 +219,6 @@ - - - - - - @@ -371,7 +369,6 @@ - @@ -388,7 +385,6 @@ - @@ -696,7 +692,6 @@ - @@ -1046,6 +1041,7 @@ + diff --git a/ivy.xml b/ivy.xml index ce724bc3cc..3f3d1c97fc 100644 --- a/ivy.xml +++ b/ivy.xml @@ -48,6 +48,9 @@ + + + diff --git a/public/c/SeparateQltout.cc b/public/c/SeparateQltout.cc new file mode 100644 index 0000000000..7644c96037 --- /dev/null +++ b/public/c/SeparateQltout.cc @@ -0,0 +1,70 @@ +#include "MainTools.h" +#include "Basevector.h" +#include "lookup/LookAlign.h" +#include "lookup/SerialQltout.h" + +unsigned int MatchingEnd(look_align &la, vecbasevector &candidates, vecbasevector &ref) { + //la.PrintParseable(cout); + + for (int i = 0; i < candidates.size(); i++) { + look_align newla = la; + + if (newla.rc1) { candidates[i].ReverseComplement(); } + newla.ResetFromAlign(newla.a, candidates[i], ref[la.target_id]); + + //newla.PrintParseable(cout, &candidates[i], &ref[newla.target_id]); + //cout << newla.Errors() << " " << la.Errors() << endl; + + if (newla.Errors() == la.Errors()) { + return i; + } + } + + //FatalErr("Query id " + ToString(la.query_id) + " had no matches."); + + return candidates.size() + 1; +} + +int main(int argc, char **argv) { + RunTime(); + + BeginCommandArguments; + CommandArgument_String(ALIGNS); + CommandArgument_String(FASTB_END_1); + CommandArgument_String(FASTB_END_2); + CommandArgument_String(REFERENCE); + + CommandArgument_String(ALIGNS_END_1_OUT); + CommandArgument_String(ALIGNS_END_2_OUT); + EndCommandArguments; + + vecbasevector ref(REFERENCE); + vecbasevector reads1(FASTB_END_1); + vecbasevector reads2(FASTB_END_2); + + ofstream aligns1stream(ALIGNS_END_1_OUT.c_str()); + ofstream aligns2stream(ALIGNS_END_2_OUT.c_str()); + + basevector bv; + + SerialQltout sqltout(ALIGNS); + look_align la; + while (sqltout.Next(la)) { + vecbasevector candidates(2); + candidates[0] = reads1[la.query_id]; + candidates[1] = reads2[la.query_id]; + + unsigned int matchingend = MatchingEnd(la, candidates, ref); + if (matchingend < 2) { + bv = (matchingend == 0) ? reads1[la.query_id] : reads2[la.query_id]; + + //la.PrintParseable(cout, &bv, &ref[la.target_id]); + la.PrintParseable(((matchingend == 0) ? aligns1stream : aligns2stream), &bv, &ref[la.target_id]); + } + } + + aligns1stream.close(); + aligns2stream.close(); + + return 0; +} diff --git a/public/c/bwa/Makefile b/public/c/bwa/Makefile new file mode 100644 index 0000000000..6399a0e6d6 --- /dev/null +++ b/public/c/bwa/Makefile @@ -0,0 +1,21 @@ +CXX=g++ +CXXFLAGS=-g -Wall -O2 -m64 -fPIC + +.cpp.o: + $(CXX) -c $(CXXFLAGS) -I$(BWA_HOME) -I$(JAVA_INCLUDE) $< -o $@ + +all: init lib + +init: + @echo Please make sure the following platforms are set correctly on your machine. + @echo BWA_HOME=$(BWA_HOME) + @echo JAVA_INCLUDE=$(JAVA_INCLUDE) + @echo TARGET_LIB=$(TARGET_LIB) + @echo EXTRA_LIBS=$(EXTRA_LIBS) + @echo LIBTOOL_COMMAND=$(LIBTOOL_COMMAND) + +lib: org_broadinstitute_sting_alignment_bwa_c_BWACAligner.o bwa_gateway.o + $(LIBTOOL_COMMAND) $? -o $(TARGET_LIB) -L$(BWA_HOME) -lbwacore $(EXTRA_LIBS) + +clean: + rm *.o libbwa.* diff --git a/public/c/bwa/build_linux.sh b/public/c/bwa/build_linux.sh new file mode 100755 index 0000000000..b3631a28df --- /dev/null +++ b/public/c/bwa/build_linux.sh @@ -0,0 +1,7 @@ +#!/bin/sh +export BWA_HOME="/humgen/gsa-scr1/hanna/src/bwa-trunk/bwa" +export JAVA_INCLUDE="/broad/tools/Linux/x86_64/pkgs/jdk_1.6.0_12/include -I/broad/tools/Linux/x86_64/pkgs/jdk_1.6.0_12/include/linux" +export TARGET_LIB="libbwa.so" +export EXTRA_LIBS="-lc -lz -lstdc++ -lpthread" +export LIBTOOL_COMMAND="g++ -shared -Wl,-soname,libbwa.so" +make diff --git a/public/c/bwa/build_mac.sh b/public/c/bwa/build_mac.sh new file mode 100644 index 0000000000..bfed900bba --- /dev/null +++ b/public/c/bwa/build_mac.sh @@ -0,0 +1,7 @@ +#!/bin/sh +export BWA_HOME="/Users/mhanna/src/bwa" +export JAVA_INCLUDE="/System/Library/Frameworks/JavaVM.framework/Headers" +export TARGET_LIB="libbwa.dylib" +export EXTRA_LIBS="-lc -lz -lsupc++" +export LIBTOOL_COMMAND="libtool -dynamic" +make diff --git a/public/c/bwa/bwa_gateway.cpp b/public/c/bwa/bwa_gateway.cpp new file mode 100644 index 0000000000..00f5aa5bcd --- /dev/null +++ b/public/c/bwa/bwa_gateway.cpp @@ -0,0 +1,277 @@ +#include +#include +#include + +#include "bwase.h" +#include "bwa_gateway.h" + +BWA::BWA(const char* ann_filename, + const char* amb_filename, + const char* pac_filename, + const char* forward_bwt_filename, + const char* forward_sa_filename, + const char* reverse_bwt_filename, + const char* reverse_sa_filename) +{ + // Load the bns (?) and reference + bns = bns_restore_core(ann_filename,amb_filename,pac_filename); + reference = new ubyte_t[bns->l_pac/4+1]; + rewind(bns->fp_pac); + fread(reference, 1, bns->l_pac/4+1, bns->fp_pac); + fclose(bns->fp_pac); + bns->fp_pac = NULL; + + // Load the BWTs (both directions) and suffix arrays (both directions) + bwts[0] = bwt_restore_bwt(forward_bwt_filename); + bwt_restore_sa(forward_sa_filename, bwts[0]); + bwts[1] = bwt_restore_bwt(reverse_bwt_filename); + bwt_restore_sa(reverse_sa_filename, bwts[1]); + load_default_options(); + + // Always reinitialize the random seed whenever a new set of files are loaded. + initialize_random_seed(); + + // initialize the bwase subsystem + bwase_initialize(); +} + +BWA::~BWA() { + delete[] reference; + bns_destroy(bns); + bwt_destroy(bwts[0]); + bwt_destroy(bwts[1]); +} + +void BWA::find_paths(const char* bases, const unsigned read_length, bwt_aln1_t*& paths, unsigned& num_paths, unsigned& best_path_count, unsigned& second_best_path_count) +{ + bwa_seq_t* sequence = create_sequence(bases, read_length); + + // Calculate the suffix array interval for each sequence, storing the result in sequence->aln (and sequence->n_aln). + // This method will destroy the contents of seq and rseq. + bwa_cal_sa_reg_gap(0,bwts,1,sequence,&options); + + paths = new bwt_aln1_t[sequence->n_aln]; + memcpy(paths,sequence->aln,sequence->n_aln*sizeof(bwt_aln1_t)); + num_paths = sequence->n_aln; + + // Call aln2seq to initialize the type of match present. + bwa_aln2seq(sequence->n_aln,sequence->aln,sequence); + best_path_count = sequence->c1; + second_best_path_count = sequence->c2; + + bwa_free_read_seq(1,sequence); +} + +Alignment* BWA::generate_single_alignment(const char* bases, const unsigned read_length) { + bwa_seq_t* sequence = create_sequence(bases,read_length); + + // Calculate paths. + bwa_cal_sa_reg_gap(0,bwts,1,sequence,&options); + + // Check for no alignments found and return null. + if(sequence->n_aln == 0) { + bwa_free_read_seq(1,sequence); + return NULL; + } + + // bwa_cal_sa_reg_gap destroys the bases / read length. Copy them back in. + copy_bases_into_sequence(sequence,bases,read_length); + + // Pick best alignment and propagate its information into the sequence. + bwa_aln2seq(sequence->n_aln,sequence->aln,sequence); + + // Generate the best alignment from the sequence. + Alignment* alignment = new Alignment; + *alignment = generate_final_alignment_from_sequence(sequence); + + bwa_free_read_seq(1,sequence); + + return alignment; +} + +void BWA::generate_alignments_from_paths(const char* bases, + const unsigned read_length, + bwt_aln1_t* paths, + const unsigned num_paths, + const unsigned best_count, + const unsigned second_best_count, + Alignment*& alignments, + unsigned& num_alignments) +{ + bwa_seq_t* sequence = create_sequence(bases,read_length); + + sequence->aln = paths; + sequence->n_aln = num_paths; + + // (Ab)use bwa_aln2seq to propagate values stored in the path out into the sequence itself. + bwa_aln2seq(sequence->n_aln,sequence->aln,sequence); + + // But overwrite key parts of the sequence in case the user passed back only a smaller subset + // of the paths. + sequence->c1 = best_count; + sequence->c2 = second_best_count; + sequence->type = sequence->c1 > 1 ? BWA_TYPE_REPEAT : BWA_TYPE_UNIQUE; + + num_alignments = 0; + for(unsigned i = 0; i < (unsigned)sequence->n_aln; i++) + num_alignments += (sequence->aln + i)->l - (sequence->aln + i)->k + 1; + + alignments = new Alignment[num_alignments]; + unsigned alignment_idx = 0; + + for(unsigned path_idx = 0; path_idx < (unsigned)num_paths; path_idx++) { + // Stub in a 'working' path, so that only the desired alignment is local-aligned. + const bwt_aln1_t* path = paths + path_idx; + bwt_aln1_t working_path = *path; + + // Loop through all alignments, aligning each one individually. + for(unsigned sa_idx = path->k; sa_idx <= path->l; sa_idx++) { + working_path.k = working_path.l = sa_idx; + sequence->aln = &working_path; + sequence->n_aln = 1; + + sequence->sa = sa_idx; + sequence->strand = path->a; + sequence->score = path->score; + + // Each time through bwa_refine_gapped, seq gets reversed. Revert the reverse. + // TODO: Fix the interface to bwa_refine_gapped so its easier to work with. + if(alignment_idx > 0) + seq_reverse(sequence->len, sequence->seq, 0); + + // Copy the local alignment data into the alignment object. + *(alignments + alignment_idx) = generate_final_alignment_from_sequence(sequence); + + alignment_idx++; + } + } + + sequence->aln = NULL; + sequence->n_aln = 0; + + bwa_free_read_seq(1,sequence); +} + +Alignment BWA::generate_final_alignment_from_sequence(bwa_seq_t* sequence) { + // Calculate the local coordinate and local alignment. + bwa_cal_pac_pos_core(bwts[0],bwts[1],sequence,options.max_diff,options.fnr); + bwa_refine_gapped(bns, 1, sequence, reference, NULL); + + // Copy the local alignment data into the alignment object. + Alignment alignment; + + // Populate basic path info + alignment.edit_distance = sequence->nm; + alignment.num_mismatches = sequence->n_mm; + alignment.num_gap_opens = sequence->n_gapo; + alignment.num_gap_extensions = sequence->n_gape; + alignment.num_best = sequence->c1; + alignment.num_second_best = sequence->c2; + + // Final alignment position. + alignment.type = sequence->type; + bns_coor_pac2real(bns, sequence->pos, pos_end(sequence) - sequence->pos, &alignment.contig); + alignment.pos = sequence->pos - bns->anns[alignment.contig].offset + 1; + alignment.negative_strand = sequence->strand; + alignment.mapping_quality = sequence->mapQ; + + // Cigar step. + alignment.cigar = NULL; + if(sequence->cigar) { + alignment.cigar = new uint16_t[sequence->n_cigar]; + memcpy(alignment.cigar,sequence->cigar,sequence->n_cigar*sizeof(uint16_t)); + } + alignment.n_cigar = sequence->n_cigar; + + // MD tag with a better breakdown of differences in the cigar + alignment.md = strdup(sequence->md); + delete[] sequence->md; + sequence->md = NULL; + + return alignment; +} + +void BWA::load_default_options() +{ + options.s_mm = 3; + options.s_gapo = 11; + options.s_gape = 4; + options.mode = 3; + options.indel_end_skip = 5; + options.max_del_occ = 10; + options.max_entries = 2000000; + options.fnr = 0.04; + options.max_diff = -1; + options.max_gapo = 1; + options.max_gape = 6; + options.max_seed_diff = 2; + options.seed_len = 2147483647; + options.n_threads = 1; + options.max_top2 = 30; + options.trim_qual = 0; +} + +void BWA::initialize_random_seed() +{ + srand48(bns->seed); +} + +void BWA::set_max_edit_distance(float edit_distance) { + if(edit_distance > 0 && edit_distance < 1) { + options.fnr = edit_distance; + options.max_diff = -1; + } + else { + options.fnr = -1.0; + options.max_diff = (int)edit_distance; + } +} + +void BWA::set_max_gap_opens(int max_gap_opens) { options.max_gapo = max_gap_opens; } +void BWA::set_max_gap_extensions(int max_gap_extensions) { options.max_gape = max_gap_extensions; } +void BWA::set_disallow_indel_within_range(int indel_range) { options.indel_end_skip = indel_range; } +void BWA::set_mismatch_penalty(int penalty) { options.s_mm = penalty; } +void BWA::set_gap_open_penalty(int penalty) { options.s_gapo = penalty; } +void BWA::set_gap_extension_penalty(int penalty) { options.s_gape = penalty; } + +/** + * Create a sequence with a set of reasonable initial defaults. + * Will leave seq and rseq empty. + */ +bwa_seq_t* BWA::create_sequence(const char* bases, const unsigned read_length) +{ + bwa_seq_t* sequence = new bwa_seq_t; + + sequence->tid = -1; + + sequence->name = 0; + + copy_bases_into_sequence(sequence, bases, read_length); + + sequence->qual = 0; + sequence->aln = 0; + sequence->md = 0; + + sequence->cigar = NULL; + sequence->n_cigar = 0; + + sequence->multi = NULL; + sequence->n_multi = 0; + + return sequence; +} + +void BWA::copy_bases_into_sequence(bwa_seq_t* sequence, const char* bases, const unsigned read_length) +{ + // seq, rseq will ultimately be freed by bwa_cal_sa_reg_gap + sequence->seq = new ubyte_t[read_length]; + sequence->rseq = new ubyte_t[read_length]; + for(unsigned i = 0; i < read_length; i++) sequence->seq[i] = nst_nt4_table[(unsigned)bases[i]]; + memcpy(sequence->rseq,sequence->seq,read_length); + + // BWA expects the read bases to arrive reversed. + seq_reverse(read_length,sequence->seq,0); + seq_reverse(read_length,sequence->rseq,1); + + sequence->full_len = sequence->len = read_length; +} diff --git a/public/c/bwa/bwa_gateway.h b/public/c/bwa/bwa_gateway.h new file mode 100644 index 0000000000..2d26ec6509 --- /dev/null +++ b/public/c/bwa/bwa_gateway.h @@ -0,0 +1,83 @@ +#ifndef BWA_GATEWAY +#define BWA_GATEWAY + +#include + +#include "bntseq.h" +#include "bwt.h" +#include "bwtaln.h" + +class Alignment { + public: + uint32_t type; + int contig; + bwtint_t pos; + bool negative_strand; + uint32_t mapping_quality; + + uint16_t *cigar; + int n_cigar; + + uint8_t num_mismatches; + uint8_t num_gap_opens; + uint8_t num_gap_extensions; + uint16_t edit_distance; + + uint32_t num_best; + uint32_t num_second_best; + + char* md; +}; + +class BWA { + private: + bntseq_t *bns; + ubyte_t* reference; + bwt_t* bwts[2]; + gap_opt_t options; + + void load_default_options(); + void initialize_random_seed(); + bwa_seq_t* create_sequence(const char* bases, const unsigned read_length); + void copy_bases_into_sequence(bwa_seq_t* sequence, const char* bases, const unsigned read_length); + Alignment generate_final_alignment_from_sequence(bwa_seq_t* sequence); + + public: + BWA(const char* ann_filename, + const char* amb_filename, + const char* pac_filename, + const char* forward_bwt_filename, + const char* forward_sa_filename, + const char* reverse_bwt_filename, + const char* reverse_sa_filename); + ~BWA(); + + // Parameterize the aligner. + void set_max_edit_distance(float edit_distance); + void set_max_gap_opens(int max_gap_opens); + void set_max_gap_extensions(int max_gap_extensions); + void set_disallow_indel_within_range(int indel_range); + void set_mismatch_penalty(int penalty); + void set_gap_open_penalty(int penalty); + void set_gap_extension_penalty(int penalty); + + // Perform the alignment + Alignment* generate_single_alignment(const char* bases, + const unsigned read_length); + void find_paths(const char* bases, + const unsigned read_length, + bwt_aln1_t*& paths, + unsigned& num_paths, + unsigned& best_path_count, + unsigned& second_best_path_count); + void generate_alignments_from_paths(const char* bases, + const unsigned read_length, + bwt_aln1_t* paths, + const unsigned num_paths, + const unsigned best_count, + const unsigned second_best_count, + Alignment*& alignments, + unsigned& num_alignments); +}; + +#endif // BWA_GATEWAY diff --git a/public/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.cpp b/public/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.cpp new file mode 100644 index 0000000000..1ccbef0d41 --- /dev/null +++ b/public/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.cpp @@ -0,0 +1,437 @@ +#include +#include +#include + +#include "bntseq.h" +#include "bwt.h" +#include "bwtaln.h" +#include "bwa_gateway.h" +#include "org_broadinstitute_sting_alignment_bwa_c_BWACAligner.h" + +typedef void (BWA::*int_setter)(int value); +typedef void (BWA::*float_setter)(float value); + +static jobject convert_to_java_alignment(JNIEnv* env, const jbyte* read_bases, const jsize read_length, const Alignment& alignment); +static jstring get_configuration_file(JNIEnv* env, jobject configuration, const char* field_name); +static void set_int_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, int_setter setter); +static void set_float_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, float_setter setter); +static void throw_config_value_exception(JNIEnv* env, const char* field_name, const char* message); + +JNIEXPORT jlong JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_create(JNIEnv* env, jobject instance, jobject bwtFiles, jobject configuration) +{ + jstring java_ann = get_configuration_file(env,bwtFiles,"annFile"); + if(java_ann == NULL) return 0L; + jstring java_amb = get_configuration_file(env,bwtFiles,"ambFile"); + if(java_amb == NULL) return 0L; + jstring java_pac = get_configuration_file(env,bwtFiles,"pacFile"); + if(java_pac == NULL) return 0L; + jstring java_forward_bwt = get_configuration_file(env,bwtFiles,"forwardBWTFile"); + if(java_forward_bwt == NULL) return 0L; + jstring java_forward_sa = get_configuration_file(env,bwtFiles,"forwardSAFile"); + if(java_forward_sa == NULL) return 0L; + jstring java_reverse_bwt = get_configuration_file(env,bwtFiles,"reverseBWTFile"); + if(java_reverse_bwt == NULL) return 0L; + jstring java_reverse_sa = get_configuration_file(env,bwtFiles,"reverseSAFile"); + if(java_reverse_sa == NULL) return 0L; + + const char* ann_filename = env->GetStringUTFChars(java_ann,JNI_FALSE); + if(env->ExceptionCheck()) return 0L; + const char* amb_filename = env->GetStringUTFChars(java_amb,JNI_FALSE); + if(env->ExceptionCheck()) return 0L; + const char* pac_filename = env->GetStringUTFChars(java_pac,JNI_FALSE); + if(env->ExceptionCheck()) return 0L; + const char* forward_bwt_filename = env->GetStringUTFChars(java_forward_bwt,JNI_FALSE); + if(env->ExceptionCheck()) return 0L; + const char* forward_sa_filename = env->GetStringUTFChars(java_forward_sa,JNI_FALSE); + if(env->ExceptionCheck()) return 0L; + const char* reverse_bwt_filename = env->GetStringUTFChars(java_reverse_bwt,JNI_FALSE); + if(env->ExceptionCheck()) return 0L; + const char* reverse_sa_filename = env->GetStringUTFChars(java_reverse_sa,JNI_FALSE); + if(env->ExceptionCheck()) return 0L; + + BWA* bwa = new BWA(ann_filename, + amb_filename, + pac_filename, + forward_bwt_filename, + forward_sa_filename, + reverse_bwt_filename, + reverse_sa_filename); + + Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_updateConfiguration(env,instance,(jlong)bwa,configuration); + if(env->ExceptionCheck()) return 0L; + + env->ReleaseStringUTFChars(java_ann,ann_filename); + if(env->ExceptionCheck()) return 0L; + env->ReleaseStringUTFChars(java_amb,amb_filename); + if(env->ExceptionCheck()) return 0L; + env->ReleaseStringUTFChars(java_pac,pac_filename); + if(env->ExceptionCheck()) return 0L; + env->ReleaseStringUTFChars(java_forward_bwt,forward_bwt_filename); + if(env->ExceptionCheck()) return 0L; + env->ReleaseStringUTFChars(java_forward_sa,forward_sa_filename); + if(env->ExceptionCheck()) return 0L; + env->ReleaseStringUTFChars(java_reverse_bwt,reverse_bwt_filename); + if(env->ExceptionCheck()) return 0L; + env->ReleaseStringUTFChars(java_reverse_sa,reverse_sa_filename); + if(env->ExceptionCheck()) return 0L; + + return (jlong)bwa; +} + +JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_destroy(JNIEnv* env, jobject instance, jlong java_bwa) +{ + BWA* bwa = (BWA*)java_bwa; + delete bwa; +} + +JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_updateConfiguration(JNIEnv *env, jobject instance, jlong java_bwa, jobject configuration) { + BWA* bwa = (BWA*)java_bwa; + set_float_configuration_param(env, configuration, "maximumEditDistance", bwa, &BWA::set_max_edit_distance); + if(env->ExceptionCheck()) return; + set_int_configuration_param(env, configuration, "maximumGapOpens", bwa, &BWA::set_max_gap_opens); + if(env->ExceptionCheck()) return; + set_int_configuration_param(env, configuration, "maximumGapExtensions", bwa, &BWA::set_max_gap_extensions); + if(env->ExceptionCheck()) return; + set_int_configuration_param(env, configuration, "disallowIndelWithinRange", bwa, &BWA::set_disallow_indel_within_range); + if(env->ExceptionCheck()) return; + set_int_configuration_param(env, configuration, "mismatchPenalty", bwa, &BWA::set_mismatch_penalty); + if(env->ExceptionCheck()) return; + set_int_configuration_param(env, configuration, "gapOpenPenalty", bwa, &BWA::set_gap_open_penalty); + if(env->ExceptionCheck()) return; + set_int_configuration_param(env, configuration, "gapExtensionPenalty", bwa, &BWA::set_gap_extension_penalty); + if(env->ExceptionCheck()) return; +} + +JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getPaths(JNIEnv *env, jobject instance, jlong java_bwa, jbyteArray java_bases) +{ + BWA* bwa = (BWA*)java_bwa; + + const jsize read_length = env->GetArrayLength(java_bases); + if(env->ExceptionCheck()) return NULL; + + jbyte *read_bases = env->GetByteArrayElements(java_bases,JNI_FALSE); + if(read_bases == NULL) return NULL; + + bwt_aln1_t* paths = NULL; + unsigned num_paths = 0; + + unsigned best_path_count, second_best_path_count; + bwa->find_paths((const char*)read_bases,read_length,paths,num_paths,best_path_count,second_best_path_count); + + jobjectArray java_paths = env->NewObjectArray(num_paths, env->FindClass("org/broadinstitute/sting/alignment/bwa/c/BWAPath"), NULL); + if(java_paths == NULL) return NULL; + + for(unsigned path_idx = 0; path_idx < (unsigned)num_paths; path_idx++) { + bwt_aln1_t& path = *(paths + path_idx); + + jclass java_path_class = env->FindClass("org/broadinstitute/sting/alignment/bwa/c/BWAPath"); + if(java_path_class == NULL) return NULL; + + jmethodID java_path_constructor = env->GetMethodID(java_path_class, "", "(IIIZJJIII)V"); + if(java_path_constructor == NULL) return NULL; + + // Note that k/l are being cast to long. Bad things will happen if JNI assumes that they're ints. + jobject java_path = env->NewObject(java_path_class, + java_path_constructor, + path.n_mm, + path.n_gapo, + path.n_gape, + path.a, + (jlong)path.k, + (jlong)path.l, + path.score, + best_path_count, + second_best_path_count); + if(java_path == NULL) return NULL; + + env->SetObjectArrayElement(java_paths,path_idx,java_path); + if(env->ExceptionCheck()) return NULL; + + env->DeleteLocalRef(java_path_class); + if(env->ExceptionCheck()) return NULL; + } + + delete[] paths; + + env->ReleaseByteArrayElements(java_bases,read_bases,JNI_FALSE); + + return env->ExceptionCheck() ? NULL : java_paths; +} + +JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_convertPathsToAlignments(JNIEnv *env, jobject instance, jlong java_bwa, jbyteArray java_bases, jobjectArray java_paths) +{ + BWA* bwa = (BWA*)java_bwa; + + const jsize read_length = env->GetArrayLength(java_bases); + if(env->ExceptionCheck()) return NULL; + + jbyte *read_bases = env->GetByteArrayElements(java_bases,JNI_FALSE); + if(read_bases == NULL) return NULL; + + const jsize num_paths = env->GetArrayLength(java_paths); + bwt_aln1_t* paths = new bwt_aln1_t[num_paths]; + unsigned best_count = 0, second_best_count = 0; + + for(unsigned path_idx = 0; path_idx < (unsigned)num_paths; path_idx++) { + jobject java_path = env->GetObjectArrayElement(java_paths,path_idx); + jclass java_path_class = env->GetObjectClass(java_path); + if(java_path_class == NULL) return NULL; + + bwt_aln1_t& path = *(paths + path_idx); + + jfieldID mismatches_field = env->GetFieldID(java_path_class, "numMismatches", "I"); + if(mismatches_field == NULL) return NULL; + path.n_mm = env->GetIntField(java_path,mismatches_field); + if(env->ExceptionCheck()) return NULL; + + jfieldID gap_opens_field = env->GetFieldID(java_path_class, "numGapOpens", "I"); + if(gap_opens_field == NULL) return NULL; + path.n_gapo = env->GetIntField(java_path,gap_opens_field); + if(env->ExceptionCheck()) return NULL; + + jfieldID gap_extensions_field = env->GetFieldID(java_path_class, "numGapExtensions", "I"); + if(gap_extensions_field == NULL) return NULL; + path.n_gape = env->GetIntField(java_path,gap_extensions_field); + if(env->ExceptionCheck()) return NULL; + + jfieldID negative_strand_field = env->GetFieldID(java_path_class, "negativeStrand", "Z"); + if(negative_strand_field == NULL) return NULL; + path.a = env->GetBooleanField(java_path,negative_strand_field); + if(env->ExceptionCheck()) return NULL; + + jfieldID k_field = env->GetFieldID(java_path_class, "k", "J"); + if(k_field == NULL) return NULL; + path.k = env->GetLongField(java_path,k_field); + if(env->ExceptionCheck()) return NULL; + + jfieldID l_field = env->GetFieldID(java_path_class, "l", "J"); + if(l_field == NULL) return NULL; + path.l = env->GetLongField(java_path,l_field); + if(env->ExceptionCheck()) return NULL; + + jfieldID score_field = env->GetFieldID(java_path_class, "score", "I"); + if(score_field == NULL) return NULL; + path.score = env->GetIntField(java_path,score_field); + if(env->ExceptionCheck()) return NULL; + + jfieldID best_count_field = env->GetFieldID(java_path_class, "bestCount", "I"); + if(best_count_field == NULL) return NULL; + best_count = env->GetIntField(java_path,best_count_field); + if(env->ExceptionCheck()) return NULL; + + jfieldID second_best_count_field = env->GetFieldID(java_path_class, "secondBestCount", "I"); + if(second_best_count_field == NULL) return NULL; + second_best_count = env->GetIntField(java_path,second_best_count_field); + if(env->ExceptionCheck()) return NULL; + } + + Alignment* alignments = NULL; + unsigned num_alignments = 0; + bwa->generate_alignments_from_paths((const char*)read_bases,read_length,paths,num_paths,best_count,second_best_count,alignments,num_alignments); + + jobjectArray java_alignments = env->NewObjectArray(num_alignments, env->FindClass("org/broadinstitute/sting/alignment/Alignment"), NULL); + if(java_alignments == NULL) return NULL; + + for(unsigned alignment_idx = 0; alignment_idx < (unsigned)num_alignments; alignment_idx++) { + Alignment& alignment = *(alignments + alignment_idx); + jobject java_alignment = convert_to_java_alignment(env,read_bases,read_length,alignment); + if(java_alignment == NULL) return NULL; + env->SetObjectArrayElement(java_alignments,alignment_idx,java_alignment); + if(env->ExceptionCheck()) return NULL; + } + + delete[] alignments; + delete[] paths; + + env->ReleaseByteArrayElements(java_bases,read_bases,JNI_FALSE); + + return env->ExceptionCheck() ? NULL : java_alignments; +} + +JNIEXPORT jobject JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getBestAlignment(JNIEnv *env, jobject instance, jlong java_bwa, jbyteArray java_bases) { + BWA* bwa = (BWA*)java_bwa; + + const jsize read_length = env->GetArrayLength(java_bases); + if(env->ExceptionCheck()) return NULL; + + jbyte *read_bases = env->GetByteArrayElements(java_bases,JNI_FALSE); + if(read_bases == NULL) return NULL; + + Alignment* best_alignment = bwa->generate_single_alignment((const char*)read_bases,read_length); + jobject java_best_alignment = (best_alignment != NULL) ? convert_to_java_alignment(env,read_bases,read_length,*best_alignment) : NULL; + delete best_alignment; + + env->ReleaseByteArrayElements(java_bases,read_bases,JNI_FALSE); + + return java_best_alignment; +} + +static jobject convert_to_java_alignment(JNIEnv *env, const jbyte* read_bases, const jsize read_length, const Alignment& alignment) { + unsigned cigar_length; + if(alignment.type == BWA_TYPE_NO_MATCH) cigar_length = 0; + else if(!alignment.cigar) cigar_length = 1; + else cigar_length = alignment.n_cigar; + + jcharArray java_cigar_operators = env->NewCharArray(cigar_length); + if(java_cigar_operators == NULL) return NULL; + jintArray java_cigar_lengths = env->NewIntArray(cigar_length); + if(java_cigar_lengths == NULL) return NULL; + + if(alignment.cigar) { + for(unsigned cigar_idx = 0; cigar_idx < (unsigned)alignment.n_cigar; ++cigar_idx) { + jchar cigar_operator = "MIDS"[alignment.cigar[cigar_idx]>>14]; + jint cigar_length = alignment.cigar[cigar_idx]&0x3fff; + + env->SetCharArrayRegion(java_cigar_operators,cigar_idx,1,&cigar_operator); + if(env->ExceptionCheck()) return NULL; + env->SetIntArrayRegion(java_cigar_lengths,cigar_idx,1,&cigar_length); + if(env->ExceptionCheck()) return NULL; + } + } + else { + if(alignment.type != BWA_TYPE_NO_MATCH) { + jchar cigar_operator = 'M'; + env->SetCharArrayRegion(java_cigar_operators,0,1,&cigar_operator); + if(env->ExceptionCheck()) return NULL; + env->SetIntArrayRegion(java_cigar_lengths,0,1,&read_length); + if(env->ExceptionCheck()) return NULL; + } + } + delete[] alignment.cigar; + + jclass java_alignment_class = env->FindClass("org/broadinstitute/sting/alignment/Alignment"); + if(java_alignment_class == NULL) return NULL; + + jmethodID java_alignment_constructor = env->GetMethodID(java_alignment_class, "", "(IIZI[C[IILjava/lang/String;IIIII)V"); + if(java_alignment_constructor == NULL) return NULL; + + jstring java_md = env->NewStringUTF(alignment.md); + if(java_md == NULL) return NULL; + delete[] alignment.md; + + jobject java_alignment = env->NewObject(java_alignment_class, + java_alignment_constructor, + alignment.contig, + alignment.pos, + alignment.negative_strand, + alignment.mapping_quality, + java_cigar_operators, + java_cigar_lengths, + alignment.edit_distance, + java_md, + alignment.num_mismatches, + alignment.num_gap_opens, + alignment.num_gap_extensions, + alignment.num_best, + alignment.num_second_best); + if(java_alignment == NULL) return NULL; + + env->DeleteLocalRef(java_alignment_class); + if(env->ExceptionCheck()) return NULL; + + return java_alignment; +} + +static jstring get_configuration_file(JNIEnv* env, jobject configuration, const char* field_name) { + jclass configuration_class = env->GetObjectClass(configuration); + if(configuration_class == NULL) return NULL; + + jfieldID configuration_field = env->GetFieldID(configuration_class, field_name, "Ljava/io/File;"); + if(configuration_field == NULL) return NULL; + + jobject configuration_file = (jobject)env->GetObjectField(configuration,configuration_field); + + jclass file_class = env->FindClass("java/io/File"); + if(file_class == NULL) return NULL; + + jmethodID path_extractor = env->GetMethodID(file_class,"getAbsolutePath", "()Ljava/lang/String;"); + if(path_extractor == NULL) return NULL; + + jstring path = (jstring)env->CallObjectMethod(configuration_file,path_extractor); + if(path == NULL) return NULL; + + env->DeleteLocalRef(configuration_class); + env->DeleteLocalRef(file_class); + env->DeleteLocalRef(configuration_file); + + return path; +} + +static void set_int_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, int_setter setter) { + jclass configuration_class = env->GetObjectClass(configuration); + if(configuration_class == NULL) return; + + jfieldID configuration_field = env->GetFieldID(configuration_class, field_name, "Ljava/lang/Integer;"); + if(configuration_field == NULL) return; + + jobject boxed_value = env->GetObjectField(configuration,configuration_field); + if(env->ExceptionCheck()) return; + + if(boxed_value != NULL) { + jclass int_box_class = env->FindClass("java/lang/Integer"); + if(int_box_class == NULL) return; + + jmethodID int_extractor = env->GetMethodID(int_box_class,"intValue", "()I"); + if(int_extractor == NULL) return; + + jint value = env->CallIntMethod(boxed_value,int_extractor); + if(env->ExceptionCheck()) return; + + if(value < 0) + { + throw_config_value_exception(env,field_name,"cannot be set to a negative value"); + return; + } + + (bwa->*setter)(value); + + env->DeleteLocalRef(int_box_class); + } + + env->DeleteLocalRef(boxed_value); + env->DeleteLocalRef(configuration_class); +} + +static void set_float_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, float_setter setter) +{ + jclass configuration_class = env->GetObjectClass(configuration); + if(configuration_class == NULL) return; + + jfieldID configuration_field = env->GetFieldID(configuration_class, field_name, "Ljava/lang/Float;"); + if(configuration_field == NULL) return; + + jobject boxed_value = env->GetObjectField(configuration,configuration_field); + if(boxed_value != NULL) { + jclass float_box_class = env->FindClass("java/lang/Float"); + if(float_box_class == NULL) return; + + jmethodID float_extractor = env->GetMethodID(float_box_class,"floatValue", "()F"); + if(float_extractor == NULL) return; + + jfloat value = env->CallFloatMethod(boxed_value,float_extractor); + if(env->ExceptionCheck()) return; + + if(value < 0) + { + throw_config_value_exception(env,field_name,"cannot be set to a negative value"); + return; + } + + (bwa->*setter)(value); + + env->DeleteLocalRef(float_box_class); + } + + env->DeleteLocalRef(boxed_value); + env->DeleteLocalRef(configuration_class); +} + +static void throw_config_value_exception(JNIEnv* env, const char* field_name, const char* message) +{ + char* buffer = new char[strlen(field_name)+1+strlen(message)+1]; + sprintf(buffer,"%s %s",field_name,message); + jclass sting_exception_class = env->FindClass("org/broadinstitute/sting/utils/StingException"); + if(sting_exception_class == NULL) return; + env->ThrowNew(sting_exception_class, buffer); + delete[] buffer; +} diff --git a/public/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.h b/public/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.h new file mode 100644 index 0000000000..0c44e430ac --- /dev/null +++ b/public/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.h @@ -0,0 +1,61 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_broadinstitute_sting_alignment_bwa_c_BWACAligner */ + +#ifndef _Included_org_broadinstitute_sting_alignment_bwa_c_BWACAligner +#define _Included_org_broadinstitute_sting_alignment_bwa_c_BWACAligner +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner + * Method: create + * Signature: (Lorg/broadinstitute/sting/alignment/bwa/BWTFiles;Lorg/broadinstitute/sting/alignment/bwa/BWAConfiguration;)J + */ +JNIEXPORT jlong JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_create + (JNIEnv *, jobject, jobject, jobject); + +/* + * Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner + * Method: updateConfiguration + * Signature: (JLorg/broadinstitute/sting/alignment/bwa/BWAConfiguration;)V + */ +JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_updateConfiguration + (JNIEnv *, jobject, jlong, jobject); + +/* + * Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner + * Method: destroy + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_destroy + (JNIEnv *, jobject, jlong); + +/* + * Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner + * Method: getPaths + * Signature: (J[B)[Lorg/broadinstitute/sting/alignment/bwa/c/BWAPath; + */ +JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getPaths + (JNIEnv *, jobject, jlong, jbyteArray); + +/* + * Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner + * Method: convertPathsToAlignments + * Signature: (J[B[Lorg/broadinstitute/sting/alignment/bwa/c/BWAPath;)[Lorg/broadinstitute/sting/alignment/Alignment; + */ +JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_convertPathsToAlignments + (JNIEnv *, jobject, jlong, jbyteArray, jobjectArray); + +/* + * Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner + * Method: getBestAlignment + * Signature: (J[B)Lorg/broadinstitute/sting/alignment/Alignment; + */ +JNIEXPORT jobject JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getBestAlignment + (JNIEnv *, jobject, jlong, jbyteArray); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/public/c/libenvironhack/Makefile b/public/c/libenvironhack/Makefile new file mode 100644 index 0000000000..302ff8e31e --- /dev/null +++ b/public/c/libenvironhack/Makefile @@ -0,0 +1,10 @@ +CC=gcc +CCFLAGS=-Wall -dynamiclib -arch i386 -arch x86_64 + +libenvironhack.dylib: libenvironhack.c + $(CC) $(CCFLAGS) -init _init_environ $< -o $@ + +all: libenvironhack.dylib + +clean: + rm -f libenvironhack.dylib diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/BatchMergeIntegrationTest.java b/public/c/libenvironhack/libenvironhack.c old mode 100755 new mode 100644 similarity index 52% rename from public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/BatchMergeIntegrationTest.java rename to public/c/libenvironhack/libenvironhack.c index 1d4ae0aa8b..8b2a2640e1 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/BatchMergeIntegrationTest.java +++ b/public/c/libenvironhack/libenvironhack.c @@ -22,25 +22,16 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.gatk.walkers.variantutils; +/* +LSF 7.0.6 on the mac is missing the unsatisfied exported symbol for environ which was removed on MacOS X 10.5+. +nm $LSF_LIBDIR/liblsf.dylib | grep environ +See "man environ" for more info, along with http://lists.apple.com/archives/java-dev/2007/Dec/msg00096.html +*/ -import org.broadinstitute.sting.WalkerTest; -import org.testng.annotations.Test; +#include -import java.io.File; -import java.util.Arrays; +char **environ = (char **)0; -public class BatchMergeIntegrationTest extends WalkerTest { - @Test - public void testBatchMerge1() { - String bam = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.bam"; - String alleles = validationDataLocation + "batch.merge.alleles.vcf"; - WalkerTestSpec spec = new WalkerTestSpec( - "-T UnifiedGenotyper -NO_HEADER -BTI alleles -stand_call_conf 0.0 -glm BOTH -G none -nsl -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -o %s -R " + b37KGReference - + " -B:alleles,VCF " + alleles - + " -I " + bam, - 1, - Arrays.asList("8bd114ceaf6a94e96a677fcc52350713")); - executeTest("testBatchMerge UG genotype given alleles:" + new File(bam).getName() + " with " + new File(alleles).getName(), spec); - } +void init_environ(void) { + environ = (*_NSGetEnviron()); } diff --git a/public/c/libenvironhack/libenvironhack.dylib b/public/c/libenvironhack/libenvironhack.dylib new file mode 100755 index 0000000000..a45e038b49 Binary files /dev/null and b/public/c/libenvironhack/libenvironhack.dylib differ diff --git a/public/java/src/org/broadinstitute/sting/alignment/Aligner.java b/public/java/src/org/broadinstitute/sting/alignment/Aligner.java new file mode 100644 index 0000000000..4bf05cb75e --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/Aligner.java @@ -0,0 +1,49 @@ +package org.broadinstitute.sting.alignment; + +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMRecord; + +/** + * Create perfect alignments from the read to the genome represented by the given BWT / suffix array. + * + * @author mhanna + * @version 0.1 + */ +public interface Aligner { + /** + * Close this instance of the BWA pointer and delete its resources. + */ + public void close(); + + /** + * Allow the aligner to choose one alignment randomly from the pile of best alignments. + * @param bases Bases to align. + * @return An align + */ + public Alignment getBestAlignment(final byte[] bases); + + /** + * Align the read to the reference. + * @param read Read to align. + * @param header Optional header to drop in place. + * @return A list of the alignments. + */ + public SAMRecord align(final SAMRecord read, final SAMFileHeader header); + + /** + * Get a iterator of alignments, batched by mapping quality. + * @param bases List of bases. + * @return Iterator to alignments. + */ + public Iterable getAllAlignments(final byte[] bases); + + /** + * Get a iterator of aligned reads, batched by mapping quality. + * @param read Read to align. + * @param newHeader Optional new header to use when aligning the read. If present, it must be null. + * @return Iterator to alignments. + */ + public Iterable alignAll(final SAMRecord read, final SAMFileHeader newHeader); +} + + diff --git a/public/java/src/org/broadinstitute/sting/alignment/Alignment.java b/public/java/src/org/broadinstitute/sting/alignment/Alignment.java new file mode 100644 index 0000000000..c63f5615fe --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/Alignment.java @@ -0,0 +1,221 @@ +package org.broadinstitute.sting.alignment; + +import net.sf.samtools.*; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +/** + * Represents an alignment of a read to a site in the reference genome. + * + * @author mhanna + * @version 0.1 + */ +public class Alignment { + protected int contigIndex; + protected long alignmentStart; + protected boolean negativeStrand; + protected int mappingQuality; + + protected char[] cigarOperators; + protected int[] cigarLengths; + + protected int editDistance; + protected String mismatchingPositions; + + protected int numMismatches; + protected int numGapOpens; + protected int numGapExtensions; + protected int bestCount; + protected int secondBestCount; + + /** + * Gets the index of the given contig. + * @return the inde + */ + public int getContigIndex() { return contigIndex; } + + /** + * Gets the starting position for the given alignment. + * @return Starting position. + */ + public long getAlignmentStart() { return alignmentStart; } + + /** + * Is the given alignment on the reverse strand? + * @return True if the alignment is on the reverse strand. + */ + public boolean isNegativeStrand() { return negativeStrand; } + + /** + * Gets the score of this alignment. + * @return The score. + */ + public int getMappingQuality() { return mappingQuality; } + + /** + * Gets the edit distance; will eventually end up in the NM SAM tag + * if this alignment makes it that far. + * @return The edit distance. + */ + public int getEditDistance() { return editDistance; } + + /** + * A string representation of which positions mismatch; contents of MD tag. + * @return String representation of mismatching positions. + */ + public String getMismatchingPositions() { return mismatchingPositions; } + + /** + * Gets the number of mismatches in the read. + * @return Number of mismatches. + */ + public int getNumMismatches() { return numMismatches; } + + /** + * Get the number of gap opens. + * @return Number of gap opens. + */ + public int getNumGapOpens() { return numGapOpens; } + + /** + * Get the number of gap extensions. + * @return Number of gap extensions. + */ + public int getNumGapExtensions() { return numGapExtensions; } + + /** + * Get the number of best alignments. + * @return Number of top scoring alignments. + */ + public int getBestCount() { return bestCount; } + + /** + * Get the number of second best alignments. + * @return Number of second best scoring alignments. + */ + public int getSecondBestCount() { return secondBestCount; } + + /** + * Gets the cigar for this alignment. + * @return sam-jdk formatted alignment. + */ + public Cigar getCigar() { + Cigar cigar = new Cigar(); + for(int i = 0; i < cigarOperators.length; i++) { + CigarOperator operator = CigarOperator.characterToEnum(cigarOperators[i]); + cigar.add(new CigarElement(cigarLengths[i],operator)); + } + return cigar; + } + + /** + * Temporarily implement getCigarString() for debugging; the TextCigarCodec is unfortunately + * package-protected. + * @return + */ + public String getCigarString() { + Cigar cigar = getCigar(); + if(cigar.isEmpty()) return "*"; + + StringBuilder cigarString = new StringBuilder(); + for(CigarElement element: cigar.getCigarElements()) { + cigarString.append(element.getLength()); + cigarString.append(element.getOperator()); + } + return cigarString.toString(); + } + + /** + * Stub for inheritance. + */ + public Alignment() {} + + /** + * Create a new alignment object. + * @param contigIndex The contig to which this read aligned. + * @param alignmentStart The point within the contig to which this read aligned. + * @param negativeStrand Forward or reverse alignment of the given read. + * @param mappingQuality How good does BWA think this mapping is? + * @param cigarOperators The ordered operators in the cigar string. + * @param cigarLengths The lengths to which each operator applies. + * @param editDistance The edit distance (cumulative) of the read. + * @param mismatchingPositions String representation of which bases in the read mismatch. + * @param numMismatches Number of total mismatches in the read. + * @param numGapOpens Number of gap opens in the read. + * @param numGapExtensions Number of gap extensions in the read. + * @param bestCount Number of best alignments in the read. + * @param secondBestCount Number of second best alignments in the read. + */ + public Alignment(int contigIndex, + int alignmentStart, + boolean negativeStrand, + int mappingQuality, + char[] cigarOperators, + int[] cigarLengths, + int editDistance, + String mismatchingPositions, + int numMismatches, + int numGapOpens, + int numGapExtensions, + int bestCount, + int secondBestCount) { + this.contigIndex = contigIndex; + this.alignmentStart = alignmentStart; + this.negativeStrand = negativeStrand; + this.mappingQuality = mappingQuality; + this.cigarOperators = cigarOperators; + this.cigarLengths = cigarLengths; + this.editDistance = editDistance; + this.mismatchingPositions = mismatchingPositions; + this.numMismatches = numMismatches; + this.numGapOpens = numGapOpens; + this.numGapExtensions = numGapExtensions; + this.bestCount = bestCount; + this.secondBestCount = secondBestCount; + } + + /** + * Creates a read directly from an alignment. + * @param alignment The alignment to convert to a read. + * @param unmappedRead Source of the unmapped read. Should have bases, quality scores, and flags. + * @param newSAMHeader The new SAM header to use in creating this read. Can be null, but if so, the sequence + * dictionary in the + * @return A mapped alignment. + */ + public static SAMRecord convertToRead(Alignment alignment, SAMRecord unmappedRead, SAMFileHeader newSAMHeader) { + SAMRecord read; + try { + read = (SAMRecord)unmappedRead.clone(); + } + catch(CloneNotSupportedException ex) { + throw new ReviewedStingException("Unable to create aligned read from template."); + } + + if(newSAMHeader != null) + read.setHeader(newSAMHeader); + + // If we're realigning a previously aligned record, strip out the placement of the alignment. + read.setReferenceName(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME); + read.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START); + read.setMateReferenceName(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME); + read.setMateAlignmentStart(SAMRecord.NO_ALIGNMENT_START); + + if(alignment != null) { + read.setReadUnmappedFlag(false); + read.setReferenceIndex(alignment.getContigIndex()); + read.setAlignmentStart((int)alignment.getAlignmentStart()); + read.setReadNegativeStrandFlag(alignment.isNegativeStrand()); + read.setMappingQuality(alignment.getMappingQuality()); + read.setCigar(alignment.getCigar()); + if(alignment.isNegativeStrand()) { + read.setReadBases(BaseUtils.simpleReverseComplement(read.getReadBases())); + read.setBaseQualities(Utils.reverse(read.getBaseQualities())); + } + read.setAttribute("NM",alignment.getEditDistance()); + read.setAttribute("MD",alignment.getMismatchingPositions()); + } + + return read; + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java b/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java new file mode 100644 index 0000000000..c6755e8789 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.alignment; + +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; +import org.broadinstitute.sting.alignment.bwa.BWTFiles; +import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.util.Iterator; + +/** + * Validates consistency of the aligner interface by taking reads already aligned by BWA in a BAM file, stripping them + * of their alignment data, realigning them, and making sure one of the best resulting realignments matches the original + * alignment from the input file. + * + * @author mhanna + * @version 0.1 + */ +public class AlignmentValidationWalker extends ReadWalker { + /** + * The supporting BWT index generated using BWT. + */ + @Argument(fullName="BWTPrefix",shortName="BWT",doc="Index files generated by bwa index -d bwtsw",required=false) + private String prefix = null; + + /** + * The instance used to generate alignments. + */ + private BWACAligner aligner = null; + + /** + * Create an aligner object. The aligner object will load and hold the BWT until close() is called. + */ + @Override + public void initialize() { + if(prefix == null) + prefix = getToolkit().getArguments().referenceFile.getAbsolutePath(); + BWTFiles bwtFiles = new BWTFiles(prefix); + BWAConfiguration configuration = new BWAConfiguration(); + aligner = new BWACAligner(bwtFiles,configuration); + } + + /** + * Aligns a read to the given reference. + * @param ref Reference over the read. Read will most likely be unmapped, so ref will be null. + * @param read Read to align. + * @return Number of reads aligned by this map (aka 1). + */ + @Override + public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { + //logger.info(String.format("examining read %s", read.getReadName())); + + byte[] bases = read.getReadBases(); + if(read.getReadNegativeStrandFlag()) bases = BaseUtils.simpleReverseComplement(bases); + + boolean matches = true; + Iterable alignments = aligner.getAllAlignments(bases); + Iterator alignmentIterator = alignments.iterator(); + + if(!alignmentIterator.hasNext()) { + matches = read.getReadUnmappedFlag(); + } + else { + Alignment[] alignmentsOfBestQuality = alignmentIterator.next(); + for(Alignment alignment: alignmentsOfBestQuality) { + matches = (alignment.getContigIndex() == read.getReferenceIndex()); + matches &= (alignment.getAlignmentStart() == read.getAlignmentStart()); + matches &= (alignment.isNegativeStrand() == read.getReadNegativeStrandFlag()); + matches &= (alignment.getCigar().equals(read.getCigar())); + matches &= (alignment.getMappingQuality() == read.getMappingQuality()); + if(matches) break; + } + } + + if(!matches) { + logger.error("Found mismatch!"); + logger.error(String.format("Read %s:",read.getReadName())); + logger.error(String.format(" Contig index: %d",read.getReferenceIndex())); + logger.error(String.format(" Alignment start: %d", read.getAlignmentStart())); + logger.error(String.format(" Negative strand: %b", read.getReadNegativeStrandFlag())); + logger.error(String.format(" Cigar: %s%n", read.getCigarString())); + logger.error(String.format(" Mapping quality: %s%n", read.getMappingQuality())); + for(Alignment[] alignmentsByScore: alignments) { + for(int i = 0; i < alignmentsByScore.length; i++) { + logger.error(String.format("Alignment %d:",i)); + logger.error(String.format(" Contig index: %d",alignmentsByScore[i].getContigIndex())); + logger.error(String.format(" Alignment start: %d", alignmentsByScore[i].getAlignmentStart())); + logger.error(String.format(" Negative strand: %b", alignmentsByScore[i].isNegativeStrand())); + logger.error(String.format(" Cigar: %s", alignmentsByScore[i].getCigarString())); + logger.error(String.format(" Mapping quality: %s%n", alignmentsByScore[i].getMappingQuality())); + } + } + throw new ReviewedStingException(String.format("Read %s mismatches!", read.getReadName())); + } + + return 1; + } + + /** + * Initial value for reduce. In this case, validated reads will be counted. + * @return 0, indicating no reads yet validated. + */ + @Override + public Integer reduceInit() { return 0; } + + /** + * Calculates the number of reads processed. + * @param value Number of reads processed by this map. + * @param sum Number of reads processed before this map. + * @return Number of reads processed up to and including this map. + */ + @Override + public Integer reduce(Integer value, Integer sum) { + return value + sum; + } + + /** + * Cleanup. + * @param result Number of reads processed. + */ + @Override + public void onTraversalDone(Integer result) { + aligner.close(); + super.onTraversalDone(result); + } + +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java b/public/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java new file mode 100644 index 0000000000..7064e637fe --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.alignment; + +import net.sf.picard.reference.ReferenceSequenceFileFactory; +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMSequenceDictionary; +import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; +import org.broadinstitute.sting.alignment.bwa.BWTFiles; +import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.gatk.walkers.WalkerName; + +import java.io.File; + +/** + * Aligns reads to a given reference using Heng Li's BWA aligner, presenting the resulting alignments in SAM or BAM format. + * Mimics the steps 'bwa aln' followed by 'bwa samse' using the BWA/C implementation. + * + * @author mhanna + * @version 0.1 + */ +@WalkerName("Align") +public class AlignmentWalker extends ReadWalker { + @Argument(fullName="target_reference",shortName="target_ref",doc="The reference to which reads in the source file should be aligned. Alongside this reference should sit index files " + + "generated by bwa index -d bwtsw. If unspecified, will default " + + "to the reference specified via the -R argument.",required=false) + private File targetReferenceFile = null; + + @Output + private StingSAMFileWriter out = null; + + /** + * The actual aligner. + */ + private BWACAligner aligner = null; + + /** + * New header to use, if desired. + */ + private SAMFileHeader header; + + /** + * Create an aligner object. The aligner object will load and hold the BWT until close() is called. + */ + @Override + public void initialize() { + if(targetReferenceFile == null) + targetReferenceFile = getToolkit().getArguments().referenceFile; + BWTFiles bwtFiles = new BWTFiles(targetReferenceFile.getAbsolutePath()); + BWAConfiguration configuration = new BWAConfiguration(); + aligner = new BWACAligner(bwtFiles,configuration); + + // Take the header of the SAM file, tweak it by adding in the reference dictionary and specifying that the target file is unsorted. + header = getToolkit().getSAMFileHeader().clone(); + SAMSequenceDictionary referenceDictionary = + ReferenceSequenceFileFactory.getReferenceSequenceFile(targetReferenceFile).getSequenceDictionary(); + header.setSequenceDictionary(referenceDictionary); + header.setSortOrder(SAMFileHeader.SortOrder.unsorted); + + out.writeHeader(header); + } + + /** + * Aligns a read to the given reference. + * @param ref Reference over the read. Read will most likely be unmapped, so ref will be null. + * @param read Read to align. + * @return Number of alignments found for this read. + */ + @Override + public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { + SAMRecord alignedRead = aligner.align(read,header); + out.addAlignment(alignedRead); + return 1; + } + + /** + * Initial value for reduce. In this case, alignments will be counted. + * @return 0, indicating no alignments yet found. + */ + @Override + public Integer reduceInit() { return 0; } + + /** + * Calculates the number of alignments found. + * @param value Number of alignments found by this map. + * @param sum Number of alignments found before this map. + * @return Number of alignments found up to and including this map. + */ + @Override + public Integer reduce(Integer value, Integer sum) { + return value + sum; + } + + /** + * Cleanup. + * @param result Number of reads processed. + */ + @Override + public void onTraversalDone(Integer result) { + aligner.close(); + super.onTraversalDone(result); + } + +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/CountBestAlignmentsWalker.java b/public/java/src/org/broadinstitute/sting/alignment/CountBestAlignmentsWalker.java new file mode 100644 index 0000000000..57d92319f5 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/CountBestAlignmentsWalker.java @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.alignment; + +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; +import org.broadinstitute.sting.alignment.bwa.BWTFiles; +import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; + +import java.io.PrintStream; +import java.util.Iterator; +import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; + +/** + * Counts the number of best alignments as presented by BWA and outputs a histogram of number of placements vs. the + * frequency of that number of placements. + * + * @author mhanna + * @version 0.1 + */ +public class CountBestAlignmentsWalker extends ReadWalker { + /** + * The supporting BWT index generated using BWT. + */ + @Argument(fullName="BWTPrefix",shortName="BWT",doc="Index files generated by bwa index -d bwtsw",required=false) + private String prefix = null; + + @Output + private PrintStream out = null; + + /** + * The actual aligner. + */ + private Aligner aligner = null; + + private SortedMap alignmentFrequencies = new TreeMap(); + + /** + * Create an aligner object. The aligner object will load and hold the BWT until close() is called. + */ + @Override + public void initialize() { + if(prefix == null) + prefix = getToolkit().getArguments().referenceFile.getAbsolutePath(); + BWTFiles bwtFiles = new BWTFiles(prefix); + BWAConfiguration configuration = new BWAConfiguration(); + aligner = new BWACAligner(bwtFiles,configuration); + } + + /** + * Aligns a read to the given reference. + * @param ref Reference over the read. Read will most likely be unmapped, so ref will be null. + * @param read Read to align. + * @return Number of alignments found for this read. + */ + @Override + public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { + Iterator alignmentIterator = aligner.getAllAlignments(read.getReadBases()).iterator(); + if(alignmentIterator.hasNext()) { + int numAlignments = alignmentIterator.next().length; + if(alignmentFrequencies.containsKey(numAlignments)) + alignmentFrequencies.put(numAlignments,alignmentFrequencies.get(numAlignments)+1); + else + alignmentFrequencies.put(numAlignments,1); + } + return 1; + } + + /** + * Initial value for reduce. In this case, validated reads will be counted. + * @return 0, indicating no reads yet validated. + */ + @Override + public Integer reduceInit() { return 0; } + + /** + * Calculates the number of reads processed. + * @param value Number of reads processed by this map. + * @param sum Number of reads processed before this map. + * @return Number of reads processed up to and including this map. + */ + @Override + public Integer reduce(Integer value, Integer sum) { + return value + sum; + } + + /** + * Cleanup. + * @param result Number of reads processed. + */ + @Override + public void onTraversalDone(Integer result) { + aligner.close(); + for(Map.Entry alignmentFrequency: alignmentFrequencies.entrySet()) + out.printf("%d\t%d%n", alignmentFrequency.getKey(), alignmentFrequency.getValue()); + super.onTraversalDone(result); + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java new file mode 100644 index 0000000000..ddbf784f57 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java @@ -0,0 +1,38 @@ +package org.broadinstitute.sting.alignment.bwa; + +import org.broadinstitute.sting.alignment.Aligner; + +/** + * Align reads using BWA. + * + * @author mhanna + * @version 0.1 + */ +public abstract class BWAAligner implements Aligner { + /** + * The supporting files used by BWA. + */ + protected BWTFiles bwtFiles; + + /** + * The current configuration for the BWA aligner. + */ + protected BWAConfiguration configuration; + + /** + * Create a new BWAAligner. Purpose of this call is to ensure that all BWA constructors accept the correct + * parameters. + * @param bwtFiles The many files representing BWTs persisted to disk. + * @param configuration Configuration parameters for the alignment. + */ + public BWAAligner(BWTFiles bwtFiles, BWAConfiguration configuration) { + this.bwtFiles = bwtFiles; + this.configuration = configuration; + } + + /** + * Update the configuration passed to the BWA aligner. + * @param configuration New configuration to set. + */ + public abstract void updateConfiguration(BWAConfiguration configuration); +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/BWAConfiguration.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/BWAConfiguration.java new file mode 100644 index 0000000000..73441cb6a4 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/BWAConfiguration.java @@ -0,0 +1,44 @@ +package org.broadinstitute.sting.alignment.bwa; + +/** + * Configuration for the BWA/C aligner. + * + * @author mhanna + * @version 0.1 + */ +public class BWAConfiguration { + /** + * The maximum edit distance used by BWA. + */ + public Float maximumEditDistance = null; + + /** + * How many gap opens are acceptable within this alignment? + */ + public Integer maximumGapOpens = null; + + /** + * How many gap extensions are acceptable within this alignment? + */ + public Integer maximumGapExtensions = null; + + /** + * Do we disallow indels within a certain range from the start / end? + */ + public Integer disallowIndelWithinRange = null; + + /** + * What is the scoring penalty for a mismatch? + */ + public Integer mismatchPenalty = null; + + /** + * What is the scoring penalty for a gap open? + */ + public Integer gapOpenPenalty = null; + + /** + * What is the scoring penalty for a gap extension? + */ + public Integer gapExtensionPenalty = null; +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/BWTFiles.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/BWTFiles.java new file mode 100644 index 0000000000..a0589ac842 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/BWTFiles.java @@ -0,0 +1,234 @@ +package org.broadinstitute.sting.alignment.bwa; + +import net.sf.samtools.SAMSequenceDictionary; +import net.sf.samtools.SAMSequenceRecord; +import net.sf.samtools.util.StringUtil; +import org.broadinstitute.sting.alignment.reference.bwt.*; +import org.broadinstitute.sting.alignment.reference.packing.PackUtils; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.File; +import java.io.IOException; + +/** + * Support files for BWT. + * + * @author mhanna + * @version 0.1 + */ +public class BWTFiles { + /** + * ANN (?) file name. + */ + public final File annFile; + + /** + * AMB (?) file name. + */ + public final File ambFile; + + /** + * Packed reference sequence file. + */ + public final File pacFile; + + /** + * Reverse of packed reference sequence file. + */ + public final File rpacFile; + + /** + * Forward BWT file. + */ + public final File forwardBWTFile; + + /** + * Forward suffix array file. + */ + public final File forwardSAFile; + + /** + * Reverse BWT file. + */ + public final File reverseBWTFile; + + /** + * Reverse suffix array file. + */ + public final File reverseSAFile; + + /** + * Where these files autogenerated on the fly? + */ + public final boolean autogenerated; + + /** + * Create a new BWA configuration file using the given prefix. + * @param prefix Prefix to use when creating the configuration. Must not be null. + */ + public BWTFiles(String prefix) { + if(prefix == null) + throw new ReviewedStingException("Prefix must not be null."); + annFile = new File(prefix + ".ann"); + ambFile = new File(prefix + ".amb"); + pacFile = new File(prefix + ".pac"); + rpacFile = new File(prefix + ".rpac"); + forwardBWTFile = new File(prefix + ".bwt"); + forwardSAFile = new File(prefix + ".sa"); + reverseBWTFile = new File(prefix + ".rbwt"); + reverseSAFile = new File(prefix + ".rsa"); + autogenerated = false; + } + + /** + * Hand-create a new BWTFiles object, specifying a unique file object for each type. + * @param annFile ANN (alternate dictionary) file. + * @param ambFile AMB (holes) files. + * @param pacFile Packed representation of the forward reference sequence. + * @param forwardBWTFile BWT representation of the forward reference sequence. + * @param forwardSAFile SA representation of the forward reference sequence. + * @param rpacFile Packed representation of the reversed reference sequence. + * @param reverseBWTFile BWT representation of the reversed reference sequence. + * @param reverseSAFile SA representation of the reversed reference sequence. + */ + private BWTFiles(File annFile, + File ambFile, + File pacFile, + File forwardBWTFile, + File forwardSAFile, + File rpacFile, + File reverseBWTFile, + File reverseSAFile) { + this.annFile = annFile; + this.ambFile = ambFile; + this.pacFile = pacFile; + this.forwardBWTFile = forwardBWTFile; + this.forwardSAFile = forwardSAFile; + this.rpacFile = rpacFile; + this.reverseBWTFile = reverseBWTFile; + this.reverseSAFile = reverseSAFile; + autogenerated = true; + } + + /** + * Close out this files object, in the process deleting any temporary filse + * that were created. + */ + public void close() { + if(autogenerated) { + boolean success = true; + success = annFile.delete(); + success &= ambFile.delete(); + success &= pacFile.delete(); + success &= forwardBWTFile.delete(); + success &= forwardSAFile.delete(); + success &= rpacFile.delete(); + success &= reverseBWTFile.delete(); + success &= reverseSAFile.delete(); + + if(!success) + throw new ReviewedStingException("Unable to clean up autogenerated representation"); + } + } + + /** + * Create a new set of BWT files from the given reference sequence. + * @param referenceSequence Sequence from which to build metadata. + * @return A new object representing encoded representations of each sequence. + */ + public static BWTFiles createFromReferenceSequence(byte[] referenceSequence) { + byte[] normalizedReferenceSequence = new byte[referenceSequence.length]; + System.arraycopy(referenceSequence,0,normalizedReferenceSequence,0,referenceSequence.length); + normalizeReferenceSequence(normalizedReferenceSequence); + + File annFile,ambFile,pacFile,bwtFile,saFile,rpacFile,rbwtFile,rsaFile; + try { + // Write the ann and amb for this reference sequence. + annFile = File.createTempFile("bwt",".ann"); + ambFile = File.createTempFile("bwt",".amb"); + + SAMSequenceDictionary dictionary = new SAMSequenceDictionary(); + dictionary.addSequence(new SAMSequenceRecord("autogenerated",normalizedReferenceSequence.length)); + + ANNWriter annWriter = new ANNWriter(annFile); + annWriter.write(dictionary); + annWriter.close(); + + AMBWriter ambWriter = new AMBWriter(ambFile); + ambWriter.writeEmpty(dictionary); + ambWriter.close(); + + // Write the encoded files for the forward version of this reference sequence. + pacFile = File.createTempFile("bwt",".pac"); + bwtFile = File.createTempFile("bwt",".bwt"); + saFile = File.createTempFile("bwt",".sa"); + + writeEncodedReferenceSequence(normalizedReferenceSequence,pacFile,bwtFile,saFile); + + // Write the encoded files for the reverse version of this reference sequence. + byte[] reverseReferenceSequence = Utils.reverse(normalizedReferenceSequence); + + rpacFile = File.createTempFile("bwt",".rpac"); + rbwtFile = File.createTempFile("bwt",".rbwt"); + rsaFile = File.createTempFile("bwt",".rsa"); + + writeEncodedReferenceSequence(reverseReferenceSequence,rpacFile,rbwtFile,rsaFile); + } + catch(IOException ex) { + throw new ReviewedStingException("Unable to write autogenerated reference sequence to temporary files"); + } + + // Make sure that, at the very least, all temporary files are deleted on exit. + annFile.deleteOnExit(); + ambFile.deleteOnExit(); + pacFile.deleteOnExit(); + bwtFile.deleteOnExit(); + saFile.deleteOnExit(); + rpacFile.deleteOnExit(); + rbwtFile.deleteOnExit(); + rsaFile.deleteOnExit(); + + return new BWTFiles(annFile,ambFile,pacFile,bwtFile,saFile,rpacFile,rbwtFile,rsaFile); + } + + /** + * Write the encoded form of the reference sequence. In the case of BWA, the encoded reference + * sequence is the reference itself in PAC format, the BWT, and the suffix array. + * @param referenceSequence The reference sequence to encode. + * @param pacFile Target for the PAC-encoded reference. + * @param bwtFile Target for the BWT representation of the reference. + * @param suffixArrayFile Target for the suffix array encoding of the reference. + * @throws java.io.IOException In case of issues writing to the file. + */ + private static void writeEncodedReferenceSequence(byte[] referenceSequence, + File pacFile, + File bwtFile, + File suffixArrayFile) throws IOException { + PackUtils.writeReferenceSequence(pacFile,referenceSequence); + + BWT bwt = BWT.createFromReferenceSequence(referenceSequence); + BWTWriter bwtWriter = new BWTWriter(bwtFile); + bwtWriter.write(bwt); + bwtWriter.close(); + + SuffixArray suffixArray = SuffixArray.createFromReferenceSequence(referenceSequence); + SuffixArrayWriter suffixArrayWriter = new SuffixArrayWriter(suffixArrayFile); + suffixArrayWriter.write(suffixArray); + suffixArrayWriter.close(); + } + + /** + * Convert the given reference sequence into a form suitable for building into + * on-the-fly sequences. + * @param referenceSequence The reference sequence to normalize. + * @throws org.broadinstitute.sting.utils.exceptions.ReviewedStingException if normalized sequence cannot be generated. + */ + private static void normalizeReferenceSequence(byte[] referenceSequence) { + StringUtil.toUpperCase(referenceSequence); + for(byte base: referenceSequence) { + if(base != 'A' && base != 'C' && base != 'G' && base != 'T') + throw new ReviewedStingException(String.format("Base type %c is not supported when building references on-the-fly",(char)base)); + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/c/BWACAligner.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/c/BWACAligner.java new file mode 100644 index 0000000000..165314259f --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/c/BWACAligner.java @@ -0,0 +1,259 @@ +package org.broadinstitute.sting.alignment.bwa.c; + +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.alignment.Alignment; +import org.broadinstitute.sting.alignment.bwa.BWAAligner; +import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; +import org.broadinstitute.sting.alignment.bwa.BWTFiles; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.util.Arrays; +import java.util.Iterator; + +/** + * An aligner using the BWA/C implementation. + * + * @author mhanna + * @version 0.1 + */ +public class BWACAligner extends BWAAligner { + static { + System.loadLibrary("bwa"); + } + + /** + * A pointer to the C++ object representing the BWA engine. + */ + private long thunkPointer = 0; + + public BWACAligner(BWTFiles bwtFiles, BWAConfiguration configuration) { + super(bwtFiles,configuration); + if(thunkPointer != 0) + throw new ReviewedStingException("BWA/C attempting to reinitialize."); + + if(!bwtFiles.annFile.exists()) throw new ReviewedStingException("ANN file is missing; please rerun 'bwa aln' to regenerate it."); + if(!bwtFiles.ambFile.exists()) throw new ReviewedStingException("AMB file is missing; please rerun 'bwa aln' to regenerate it."); + if(!bwtFiles.pacFile.exists()) throw new ReviewedStingException("PAC file is missing; please rerun 'bwa aln' to regenerate it."); + if(!bwtFiles.forwardBWTFile.exists()) throw new ReviewedStingException("Forward BWT file is missing; please rerun 'bwa aln' to regenerate it."); + if(!bwtFiles.forwardSAFile.exists()) throw new ReviewedStingException("Forward SA file is missing; please rerun 'bwa aln' to regenerate it."); + if(!bwtFiles.reverseBWTFile.exists()) throw new ReviewedStingException("Reverse BWT file is missing; please rerun 'bwa aln' to regenerate it."); + if(!bwtFiles.reverseSAFile.exists()) throw new ReviewedStingException("Reverse SA file is missing; please rerun 'bwa aln' to regenerate it."); + + thunkPointer = create(bwtFiles,configuration); + } + + /** + * Create an aligner object using an array of bytes as a reference. + * @param referenceSequence Reference sequence to encode ad-hoc. + * @param configuration Configuration for the given aligner. + */ + public BWACAligner(byte[] referenceSequence, BWAConfiguration configuration) { + this(BWTFiles.createFromReferenceSequence(referenceSequence),configuration); + // Now that the temporary files are created, the temporary files can be destroyed. + bwtFiles.close(); + } + + /** + * Update the configuration passed to the BWA aligner. + * @param configuration New configuration to set. + */ + @Override + public void updateConfiguration(BWAConfiguration configuration) { + if(thunkPointer == 0) + throw new ReviewedStingException("BWA/C: attempting to update configuration of uninitialized aligner."); + updateConfiguration(thunkPointer,configuration); + } + + /** + * Close this instance of the BWA pointer and delete its resources. + */ + @Override + public void close() { + if(thunkPointer == 0) + throw new ReviewedStingException("BWA/C close attempted, but BWA/C is not properly initialized."); + destroy(thunkPointer); + } + + /** + * Allow the aligner to choose one alignment randomly from the pile of best alignments. + * @param bases Bases to align. + * @return An align + */ + @Override + public Alignment getBestAlignment(final byte[] bases) { + if(thunkPointer == 0) + throw new ReviewedStingException("BWA/C getBestAlignment attempted, but BWA/C is not properly initialized."); + return getBestAlignment(thunkPointer,bases); + } + + /** + * Get the best aligned read, chosen randomly from the pile of best alignments. + * @param read Read to align. + * @param newHeader New header to apply to this SAM file. Can be null, but if so, read header must be valid. + * @return Read with injected alignment data. + */ + @Override + public SAMRecord align(final SAMRecord read, final SAMFileHeader newHeader) { + if(bwtFiles.autogenerated) + throw new UnsupportedOperationException("Cannot create target alignment; source contig was generated ad-hoc and is not reliable"); + return Alignment.convertToRead(getBestAlignment(read.getReadBases()),read,newHeader); + } + + /** + * Get a iterator of alignments, batched by mapping quality. + * @param bases List of bases. + * @return Iterator to alignments. + */ + @Override + public Iterable getAllAlignments(final byte[] bases) { + final BWAPath[] paths = getPaths(bases); + return new Iterable() { + public Iterator iterator() { + return new Iterator() { + /** + * The last position accessed. + */ + private int position = 0; + + /** + * Whether all alignments have been seen based on the current position. + * @return True if any more alignments are pending. False otherwise. + */ + public boolean hasNext() { return position < paths.length; } + + /** + * Return the next cross-section of alignments, based on mapping quality. + * @return Array of the next set of alignments of a given mapping quality. + */ + public Alignment[] next() { + if(position >= paths.length) + throw new UnsupportedOperationException("Out of alignments to return."); + int score = paths[position].score; + int startingPosition = position; + while(position < paths.length && paths[position].score == score) position++; + return convertPathsToAlignments(bases,Arrays.copyOfRange(paths,startingPosition,position)); + } + + /** + * Unsupported. + */ + public void remove() { throw new UnsupportedOperationException("Cannot remove from an alignment iterator"); } + }; + } + }; + } + + /** + * Get a iterator of aligned reads, batched by mapping quality. + * @param read Read to align. + * @param newHeader Optional new header to use when aligning the read. If present, it must be null. + * @return Iterator to alignments. + */ + @Override + public Iterable alignAll(final SAMRecord read, final SAMFileHeader newHeader) { + if(bwtFiles.autogenerated) + throw new UnsupportedOperationException("Cannot create target alignment; source contig was generated ad-hoc and is not reliable"); + final Iterable alignments = getAllAlignments(read.getReadBases()); + return new Iterable() { + public Iterator iterator() { + final Iterator alignmentIterator = alignments.iterator(); + return new Iterator() { + /** + * Whether all alignments have been seen based on the current position. + * @return True if any more alignments are pending. False otherwise. + */ + public boolean hasNext() { return alignmentIterator.hasNext(); } + + /** + * Return the next cross-section of alignments, based on mapping quality. + * @return Array of the next set of alignments of a given mapping quality. + */ + public SAMRecord[] next() { + Alignment[] alignmentsOfQuality = alignmentIterator.next(); + SAMRecord[] reads = new SAMRecord[alignmentsOfQuality.length]; + for(int i = 0; i < alignmentsOfQuality.length; i++) { + reads[i] = Alignment.convertToRead(alignmentsOfQuality[i],read,newHeader); + } + return reads; + } + + /** + * Unsupported. + */ + public void remove() { throw new UnsupportedOperationException("Cannot remove from an alignment iterator"); } + }; + } + }; + } + + /** + * Get the paths associated with the given base string. + * @param bases List of bases. + * @return A set of paths through the BWA. + */ + public BWAPath[] getPaths(byte[] bases) { + if(thunkPointer == 0) + throw new ReviewedStingException("BWA/C getPaths attempted, but BWA/C is not properly initialized."); + return getPaths(thunkPointer,bases); + } + + /** + * Create a pointer to the BWA/C thunk. + * @param files BWT source files. + * @param configuration Configuration of the aligner. + * @return Pointer to the BWA/C thunk. + */ + protected native long create(BWTFiles files, BWAConfiguration configuration); + + /** + * Update the configuration passed to the BWA aligner. For internal use only. + * @param thunkPointer pointer to BWA object. + * @param configuration New configuration to set. + */ + protected native void updateConfiguration(long thunkPointer, BWAConfiguration configuration); + + /** + * Destroy the BWA/C thunk. + * @param thunkPointer Pointer to the allocated thunk. + */ + protected native void destroy(long thunkPointer); + + /** + * Do the extra steps involved in converting a local alignment to a global alignment. + * @param bases ASCII representation of byte array. + * @param paths Paths through the current BWT. + * @return A list of alignments. + */ + protected Alignment[] convertPathsToAlignments(byte[] bases, BWAPath[] paths) { + if(thunkPointer == 0) + throw new ReviewedStingException("BWA/C convertPathsToAlignments attempted, but BWA/C is not properly initialized."); + return convertPathsToAlignments(thunkPointer,bases,paths); + } + + /** + * Caller to the path generation functionality within BWA/C. Call this method's getPaths() wrapper (above) instead. + * @param thunkPointer pointer to the C++ object managing BWA/C. + * @param bases ASCII representation of byte array. + * @return A list of paths through the specified BWT. + */ + protected native BWAPath[] getPaths(long thunkPointer, byte[] bases); + + /** + * Do the extra steps involved in converting a local alignment to a global alignment. + * Call this method's convertPathsToAlignments() wrapper (above) instead. + * @param thunkPointer pointer to the C++ object managing BWA/C. + * @param bases ASCII representation of byte array. + * @param paths Paths through the current BWT. + * @return A list of alignments. + */ + protected native Alignment[] convertPathsToAlignments(long thunkPointer, byte[] bases, BWAPath[] paths); + + /** + * Gets the best alignment from BWA/C, randomly selected from all best-aligned reads. + * @param thunkPointer Pointer to BWA thunk. + * @param bases bases to align. + * @return The best alignment from BWA/C. + */ + protected native Alignment getBestAlignment(long thunkPointer, byte[] bases); +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/c/BWAPath.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/c/BWAPath.java new file mode 100755 index 0000000000..347d4344ff --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/c/BWAPath.java @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.alignment.bwa.c; + +/** + * Models a BWA path. + * + * @author mhanna + * @version 0.1 + */ +public class BWAPath { + /** + * Number of mismatches encountered along this path. + */ + public final int numMismatches; + + /** + * Number of gap opens encountered along this path. + */ + public final int numGapOpens; + + /** + * Number of gap extensions along this path. + */ + public final int numGapExtensions; + + /** + * Whether this alignment was found on the positive or negative strand. + */ + public final boolean negativeStrand; + + /** + * Starting coordinate in the BWT. + */ + public final long k; + + /** + * Ending coordinate in the BWT. + */ + public final long l; + + /** + * The score of this path. + */ + public final int score; + + /** + * The number of best alignments seen along this path. + */ + public final int bestCount; + + /** + * The number of second best alignments seen along this path. + */ + public final int secondBestCount; + + /** + * Create a new path with the given attributes. + * @param numMismatches Number of mismatches along path. + * @param numGapOpens Number of gap opens along path. + * @param numGapExtensions Number of gap extensions along path. + * @param k Index to first coordinate within BWT. + * @param l Index to last coordinate within BWT. + * @param score Score of this alignment. Not the mapping quality. + */ + public BWAPath(int numMismatches, int numGapOpens, int numGapExtensions, boolean negativeStrand, long k, long l, int score, int bestCount, int secondBestCount) { + this.numMismatches = numMismatches; + this.numGapOpens = numGapOpens; + this.numGapExtensions = numGapExtensions; + this.negativeStrand = negativeStrand; + this.k = k; + this.l = l; + this.score = score; + this.bestCount = bestCount; + this.secondBestCount = secondBestCount; + } + +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignerTestHarness.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignerTestHarness.java new file mode 100644 index 0000000000..2d568a96a4 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignerTestHarness.java @@ -0,0 +1,164 @@ +package org.broadinstitute.sting.alignment.bwa.java; + +import net.sf.picard.reference.IndexedFastaSequenceFile; +import net.sf.samtools.*; +import org.broadinstitute.sting.alignment.Aligner; +import org.broadinstitute.sting.alignment.Alignment; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.File; +import java.io.FileNotFoundException; + +/** + * A test harness to ensure that the perfect aligner works. + * + * @author mhanna + * @version 0.1 + */ +public class AlignerTestHarness { + public static void main( String argv[] ) throws FileNotFoundException { + if( argv.length != 6 ) { + System.out.println("PerfectAlignerTestHarness "); + System.exit(1); + } + + File referenceFile = new File(argv[0]); + File bwtFile = new File(argv[1]); + File rbwtFile = new File(argv[2]); + File suffixArrayFile = new File(argv[3]); + File reverseSuffixArrayFile = new File(argv[4]); + File bamFile = new File(argv[5]); + + align(referenceFile,bwtFile,rbwtFile,suffixArrayFile,reverseSuffixArrayFile,bamFile); + } + + private static void align(File referenceFile, File bwtFile, File rbwtFile, File suffixArrayFile, File reverseSuffixArrayFile, File bamFile) throws FileNotFoundException { + Aligner aligner = new BWAJavaAligner(bwtFile,rbwtFile,suffixArrayFile,reverseSuffixArrayFile); + int count = 0; + + SAMFileReader reader = new SAMFileReader(bamFile); + reader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT); + + int mismatches = 0; + int failures = 0; + + for(SAMRecord read: reader) { + count++; + if( count > 200000 ) break; + //if( count < 366000 ) continue; + //if( count > 2 ) break; + //if( !read.getReadName().endsWith("SL-XBC:1:82:506:404#0") ) + // continue; + //if( !read.getReadName().endsWith("SL-XBC:1:36:30:1926#0") ) + // continue; + //if( !read.getReadName().endsWith("SL-XBC:1:60:1342:1340#0") ) + // continue; + + SAMRecord alignmentCleaned = null; + try { + alignmentCleaned = (SAMRecord)read.clone(); + } + catch( CloneNotSupportedException ex ) { + throw new ReviewedStingException("SAMRecord clone not supported", ex); + } + + if( alignmentCleaned.getReadNegativeStrandFlag() ) + alignmentCleaned.setReadBases(BaseUtils.simpleReverseComplement(alignmentCleaned.getReadBases())); + + alignmentCleaned.setReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX); + alignmentCleaned.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START); + alignmentCleaned.setMappingQuality(SAMRecord.NO_MAPPING_QUALITY); + alignmentCleaned.setCigarString(SAMRecord.NO_ALIGNMENT_CIGAR); + + // Clear everything except flags pertaining to pairing and set 'unmapped' status to true. + alignmentCleaned.setFlags(alignmentCleaned.getFlags() & 0x00A1 | 0x000C); + + Iterable alignments = aligner.getAllAlignments(alignmentCleaned.getReadBases()); + if(!alignments.iterator().hasNext() ) { + //throw new StingException(String.format("Unable to align read %s to reference; count = %d",read.getReadName(),count)); + System.out.printf("Unable to align read %s to reference; count = %d%n",read.getReadName(),count); + failures++; + } + + Alignment foundAlignment = null; + for(Alignment[] alignmentsOfQuality: alignments) { + for(Alignment alignment: alignmentsOfQuality) { + if( read.getReadNegativeStrandFlag() != alignment.isNegativeStrand() ) + continue; + if( read.getAlignmentStart() != alignment.getAlignmentStart() ) + continue; + + foundAlignment = alignment; + } + } + + if( foundAlignment != null ) { + //System.out.printf("%s: Aligned read to reference at position %d with %d mismatches, %d gap opens, and %d gap extensions.%n", read.getReadName(), foundAlignment.getAlignmentStart(), foundAlignment.getMismatches(), foundAlignment.getGapOpens(), foundAlignment.getGapExtensions()); + } + else { + System.out.printf("Error aligning read %s%n", read.getReadName()); + + mismatches++; + + IndexedFastaSequenceFile reference = new IndexedFastaSequenceFile(referenceFile); + + System.out.printf("read = %s, position = %d, negative strand = %b%n", formatBasesBasedOnCigar(read.getReadString(),read.getCigar(),CigarOperator.DELETION), + read.getAlignmentStart(), + read.getReadNegativeStrandFlag()); + int numDeletions = numDeletionsInCigar(read.getCigar()); + String expectedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),read.getAlignmentStart(),read.getAlignmentStart()+read.getReadLength()+numDeletions-1).getBases()); + System.out.printf("expected ref = %s%n", formatBasesBasedOnCigar(expectedRef,read.getCigar(),CigarOperator.INSERTION)); + + for(Alignment[] alignmentsOfQuality: alignments) { + for(Alignment alignment: alignmentsOfQuality) { + System.out.println(); + + Cigar cigar = ((BWAAlignment)alignment).getCigar(); + + System.out.printf("read = %s%n", formatBasesBasedOnCigar(read.getReadString(),cigar,CigarOperator.DELETION)); + + int deletionCount = ((BWAAlignment)alignment).getNumberOfBasesMatchingState(AlignmentState.DELETION); + String alignedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),alignment.getAlignmentStart(),alignment.getAlignmentStart()+read.getReadLength()+deletionCount-1).getBases()); + System.out.printf("actual ref = %s, position = %d, negative strand = %b%n", formatBasesBasedOnCigar(alignedRef,cigar,CigarOperator.INSERTION), + alignment.getAlignmentStart(), + alignment.isNegativeStrand()); + } + } + + //throw new StingException(String.format("Read %s was placed at incorrect location; count = %d%n",read.getReadName(),count)); + } + + + if( count % 1000 == 0 ) + System.out.printf("%d reads examined.%n",count); + } + + System.out.printf("%d reads examined; %d mismatches; %d failures.%n",count,mismatches,failures); + } + + private static String formatBasesBasedOnCigar( String bases, Cigar cigar, CigarOperator toBlank ) { + StringBuilder formatted = new StringBuilder(); + int readIndex = 0; + for(CigarElement cigarElement: cigar.getCigarElements()) { + if(cigarElement.getOperator() == toBlank) { + int number = cigarElement.getLength(); + while( number-- > 0 ) formatted.append(' '); + } + else { + int number = cigarElement.getLength(); + while( number-- > 0 ) formatted.append(bases.charAt(readIndex++)); + } + } + return formatted.toString(); + } + + private static int numDeletionsInCigar( Cigar cigar ) { + int numDeletions = 0; + for(CigarElement cigarElement: cigar.getCigarElements()) { + if(cigarElement.getOperator() == CigarOperator.DELETION) + numDeletions += cigarElement.getLength(); + } + return numDeletions; + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignmentMatchSequence.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignmentMatchSequence.java new file mode 100644 index 0000000000..f1e3c31b67 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignmentMatchSequence.java @@ -0,0 +1,150 @@ +package org.broadinstitute.sting.alignment.bwa.java; + +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.util.ArrayDeque; +import java.util.Deque; +import java.util.Iterator; + +/** + * Represents a sequence of matches. + * + * @author mhanna + * @version 0.1 + */ +public class AlignmentMatchSequence implements Cloneable { + /** + * Stores the particular match entries in the order they occur. + */ + private Deque entries = new ArrayDeque(); + + /** + * Clone the given match sequence. + * @return A deep copy of the current match sequence. + */ + public AlignmentMatchSequence clone() { + AlignmentMatchSequence copy = null; + try { + copy = (AlignmentMatchSequence)super.clone(); + } + catch( CloneNotSupportedException ex ) { + throw new ReviewedStingException("Unable to clone AlignmentMatchSequence."); + } + + copy.entries = new ArrayDeque(); + for( AlignmentMatchSequenceEntry entry: entries ) + copy.entries.add(entry.clone()); + + return copy; + } + + public Cigar convertToCigar(boolean negativeStrand) { + Cigar cigar = new Cigar(); + Iterator iterator = negativeStrand ? entries.descendingIterator() : entries.iterator(); + while( iterator.hasNext() ) { + AlignmentMatchSequenceEntry entry = iterator.next(); + CigarOperator operator; + switch( entry.getAlignmentState() ) { + case MATCH_MISMATCH: operator = CigarOperator.MATCH_OR_MISMATCH; break; + case INSERTION: operator = CigarOperator.INSERTION; break; + case DELETION: operator = CigarOperator.DELETION; break; + default: throw new ReviewedStingException("convertToCigar: cannot process state: " + entry.getAlignmentState()); + } + cigar.add( new CigarElement(entry.count,operator) ); + } + return cigar; + } + + /** + * All a new alignment of the given state. + * @param state State to add to the sequence. + */ + public void addNext( AlignmentState state ) { + AlignmentMatchSequenceEntry last = entries.peekLast(); + // If the last entry is the same as this one, increment it. Otherwise, add a new entry. + if( last != null && last.alignmentState == state ) + last.increment(); + else + entries.add(new AlignmentMatchSequenceEntry(state)); + } + + /** + * Gets the current state of this alignment (what's the state of the last base?) + * @return State of the most recently aligned base. + */ + public AlignmentState getCurrentState() { + if( entries.size() == 0 ) + return AlignmentState.MATCH_MISMATCH; + return entries.peekLast().getAlignmentState(); + } + + /** + * How many bases in the read match the given state. + * @param state State to test. + * @return number of bases which match that state. + */ + public int getNumberOfBasesMatchingState(AlignmentState state) { + int matches = 0; + for( AlignmentMatchSequenceEntry entry: entries ) { + if( entry.getAlignmentState() == state ) + matches += entry.count; + } + return matches; + } + + /** + * Stores an individual match sequence entry. + */ + private class AlignmentMatchSequenceEntry implements Cloneable { + /** + * The state of the alignment throughout a given point in the sequence. + */ + private final AlignmentState alignmentState; + + /** + * The number of bases having this particular state. + */ + private int count; + + /** + * Create a new sequence entry with the given state. + * @param alignmentState The state that this sequence should contain. + */ + AlignmentMatchSequenceEntry( AlignmentState alignmentState ) { + this.alignmentState = alignmentState; + this.count = 1; + } + + /** + * Clone the given match sequence entry. + * @return A deep copy of the current match sequence entry. + */ + public AlignmentMatchSequenceEntry clone() { + try { + return (AlignmentMatchSequenceEntry)super.clone(); + } + catch( CloneNotSupportedException ex ) { + throw new ReviewedStingException("Unable to clone AlignmentMatchSequenceEntry."); + } + } + + /** + * Retrieves the current state of the alignment. + * @return The state of the current sequence. + */ + AlignmentState getAlignmentState() { + return alignmentState; + } + + /** + * Increment the count of alignments having this particular state. + */ + void increment() { + count++; + } + } +} + diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignmentState.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignmentState.java new file mode 100644 index 0000000000..92c603335a --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignmentState.java @@ -0,0 +1,13 @@ +package org.broadinstitute.sting.alignment.bwa.java; + +/** + * The state of a given base in the alignment. + * + * @author mhanna + * @version 0.1 + */ +public enum AlignmentState { + MATCH_MISMATCH, + INSERTION, + DELETION +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAAlignment.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAAlignment.java new file mode 100644 index 0000000000..f3b515dba3 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAAlignment.java @@ -0,0 +1,190 @@ +package org.broadinstitute.sting.alignment.bwa.java; + +import net.sf.samtools.Cigar; +import org.broadinstitute.sting.alignment.Alignment; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +/** + * An alignment object to be used incrementally as the BWA aligner + * inspects the read. + * + * @author mhanna + * @version 0.1 + */ +public class BWAAlignment extends Alignment implements Cloneable { + /** + * Track the number of alignments that have been created. + */ + private static long numCreated; + + /** + * Which number alignment is this? + */ + private long creationNumber; + + /** + * The aligner performing the alignments. + */ + protected BWAJavaAligner aligner; + + /** + * The sequence of matches/mismatches/insertions/deletions. + */ + private AlignmentMatchSequence alignmentMatchSequence = new AlignmentMatchSequence(); + + /** + * Working variable. How many bases have been matched at this point. + */ + protected int position; + + /** + * Working variable. How many mismatches have been encountered at this point. + */ + private int mismatches; + + /** + * Number of gap opens in alignment. + */ + private int gapOpens; + + /** + * Number of gap extensions in alignment. + */ + private int gapExtensions; + + /** + * Working variable. The lower bound of the alignment within the BWT. + */ + protected long loBound; + + /** + * Working variable. The upper bound of the alignment within the BWT. + */ + protected long hiBound; + + protected void setAlignmentStart(long position) { + this.alignmentStart = position; + } + + protected void setNegativeStrand(boolean negativeStrand) { + this.negativeStrand = negativeStrand; + } + + /** + * Cache the score. + */ + private int score; + + public Cigar getCigar() { + return alignmentMatchSequence.convertToCigar(isNegativeStrand()); + } + + /** + * Gets the current state of this alignment (state of the last base viewed).. + * @return Current state of the alignment. + */ + public AlignmentState getCurrentState() { + return alignmentMatchSequence.getCurrentState(); + } + + /** + * Adds the given state to the current alignment. + * @param state State to add to the given alignment. + */ + public void addState( AlignmentState state ) { + alignmentMatchSequence.addNext(state); + } + + /** + * Gets the BWA score of this alignment. + * @return BWA-style scores. 0 is best. + */ + public int getScore() { + return score; + } + + public int getMismatches() { return mismatches; } + public int getGapOpens() { return gapOpens; } + public int getGapExtensions() { return gapExtensions; } + + public void incrementMismatches() { + this.mismatches++; + updateScore(); + } + + public void incrementGapOpens() { + this.gapOpens++; + updateScore(); + } + + public void incrementGapExtensions() { + this.gapExtensions++; + updateScore(); + } + + /** + * Updates the score based on new information about matches / mismatches. + */ + private void updateScore() { + score = mismatches*aligner.MISMATCH_PENALTY + gapOpens*aligner.GAP_OPEN_PENALTY + gapExtensions*aligner.GAP_EXTENSION_PENALTY; + } + + /** + * Create a new alignment with the given parent aligner. + * @param aligner Aligner being used. + */ + public BWAAlignment( BWAJavaAligner aligner ) { + this.aligner = aligner; + this.creationNumber = numCreated++; + } + + /** + * Clone the alignment. + * @return New instance of the alignment. + */ + public BWAAlignment clone() { + BWAAlignment newAlignment = null; + try { + newAlignment = (BWAAlignment)super.clone(); + } + catch( CloneNotSupportedException ex ) { + throw new ReviewedStingException("Unable to clone BWAAlignment."); + } + newAlignment.creationNumber = numCreated++; + newAlignment.alignmentMatchSequence = alignmentMatchSequence.clone(); + + return newAlignment; + } + + /** + * How many bases in the read match the given state. + * @param state State to test. + * @return number of bases which match that state. + */ + public int getNumberOfBasesMatchingState(AlignmentState state) { + return alignmentMatchSequence.getNumberOfBasesMatchingState(state); + } + + /** + * Compare this alignment to another alignment. + * @param rhs Other alignment to which to compare. + * @return < 0 if this < other, == 0 if this == other, > 0 if this > other + */ + public int compareTo(Alignment rhs) { + BWAAlignment other = (BWAAlignment)rhs; + + // If the scores are different, disambiguate using the score. + if(score != other.score) + return score > other.score ? 1 : -1; + + // Otherwise, use the order in which the elements were created. + if(creationNumber != other.creationNumber) + return creationNumber > other.creationNumber ? -1 : 1; + + return 0; + } + + public String toString() { + return String.format("position: %d, strand: %b, state: %s, mismatches: %d, gap opens: %d, gap extensions: %d, loBound: %d, hiBound: %d, score: %d, creationNumber: %d", position, negativeStrand, alignmentMatchSequence.getCurrentState(), mismatches, gapOpens, gapExtensions, loBound, hiBound, getScore(), creationNumber); + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAJavaAligner.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAJavaAligner.java new file mode 100644 index 0000000000..fbeac91925 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAJavaAligner.java @@ -0,0 +1,393 @@ +package org.broadinstitute.sting.alignment.bwa.java; + +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.alignment.Alignment; +import org.broadinstitute.sting.alignment.bwa.BWAAligner; +import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; +import org.broadinstitute.sting.alignment.reference.bwt.*; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.Utils; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; +import java.util.PriorityQueue; + +/** + * Create imperfect alignments from the read to the genome represented by the given BWT / suffix array. + * + * @author mhanna + * @version 0.1 + */ +public class BWAJavaAligner extends BWAAligner { + /** + * BWT in the forward direction. + */ + private BWT forwardBWT; + + /** + * BWT in the reverse direction. + */ + private BWT reverseBWT; + + /** + * Suffix array in the forward direction. + */ + private SuffixArray forwardSuffixArray; + + /** + * Suffix array in the reverse direction. + */ + private SuffixArray reverseSuffixArray; + + /** + * Maximum edit distance (-n option from original BWA). + */ + private final int MAXIMUM_EDIT_DISTANCE = 4; + + /** + * Maximum number of gap opens (-o option from original BWA). + */ + private final int MAXIMUM_GAP_OPENS = 1; + + /** + * Maximum number of gap extensions (-e option from original BWA). + */ + private final int MAXIMUM_GAP_EXTENSIONS = 6; + + /** + * Penalty for straight mismatches (-M option from original BWA). + */ + public final int MISMATCH_PENALTY = 3; + + /** + * Penalty for gap opens (-O option from original BWA). + */ + public final int GAP_OPEN_PENALTY = 11; + + /** + * Penalty for gap extensions (-E option from original BWA). + */ + public final int GAP_EXTENSION_PENALTY = 4; + + /** + * Skip the ends of indels. + */ + public final int INDEL_END_SKIP = 5; + + public BWAJavaAligner( File forwardBWTFile, File reverseBWTFile, File forwardSuffixArrayFile, File reverseSuffixArrayFile ) { + super(null,null); + forwardBWT = new BWTReader(forwardBWTFile).read(); + reverseBWT = new BWTReader(reverseBWTFile).read(); + forwardSuffixArray = new SuffixArrayReader(forwardSuffixArrayFile,forwardBWT).read(); + reverseSuffixArray = new SuffixArrayReader(reverseSuffixArrayFile,reverseBWT).read(); + } + + /** + * Close this instance of the BWA pointer and delete its resources. + */ + @Override + public void close() { + throw new UnsupportedOperationException("BWA aligner can't currently be closed."); + } + + /** + * Update the current parameters of this aligner. + * @param configuration New configuration to set. + */ + public void updateConfiguration(BWAConfiguration configuration) { + throw new UnsupportedOperationException("Configuration of the BWA aligner can't currently be changed."); + } + + /** + * Allow the aligner to choose one alignment randomly from the pile of best alignments. + * @param bases Bases to align. + * @return An align + */ + public Alignment getBestAlignment(final byte[] bases) { throw new UnsupportedOperationException("BWAJavaAligner does not yet support the standard Aligner interface."); } + + /** + * Align the read to the reference. + * @param read Read to align. + * @param header Optional header to drop in place. + * @return A list of the alignments. + */ + public SAMRecord align(final SAMRecord read, final SAMFileHeader header) { throw new UnsupportedOperationException("BWAJavaAligner does not yet support the standard Aligner interface."); } + + /** + * Get a iterator of alignments, batched by mapping quality. + * @param bases List of bases. + * @return Iterator to alignments. + */ + public Iterable getAllAlignments(final byte[] bases) { throw new UnsupportedOperationException("BWAJavaAligner does not yet support the standard Aligner interface."); } + + /** + * Get a iterator of aligned reads, batched by mapping quality. + * @param read Read to align. + * @param newHeader Optional new header to use when aligning the read. If present, it must be null. + * @return Iterator to alignments. + */ + public Iterable alignAll(final SAMRecord read, final SAMFileHeader newHeader) { throw new UnsupportedOperationException("BWAJavaAligner does not yet support the standard Aligner interface."); } + + + public List align( SAMRecord read ) { + List successfulMatches = new ArrayList(); + + Byte[] uncomplementedBases = normalizeBases(read.getReadBases()); + Byte[] complementedBases = normalizeBases(Utils.reverse(BaseUtils.simpleReverseComplement(read.getReadBases()))); + + List forwardLowerBounds = LowerBound.create(uncomplementedBases,forwardBWT); + List reverseLowerBounds = LowerBound.create(complementedBases,reverseBWT); + + // Seed the best score with any score that won't overflow on comparison. + int bestScore = Integer.MAX_VALUE - MISMATCH_PENALTY; + int bestDiff = MAXIMUM_EDIT_DISTANCE+1; + int maxDiff = MAXIMUM_EDIT_DISTANCE; + + PriorityQueue alignments = new PriorityQueue(); + + // Create a fictional initial alignment, with the position just off the end of the read, and the limits + // set as the entire BWT. + alignments.add(createSeedAlignment(reverseBWT)); + alignments.add(createSeedAlignment(forwardBWT)); + + while(!alignments.isEmpty()) { + BWAAlignment alignment = alignments.remove(); + + // From bwtgap.c in the original BWT; if the rank is worse than the best score + the mismatch PENALTY, move on. + if( alignment.getScore() > bestScore + MISMATCH_PENALTY ) + break; + + Byte[] bases = alignment.isNegativeStrand() ? complementedBases : uncomplementedBases; + BWT bwt = alignment.isNegativeStrand() ? forwardBWT : reverseBWT; + List lowerBounds = alignment.isNegativeStrand() ? reverseLowerBounds : forwardLowerBounds; + + // if z < D(i) then return {} + int mismatches = maxDiff - alignment.getMismatches() - alignment.getGapOpens() - alignment.getGapExtensions(); + if( alignment.position < lowerBounds.size()-1 && mismatches < lowerBounds.get(alignment.position+1).value ) + continue; + + if(mismatches == 0) { + exactMatch(alignment,bases,bwt); + if(alignment.loBound > alignment.hiBound) + continue; + } + + // Found a valid alignment; store it and move on. + if(alignment.position >= read.getReadLength()-1) { + for(long bwtIndex = alignment.loBound; bwtIndex <= alignment.hiBound; bwtIndex++) { + BWAAlignment finalAlignment = alignment.clone(); + + if( finalAlignment.isNegativeStrand() ) + finalAlignment.setAlignmentStart(forwardSuffixArray.get(bwtIndex) + 1); + else { + int sizeAlongReference = read.getReadLength() - + finalAlignment.getNumberOfBasesMatchingState(AlignmentState.INSERTION) + + finalAlignment.getNumberOfBasesMatchingState(AlignmentState.DELETION); + finalAlignment.setAlignmentStart(reverseBWT.length() - reverseSuffixArray.get(bwtIndex) - sizeAlongReference + 1); + } + + successfulMatches.add(finalAlignment); + + bestScore = Math.min(finalAlignment.getScore(),bestScore); + bestDiff = Math.min(finalAlignment.getMismatches()+finalAlignment.getGapOpens()+finalAlignment.getGapExtensions(),bestDiff); + maxDiff = bestDiff + 1; + } + + continue; + } + + //System.out.printf("Processing alignments; queue size = %d, alignment = %s, bound = %d, base = %s%n", alignments.size(), alignment, lowerBounds.get(alignment.position+1).value, alignment.position >= 0 ? (char)bases[alignment.position].byteValue() : ""); + /* + System.out.printf("#1\t[%d,%d,%d,%c]\t[%d,%d,%d]\t[%d,%d]\t[%d,%d]%n",alignments.size(), + alignment.negativeStrand?1:0, + bases.length-alignment.position-1, + alignment.getCurrentState().toString().charAt(0), + alignment.getMismatches(), + alignment.getGapOpens(), + alignment.getGapExtensions(), + lowerBounds.get(alignment.position+1).value, + lowerBounds.get(alignment.position+1).width, + alignment.loBound, + alignment.hiBound); + */ + + // Temporary -- look ahead to see if the next alignment is bounded. + boolean allowDifferences = mismatches > 0; + boolean allowMismatches = mismatches > 0; + + if( allowDifferences && + alignment.position+1 >= INDEL_END_SKIP-1+alignment.getGapOpens()+alignment.getGapExtensions() && + read.getReadLength()-1-(alignment.position+1) >= INDEL_END_SKIP+alignment.getGapOpens()+alignment.getGapExtensions() ) { + if( alignment.getCurrentState() == AlignmentState.MATCH_MISMATCH ) { + if( alignment.getGapOpens() < MAXIMUM_GAP_OPENS ) { + // Add a potential insertion extension. + BWAAlignment insertionAlignment = createInsertionAlignment(alignment); + insertionAlignment.incrementGapOpens(); + alignments.add(insertionAlignment); + + // Add a potential deletion by marking a deletion and augmenting the position. + List deletionAlignments = createDeletionAlignments(bwt,alignment); + for( BWAAlignment deletionAlignment: deletionAlignments ) + deletionAlignment.incrementGapOpens(); + alignments.addAll(deletionAlignments); + } + } + else if( alignment.getCurrentState() == AlignmentState.INSERTION ) { + if( alignment.getGapExtensions() < MAXIMUM_GAP_EXTENSIONS && mismatches > 0 ) { + // Add a potential insertion extension. + BWAAlignment insertionAlignment = createInsertionAlignment(alignment); + insertionAlignment.incrementGapExtensions(); + alignments.add(insertionAlignment); + } + } + else if( alignment.getCurrentState() == AlignmentState.DELETION ) { + if( alignment.getGapExtensions() < MAXIMUM_GAP_EXTENSIONS && mismatches > 0 ) { + // Add a potential deletion by marking a deletion and augmenting the position. + List deletionAlignments = createDeletionAlignments(bwt,alignment); + for( BWAAlignment deletionAlignment: deletionAlignments ) + deletionAlignment.incrementGapExtensions(); + alignments.addAll(deletionAlignments); + } + } + } + + // Mismatches + alignments.addAll(createMatchedAlignments(bwt,alignment,bases,allowDifferences&&allowMismatches)); + } + + return successfulMatches; + } + + /** + * Create an seeding alignment to use as a starting point when traversing. + * @param bwt source BWT. + * @return Seed alignment. + */ + private BWAAlignment createSeedAlignment(BWT bwt) { + BWAAlignment seed = new BWAAlignment(this); + seed.setNegativeStrand(bwt == forwardBWT); + seed.position = -1; + seed.loBound = 0; + seed.hiBound = bwt.length(); + return seed; + } + + /** + * Creates a new alignments representing direct matches / mismatches. + * @param bwt Source BWT with which to work. + * @param alignment Alignment for the previous position. + * @param bases The bases in the read. + * @param allowMismatch Should mismatching bases be allowed? + * @return New alignment representing this position if valid; null otherwise. + */ + private List createMatchedAlignments( BWT bwt, BWAAlignment alignment, Byte[] bases, boolean allowMismatch ) { + List newAlignments = new ArrayList(); + + List baseChoices = new ArrayList(); + Byte thisBase = bases[alignment.position+1]; + + if( allowMismatch ) + baseChoices.addAll(Bases.allOf()); + else + baseChoices.add(thisBase); + + if( thisBase != null ) { + // Keep rotating the current base to the last position until we've hit the current base. + for( ;; ) { + baseChoices.add(baseChoices.remove(0)); + if( thisBase.equals(baseChoices.get(baseChoices.size()-1)) ) + break; + + } + } + + for(byte base: baseChoices) { + BWAAlignment newAlignment = alignment.clone(); + + newAlignment.loBound = bwt.counts(base) + bwt.occurrences(base,alignment.loBound-1) + 1; + newAlignment.hiBound = bwt.counts(base) + bwt.occurrences(base,alignment.hiBound); + + // If this alignment is valid, skip it. + if( newAlignment.loBound > newAlignment.hiBound ) + continue; + + newAlignment.position++; + newAlignment.addState(AlignmentState.MATCH_MISMATCH); + if( bases[newAlignment.position] == null || base != bases[newAlignment.position] ) + newAlignment.incrementMismatches(); + + newAlignments.add(newAlignment); + } + + return newAlignments; + } + + /** + * Create a new alignment representing an insertion at this point in the read. + * @param alignment Alignment from which to derive the insertion. + * @return New alignment reflecting the insertion. + */ + private BWAAlignment createInsertionAlignment( BWAAlignment alignment ) { + // Add a potential insertion extension. + BWAAlignment newAlignment = alignment.clone(); + newAlignment.position++; + newAlignment.addState(AlignmentState.INSERTION); + return newAlignment; + } + + /** + * Create new alignments representing a deletion at this point in the read. + * @param bwt source BWT for inferring deletion info. + * @param alignment Alignment from which to derive the deletion. + * @return New alignments reflecting all possible deletions. + */ + private List createDeletionAlignments( BWT bwt, BWAAlignment alignment) { + List newAlignments = new ArrayList(); + for(byte base: Bases.instance) { + BWAAlignment newAlignment = alignment.clone(); + + newAlignment.loBound = bwt.counts(base) + bwt.occurrences(base,alignment.loBound-1) + 1; + newAlignment.hiBound = bwt.counts(base) + bwt.occurrences(base,alignment.hiBound); + + // If this alignment is valid, skip it. + if( newAlignment.loBound > newAlignment.hiBound ) + continue; + + newAlignment.addState(AlignmentState.DELETION); + + newAlignments.add(newAlignment); + } + + return newAlignments; + } + + /** + * Exactly match the given alignment against the given BWT. + * @param alignment Alignment to match. + * @param bases Bases to use. + * @param bwt BWT to use. + */ + private void exactMatch( BWAAlignment alignment, Byte[] bases, BWT bwt ) { + while( ++alignment.position < bases.length ) { + byte base = bases[alignment.position]; + alignment.loBound = bwt.counts(base) + bwt.occurrences(base,alignment.loBound-1) + 1; + alignment.hiBound = bwt.counts(base) + bwt.occurrences(base,alignment.hiBound); + if( alignment.loBound > alignment.hiBound ) + return; + } + } + + /** + * Make each base into A/C/G/T or null if unknown. + * @param bases Base string to normalize. + * @return Array of normalized bases. + */ + private Byte[] normalizeBases( byte[] bases ) { + Byte[] normalBases = new Byte[bases.length]; + for(int i = 0; i < bases.length; i++) + normalBases[i] = Bases.fromASCII(bases[i]); + return normalBases; + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/java/LowerBound.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/LowerBound.java new file mode 100644 index 0000000000..be75142553 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/LowerBound.java @@ -0,0 +1,88 @@ +package org.broadinstitute.sting.alignment.bwa.java; + +import org.broadinstitute.sting.alignment.reference.bwt.BWT; + +import java.util.ArrayList; +import java.util.List; + +/** + * At any point along the given read, what is a good lower bound for the + * total number of differences? + * + * @author mhanna + * @version 0.1 + */ +public class LowerBound { + /** + * Lower bound of the suffix array. + */ + public final long loIndex; + + /** + * Upper bound of the suffix array. + */ + public final long hiIndex; + + /** + * Width of the bwt from loIndex -> hiIndex, inclusive. + */ + public final long width; + + /** + * The lower bound at the given point. + */ + public final int value; + + /** + * Create a new lower bound with the given value. + * @param loIndex The lower bound of the BWT. + * @param hiIndex The upper bound of the BWT. + * @param value Value for the lower bound at this site. + */ + private LowerBound(long loIndex, long hiIndex, int value) { + this.loIndex = loIndex; + this.hiIndex = hiIndex; + this.width = hiIndex - loIndex + 1; + this.value = value; + } + + /** + * Create a non-optimal bound according to the algorithm specified in Figure 3 of the BWA paper. + * @param bases Bases of the read to use when creating a new BWT. + * @param bwt BWT to check against. + * @return A list of lower bounds at every point in the reference. + * + */ + public static List create(Byte[] bases, BWT bwt) { + List bounds = new ArrayList(); + + long loIndex = 0, hiIndex = bwt.length(); + int mismatches = 0; + for( int i = bases.length-1; i >= 0; i-- ) { + Byte base = bases[i]; + + // Ignore non-ACGT bases. + if( base != null ) { + loIndex = bwt.counts(base) + bwt.occurrences(base,loIndex-1) + 1; + hiIndex = bwt.counts(base) + bwt.occurrences(base,hiIndex); + } + + if( base == null || loIndex > hiIndex ) { + loIndex = 0; + hiIndex = bwt.length(); + mismatches++; + } + bounds.add(0,new LowerBound(loIndex,hiIndex,mismatches)); + } + + return bounds; + } + + /** + * Create a string representation of this bound. + * @return String version of this bound. + */ + public String toString() { + return String.format("LowerBound: w = %d, value = %d",width,value); + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/package-info.java b/public/java/src/org/broadinstitute/sting/alignment/package-info.java new file mode 100644 index 0000000000..60cf1e425a --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/package-info.java @@ -0,0 +1,4 @@ +/** + * Analyses used to validate the correctness and performance the BWA Java bindings. + */ +package org.broadinstitute.sting.alignment; \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/AMBWriter.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/AMBWriter.java new file mode 100644 index 0000000000..ec10415dd1 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/AMBWriter.java @@ -0,0 +1,68 @@ +package org.broadinstitute.sting.alignment.reference.bwt; + +import net.sf.samtools.SAMSequenceDictionary; +import net.sf.samtools.SAMSequenceRecord; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.io.PrintStream; + +/** + * Writes .amb files - a file indicating where 'holes' (indeterminant bases) + * exist in the contig. Currently, only empty, placeholder AMBs are supported. + * + * @author mhanna + * @version 0.1 + */ +public class AMBWriter { + /** + * Number of holes is fixed at zero. + */ + private static final int NUM_HOLES = 0; + + /** + * Input stream from which to read BWT data. + */ + private final PrintStream out; + + /** + * Create a new ANNWriter targeting the given file. + * @param file file into which ANN data should be written. + * @throws java.io.IOException if there is a problem opening the output file. + */ + public AMBWriter(File file) throws IOException { + out = new PrintStream(file); + } + + /** + * Create a new ANNWriter targeting the given OutputStream. + * @param stream Stream into which ANN data should be written. + */ + public AMBWriter(OutputStream stream) { + out = new PrintStream(stream); + } + + /** + * Write the contents of the given dictionary into the AMB file. + * Assumes that there are no holes in the dictionary. + * @param dictionary Dictionary to write. + */ + public void writeEmpty(SAMSequenceDictionary dictionary) { + long genomeLength = 0L; + for(SAMSequenceRecord sequence: dictionary.getSequences()) + genomeLength += sequence.getSequenceLength(); + + int sequences = dictionary.getSequences().size(); + + // Write the header + out.printf("%d %d %d%n",genomeLength,sequences,NUM_HOLES); + } + + /** + * Close the given output stream. + */ + public void close() { + out.close(); + } +} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/ANNWriter.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/ANNWriter.java new file mode 100644 index 0000000000..8d692a9e77 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/ANNWriter.java @@ -0,0 +1,95 @@ +package org.broadinstitute.sting.alignment.reference.bwt; + +import net.sf.samtools.SAMSequenceDictionary; +import net.sf.samtools.SAMSequenceRecord; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.io.PrintStream; + +/** + * Writes .ann files - an alternate sequence dictionary format + * used by BWA/C. For best results, the input sequence dictionary + * should be created with Picard's CreateSequenceDictionary.jar, + * TRUNCATE_NAMES_AT_WHITESPACE=false. + * + * @author mhanna + * @version 0.1 + */ +public class ANNWriter { + /** + * BWA uses a fixed seed of 11, written into every file. + */ + private static final int BNS_SEED = 11; + + /** + * A seemingly unused value that appears in every contig in the ANN. + */ + private static final int GI = 0; + + /** + * Input stream from which to read BWT data. + */ + private final PrintStream out; + + /** + * Create a new ANNWriter targeting the given file. + * @param file file into which ANN data should be written. + * @throws IOException if there is a problem opening the output file. + */ + public ANNWriter(File file) throws IOException { + out = new PrintStream(file); + } + + /** + * Create a new ANNWriter targeting the given OutputStream. + * @param stream Stream into which ANN data should be written. + */ + public ANNWriter(OutputStream stream) { + out = new PrintStream(stream); + } + + /** + * Write the contents of the given dictionary into the ANN file. + * Assumes that no ambs (blocks of indeterminate base) are present in the dictionary. + * @param dictionary Dictionary to write. + */ + public void write(SAMSequenceDictionary dictionary) { + long genomeLength = 0L; + for(SAMSequenceRecord sequence: dictionary.getSequences()) + genomeLength += sequence.getSequenceLength(); + + int sequences = dictionary.getSequences().size(); + + // Write the header + out.printf("%d %d %d%n",genomeLength,sequences,BNS_SEED); + + for(SAMSequenceRecord sequence: dictionary.getSequences()) { + String fullSequenceName = sequence.getSequenceName(); + String trimmedSequenceName = fullSequenceName; + String sequenceComment = "(null)"; + + long offset = 0; + + // Separate the sequence name from the sequence comment, based on BWA's definition. + // BWA's definition appears to accept a zero-length contig name, so mimic that behavior. + if(fullSequenceName.indexOf(' ') >= 0) { + trimmedSequenceName = fullSequenceName.substring(0,fullSequenceName.indexOf(' ')); + sequenceComment = fullSequenceName.substring(fullSequenceName.indexOf(' ')+1); + } + + // Write the sequence GI (?), name, and comment. + out.printf("%d %s %s%n",GI,trimmedSequenceName,sequenceComment); + // Write the sequence offset, length, and ambs (currently fixed at 0). + out.printf("%d %d %d%n",offset,sequence.getSequenceLength(),0); + } + } + + /** + * Close the given output stream. + */ + public void close() { + out.close(); + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWT.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWT.java new file mode 100644 index 0000000000..7f8c48253d --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWT.java @@ -0,0 +1,172 @@ +package org.broadinstitute.sting.alignment.reference.bwt; + +import org.broadinstitute.sting.alignment.reference.packing.PackUtils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +/** + * Represents the Burrows-Wheeler Transform of a reference sequence. + * + * @author mhanna + * @version 0.1 + */ +public class BWT { + /** + * Write an occurrence table after every SEQUENCE_BLOCK_SIZE bases. + * For this implementation to behave correctly, SEQUENCE_BLOCK_SIZE % 8 == 0 + */ + public static final int SEQUENCE_BLOCK_SIZE = 128; + + /** + * The inverse SA, used as a placeholder for determining where the special EOL character sits. + */ + protected final long inverseSA0; + + /** + * Cumulative counts for the entire BWT. + */ + protected final Counts counts; + + /** + * The individual sequence blocks, modelling how they appear on disk. + */ + protected final SequenceBlock[] sequenceBlocks; + + /** + * Creates a new BWT with the given inverse SA, counts, and sequence (in ASCII). + * @param inverseSA0 Inverse SA entry for the first element. Will be missing from the BWT sequence. + * @param counts Cumulative count of bases, in A,C,G,T order. + * @param sequenceBlocks The full BWT sequence, sans the '$'. + */ + public BWT( long inverseSA0, Counts counts, SequenceBlock[] sequenceBlocks ) { + this.inverseSA0 = inverseSA0; + this.counts = counts; + this.sequenceBlocks = sequenceBlocks; + } + + /** + * Creates a new BWT with the given inverse SA, occurrences, and sequence (in ASCII). + * @param inverseSA0 Inverse SA entry for the first element. Will be missing from the BWT sequence. + * @param counts Count of bases, in A,C,G,T order. + * @param sequence The full BWT sequence, sans the '$'. + */ + public BWT( long inverseSA0, Counts counts, byte[] sequence ) { + this(inverseSA0,counts,generateSequenceBlocks(sequence)); + } + + /** + * Extract the full sequence from the list of block. + * @return The full BWT string as a byte array. + */ + public byte[] getSequence() { + byte[] sequence = new byte[(int)counts.getTotal()]; + for( SequenceBlock block: sequenceBlocks ) + System.arraycopy(block.sequence,0,sequence,block.sequenceStart,block.sequenceLength); + return sequence; + } + + /** + * Get the total counts of bases lexicographically smaller than the given base, for Ferragina and Manzini's search. + * @param base The base. + * @return Total counts for all bases lexicographically smaller than this base. + */ + public long counts(byte base) { + return counts.getCumulative(base); + } + + /** + * Get the total counts of bases lexicographically smaller than the given base, for Ferragina and Manzini's search. + * @param base The base. + * @param index The position to search within the BWT. + * @return Total counts for all bases lexicographically smaller than this base. + */ + public long occurrences(byte base,long index) { + SequenceBlock block = getSequenceBlock(index); + int position = getSequencePosition(index); + long accumulator = block.occurrences.get(base); + for(int i = 0; i <= position; i++) { + if(base == block.sequence[i]) + accumulator++; + } + return accumulator; + } + + /** + * The number of bases in the BWT as a whole. + * @return Number of bases. + */ + public long length() { + return counts.getTotal(); + } + + /** + * Create a new BWT from the given reference sequence. + * @param referenceSequence Sequence from which to derive the BWT. + * @return reference sequence-derived BWT. + */ + public static BWT createFromReferenceSequence(byte[] referenceSequence) { + SuffixArray suffixArray = SuffixArray.createFromReferenceSequence(referenceSequence); + + byte[] bwt = new byte[(int)suffixArray.length()-1]; + int bwtIndex = 0; + for(long suffixArrayIndex = 0; suffixArrayIndex < suffixArray.length(); suffixArrayIndex++) { + if(suffixArray.get(suffixArrayIndex) == 0) + continue; + bwt[bwtIndex++] = referenceSequence[(int)suffixArray.get(suffixArrayIndex)-1]; + } + + return new BWT(suffixArray.inverseSA0,suffixArray.occurrences,bwt); + } + + /** + * Gets the base at a given position in the BWT. + * @param index The index to use. + * @return The base at that location. + */ + protected byte getBase(long index) { + if(index == inverseSA0) + throw new ReviewedStingException(String.format("Base at index %d does not have a text representation",index)); + + SequenceBlock block = getSequenceBlock(index); + int position = getSequencePosition(index); + return block.sequence[position]; + } + + private SequenceBlock getSequenceBlock(long index) { + // If the index is above the SA-1[0], remap it to the appropriate coordinate space. + if(index > inverseSA0) index--; + return sequenceBlocks[(int)(index/SEQUENCE_BLOCK_SIZE)]; + } + + private int getSequencePosition(long index) { + // If the index is above the SA-1[0], remap it to the appropriate coordinate space. + if(index > inverseSA0) index--; + return (int)(index%SEQUENCE_BLOCK_SIZE); + } + + /** + * Create a set of sequence blocks from one long sequence. + * @param sequence Sequence from which to derive blocks. + * @return Array of sequence blocks containing data from the sequence. + */ + private static SequenceBlock[] generateSequenceBlocks( byte[] sequence ) { + Counts occurrences = new Counts(); + + int numSequenceBlocks = PackUtils.numberOfPartitions(sequence.length,SEQUENCE_BLOCK_SIZE); + SequenceBlock[] sequenceBlocks = new SequenceBlock[numSequenceBlocks]; + + for( int block = 0; block < numSequenceBlocks; block++ ) { + int blockStart = block*SEQUENCE_BLOCK_SIZE; + int blockLength = Math.min(SEQUENCE_BLOCK_SIZE, sequence.length-blockStart); + byte[] subsequence = new byte[blockLength]; + + System.arraycopy(sequence,blockStart,subsequence,0,blockLength); + + sequenceBlocks[block] = new SequenceBlock(blockStart,blockLength,occurrences.clone(),subsequence); + + for( byte base: subsequence ) + occurrences.increment(base); + } + + return sequenceBlocks; + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWTReader.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWTReader.java new file mode 100644 index 0000000000..5c4f6d39d8 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWTReader.java @@ -0,0 +1,89 @@ +package org.broadinstitute.sting.alignment.reference.bwt; + +import org.broadinstitute.sting.alignment.reference.packing.BasePackedInputStream; +import org.broadinstitute.sting.alignment.reference.packing.PackUtils; +import org.broadinstitute.sting.alignment.reference.packing.UnsignedIntPackedInputStream; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.ByteOrder; +/** + * Reads a BWT from a given file. + * + * @author mhanna + * @version 0.1 + */ +public class BWTReader { + /** + * Input stream from which to read BWT data. + */ + private FileInputStream inputStream; + + /** + * Create a new BWT reader. + * @param inputFile File in which the BWT is stored. + */ + public BWTReader( File inputFile ) { + try { + this.inputStream = new FileInputStream(inputFile); + } + catch( FileNotFoundException ex ) { + throw new ReviewedStingException("Unable to open input file", ex); + } + } + + /** + * Read a BWT from the input stream. + * @return The BWT stored in the input stream. + */ + public BWT read() { + UnsignedIntPackedInputStream uintPackedInputStream = new UnsignedIntPackedInputStream(inputStream, ByteOrder.LITTLE_ENDIAN); + BasePackedInputStream basePackedInputStream = new BasePackedInputStream(Integer.class, inputStream, ByteOrder.LITTLE_ENDIAN); + + long inverseSA0; + long[] count; + SequenceBlock[] sequenceBlocks; + + try { + inverseSA0 = uintPackedInputStream.read(); + count = new long[PackUtils.ALPHABET_SIZE]; + uintPackedInputStream.read(count); + + long bwtSize = count[PackUtils.ALPHABET_SIZE-1]; + sequenceBlocks = new SequenceBlock[PackUtils.numberOfPartitions(bwtSize,BWT.SEQUENCE_BLOCK_SIZE)]; + + for( int block = 0; block < sequenceBlocks.length; block++ ) { + int sequenceStart = block* BWT.SEQUENCE_BLOCK_SIZE; + int sequenceLength = (int)Math.min(BWT.SEQUENCE_BLOCK_SIZE,bwtSize-sequenceStart); + + long[] occurrences = new long[PackUtils.ALPHABET_SIZE]; + byte[] bwt = new byte[sequenceLength]; + + uintPackedInputStream.read(occurrences); + basePackedInputStream.read(bwt); + + sequenceBlocks[block] = new SequenceBlock(sequenceStart,sequenceLength,new Counts(occurrences,false),bwt); + } + } + catch( IOException ex ) { + throw new ReviewedStingException("Unable to read BWT from input stream.", ex); + } + + return new BWT(inverseSA0, new Counts(count,true), sequenceBlocks); + } + + /** + * Close the input stream. + */ + public void close() { + try { + inputStream.close(); + } + catch( IOException ex ) { + throw new ReviewedStingException("Unable to close input file", ex); + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWTSupplementaryFileGenerator.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWTSupplementaryFileGenerator.java new file mode 100644 index 0000000000..3370f79c89 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWTSupplementaryFileGenerator.java @@ -0,0 +1,60 @@ +package org.broadinstitute.sting.alignment.reference.bwt; + +import net.sf.picard.reference.ReferenceSequenceFile; +import net.sf.picard.reference.ReferenceSequenceFileFactory; +import net.sf.samtools.SAMSequenceDictionary; + +import java.io.File; +import java.io.IOException; + +/** + * Generate BWA supplementary files (.ann, .amb) from the command line. + * + * @author mhanna + * @version 0.1 + */ +public class BWTSupplementaryFileGenerator { + enum SupplementaryFileType { ANN, AMB } + + public static void main(String[] args) throws IOException { + if(args.length < 3) + usage("Incorrect number of arguments supplied"); + + File fastaFile = new File(args[0]); + File outputFile = new File(args[1]); + SupplementaryFileType outputType = null; + try { + outputType = Enum.valueOf(SupplementaryFileType.class,args[2]); + } + catch(IllegalArgumentException ex) { + usage("Invalid output type: " + args[2]); + } + + ReferenceSequenceFile sequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(fastaFile); + SAMSequenceDictionary dictionary = sequenceFile.getSequenceDictionary(); + + switch(outputType) { + case ANN: + ANNWriter annWriter = new ANNWriter(outputFile); + annWriter.write(dictionary); + annWriter.close(); + break; + case AMB: + AMBWriter ambWriter = new AMBWriter(outputFile); + ambWriter.writeEmpty(dictionary); + ambWriter.close(); + break; + default: + usage("Unsupported output type: " + outputType); + } + } + + /** + * Print usage information and exit. + */ + private static void usage(String message) { + System.err.println(message); + System.err.println("Usage: BWTSupplementaryFileGenerator "); + System.exit(1); + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWTWriter.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWTWriter.java new file mode 100644 index 0000000000..a813cdc9a4 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/BWTWriter.java @@ -0,0 +1,71 @@ +package org.broadinstitute.sting.alignment.reference.bwt; + +import org.broadinstitute.sting.alignment.reference.packing.BasePackedOutputStream; +import org.broadinstitute.sting.alignment.reference.packing.UnsignedIntPackedOutputStream; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.*; +import java.nio.ByteOrder; + +/** + * Writes an in-memory BWT to an outputstream. + * + * @author mhanna + * @version 0.1 + */ +public class BWTWriter { + /** + * Input stream from which to read BWT data. + */ + private final OutputStream outputStream; + + /** + * Create a new BWT writer. + * @param outputFile File in which the BWT is stored. + */ + public BWTWriter( File outputFile ) { + try { + this.outputStream = new BufferedOutputStream(new FileOutputStream(outputFile)); + } + catch( FileNotFoundException ex ) { + throw new ReviewedStingException("Unable to open output file", ex); + } + } + + /** + * Write a BWT to the output stream. + * @param bwt Transform to be written to the output stream. + */ + public void write( BWT bwt ) { + UnsignedIntPackedOutputStream intPackedOutputStream = new UnsignedIntPackedOutputStream(outputStream, ByteOrder.LITTLE_ENDIAN); + BasePackedOutputStream basePackedOutputStream = new BasePackedOutputStream(Integer.class, outputStream, ByteOrder.LITTLE_ENDIAN); + + try { + intPackedOutputStream.write(bwt.inverseSA0); + intPackedOutputStream.write(bwt.counts.toArray(true)); + + for( SequenceBlock block: bwt.sequenceBlocks ) { + intPackedOutputStream.write(block.occurrences.toArray(false)); + basePackedOutputStream.write(block.sequence); + } + + // The last block is the last set of counts in the structure. + intPackedOutputStream.write(bwt.counts.toArray(false)); + } + catch( IOException ex ) { + throw new ReviewedStingException("Unable to read BWT from input stream.", ex); + } + } + + /** + * Close the input stream. + */ + public void close() { + try { + outputStream.close(); + } + catch( IOException ex ) { + throw new ReviewedStingException("Unable to close input file", ex); + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/Bases.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/Bases.java new file mode 100644 index 0000000000..bc0a5b63d6 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/Bases.java @@ -0,0 +1,108 @@ +package org.broadinstitute.sting.alignment.reference.bwt; + +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.util.*; + +/** + * Enhanced enum representation of a base. + * + * @author mhanna + * @version 0.1 + */ +public class Bases implements Iterable +{ + public static byte A = 'A'; + public static byte C = 'C'; + public static byte G = 'G'; + public static byte T = 'T'; + + public static final Bases instance = new Bases(); + + private static final List allBases; + + /** + * Representation of the base broken down by packed value. + */ + private static final Map basesByPack = new HashMap(); + + static { + List bases = new ArrayList(); + bases.add(A); + bases.add(C); + bases.add(G); + bases.add(T); + allBases = Collections.unmodifiableList(bases); + + for(int i = 0; i < allBases.size(); i++) + basesByPack.put(i,allBases.get(i)); + } + + /** + * Create a new base with the given ascii representation and + * pack value. + */ + private Bases() { + } + + /** + * Return all possible bases. + * @return Byte representation of all bases. + */ + public static Collection allOf() { + return allBases; + } + + /** + * Gets the number of known bases. + * @return The number of known bases. + */ + public static int size() { + return allBases.size(); + } + + /** + * Gets an iterator over the total number of known base types. + * @return Iterator over all known bases. + */ + public Iterator iterator() { + return basesByPack.values().iterator(); + } + + /** + * Get the given base from the packed representation. + * @param pack Packed representation. + * @return base. + */ + public static byte fromPack( int pack ) { return basesByPack.get(pack); } + + /** + * Convert the given base to its packed value. + * @param ascii ASCII representation of the base. + * @return Packed value. + */ + public static int toPack( byte ascii ) + { + for( Map.Entry entry: basesByPack.entrySet() ) { + if( entry.getValue().equals(ascii) ) + return entry.getKey(); + } + throw new ReviewedStingException(String.format("Base %c is an invalid base to pack", (char)ascii)); + } + + /** + * Convert the ASCII representation of a base to its 'normalized' representation. + * @param base The base itself. + * @return The byte, if present. Null if unknown. + */ + public static Byte fromASCII( byte base ) { + Byte found = null; + for( Byte normalized: allBases ) { + if( normalized.equals(base) ) { + found = normalized; + break; + } + } + return found; + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/Counts.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/Counts.java new file mode 100644 index 0000000000..268b11ac4d --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/Counts.java @@ -0,0 +1,151 @@ +package org.broadinstitute.sting.alignment.reference.bwt; + +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.util.HashMap; +import java.util.Map; + +/** + * Counts of how many bases of each type have been seen. + * + * @author mhanna + * @version 0.1 + */ +public class Counts implements Cloneable { + /** + * Internal representation of counts, broken down by ASCII value. + */ + private Map counts = new HashMap(); + + /** + * Internal representation of cumulative counts, broken down by ASCII value. + */ + private Map cumulativeCounts = new HashMap(); + + /** + * Create an empty Counts object with values A=0,C=0,G=0,T=0. + */ + public Counts() + { + for(byte base: Bases.instance) { + counts.put(base,0L); + cumulativeCounts.put(base,0L); + } + } + + /** + * Create a counts data structure with the given initial values. + * @param data Count data, broken down by base. + * @param cumulative Whether the counts are cumulative, (count_G=numA+numC+numG,for example). + */ + public Counts( long[] data, boolean cumulative ) { + if(cumulative) { + long priorCount = 0; + for(byte base: Bases.instance) { + long count = data[Bases.toPack(base)]; + counts.put(base,count-priorCount); + cumulativeCounts.put(base,priorCount); + priorCount = count; + } + } + else { + long priorCount = 0; + for(byte base: Bases.instance) { + long count = data[Bases.toPack(base)]; + counts.put(base,count); + cumulativeCounts.put(base,priorCount); + priorCount += count; + } + } + } + + /** + * Convert to an array for persistence. + * @param cumulative Use a cumulative representation. + * @return Array of count values. + */ + public long[] toArray(boolean cumulative) { + long[] countArray = new long[counts.size()]; + if(cumulative) { + int index = 0; + boolean first = true; + for(byte base: Bases.instance) { + if(first) { + first = false; + continue; + } + countArray[index++] = getCumulative(base); + } + countArray[countArray.length-1] = getTotal(); + } + else { + int index = 0; + for(byte base: Bases.instance) + countArray[index++] = counts.get(base); + } + return countArray; + } + + /** + * Create a unique copy of the current object. + * @return A duplicate of this object. + */ + public Counts clone() { + Counts other; + try { + other = (Counts)super.clone(); + } + catch(CloneNotSupportedException ex) { + throw new ReviewedStingException("Unable to clone counts object", ex); + } + other.counts = new HashMap(counts); + other.cumulativeCounts = new HashMap(cumulativeCounts); + return other; + } + + /** + * Increment the number of bases seen at the given location. + * @param base Base to increment. + */ + public void increment(byte base) { + counts.put(base,counts.get(base)+1); + boolean increment = false; + for(byte cumulative: Bases.instance) { + if(increment) cumulativeCounts.put(cumulative,cumulativeCounts.get(cumulative)+1); + increment |= (cumulative == base); + } + } + + /** + * Gets a count of the number of bases seen at a given location. + * Note that counts in this case are not cumulative (counts for A,C,G,T + * are independent). + * @param base Base for which to query counts. + * @return Number of bases of this type seen. + */ + public long get(byte base) { + return counts.get(base); + } + + /** + * Gets a count of the number of bases seen before this base. + * Note that counts in this case are cumulative. + * @param base Base for which to query counts. + * @return Number of bases of this type seen. + */ + public long getCumulative(byte base) { + return cumulativeCounts.get(base); + } + + /** + * How many total bases are represented by this count structure? + * @return Total bases represented. + */ + public long getTotal() { + int accumulator = 0; + for(byte base: Bases.instance) { + accumulator += get(base); + } + return accumulator; + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/CreateBWTFromReference.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/CreateBWTFromReference.java new file mode 100755 index 0000000000..801ab3a0b5 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/CreateBWTFromReference.java @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITHoc THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.alignment.reference.bwt; + +import net.sf.picard.reference.ReferenceSequence; +import net.sf.picard.reference.ReferenceSequenceFile; +import net.sf.picard.reference.ReferenceSequenceFileFactory; +import org.broadinstitute.sting.alignment.reference.packing.PackUtils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.File; +import java.io.IOException; + +/** + * Create a suffix array data structure. + * + * @author mhanna + * @version 0.1 + */ +public class CreateBWTFromReference { + private byte[] loadReference( File inputFile ) { + // Read in the first sequence in the input file + ReferenceSequenceFile reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(inputFile); + ReferenceSequence sequence = reference.nextSequence(); + return sequence.getBases(); + } + + private byte[] loadReverseReference( File inputFile ) { + ReferenceSequenceFile reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(inputFile); + ReferenceSequence sequence = reference.nextSequence(); + PackUtils.reverse(sequence.getBases()); + return sequence.getBases(); + } + + private Counts countOccurrences( byte[] sequence ) { + Counts occurrences = new Counts(); + for( byte base: sequence ) + occurrences.increment(base); + return occurrences; + } + + private long[] createSuffixArray( byte[] sequence ) { + return SuffixArray.createFromReferenceSequence(sequence).sequence; + } + + private long[] invertSuffixArray( long[] suffixArray ) { + long[] inverseSuffixArray = new long[suffixArray.length]; + for( int i = 0; i < suffixArray.length; i++ ) + inverseSuffixArray[(int)suffixArray[i]] = i; + return inverseSuffixArray; + } + + private long[] createCompressedSuffixArray( int[] suffixArray, int[] inverseSuffixArray ) { + long[] compressedSuffixArray = new long[suffixArray.length]; + compressedSuffixArray[0] = inverseSuffixArray[0]; + for( int i = 1; i < suffixArray.length; i++ ) + compressedSuffixArray[i] = inverseSuffixArray[suffixArray[i]+1]; + return compressedSuffixArray; + } + + private long[] createInversedCompressedSuffixArray( int[] compressedSuffixArray ) { + long[] inverseCompressedSuffixArray = new long[compressedSuffixArray.length]; + for( int i = 0; i < compressedSuffixArray.length; i++ ) + inverseCompressedSuffixArray[compressedSuffixArray[i]] = i; + return inverseCompressedSuffixArray; + } + + public static void main( String argv[] ) throws IOException { + if( argv.length != 5 ) { + System.out.println("USAGE: CreateBWTFromReference .fasta "); + return; + } + + String inputFileName = argv[0]; + File inputFile = new File(inputFileName); + + String bwtFileName = argv[1]; + File bwtFile = new File(bwtFileName); + + String rbwtFileName = argv[2]; + File rbwtFile = new File(rbwtFileName); + + String saFileName = argv[3]; + File saFile = new File(saFileName); + + String rsaFileName = argv[4]; + File rsaFile = new File(rsaFileName); + + CreateBWTFromReference creator = new CreateBWTFromReference(); + + byte[] sequence = creator.loadReference(inputFile); + byte[] reverseSequence = creator.loadReverseReference(inputFile); + + // Count the occurences of each given base. + Counts occurrences = creator.countOccurrences(sequence); + System.out.printf("Occurrences: a=%d, c=%d, g=%d, t=%d%n",occurrences.getCumulative(Bases.A), + occurrences.getCumulative(Bases.C), + occurrences.getCumulative(Bases.G), + occurrences.getCumulative(Bases.T)); + + // Generate the suffix array and print diagnostics. + long[] suffixArrayData = creator.createSuffixArray(sequence); + long[] reverseSuffixArrayData = creator.createSuffixArray(reverseSequence); + + // Invert the suffix array and print diagnostics. + long[] inverseSuffixArray = creator.invertSuffixArray(suffixArrayData); + long[] reverseInverseSuffixArray = creator.invertSuffixArray(reverseSuffixArrayData); + + SuffixArray suffixArray = new SuffixArray( inverseSuffixArray[0], occurrences, suffixArrayData ); + SuffixArray reverseSuffixArray = new SuffixArray( reverseInverseSuffixArray[0], occurrences, reverseSuffixArrayData ); + + /* + // Create the data structure for the compressed suffix array and print diagnostics. + int[] compressedSuffixArray = creator.createCompressedSuffixArray(suffixArray.sequence,inverseSuffixArray); + int reconstructedInverseSA = compressedSuffixArray[0]; + for( int i = 0; i < 8; i++ ) { + System.out.printf("compressedSuffixArray[%d] = %d (SA-1[%d] = %d)%n", i, compressedSuffixArray[i], i, reconstructedInverseSA); + reconstructedInverseSA = compressedSuffixArray[reconstructedInverseSA]; + } + + // Create the data structure for the inverse compressed suffix array and print diagnostics. + int[] inverseCompressedSuffixArray = creator.createInversedCompressedSuffixArray(compressedSuffixArray); + for( int i = 0; i < 8; i++ ) { + System.out.printf("inverseCompressedSuffixArray[%d] = %d%n", i, inverseCompressedSuffixArray[i]); + } + */ + + // Create the BWT. + BWT bwt = BWT.createFromReferenceSequence(sequence); + BWT reverseBWT = BWT.createFromReferenceSequence(reverseSequence); + + byte[] bwtSequence = bwt.getSequence(); + System.out.printf("BWT: %s... (length = %d)%n", new String(bwtSequence,0,80),bwt.length()); + + BWTWriter bwtWriter = new BWTWriter(bwtFile); + bwtWriter.write(bwt); + bwtWriter.close(); + + BWTWriter reverseBWTWriter = new BWTWriter(rbwtFile); + reverseBWTWriter.write(reverseBWT); + reverseBWTWriter.close(); + + /* + SuffixArrayWriter saWriter = new SuffixArrayWriter(saFile); + saWriter.write(suffixArray); + saWriter.close(); + + SuffixArrayWriter reverseSAWriter = new SuffixArrayWriter(rsaFile); + reverseSAWriter.write(reverseSuffixArray); + reverseSAWriter.close(); + */ + + File existingBWTFile = new File(inputFileName+".bwt"); + BWTReader existingBWTReader = new BWTReader(existingBWTFile); + BWT existingBWT = existingBWTReader.read(); + + byte[] existingBWTSequence = existingBWT.getSequence(); + System.out.printf("Existing BWT: %s... (length = %d)%n",new String(existingBWTSequence,0,80),existingBWT.length()); + + for( int i = 0; i < bwt.length(); i++ ) { + if( bwtSequence[i] != existingBWTSequence[i] ) + throw new ReviewedStingException("BWT mismatch at " + i); + } + + File existingSAFile = new File(inputFileName+".sa"); + SuffixArrayReader existingSuffixArrayReader = new SuffixArrayReader(existingSAFile,existingBWT); + SuffixArray existingSuffixArray = existingSuffixArrayReader.read(); + + for(int i = 0; i < suffixArray.length(); i++) { + if( i % 10000 == 0 ) + System.out.printf("Validating suffix array entry %d%n", i); + if( suffixArray.get(i) != existingSuffixArray.get(i) ) + throw new ReviewedStingException(String.format("Suffix array mismatch at %d; SA is %d; should be %d",i,existingSuffixArray.get(i),suffixArray.get(i))); + } + } + +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/SequenceBlock.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/SequenceBlock.java new file mode 100644 index 0000000000..13714de1ed --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/SequenceBlock.java @@ -0,0 +1,41 @@ +package org.broadinstitute.sting.alignment.reference.bwt; + +/** + * Models a block of bases within the BWT. + */ +public class SequenceBlock { + /** + * Start position of this sequence within the BWT. + */ + public final int sequenceStart; + + /** + * Length of this sequence within the BWT. + */ + public final int sequenceLength; + + + /** + * Occurrences of each letter up to this sequence block. + */ + public final Counts occurrences; + + /** + * Sequence for this segment. + */ + public final byte[] sequence; + + /** + * Create a new block within this BWT. + * @param sequenceStart Starting position of this sequence within the BWT. + * @param sequenceLength Length of this sequence. + * @param occurrences How many of each base has been seen before this sequence began. + * @param sequence The actual sequence from the BWT. + */ + public SequenceBlock( int sequenceStart, int sequenceLength, Counts occurrences, byte[] sequence ) { + this.sequenceStart = sequenceStart; + this.sequenceLength = sequenceLength; + this.occurrences = occurrences; + this.sequence = sequence; + } +} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/SuffixArray.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/SuffixArray.java new file mode 100644 index 0000000000..49af98bb9d --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/SuffixArray.java @@ -0,0 +1,158 @@ +package org.broadinstitute.sting.alignment.reference.bwt; + +import net.sf.samtools.util.StringUtil; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.util.Comparator; +import java.util.TreeSet; + +/** + * An in-memory representation of a suffix array. + * + * @author mhanna + * @version 0.1 + */ +public class SuffixArray { + public final long inverseSA0; + public final Counts occurrences; + + /** + * The elements of the sequence actually stored in memory. + */ + protected final long[] sequence; + + /** + * How often are individual elements in the sequence actually stored + * in memory, as opposed to being calculated on the fly? + */ + protected final int sequenceInterval; + + /** + * The BWT used to calculate missing portions of the sequence. + */ + protected final BWT bwt; + + public SuffixArray(long inverseSA0, Counts occurrences, long[] sequence) { + this(inverseSA0,occurrences,sequence,1,null); + } + + /** + * Creates a new sequence array with the given inverse SA, occurrences, and values. + * @param inverseSA0 Inverse SA entry for the first element. + * @param occurrences Cumulative number of occurrences of A,C,G,T, in order. + * @param sequence The full suffix array. + * @param sequenceInterval How frequently is the sequence interval stored. + * @param bwt bwt used to infer the remaining entries in the BWT. + */ + public SuffixArray(long inverseSA0, Counts occurrences, long[] sequence, int sequenceInterval, BWT bwt) { + this.inverseSA0 = inverseSA0; + this.occurrences = occurrences; + this.sequence = sequence; + this.sequenceInterval = sequenceInterval; + this.bwt = bwt; + + if(sequenceInterval != 1 && bwt == null) + throw new ReviewedStingException("A BWT must be provided if the sequence interval is not 1"); + } + + /** + * Retrieves the length of the sequence array. + * @return Length of the suffix array. + */ + public long length() { + if( bwt != null ) + return bwt.length()+1; + else + return sequence.length; + } + + /** + * Get the suffix array value at a given sequence. + * @param index Index at which to retrieve the suffix array vaule. + * @return The suffix array value at that entry. + */ + public long get(long index) { + int iterations = 0; + while(index%sequenceInterval != 0) { + // The inverseSA0 ('$') doesn't have a usable ASCII representation; it must be treated as a special case. + if(index == inverseSA0) + index = 0; + else { + byte base = bwt.getBase(index); + index = bwt.counts(base) + bwt.occurrences(base,index); + } + iterations++; + } + return (sequence[(int)(index/sequenceInterval)]+iterations) % length(); + } + + /** + * Create a suffix array from a given reference sequence. + * @param sequence The reference sequence to use when building the suffix array. + * @return a constructed suffix array. + */ + public static SuffixArray createFromReferenceSequence(byte[] sequence) { + // The builder for the suffix array. Use an integer in this case because + // Java arrays can only hold an integer. + TreeSet suffixArrayBuilder = new TreeSet(new SuffixArrayComparator(sequence)); + + Counts occurrences = new Counts(); + for( byte base: sequence ) + occurrences.increment(base); + + // Build out the suffix array using a custom comparator. + for( int i = 0; i <= sequence.length; i++ ) + suffixArrayBuilder.add(i); + + // Copy the suffix array into an array. + long[] suffixArray = new long[suffixArrayBuilder.size()]; + int i = 0; + for( Integer element: suffixArrayBuilder ) + suffixArray[i++] = element; + + // Find the first element in the inverse suffix array. + long inverseSA0 = -1; + for(i = 0; i < suffixArray.length; i++) { + if(suffixArray[i] == 0) + inverseSA0 = i; + } + if(inverseSA0 < 0) + throw new ReviewedStingException("Unable to find first inverse SA entry in generated suffix array."); + + return new SuffixArray(inverseSA0,occurrences,suffixArray); + } + + /** + * Compares two suffix arrays of the given sequence. Will return whichever string appears + * first in lexicographic order. + */ + private static class SuffixArrayComparator implements Comparator { + /** + * The data source for all suffix arrays. + */ + private final String sequence; + + /** + * Create a new comparator. + * @param sequence Reference sequence to use as basis for comparison. + */ + public SuffixArrayComparator( byte[] sequence ) { + // Processing the suffix array tends to be easier as a string. + this.sequence = StringUtil.bytesToString(sequence); + } + + /** + * Compare the two given suffix arrays. Criteria for comparison is the lexicographic order of + * the two substrings sequence[lhs:], sequence[rhs:]. + * @param lhs Left-hand side of comparison. + * @param rhs Right-hand side of comparison. + * @return How the suffix arrays represented by lhs, rhs compare. + */ + public int compare( Integer lhs, Integer rhs ) { + String lhsSuffixArray = sequence.substring(lhs); + String rhsSuffixArray = sequence.substring(rhs); + return lhsSuffixArray.compareTo(rhsSuffixArray); + } + } + +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/SuffixArrayReader.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/SuffixArrayReader.java new file mode 100644 index 0000000000..b48e4c69cc --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/SuffixArrayReader.java @@ -0,0 +1,85 @@ +package org.broadinstitute.sting.alignment.reference.bwt; + +import org.broadinstitute.sting.alignment.reference.packing.PackUtils; +import org.broadinstitute.sting.alignment.reference.packing.UnsignedIntPackedInputStream; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.ByteOrder; + +/** + * A reader for suffix arrays in permanent storage. + * + * @author mhanna + * @version 0.1 + */ +public class SuffixArrayReader { + /** + * Input stream from which to read suffix array data. + */ + private FileInputStream inputStream; + + /** + * BWT to use to fill in missing data. + */ + private BWT bwt; + + /** + * Create a new suffix array reader. + * @param inputFile File in which the suffix array is stored. + * @param bwt BWT to use when filling in missing data. + */ + public SuffixArrayReader(File inputFile, BWT bwt) { + try { + this.inputStream = new FileInputStream(inputFile); + this.bwt = bwt; + } + catch( FileNotFoundException ex ) { + throw new ReviewedStingException("Unable to open input file", ex); + } + } + + /** + * Read a suffix array from the input stream. + * @return The suffix array stored in the input stream. + */ + public SuffixArray read() { + UnsignedIntPackedInputStream uintPackedInputStream = new UnsignedIntPackedInputStream(inputStream, ByteOrder.LITTLE_ENDIAN); + + long inverseSA0; + long[] occurrences; + long[] suffixArray; + int suffixArrayInterval; + + try { + inverseSA0 = uintPackedInputStream.read(); + occurrences = new long[PackUtils.ALPHABET_SIZE]; + uintPackedInputStream.read(occurrences); + // Throw away the suffix array size in bytes and use the occurrences table directly. + suffixArrayInterval = (int)uintPackedInputStream.read(); + suffixArray = new long[(int)((occurrences[occurrences.length-1]+suffixArrayInterval-1)/suffixArrayInterval)]; + uintPackedInputStream.read(suffixArray); + } + catch( IOException ex ) { + throw new ReviewedStingException("Unable to read BWT from input stream.", ex); + } + + return new SuffixArray(inverseSA0, new Counts(occurrences,true), suffixArray, suffixArrayInterval, bwt); + } + + + /** + * Close the input stream. + */ + public void close() { + try { + inputStream.close(); + } + catch( IOException ex ) { + throw new ReviewedStingException("Unable to close input file", ex); + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/SuffixArrayWriter.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/SuffixArrayWriter.java new file mode 100644 index 0000000000..b6f79be2f8 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/SuffixArrayWriter.java @@ -0,0 +1,67 @@ +package org.broadinstitute.sting.alignment.reference.bwt; + +import org.broadinstitute.sting.alignment.reference.packing.UnsignedIntPackedOutputStream; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.*; +import java.nio.ByteOrder; + +/** + * Javadoc goes here. + * + * @author mhanna + * @version 0.1 + */ +public class SuffixArrayWriter { + /** + * Input stream from which to read suffix array data. + */ + private OutputStream outputStream; + + /** + * Create a new suffix array reader. + * @param outputFile File in which the suffix array is stored. + */ + public SuffixArrayWriter( File outputFile ) { + try { + this.outputStream = new BufferedOutputStream(new FileOutputStream(outputFile)); + } + catch( FileNotFoundException ex ) { + throw new ReviewedStingException("Unable to open input file", ex); + } + } + + /** + * Write a suffix array to the output stream. + * @param suffixArray suffix array to write. + */ + public void write(SuffixArray suffixArray) { + UnsignedIntPackedOutputStream uintPackedOutputStream = new UnsignedIntPackedOutputStream(outputStream, ByteOrder.LITTLE_ENDIAN); + + try { + uintPackedOutputStream.write(suffixArray.inverseSA0); + uintPackedOutputStream.write(suffixArray.occurrences.toArray(true)); + // How frequently the suffix array entry is placed. + uintPackedOutputStream.write(1); + // Length of the suffix array. + uintPackedOutputStream.write(suffixArray.length()-1); + uintPackedOutputStream.write(suffixArray.sequence,1,suffixArray.sequence.length-1); + } + catch( IOException ex ) { + throw new ReviewedStingException("Unable to read BWT from input stream.", ex); + } + } + + + /** + * Close the input stream. + */ + public void close() { + try { + outputStream.close(); + } + catch( IOException ex ) { + throw new ReviewedStingException("Unable to close input file", ex); + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/packing/BasePackedInputStream.java b/public/java/src/org/broadinstitute/sting/alignment/reference/packing/BasePackedInputStream.java new file mode 100644 index 0000000000..174a9853b5 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/packing/BasePackedInputStream.java @@ -0,0 +1,95 @@ +package org.broadinstitute.sting.alignment.reference.packing; + +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.channels.FileChannel; + +/** + * Reads a packed version of the input stream. + * + * @author mhanna + * @version 0.1 + */ +public class BasePackedInputStream { + /** + * Type of object to unpack. + */ + private final Class type; + + /** + * Ultimate source for packed bases. + */ + private final FileInputStream targetInputStream; + + /** + * Channel source for packed bases. + */ + private final FileChannel targetInputChannel; + + /** + * A fixed-size buffer for word-packed data. + */ + private final ByteOrder byteOrder; + + /** + * How many bases are in a given packed word. + */ + private final int basesPerPackedWord = PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BASE; + + /** + * How many bytes in an integer? + */ + private final int bytesPerInteger = PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BYTE; + + + public BasePackedInputStream( Class type, File inputFile, ByteOrder byteOrder ) throws FileNotFoundException { + this(type,new FileInputStream(inputFile),byteOrder); + } + + public BasePackedInputStream( Class type, FileInputStream inputStream, ByteOrder byteOrder ) { + if( type != Integer.class ) + throw new ReviewedStingException("Only bases packed into 32-bit words are currently supported by this input stream. Type specified: " + type.getName()); + this.type = type; + this.targetInputStream = inputStream; + this.targetInputChannel = inputStream.getChannel(); + this.byteOrder = byteOrder; + } + + /** + * Read the entire contents of the input stream. + * @param bwt array into which bases should be read. + * @throws IOException if an I/O error occurs. + */ + public void read(byte[] bwt) throws IOException { + read(bwt,0,bwt.length); + } + + /** + * Read the next length bases into the bwt array, starting at the given offset. + * @param bwt array holding the given data. + * @param offset target position in the bases array into which bytes should be written. + * @param length number of bases to read from the stream. + * @throws IOException if an I/O error occurs. + */ + public void read(byte[] bwt, int offset, int length) throws IOException { + int bufferWidth = ((bwt.length+basesPerPackedWord-1)/basesPerPackedWord)*bytesPerInteger; + ByteBuffer buffer = ByteBuffer.allocate(bufferWidth).order(byteOrder); + targetInputChannel.read(buffer); + targetInputChannel.position(targetInputChannel.position()+buffer.remaining()); + buffer.flip(); + + int packedWord = 0; + int i = 0; + while(i < length) { + if(i % basesPerPackedWord == 0) packedWord = buffer.getInt(); + int position = basesPerPackedWord - i%basesPerPackedWord - 1; + bwt[offset+i++] = PackUtils.unpackBase((byte)((packedWord >> position*PackUtils.BITS_PER_BASE) & 0x3)); + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/packing/BasePackedOutputStream.java b/public/java/src/org/broadinstitute/sting/alignment/reference/packing/BasePackedOutputStream.java new file mode 100644 index 0000000000..c62f40e512 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/packing/BasePackedOutputStream.java @@ -0,0 +1,140 @@ +package org.broadinstitute.sting.alignment.reference.packing; + +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.*; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +/** + * A general-purpose stream for writing packed bases. + * + * @author mhanna + * @version 0.1 + */ +public class BasePackedOutputStream { + /** + * Type of object to pack. + */ + private final Class type; + + /** + * How many bases can be stored in the given data structure? + */ + private final int basesPerType; + + /** + * Ultimate target for the packed bases. + */ + private final OutputStream targetOutputStream; + + /** + * A fixed-size buffer for word-packed data. + */ + private final ByteBuffer buffer; + + public BasePackedOutputStream( Class type, File outputFile, ByteOrder byteOrder ) throws FileNotFoundException { + this(type,new BufferedOutputStream(new FileOutputStream(outputFile)),byteOrder); + } + + /** + * Write packed bases to the given output stream. + * @param type Type of data to pack bases into. + * @param outputStream Output stream to which to write packed bases. + * @param byteOrder Switch between big endian / little endian when reading / writing files. + */ + public BasePackedOutputStream( Class type, OutputStream outputStream, ByteOrder byteOrder) { + this.targetOutputStream = outputStream; + this.type = type; + basesPerType = PackUtils.bitsInType(type)/PackUtils.BITS_PER_BASE; + this.buffer = ByteBuffer.allocate(basesPerType/PackUtils.ALPHABET_SIZE).order(byteOrder); + } + + /** + * Writes the given base to the output stream. Will write only this base; no packing will be performed. + * @param base List of bases to write. + * @throws IOException if an I/O error occurs. + */ + public void write( int base ) throws IOException { + write( new byte[] { (byte)base } ); + } + + /** + * Writes an array of bases to the target output stream. + * @param bases List of bases to write. + * @throws IOException if an I/O error occurs. + */ + public void write( byte[] bases ) throws IOException { + write(bases,0,bases.length); + } + + /** + * Writes a subset of the array of bases to the output stream. + * @param bases List of bases to write. + * @param offset site at which to start writing. + * @param length number of bases to write. + * @throws IOException if an I/O error occurs. + */ + public void write( byte[] bases, int offset, int length ) throws IOException { + int packedBases = 0; + int positionInPack = 0; + + for( int base = offset; base < offset+length; base++ ) { + packedBases = packBase(bases[base], packedBases, positionInPack); + + // Increment the packed counter. If all possible bases have been squeezed into this byte, write it out. + positionInPack = ++positionInPack % basesPerType; + if( positionInPack == 0 ) { + writePackedBases(packedBases); + packedBases = 0; + } + } + + if( positionInPack > 0 ) + writePackedBases(packedBases); + } + + /** + * Flush the contents of the OutputStream to disk. + * @throws IOException if an I/O error occurs. + */ + public void flush() throws IOException { + targetOutputStream.flush(); + } + + /** + * Closes the given output stream. + * @throws IOException if an I/O error occurs. + */ + public void close() throws IOException { + targetOutputStream.close(); + } + + /** + * Pack the given base into the basepack. + * @param base The base to pack. + * @param basePack Target for the pack operation. + * @param position Position within the pack to which to add the base. + * @return The packed integer. + */ + private int packBase( byte base, int basePack, int position ) { + basePack |= (PackUtils.packBase(base) << 2*(basesPerType-position-1)); + return basePack; + } + + /** + * Write the given packed base structure to the output file. + * @param packedBases Packed bases to write. + * @throws IOException on error writing to the file. + */ + private void writePackedBases(int packedBases) throws IOException { + buffer.rewind(); + if( type == Integer.class ) + buffer.putInt(packedBases); + else if( type == Byte.class ) + buffer.put((byte)packedBases); + else + throw new ReviewedStingException("Cannot pack bases into type " + type.getName()); + targetOutputStream.write(buffer.array()); + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/packing/CreatePACFromReference.java b/public/java/src/org/broadinstitute/sting/alignment/reference/packing/CreatePACFromReference.java new file mode 100755 index 0000000000..561535e29f --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/packing/CreatePACFromReference.java @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.alignment.reference.packing; + +import net.sf.picard.reference.ReferenceSequence; +import net.sf.picard.reference.ReferenceSequenceFile; +import net.sf.picard.reference.ReferenceSequenceFileFactory; + +import java.io.File; +import java.io.IOException; + +/** + * Generate a .PAC file from a given reference. + * + * @author hanna + * @version 0.1 + */ + +public class CreatePACFromReference { + public static void main( String argv[] ) throws IOException { + if( argv.length != 3 ) { + System.out.println("USAGE: CreatePACFromReference .fasta "); + return; + } + + // Read in the first sequence in the input file + String inputFileName = argv[0]; + File inputFile = new File(inputFileName); + ReferenceSequenceFile reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(inputFile); + ReferenceSequence sequence = reference.nextSequence(); + + // Target file for output + PackUtils.writeReferenceSequence( new File(argv[1]), sequence.getBases() ); + + // Reverse the bases in the reference + PackUtils.reverse(sequence.getBases()); + + // Target file for output + PackUtils.writeReferenceSequence( new File(argv[2]), sequence.getBases() ); + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/packing/PackUtils.java b/public/java/src/org/broadinstitute/sting/alignment/reference/packing/PackUtils.java new file mode 100644 index 0000000000..972e31cf0f --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/packing/PackUtils.java @@ -0,0 +1,135 @@ +package org.broadinstitute.sting.alignment.reference.packing; + +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteOrder; + +/** + * Utilities designed for packing / unpacking bases. + * + * @author mhanna + * @version 0.1 + */ +public class PackUtils { + /** + * How many possible bases can be encoded? + */ + public static final int ALPHABET_SIZE = 4; + + /** + * How many bits does it take to store a single base? + */ + public static final int BITS_PER_BASE = (int)(Math.log(ALPHABET_SIZE)/Math.log(2)); + + /** + * How many bits fit into a single byte? + */ + public static final int BITS_PER_BYTE = 8; + + /** + * Writes a reference sequence to a PAC file. + * @param outputFile Filename for the PAC file. + * @param referenceSequence Reference sequence to write. + * @throws IOException If there's a problem writing to the output file. + */ + public static void writeReferenceSequence( File outputFile, byte[] referenceSequence ) throws IOException { + OutputStream outputStream = new FileOutputStream(outputFile); + + BasePackedOutputStream basePackedOutputStream = new BasePackedOutputStream(Byte.class, outputStream, ByteOrder.BIG_ENDIAN); + basePackedOutputStream.write(referenceSequence); + + outputStream.write(referenceSequence.length%PackUtils.ALPHABET_SIZE); + + outputStream.close(); + } + + + /** + * How many bits can a given type hold? + * @param type Type to test. + * @return Number of bits that the given type can hold. + */ + public static int bitsInType( Class type ) { + try { + long typeSize = type.getField("MAX_VALUE").getLong(null) - type.getField("MIN_VALUE").getLong(null)+1; + long intTypeSize = (long)Integer.MAX_VALUE - (long)Integer.MIN_VALUE + 1; + if( typeSize > intTypeSize ) + throw new ReviewedStingException("Cannot determine number of bits available in type: " + type.getName()); + return (int)(Math.log(typeSize)/Math.log(2)); + } + catch( NoSuchFieldException ex ) { + throw new ReviewedStingException("Cannot determine number of bits available in type: " + type.getName(),ex); + } + catch( IllegalAccessException ex ) { + throw new ReviewedStingException("Cannot determine number of bits available in type: " + type.getName(),ex); + } + } + + /** + * Gets the two-bit representation of a base. A=00b, C=01b, G=10b, T=11b. + * @param base ASCII value for the base to pack. + * @return A byte from 0-3 indicating the base's packed value. + */ + public static byte packBase(byte base) { + switch( base ) { + case 'A': + return 0; + case 'C': + return 1; + case 'G': + return 2; + case 'T': + return 3; + default: + throw new ReviewedStingException("Unknown base type: " + base); + } + } + + /** + * Converts a two-bit representation of a base into an ASCII representation of a base. + * @param pack Byte from 0-3 indicating which base is represented. + * @return An ASCII value representing the packed base. + */ + public static byte unpackBase(byte pack) { + switch( pack ) { + case 0: + return 'A'; + case 1: + return 'C'; + case 2: + return 'G'; + case 3: + return 'T'; + default: + throw new ReviewedStingException("Unknown pack type: " + pack); + } + } + + /** + * Reverses an unpacked sequence of bases. + * @param bases bases to reverse. + */ + public static void reverse( byte[] bases ) { + for( int i = 0, j = bases.length-1; i < j; i++, j-- ) { + byte temp = bases[j]; + bases[j] = bases[i]; + bases[i] = temp; + } + } + + /** + * Given a structure of size size that should be split + * into partitionSize partitions, how many partitions should + * be created? Size of last partition will be <= partitionSize. + * @param size Total size of the data structure. + * @param partitionSize Size of an individual partition. + * @return Number of partitions that would be created. + */ + public static int numberOfPartitions( long size, long partitionSize ) { + return (int)((size+partitionSize-1) / partitionSize); + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/packing/UnsignedIntPackedInputStream.java b/public/java/src/org/broadinstitute/sting/alignment/reference/packing/UnsignedIntPackedInputStream.java new file mode 100644 index 0000000000..999e544513 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/packing/UnsignedIntPackedInputStream.java @@ -0,0 +1,104 @@ +package org.broadinstitute.sting.alignment.reference.packing; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.channels.FileChannel; + +/** + * Read a set of integers packed into + * + * @author mhanna + * @version 0.1 + */ +public class UnsignedIntPackedInputStream { + /** + * Ultimate target for the occurrence array. + */ + private final FileInputStream targetInputStream; + + /** + * Target channel from which to pull file data. + */ + private final FileChannel targetInputChannel; + + /** + * The byte order in which integer input data appears. + */ + private final ByteOrder byteOrder; + + /** + * How many bytes are required to store an integer? + */ + private final int bytesPerInteger = PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BYTE; + + /** + * Create a new PackedIntInputStream, writing to the given target file. + * @param inputFile target input file. + * @param byteOrder Endianness to use when writing a list of integers. + * @throws java.io.IOException if an I/O error occurs. + */ + public UnsignedIntPackedInputStream(File inputFile, ByteOrder byteOrder) throws IOException { + this(new FileInputStream(inputFile),byteOrder); + } + + /** + * Read ints from the given InputStream. + * @param inputStream Input stream from which to read ints. + * @param byteOrder Endianness to use when writing a list of integers. + */ + public UnsignedIntPackedInputStream(FileInputStream inputStream, ByteOrder byteOrder) { + this.targetInputStream = inputStream; + this.targetInputChannel = inputStream.getChannel(); + this.byteOrder = byteOrder; + } + + /** + * Read a datum from the input stream. + * @return The next input datum in the stream. + * @throws IOException if an I/O error occurs. + */ + public long read() throws IOException { + long[] data = new long[1]; + read(data); + return data[0]; + } + + /** + * Read the data from the input stream. + * @param data placeholder for input data. + * @throws IOException if an I/O error occurs. + */ + public void read( long[] data ) throws IOException { + read( data, 0, data.length ); + } + + /** + * Read the data from the input stream, starting at the given offset. + * @param data placeholder for input data. + * @param offset place in the array to start reading in data. + * @param length number of ints to read in. + * @throws IOException if an I/O error occurs. + */ + public void read( long[] data, int offset, int length ) throws IOException { + ByteBuffer readBuffer = ByteBuffer.allocate(bytesPerInteger*length).order(byteOrder); + + targetInputChannel.read(readBuffer,targetInputChannel.position()); + readBuffer.flip(); + targetInputChannel.position(targetInputChannel.position()+readBuffer.remaining()); + + int i = 0; + while(i < length) + data[offset+i++] = readBuffer.getInt() & 0xFFFFFFFFL; + } + + /** + * Closes the given output stream. + * @throws IOException if an I/O error occurs. + */ + public void close() throws IOException { + targetInputStream.close(); + } +} diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/packing/UnsignedIntPackedOutputStream.java b/public/java/src/org/broadinstitute/sting/alignment/reference/packing/UnsignedIntPackedOutputStream.java new file mode 100755 index 0000000000..b020243661 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/packing/UnsignedIntPackedOutputStream.java @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.alignment.reference.packing; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +/** + * Writes an list of integers to the output file. + * + * @author mhanna + * @version 0.1 + */ +public class UnsignedIntPackedOutputStream { + /** + * Ultimate target for the occurrence array. + */ + private final OutputStream targetOutputStream; + + /** + * A fixed-size buffer for int-packed data. + */ + private final ByteBuffer buffer; + + /** + * Create a new PackedIntOutputStream, writing to the given target file. + * @param outputFile target output file. + * @param byteOrder Endianness to use when writing a list of integers. + * @throws IOException if an I/O error occurs. + */ + public UnsignedIntPackedOutputStream(File outputFile, ByteOrder byteOrder) throws IOException { + this(new FileOutputStream(outputFile),byteOrder); + } + + /** + * Write packed ints to the given OutputStream. + * @param outputStream Output stream to which to write packed ints. + * @param byteOrder Endianness to use when writing a list of integers. + */ + public UnsignedIntPackedOutputStream(OutputStream outputStream, ByteOrder byteOrder) { + this.targetOutputStream = outputStream; + buffer = ByteBuffer.allocate(PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BYTE).order(byteOrder); + } + + /** + * Write the data to the output stream. + * @param datum datum to write. + * @throws IOException if an I/O error occurs. + */ + public void write( long datum ) throws IOException { + buffer.rewind(); + buffer.putInt((int)datum); + targetOutputStream.write(buffer.array()); + } + + /** + * Write the data to the output stream. + * @param data data to write. occurrences.length must match alphabet size. + * @throws IOException if an I/O error occurs. + */ + public void write( long[] data ) throws IOException { + for(long datum: data) + write(datum); + } + + /** + * Write the given chunk of data to the input stream. + * @param data data to write. + * @param offset position at which to start. + * @param length number of ints to write. + * @throws IOException if an I/O error occurs. + */ + public void write( long[] data, int offset, int length ) throws IOException { + for( int i = offset; i < offset+length; i++ ) + write(data[i]); + } + + /** + * Flush the contents of the OutputStream to disk. + * @throws IOException if an I/O error occurs. + */ + public void flush() throws IOException { + targetOutputStream.flush(); + } + + /** + * Closes the given output stream. + * @throws IOException if an I/O error occurs. + */ + public void close() throws IOException { + targetOutputStream.close(); + } + +} diff --git a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java index f8e298d88e..b9e380295f 100755 --- a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java +++ b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java @@ -25,20 +25,21 @@ package org.broadinstitute.sting.analyzecovariates; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.commandline.Input; -import org.broadinstitute.sting.gatk.walkers.recalibration.*; +import org.broadinstitute.sting.gatk.walkers.recalibration.Covariate; +import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum; +import org.broadinstitute.sting.gatk.walkers.recalibration.RecalibrationArgumentCollection; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; import org.broadinstitute.sting.utils.text.XReadLines; -import org.broadinstitute.sting.commandline.CommandLineProgram; -import org.broadinstitute.sting.commandline.Argument; +import java.io.*; import java.util.ArrayList; import java.util.Collection; -import java.util.List; import java.util.Map; import java.util.regex.Pattern; -import java.io.*; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/commandline/Argument.java b/public/java/src/org/broadinstitute/sting/commandline/Argument.java index b2ee9d1fcb..33592287d3 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/Argument.java +++ b/public/java/src/org/broadinstitute/sting/commandline/Argument.java @@ -25,12 +25,7 @@ package org.broadinstitute.sting.commandline; -import java.lang.annotation.Documented; -import java.lang.annotation.ElementType; -import java.lang.annotation.Inherited; -import java.lang.annotation.Retention; -import java.lang.annotation.RetentionPolicy; -import java.lang.annotation.Target; +import java.lang.annotation.*; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitionGroup.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitionGroup.java index c36a8e04f1..b47677b08e 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitionGroup.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitionGroup.java @@ -27,10 +27,10 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.List; -import java.util.Collections; import java.util.ArrayList; +import java.util.Collections; import java.util.Iterator; +import java.util.List; /** * A group of argument definitions. diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitions.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitions.java index 39e698ca33..9f92df6e08 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitions.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitions.java @@ -27,10 +27,10 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.Set; -import java.util.HashSet; import java.util.Collection; +import java.util.HashSet; import java.util.Iterator; +import java.util.Set; /** * A collection of argument definitions. diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index 6c50e17846..9c33e084d3 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -25,12 +25,12 @@ package org.broadinstitute.sting.commandline; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.classloader.JVMUtils; +import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.walkers.Multiplex; import org.broadinstitute.sting.gatk.walkers.Multiplexer; -import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.classloader.JVMUtils; import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import java.lang.annotation.Annotation; diff --git a/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java b/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java index d404a2b6e7..aba4fc109f 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java +++ b/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java @@ -25,14 +25,20 @@ package org.broadinstitute.sting.commandline; -import org.apache.log4j.*; +import org.apache.log4j.FileAppender; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.log4j.PatternLayout; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.help.ApplicationDetails; import org.broadinstitute.sting.utils.help.HelpFormatter; import java.io.IOException; -import java.util.*; +import java.util.Collection; +import java.util.Collections; +import java.util.EnumSet; +import java.util.Locale; public abstract class CommandLineProgram { diff --git a/public/java/src/org/broadinstitute/sting/commandline/CommandLineUtils.java b/public/java/src/org/broadinstitute/sting/commandline/CommandLineUtils.java index 99608f1678..bd20063880 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/CommandLineUtils.java +++ b/public/java/src/org/broadinstitute/sting/commandline/CommandLineUtils.java @@ -25,11 +25,17 @@ package org.broadinstitute.sting.commandline; -import org.apache.log4j.*; +import org.apache.log4j.Appender; +import org.apache.log4j.ConsoleAppender; +import org.apache.log4j.Logger; +import org.apache.log4j.PatternLayout; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.*; import java.lang.annotation.Annotation; +import java.util.Collections; +import java.util.Enumeration; +import java.util.LinkedHashMap; +import java.util.Map; /** * Static utility methods for working with command-line arguments. diff --git a/public/java/src/org/broadinstitute/sting/commandline/MissingArgumentValueException.java b/public/java/src/org/broadinstitute/sting/commandline/MissingArgumentValueException.java index 8029db7b3e..4e6c3a16fa 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/MissingArgumentValueException.java +++ b/public/java/src/org/broadinstitute/sting/commandline/MissingArgumentValueException.java @@ -26,8 +26,6 @@ import org.broadinstitute.sting.utils.Utils; -import java.util.Collection; - /** * Specifies that a value was missing when attempting to populate an argument. */ diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java index 717c5c5228..8423bb2f2e 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java @@ -25,16 +25,16 @@ package org.broadinstitute.sting.commandline; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.classloader.JVMUtils; +import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.classloader.JVMUtils; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.ApplicationDetails; import org.broadinstitute.sting.utils.help.HelpFormatter; -import org.apache.log4j.Logger; -import java.lang.reflect.*; +import java.lang.reflect.Field; import java.util.*; /** diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java index d1cda3ed95..a070cb5a11 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java @@ -27,10 +27,8 @@ import org.broadinstitute.sting.utils.Utils; -import java.util.regex.Pattern; import java.util.regex.Matcher; -import java.util.List; -import java.util.ArrayList; +import java.util.regex.Pattern; /** * Holds a pattern, along with how to get to the argument definitions that could match that pattern. diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java index fd7e749c38..a080ab4390 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java @@ -25,30 +25,21 @@ package org.broadinstitute.sting.gatk; -import org.broadinstitute.sting.commandline.Tags; -import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; -import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor; -import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; +import org.broadinstitute.sting.commandline.CommandLineProgram; +import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.filters.ReadFilter; import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; -import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; -import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType; -import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.Walker; - -import java.io.File; -import java.io.FileNotFoundException; -import java.util.*; - -import net.sf.picard.filter.SamRecordFilter; -import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.ListFileUtils; -import org.broadinstitute.sting.utils.text.XReadLines; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; /** * @author aaron diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java index 7982f61e21..da2be74bf6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java +++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java @@ -26,13 +26,15 @@ package org.broadinstitute.sting.gatk; import org.broad.tribble.TribbleException; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.ArgumentCollection; +import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.walkers.Attribution; +import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.text.TextFormattingUtils; import org.broadinstitute.sting.utils.help.ApplicationDetails; -import org.broadinstitute.sting.commandline.*; -import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.utils.text.TextFormattingUtils; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index f8527c33bd..918bc12517 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -28,24 +28,14 @@ import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.samtools.*; import org.apache.log4j.Logger; -import org.broadinstitute.sting.commandline.ArgumentException; -import org.broadinstitute.sting.commandline.ArgumentSource; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.commandline.CommandLineUtils; -import org.broadinstitute.sting.commandline.ParsingEngine; -import org.broadinstitute.sting.commandline.Tags; +import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; -import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; -import org.broadinstitute.sting.gatk.datasources.reads.Shard; +import org.broadinstitute.sting.gatk.datasources.reads.*; import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.datasources.sample.Sample; import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource; -import org.broadinstitute.sting.gatk.datasources.reads.MonolithicShardStrategy; -import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategy; -import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategyFactory; -import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource; import org.broadinstitute.sting.gatk.executive.MicroScheduler; import org.broadinstitute.sting.gatk.filters.FilterManager; import org.broadinstitute.sting.gatk.filters.ReadFilter; @@ -65,6 +55,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.interval.IntervalUtils; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.File; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java b/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java index 27a86ab3f2..7cb615f7fb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java +++ b/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java @@ -25,12 +25,11 @@ package org.broadinstitute.sting.gatk; import net.sf.picard.filter.SamRecordFilter; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.Map; -import java.util.HashMap; import java.util.Collections; - -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import java.util.HashMap; +import java.util.Map; /** * Holds a bunch of basic information about the traversal. diff --git a/public/java/src/org/broadinstitute/sting/gatk/ReadProperties.java b/public/java/src/org/broadinstitute/sting/gatk/ReadProperties.java index 1a361029a1..93fa2d146c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/ReadProperties.java +++ b/public/java/src/org/broadinstitute/sting/gatk/ReadProperties.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.gatk; -import net.sf.picard.filter.SamRecordFilter; import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileReader; diff --git a/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java b/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java index 9553f651ea..cf190835e3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java @@ -25,19 +25,18 @@ package org.broadinstitute.sting.gatk; -import net.sf.picard.filter.SamRecordFilter; import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.filters.FilterManager; import org.broadinstitute.sting.gatk.filters.ReadFilter; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.classloader.PluginManager; -import org.broadinstitute.sting.utils.text.TextFormattingUtils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.help.DescriptionTaglet; import org.broadinstitute.sting.utils.help.DisplayNameTaglet; import org.broadinstitute.sting.utils.help.SummaryTaglet; -import org.broadinstitute.sting.utils.baq.BAQ; +import org.broadinstitute.sting.utils.text.TextFormattingUtils; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 93638f21d0..ee2e850253 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -26,17 +26,16 @@ package org.broadinstitute.sting.gatk.arguments; import net.sf.samtools.SAMFileReader; -import org.broadinstitute.sting.commandline.Hidden; -import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; -import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.gatk.DownsampleType; import org.broadinstitute.sting.gatk.DownsamplingMethod; -import org.broadinstitute.sting.utils.interval.IntervalSetRule; +import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; import org.broadinstitute.sting.utils.baq.BAQ; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.interval.IntervalMergingRule; +import org.broadinstitute.sting.utils.interval.IntervalSetRule; import org.simpleframework.xml.*; import org.simpleframework.xml.core.Persister; import org.simpleframework.xml.stream.Format; diff --git a/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java b/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java index 337c2664cb..17e4a07432 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java +++ b/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java @@ -26,13 +26,13 @@ package org.broadinstitute.sting.gatk.contexts; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.HasGenomeLocation; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import java.util.*; +import java.util.List; /** * Useful class for forwarding on locusContext data from this iterator diff --git a/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContextUtils.java b/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContextUtils.java index 6a0d30837c..1f9a7d705a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContextUtils.java @@ -27,8 +27,8 @@ import net.sf.samtools.SAMReadGroupRecord; import org.broadinstitute.sting.gatk.datasources.sample.Sample; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java b/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java index 760b3a7bc0..376064cdb5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java +++ b/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java @@ -25,11 +25,12 @@ package org.broadinstitute.sting.gatk.contexts; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.BaseUtils; +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; import net.sf.samtools.util.StringUtil; -import com.google.java.contract.*; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; /** * The section of the reference that overlaps with the given diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java index b36c59a2c0..e92599494c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java @@ -1,16 +1,14 @@ package org.broadinstitute.sting.gatk.datasources.providers; -import java.util.List; -import java.util.NoSuchElementException; -import java.util.ArrayList; -import java.util.Collections; - -import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator; import org.broadinstitute.sting.utils.GenomeLoc; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; + +import java.util.Collections; +import java.util.List; +import java.util.NoSuchElementException; /** * User: hanna * Date: May 13, 2009 diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java index 330a9e4f70..ff312bcac0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java @@ -1,13 +1,12 @@ package org.broadinstitute.sting.gatk.datasources.providers; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; +import net.sf.picard.reference.ReferenceSequence; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.walkers.Reference; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.Window; -import org.broadinstitute.sting.gatk.walkers.Reference; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import net.sf.picard.reference.ReferenceSequence; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; /* * Copyright (c) 2009 The Broad Institute * diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java index 72b9625222..55304da347 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java @@ -1,16 +1,15 @@ package org.broadinstitute.sting.gatk.datasources.providers; +import net.sf.picard.reference.IndexedFastaSequenceFile; +import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.datasources.reads.Shard; -import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.iterators.LocusIterator; -import org.broadinstitute.sting.gatk.ReadProperties; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import java.util.Collection; -import net.sf.picard.reference.IndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.GenomeLocParser; - /** * Presents data sharded by locus to the traversal engine. * diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java index b467a2ab5d..f9ed0cb747 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.gatk.datasources.providers; -import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.DownsampleType; +import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.iterators.LocusIterator; import org.broadinstitute.sting.utils.GenomeLoc; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java index a91e169c12..223659a46f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java @@ -5,10 +5,10 @@ import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.utils.GenomeLoc; -import java.util.List; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.List; /** * User: hanna * Date: May 21, 2009 diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceView.java index d2c097f5dd..3d62faf495 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceView.java @@ -1,9 +1,8 @@ package org.broadinstitute.sting.gatk.datasources.providers; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.utils.GenomeLoc; /* * Copyright (c) 2009 The Broad Institute * diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadShardDataProvider.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadShardDataProvider.java index 5a672b09f9..7843e7518c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadShardDataProvider.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadShardDataProvider.java @@ -1,14 +1,13 @@ package org.broadinstitute.sting.gatk.datasources.providers; +import net.sf.picard.reference.IndexedFastaSequenceFile; import org.broadinstitute.sting.gatk.datasources.reads.Shard; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; +import org.broadinstitute.sting.utils.GenomeLocParser; import java.util.Collection; -import net.sf.picard.reference.IndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.GenomeLocParser; - /** * Present data sharded by read to a traversal engine. * diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadView.java index 0c4b78a7c2..e809092d48 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadView.java @@ -1,11 +1,10 @@ package org.broadinstitute.sting.gatk.datasources.providers; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import java.util.Collection; import java.util.Arrays; - -import net.sf.samtools.SAMRecord; +import java.util.Collection; /** * User: hanna * Date: May 22, 2009 diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceView.java index d6c938f361..efb92235fc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceView.java @@ -1,16 +1,16 @@ package org.broadinstitute.sting.gatk.datasources.providers; -import org.broadinstitute.sting.utils.*; - -import java.util.Collections; -import java.util.Collection; -import java.util.Arrays; - -import net.sf.samtools.SAMSequenceRecord; -import net.sf.samtools.SAMRecord; -import net.sf.picard.reference.ReferenceSequence; import net.sf.picard.reference.IndexedFastaSequenceFile; +import net.sf.picard.reference.ReferenceSequence; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMSequenceRecord; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; /** * User: hanna * Date: May 22, 2009 diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java index feed2ab85b..39c632539f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java @@ -25,9 +25,9 @@ package org.broadinstitute.sting.gatk.datasources.providers; -import org.broadinstitute.sting.gatk.refdata.*; -import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.utils.GenomeLoc; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java index a0ea32f9bc..803bd885b0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java @@ -1,15 +1,14 @@ package org.broadinstitute.sting.gatk.datasources.providers; +import net.sf.picard.reference.IndexedFastaSequenceFile; import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.ArrayList; -import java.util.List; import java.util.Collection; - -import net.sf.picard.reference.IndexedFastaSequenceFile; +import java.util.List; /** * User: hanna * Date: May 8, 2009 diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMBlockStartIterator.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMBlockStartIterator.java index a9e04e3574..de938e8453 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMBlockStartIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMBlockStartIterator.java @@ -24,9 +24,6 @@ package org.broadinstitute.sting.gatk.datasources.reads; -import net.sf.samtools.SAMFileReader; -import net.sf.samtools.SAMRecord; -import org.apache.commons.lang.ArrayUtils; import org.broadinstitute.sting.utils.exceptions.StingException; import java.io.File; @@ -35,9 +32,7 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.channels.FileChannel; -import java.util.ArrayList; import java.util.Iterator; -import java.util.List; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMSchedule.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMSchedule.java index 34693d501a..521bcd5a3d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMSchedule.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMSchedule.java @@ -39,12 +39,7 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.channels.FileChannel; -import java.util.ArrayList; -import java.util.BitSet; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; +import java.util.*; /** * Writes schedules for a single BAM file to a target output file. diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java index 266232c0fd..467aebac53 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java @@ -26,20 +26,10 @@ import net.sf.picard.util.PeekableIterator; import net.sf.samtools.GATKBAMFileSpan; -import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocSortedSet; -import java.io.File; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.NoSuchElementException; +import java.util.*; /** * Assign intervals to the most appropriate blocks, keeping as little as possible in memory at once. diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/FilePointer.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/FilePointer.java index c014c19950..e4141f61c9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/FilePointer.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/FilePointer.java @@ -29,19 +29,11 @@ import net.sf.samtools.SAMFileSpan; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.utils.interval.IntervalUtils; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.SortedMap; -import java.util.TreeMap; +import java.util.*; /** * Represents a small section of a BAM file, and every associated interval. diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java index 8ebb8b1a89..5d0c38b783 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java @@ -23,24 +23,18 @@ */ package org.broadinstitute.sting.gatk.datasources.reads; -import net.sf.samtools.Bin; - -import net.sf.samtools.GATKBAMFileSpan; -import net.sf.samtools.GATKBin; -import net.sf.samtools.GATKChunk; -import net.sf.samtools.LinearIndex; -import net.sf.samtools.SAMException; -import net.sf.samtools.util.RuntimeIOException; +import net.sf.samtools.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.File; import java.io.FileInputStream; import java.io.IOException; -import java.lang.ref.SoftReference; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.channels.FileChannel; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; /** * A basic interface for querying BAM indices. diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndexData.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndexData.java index f9b998a60c..daf1b77e39 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndexData.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndexData.java @@ -24,11 +24,7 @@ package org.broadinstitute.sting.gatk.datasources.reads; -import net.sf.samtools.Bin; -import net.sf.samtools.GATKBAMFileSpan; -import net.sf.samtools.GATKBin; -import net.sf.samtools.GATKChunk; -import net.sf.samtools.LinearIndex; +import net.sf.samtools.*; import java.util.ArrayList; import java.util.Collections; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/IntervalSharder.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/IntervalSharder.java index fc3f76ab77..4ddf28dced 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/IntervalSharder.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/IntervalSharder.java @@ -35,16 +35,7 @@ import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.NoSuchElementException; -import java.util.PriorityQueue; -import java.util.Queue; +import java.util.*; /** * Shard intervals based on position within the BAM file. diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LocusShard.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LocusShard.java index 26af890b49..19d33aa6ba 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LocusShard.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LocusShard.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.datasources.reads; +import net.sf.samtools.SAMFileSpan; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.Utils; @@ -7,8 +8,6 @@ import java.util.List; import java.util.Map; -import net.sf.samtools.SAMFileSpan; - /** * Handles locus shards of BAM information. * @author aaron diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LocusShardStrategy.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LocusShardStrategy.java index 950d67428d..a5ca078534 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LocusShardStrategy.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LocusShardStrategy.java @@ -24,17 +24,18 @@ package org.broadinstitute.sting.gatk.datasources.reads; -import net.sf.samtools.GATKBAMFileSpan; -import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import net.sf.picard.reference.IndexedFastaSequenceFile; +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMFileSpan; +import net.sf.samtools.SAMSequenceRecord; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.GenomeLocSortedSet; -import java.util.*; - -import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMSequenceRecord; -import net.sf.samtools.SAMFileSpan; -import net.sf.picard.reference.IndexedFastaSequenceFile; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; /** * A sharding strategy for loci based on reading of the index. diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/MonolithicShard.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/MonolithicShard.java index 7579c22f65..278eeb8989 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/MonolithicShard.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/MonolithicShard.java @@ -1,8 +1,8 @@ package org.broadinstitute.sting.gatk.datasources.reads; +import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.GenomeLoc; import java.util.List; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/MonolithicShardStrategy.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/MonolithicShardStrategy.java index 10228ecd72..28b737f283 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/MonolithicShardStrategy.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/MonolithicShardStrategy.java @@ -4,8 +4,8 @@ import org.broadinstitute.sting.utils.GenomeLocParser; import java.util.Iterator; -import java.util.NoSuchElementException; import java.util.List; +import java.util.NoSuchElementException; /** * Create a giant shard representing all the data in the input BAM(s). diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShard.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShard.java index 9aecd77790..4d9c9092d6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShard.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShard.java @@ -1,15 +1,17 @@ package org.broadinstitute.sting.gatk.datasources.reads; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter; - -import java.util.*; - import net.sf.samtools.SAMFileSpan; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; +import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter; +import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map; + /** * * User: aaron diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShardStrategy.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShardStrategy.java index da70a615bd..c2235ec73a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShardStrategy.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShardStrategy.java @@ -25,12 +25,11 @@ package org.broadinstitute.sting.gatk.datasources.reads; import net.sf.samtools.SAMFileSpan; - -import java.util.*; - import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import java.util.*; + /** * The sharding strategy for reads using a simple counting mechanism. Each read shard * has a specific number of reads (default to 10K) which is configured in the constructor. diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index c2aa5f18e8..6064806f3e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -24,21 +24,19 @@ package org.broadinstitute.sting.gatk.datasources.reads; +import net.sf.picard.reference.IndexedFastaSequenceFile; +import net.sf.picard.sam.MergingSamRecordIterator; +import net.sf.picard.sam.SamFileHeaderMerger; import net.sf.samtools.*; import net.sf.samtools.util.CloseableIterator; -import net.sf.picard.filter.SamRecordFilter; -import net.sf.picard.sam.SamFileHeaderMerger; -import net.sf.picard.sam.MergingSamRecordIterator; -import net.sf.picard.reference.IndexedFastaSequenceFile; - import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.DownsamplingMethod; -import org.broadinstitute.sting.gatk.filters.ReadFilter; -import org.broadinstitute.sting.gatk.iterators.*; -import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.ReadMetrics; +import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.filters.CountingFilteringIterator; +import org.broadinstitute.sting.gatk.filters.ReadFilter; +import org.broadinstitute.sting.gatk.iterators.*; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.baq.BAQ; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/Shard.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/Shard.java index 418f5d3eeb..f8d941784a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/Shard.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/Shard.java @@ -2,17 +2,13 @@ import net.sf.samtools.SAMFileSpan; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource; -import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.gatk.ReadMetrics; import org.broadinstitute.sting.gatk.ReadProperties; +import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; +import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.HasGenomeLocation; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.io.Serializable; import java.util.Collections; import java.util.List; import java.util.Map; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ShardStrategyFactory.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ShardStrategyFactory.java index fa733ce126..780b41ef72 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ShardStrategyFactory.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ShardStrategyFactory.java @@ -1,10 +1,10 @@ package org.broadinstitute.sting.gatk.datasources.reads; -import net.sf.samtools.SAMSequenceDictionary; import net.sf.picard.reference.IndexedFastaSequenceFile; +import net.sf.samtools.SAMSequenceDictionary; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; /** * diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/BAMFileStat.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/BAMFileStat.java index ae0f1cf430..07c13a76e7 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/BAMFileStat.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/BAMFileStat.java @@ -25,17 +25,17 @@ package org.broadinstitute.sting.gatk.datasources.reads.utilities; -import org.broadinstitute.sting.commandline.CommandLineProgram; +import net.sf.samtools.BAMIndex; +import net.sf.samtools.SAMFileReader; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.instrumentation.Sizeof; import java.io.File; import java.lang.reflect.Field; -import java.util.Map; import java.util.List; - -import net.sf.samtools.*; +import java.util.Map; /** * diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/BAMTagRenamer.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/BAMTagRenamer.java index f03e2a44f5..54de043793 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/BAMTagRenamer.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/BAMTagRenamer.java @@ -24,12 +24,12 @@ package org.broadinstitute.sting.gatk.datasources.reads.utilities; -import org.broadinstitute.sting.commandline.CommandLineProgram; -import org.broadinstitute.sting.commandline.Argument; -import net.sf.samtools.SAMFileWriter; import net.sf.samtools.SAMFileReader; -import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMFileWriter; import net.sf.samtools.SAMFileWriterFactory; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.CommandLineProgram; import java.io.File; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/PrintBGZFBounds.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/PrintBGZFBounds.java index df7dccaa95..773541d117 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/PrintBGZFBounds.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/PrintBGZFBounds.java @@ -24,7 +24,6 @@ package org.broadinstitute.sting.gatk.datasources.reads.utilities; -import net.sf.samtools.SAMFileReader; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.CommandLineProgram; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java index c2d64ddd8b..ef69a8e5f7 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java @@ -25,16 +25,15 @@ package org.broadinstitute.sting.gatk.datasources.reference; -import net.sf.picard.reference.ReferenceSequenceFileFactory; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import net.sf.picard.reference.FastaSequenceIndex; import net.sf.picard.reference.FastaSequenceIndexBuilder; -import net.sf.picard.sam.CreateSequenceDictionary; import net.sf.picard.reference.IndexedFastaSequenceFile; -import net.sf.picard.reference.FastaSequenceIndex; +import net.sf.picard.sam.CreateSequenceDictionary; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.file.FSLockWithShared; import org.broadinstitute.sting.utils.file.FileSystemInabilityToLockException; -import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import java.io.File; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ResourcePool.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ResourcePool.java index 934f4f997c..21f58d4804 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ResourcePool.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ResourcePool.java @@ -28,11 +28,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; +import java.util.*; /** * A pool of open resources, all of which can create a closeable iterator. diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleDataSource.java index 0a5981a1bf..067bf3f720 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleDataSource.java @@ -3,11 +3,10 @@ import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMReadGroupRecord; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.exceptions.StingException; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.yaml.snakeyaml.TypeDescription; import org.yaml.snakeyaml.Yaml; import org.yaml.snakeyaml.constructor.Constructor; diff --git a/public/java/src/org/broadinstitute/sting/gatk/examples/CoverageBySample.java b/public/java/src/org/broadinstitute/sting/gatk/examples/CoverageBySample.java index a1419ba70a..5dbd904051 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/examples/CoverageBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/examples/CoverageBySample.java @@ -1,15 +1,20 @@ package org.broadinstitute.sting.gatk.examples; -import net.sf.samtools.*; -import org.broadinstitute.sting.gatk.refdata.*; -import org.broadinstitute.sting.gatk.walkers.LocusWalker; -import org.broadinstitute.sting.gatk.contexts.*; -import org.broadinstitute.sting.utils.pileup.*; +import net.sf.samtools.SAMReadGroupRecord; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import java.util.*; import java.io.PrintStream; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; /** * Computes the coverage per sample. diff --git a/public/java/src/org/broadinstitute/sting/gatk/examples/GATKPaperGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/examples/GATKPaperGenotyper.java index b96d0ffbfe..26205a203f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/examples/GATKPaperGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/examples/GATKPaperGenotyper.java @@ -25,6 +25,8 @@ package org.broadinstitute.sting.gatk.examples; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -32,8 +34,6 @@ import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidSNPGenotypePriors; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import java.io.PrintStream; diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/Accumulator.java b/public/java/src/org/broadinstitute/sting/gatk/executive/Accumulator.java index 3e335733d8..b23782563f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/Accumulator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/Accumulator.java @@ -25,18 +25,18 @@ package org.broadinstitute.sting.gatk.executive; -import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; -import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; +import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.ArrayList; -import java.util.List; import java.util.Iterator; +import java.util.List; /** * Manages the */ diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java index 8a27e008cf..59fb4aa9ef 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java @@ -1,27 +1,27 @@ package org.broadinstitute.sting.gatk.executive; +import net.sf.picard.reference.IndexedFastaSequenceFile; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource; import org.broadinstitute.sting.gatk.datasources.reads.Shard; -import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategy; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; -import org.broadinstitute.sting.gatk.io.*; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.io.OutputTracker; +import org.broadinstitute.sting.gatk.io.ThreadLocalOutputTracker; +import org.broadinstitute.sting.gatk.walkers.TreeReducible; +import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.threading.ThreadPoolMonitor; -import java.util.Queue; -import java.util.LinkedList; import java.util.Collection; -import java.util.concurrent.Executors; +import java.util.LinkedList; +import java.util.Queue; import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.FutureTask; -import net.sf.picard.reference.IndexedFastaSequenceFile; - /** * A microscheduler that schedules shards according to a tree-like structure. * Requires a special walker tagged with a 'TreeReducible' interface. diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java index 4cb571c454..9466fdf756 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java @@ -1,22 +1,21 @@ package org.broadinstitute.sting.gatk.executive; -import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; +import net.sf.picard.reference.IndexedFastaSequenceFile; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider; import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource; import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategy; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; -import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.io.DirectOutputTracker; import org.broadinstitute.sting.gatk.io.OutputTracker; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.gatk.walkers.Walker; import java.util.Collection; -import net.sf.picard.reference.IndexedFastaSequenceFile; - /** A micro-scheduling manager for single-threaded execution of a traversal. */ public class LinearMicroScheduler extends MicroScheduler { diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java index cfe0f81872..23e5769f1b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java @@ -25,26 +25,18 @@ package org.broadinstitute.sting.gatk.executive; +import net.sf.picard.reference.IndexedFastaSequenceFile; import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource; -import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategy; -import org.broadinstitute.sting.gatk.traversals.*; -import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.io.OutputTracker; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.iterators.NullSAMIterator; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.gatk.ReadMetrics; - -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.PrintStream; -import java.lang.management.ManagementFactory; -import java.util.*; - -import net.sf.picard.reference.IndexedFastaSequenceFile; +import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; +import org.broadinstitute.sting.gatk.traversals.*; +import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.threading.*; @@ -52,6 +44,11 @@ import javax.management.JMException; import javax.management.MBeanServer; import javax.management.ObjectName; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.PrintStream; +import java.lang.management.ManagementFactory; +import java.util.Collection; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/OutputMergeTask.java b/public/java/src/org/broadinstitute/sting/gatk/executive/OutputMergeTask.java index 76e0c1c8ac..7be37a616e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/OutputMergeTask.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/OutputMergeTask.java @@ -1,10 +1,9 @@ package org.broadinstitute.sting.gatk.executive; import org.broadinstitute.sting.gatk.io.storage.Storage; -import org.broadinstitute.sting.gatk.io.OutputTracker; -import java.util.Collection; import java.util.ArrayList; +import java.util.Collection; /** * User: hanna diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/ReduceTree.java b/public/java/src/org/broadinstitute/sting/gatk/executive/ReduceTree.java index 151a1ba264..7aac70b473 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/ReduceTree.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/ReduceTree.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.executive; -import java.util.Queue; -import java.util.List; import java.util.ArrayList; import java.util.LinkedList; +import java.util.List; +import java.util.Queue; import java.util.concurrent.Future; /** * User: hanna diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java b/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java index b78a4edc9b..6136bd68da 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java @@ -1,13 +1,13 @@ package org.broadinstitute.sting.gatk.executive; import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.datasources.reads.Shard; -import org.broadinstitute.sting.gatk.traversals.TraversalEngine; import org.broadinstitute.sting.gatk.io.ThreadLocalOutputTracker; -import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.gatk.traversals.TraversalEngine; import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.concurrent.Callable; diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/TreeReducer.java b/public/java/src/org/broadinstitute/sting/gatk/executive/TreeReducer.java index 8fb4adb5d4..d36a3b5768 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/TreeReducer.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/TreeReducer.java @@ -4,8 +4,8 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.concurrent.Callable; -import java.util.concurrent.Future; import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; /** * User: hanna * Date: Apr 29, 2009 diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java b/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java index 5c341bb022..cfbce58ee9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java @@ -1,17 +1,20 @@ package org.broadinstitute.sting.gatk.executive; +import net.sf.picard.util.PeekableIterator; +import org.broadinstitute.sting.gatk.ReadProperties; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource; +import org.broadinstitute.sting.gatk.iterators.LocusIterator; +import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState; +import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.gatk.iterators.*; -import org.broadinstitute.sting.gatk.ReadProperties; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; - -import java.util.*; - -import net.sf.picard.util.PeekableIterator; import org.broadinstitute.sting.utils.GenomeLocParser; +import java.util.Iterator; +import java.util.List; +import java.util.NoSuchElementException; + /** * Buffer shards of data which may or may not contain multiple loci into * iterators of all data which cover an interval. Its existence is an homage diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/BadCigarFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/BadCigarFilter.java index b8a3ee9779..0987c5d749 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/BadCigarFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/BadCigarFilter.java @@ -24,8 +24,10 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; -import net.sf.samtools.*; +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import net.sf.samtools.SAMRecord; /** * Filter out reads with wonky cigar strings. diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/BadMateFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/BadMateFilter.java index 3b988c8fb3..8596e18eb6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/BadMateFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/BadMateFilter.java @@ -24,7 +24,6 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/CountingFilteringIterator.java b/public/java/src/org/broadinstitute/sting/gatk/filters/CountingFilteringIterator.java index 84390c1734..03fc2063b3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/CountingFilteringIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/CountingFilteringIterator.java @@ -23,16 +23,15 @@ */ package org.broadinstitute.sting.gatk.filters; +import net.sf.picard.filter.SamRecordFilter; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.util.CloseableIterator; import net.sf.samtools.util.CloserUtil; +import org.broadinstitute.sting.gatk.ReadMetrics; +import java.util.Collection; import java.util.Iterator; import java.util.NoSuchElementException; -import java.util.Collection; - -import net.sf.samtools.SAMRecord; -import net.sf.samtools.util.CloseableIterator; -import net.sf.picard.filter.SamRecordFilter; -import org.broadinstitute.sting.gatk.ReadMetrics; /** * Filtering Iterator which takes a filter and an iterator and iterates diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/DuplicateReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/DuplicateReadFilter.java index fb3c385828..589910fc7b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/DuplicateReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/DuplicateReadFilter.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; /* diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckReadFilter.java index b7806524a9..cd77a9e7e0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckReadFilter.java @@ -24,7 +24,6 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/FilterManager.java b/public/java/src/org/broadinstitute/sting/gatk/filters/FilterManager.java index cda08fb665..67f82235dd 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/FilterManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/FilterManager.java @@ -25,11 +25,8 @@ package org.broadinstitute.sting.gatk.filters; -import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.classloader.PluginManager; -import net.sf.picard.filter.SamRecordFilter; - import java.util.Collection; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java index 4deeb09ee9..74deace9a9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java @@ -24,8 +24,8 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMSequenceRecord; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityReadFilter.java index bcd473b158..75369b3066 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityReadFilter.java @@ -25,7 +25,6 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableReadFilter.java new file mode 100644 index 0000000000..1afec36d15 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableReadFilter.java @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2009 The Broad Institute + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.filters; + +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.QualityUtils; + +/** + * Filter out mapping quality zero reads. + * + * @author ebanks + * @version 0.1 + */ + +public class MappingQualityUnavailableReadFilter extends ReadFilter { + public boolean filterOut(SAMRecord rec) { + return (rec.getMappingQuality() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE); + } +} + diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/ZeroMappingQualityReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityZeroReadFilter.java similarity index 90% rename from public/java/src/org/broadinstitute/sting/gatk/filters/ZeroMappingQualityReadFilter.java rename to public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityZeroReadFilter.java index 7e6fc5e828..e49d4117c5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/ZeroMappingQualityReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityZeroReadFilter.java @@ -24,17 +24,16 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; /** - * Filter out zero mapping quality reads. + * Filter out mapping quality zero reads. * * @author hanna * @version 0.1 */ -public class ZeroMappingQualityReadFilter extends ReadFilter { +public class MappingQualityZeroReadFilter extends ReadFilter { public boolean filterOut(SAMRecord rec) { return (rec.getMappingQuality() == 0); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MaxInsertSizeFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MaxInsertSizeFilter.java index 584783d340..7bcee033f8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/MaxInsertSizeFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MaxInsertSizeFilter.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MaxReadLengthFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MaxReadLengthFilter.java index 8a2fd5af36..cd31da61a0 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/MaxReadLengthFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MaxReadLengthFilter.java @@ -25,7 +25,6 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MissingReadGroupFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MissingReadGroupFilter.java index d5d40ec389..490a550400 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/MissingReadGroupFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MissingReadGroupFilter.java @@ -24,7 +24,6 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/NoOriginalQualityScoresFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/NoOriginalQualityScoresFilter.java index fc21538e88..29738e499c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/NoOriginalQualityScoresFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/NoOriginalQualityScoresFilter.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; /* diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentReadFilter.java index cfbba03836..31c2144ce2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentReadFilter.java @@ -24,7 +24,6 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/Platform454Filter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/Platform454Filter.java index dcddebd55a..8ad91ac1c1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/Platform454Filter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/Platform454Filter.java @@ -25,9 +25,7 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; - import org.broadinstitute.sting.utils.sam.ReadUtils; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformFilter.java index 691fd95f0d..30b2f828d0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformFilter.java @@ -25,11 +25,9 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; - -import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.utils.sam.ReadUtils; /** * Filter out PL matching reads. diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformUnitFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformUnitFilter.java index 4ef88402d5..81044b888a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformUnitFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformUnitFilter.java @@ -1,13 +1,11 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; -import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMReadGroupRecord; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.exceptions.UserException; -import java.util.Set; import java.util.HashSet; - -import org.broadinstitute.sting.utils.exceptions.UserException; +import java.util.Set; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/ReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadFilter.java index 1d74ee20e7..2276377619 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/ReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadFilter.java @@ -1,7 +1,6 @@ package org.broadinstitute.sting.gatk.filters; import net.sf.picard.filter.SamRecordFilter; -import net.sf.samtools.SAMFileHeader; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/ReadGroupBlackListFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadGroupBlackListFilter.java index 69ebc8b041..0e5e8800ca 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/ReadGroupBlackListFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadGroupBlackListFilter.java @@ -25,17 +25,16 @@ package org.broadinstitute.sting.gatk.filters; -import java.util.*; -import java.util.Map.Entry; -import java.io.File; -import java.io.FileNotFoundException; - -import net.sf.picard.filter.SamRecordFilter; -import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMReadGroupRecord; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.XReadLines; +import java.io.File; +import java.io.FileNotFoundException; +import java.util.*; +import java.util.Map.Entry; + /** * Removes records matching the read group tag and exact match string. * For example, this filter value: diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/ReadStrandFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadStrandFilter.java index 1b2a77f450..16eeed3ccf 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/ReadStrandFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadStrandFilter.java @@ -25,7 +25,6 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/SampleFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/SampleFilter.java index 682b22b1f1..99d6bc1544 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/SampleFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/SampleFilter.java @@ -25,9 +25,8 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; -import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMReadGroupRecord; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/SingleReadGroupFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/SingleReadGroupFilter.java index 05b4720367..2f93cbcaed 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/SingleReadGroupFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/SingleReadGroupFilter.java @@ -25,9 +25,8 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMReadGroupRecord; -import net.sf.picard.filter.SamRecordFilter; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/UnmappedReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/UnmappedReadFilter.java index ac4f4853ad..e7ee345d2f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/UnmappedReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/UnmappedReadFilter.java @@ -24,7 +24,6 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/DirectOutputTracker.java b/public/java/src/org/broadinstitute/sting/gatk/io/DirectOutputTracker.java index 8655286884..658a28fbd4 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/DirectOutputTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/DirectOutputTracker.java @@ -25,9 +25,9 @@ package org.broadinstitute.sting.gatk.io; -import org.broadinstitute.sting.gatk.io.stubs.Stub; -import org.broadinstitute.sting.gatk.io.storage.StorageFactory; import org.broadinstitute.sting.gatk.io.storage.Storage; +import org.broadinstitute.sting.gatk.io.storage.StorageFactory; +import org.broadinstitute.sting.gatk.io.stubs.Stub; /** * Maps creation of storage directly to output streams in parent. diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/OutputTracker.java b/public/java/src/org/broadinstitute/sting/gatk/io/OutputTracker.java index b68013aa44..f39ba2d8d4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/io/OutputTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/OutputTracker.java @@ -26,20 +26,20 @@ package org.broadinstitute.sting.gatk.io; import net.sf.samtools.SAMFileReader; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.classloader.JVMUtils; import org.broadinstitute.sting.commandline.ArgumentSource; -import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder; -import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.gatk.io.storage.Storage; +import org.broadinstitute.sting.gatk.io.storage.StorageFactory; import org.broadinstitute.sting.gatk.io.stubs.OutputStreamStub; import org.broadinstitute.sting.gatk.io.stubs.Stub; -import org.broadinstitute.sting.gatk.io.storage.StorageFactory; -import org.broadinstitute.sting.gatk.io.storage.Storage; +import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.utils.classloader.JVMUtils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder; -import java.io.*; +import java.io.OutputStream; import java.lang.reflect.Field; -import java.util.Map; import java.util.HashMap; +import java.util.Map; /** * Manages the output and err streams that are created specifically for walker diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/StingSAMFileWriter.java b/public/java/src/org/broadinstitute/sting/gatk/io/StingSAMFileWriter.java index 8701ecf3ca..a9a74925dc 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/StingSAMFileWriter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/StingSAMFileWriter.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.gatk.io; -import net.sf.samtools.SAMFileWriter; import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMFileWriter; /** * A writer that will allow unsorted BAM files to be written diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/ThreadLocalOutputTracker.java b/public/java/src/org/broadinstitute/sting/gatk/io/ThreadLocalOutputTracker.java index 36960246a7..999deddd1f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/ThreadLocalOutputTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/ThreadLocalOutputTracker.java @@ -25,15 +25,16 @@ package org.broadinstitute.sting.gatk.io; -import org.broadinstitute.sting.gatk.io.stubs.Stub; -import org.broadinstitute.sting.gatk.io.storage.StorageFactory; -import org.broadinstitute.sting.gatk.io.storage.Storage; import org.broadinstitute.sting.gatk.executive.OutputMergeTask; +import org.broadinstitute.sting.gatk.io.storage.Storage; +import org.broadinstitute.sting.gatk.io.storage.StorageFactory; +import org.broadinstitute.sting.gatk.io.stubs.Stub; import org.broadinstitute.sting.utils.exceptions.UserException; -import java.util.*; import java.io.File; import java.io.IOException; +import java.util.HashMap; +import java.util.Map; /** * An output tracker that can either track its output per-thread or directly, diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/storage/OutputStreamStorage.java b/public/java/src/org/broadinstitute/sting/gatk/io/storage/OutputStreamStorage.java index 4dc9762890..56c9c04651 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/OutputStreamStorage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/OutputStreamStorage.java @@ -25,14 +25,14 @@ package org.broadinstitute.sting.gatk.io.storage; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.gatk.io.stubs.OutputStreamStub; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.*; +import java.nio.channels.Channels; import java.nio.channels.FileChannel; import java.nio.channels.WritableByteChannel; -import java.nio.channels.Channels; public class OutputStreamStorage extends OutputStream implements Storage { /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java b/public/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java index 610db1d768..cb8786be10 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java @@ -27,17 +27,16 @@ import net.sf.samtools.*; import net.sf.samtools.util.CloseableIterator; - -import java.io.*; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Method; - import net.sf.samtools.util.RuntimeIOException; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterStub; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.SimplifyingSAMFileWriter; +import java.io.File; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; + /** * Provides temporary storage for SAMFileWriters. * diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/storage/StorageFactory.java b/public/java/src/org/broadinstitute/sting/gatk/io/storage/StorageFactory.java index ee5c56524c..66907dd6b1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/StorageFactory.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/StorageFactory.java @@ -25,9 +25,9 @@ package org.broadinstitute.sting.gatk.io.storage; -import org.broadinstitute.sting.gatk.io.stubs.Stub; import org.broadinstitute.sting.gatk.io.stubs.OutputStreamStub; import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterStub; +import org.broadinstitute.sting.gatk.io.stubs.Stub; import org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java index 74176ec350..1da03e9c27 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java @@ -1,16 +1,21 @@ package org.broadinstitute.sting.gatk.io.storage; +import net.sf.samtools.util.BlockCompressedOutputStream; import org.apache.log4j.Logger; import org.broad.tribble.source.BasicFeatureSource; -import org.broadinstitute.sting.utils.codecs.vcf.*; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub; - -import java.io.*; - -import net.sf.samtools.util.BlockCompressedOutputStream; +import org.broadinstitute.sting.utils.codecs.vcf.StandardVCFWriter; +import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.io.PrintStream; /** * Provides temporary and permanent storage for genotypes in VCF format. diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java index 00e78f3912..8bc97c886d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java @@ -30,8 +30,8 @@ import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.io.OutputStream; import java.io.File; +import java.io.OutputStream; import java.lang.reflect.Constructor; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamStub.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamStub.java index 5cf84c5a28..27bcb8a1cf 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamStub.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamStub.java @@ -27,9 +27,9 @@ import org.broadinstitute.sting.gatk.io.OutputTracker; -import java.io.OutputStream; -import java.io.IOException; import java.io.File; +import java.io.IOException; +import java.io.OutputStream; /** * A stub for routing and management of anything backed by an OutputStream. diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java index d847015ed7..f124c23028 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java @@ -24,14 +24,14 @@ package org.broadinstitute.sting.gatk.io.stubs; -import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor; -import org.broadinstitute.sting.commandline.ArgumentSource; +import net.sf.samtools.SAMFileReader; import org.broadinstitute.sting.commandline.ArgumentMatches; +import org.broadinstitute.sting.commandline.ArgumentSource; +import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor; import org.broadinstitute.sting.commandline.ParsingEngine; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import net.sf.samtools.SAMFileReader; import java.io.File; diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java index a9a2722206..38640eda05 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java @@ -25,18 +25,17 @@ package org.broadinstitute.sting.gatk.io.stubs; +import net.sf.samtools.SAMFileWriter; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; -import net.sf.samtools.SAMFileWriter; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; -import java.lang.annotation.Annotation; -import java.util.List; -import java.util.Arrays; import java.io.File; import java.io.OutputStream; +import java.lang.annotation.Annotation; +import java.util.Arrays; +import java.util.List; /** * Insert a SAMFileWriterStub instead of a full-fledged concrete OutputStream implementations. diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterStub.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterStub.java index f5c1e0efc3..d8e59a3dd6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterStub.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterStub.java @@ -25,19 +25,18 @@ package org.broadinstitute.sting.gatk.io.stubs; +import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileWriter; import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMFileHeader; - -import java.io.File; -import java.io.OutputStream; - +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.io.OutputTracker; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.baq.BAQ; + +import java.io.File; +import java.io.OutputStream; /** * A stub for routing and management of SAM file reading and writing. diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java index 7521e754d7..615841f022 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java @@ -25,14 +25,17 @@ package org.broadinstitute.sting.gatk.io.stubs; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.File; import java.io.OutputStream; -import java.util.*; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; /** * Injects new command-line arguments into the system providing support for the genotype writer. diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java index 1a79d27857..bb84f9457b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java @@ -25,19 +25,19 @@ package org.broadinstitute.sting.gatk.io.stubs; -import java.io.File; -import java.io.PrintStream; -import java.io.OutputStream; -import java.util.Collection; - -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.gatk.CommandLineExecutable; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.io.OutputTracker; import org.broadinstitute.sting.utils.classloader.JVMUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.io.File; +import java.io.OutputStream; +import java.io.PrintStream; +import java.util.Collection; /** * A stub for routing and management of genotype reading and writing. diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/BoundedReadIterator.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/BoundedReadIterator.java index b5643f8346..478675f9d1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/BoundedReadIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/BoundedReadIterator.java @@ -1,13 +1,11 @@ package org.broadinstitute.sting.gatk.iterators; +import net.sf.picard.sam.MergingSamRecordIterator; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMRecord; -import net.sf.picard.sam.MergingSamRecordIterator; import java.util.Iterator; -import org.broadinstitute.sting.gatk.ReadProperties; - /* * Copyright (c) 2009 The Broad Institute * diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/BufferingReadIterator.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/BufferingReadIterator.java index f3a060be10..7eaf4be410 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/BufferingReadIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/BufferingReadIterator.java @@ -26,12 +26,11 @@ import net.sf.samtools.SAMRecord; import net.sf.samtools.util.CloseableIterator; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.Queue; import java.util.LinkedList; import java.util.NoSuchElementException; - -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import java.util.Queue; /** * Buffers access to a large stream of reads, replenishing the buffer only when the reads diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/DownsampleIterator.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/DownsampleIterator.java index 1342f11fd2..835748ff0e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/DownsampleIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/DownsampleIterator.java @@ -1,11 +1,10 @@ package org.broadinstitute.sting.gatk.iterators; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import java.util.Iterator; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; - public class DownsampleIterator implements StingSAMIterator { diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/GenomeLocusIterator.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/GenomeLocusIterator.java index aa376a12a9..240564d347 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/GenomeLocusIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/GenomeLocusIterator.java @@ -3,8 +3,8 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import java.util.NoSuchElementException; import java.util.Iterator; +import java.util.NoSuchElementException; /** * User: hanna * Date: May 12, 2009 diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIterator.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIterator.java index 30c1cf5129..e177984caa 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIterator.java @@ -1,11 +1,10 @@ package org.broadinstitute.sting.gatk.iterators; import net.sf.samtools.util.CloseableIterator; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import java.util.Iterator; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; - /** * Iterator that traverses a SAM File, accumulating information on a per-locus basis */ diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java index 4e58813f55..e13c5a7641 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java @@ -25,19 +25,27 @@ package org.broadinstitute.sting.gatk.iterators; -import net.sf.samtools.*; import net.sf.picard.util.PeekableIterator; +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import net.sf.samtools.SAMRecord; import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.ReadProperties; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.gatk.DownsamplingMethod; import org.broadinstitute.sting.gatk.DownsampleType; +import org.broadinstitute.sting.gatk.DownsamplingMethod; +import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.datasources.sample.Sample; import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.ReservoirDownsampler; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.pileup.*; +import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileupImpl; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; import org.broadinstitute.sting.utils.sam.ReadUtils; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/NullSAMIterator.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/NullSAMIterator.java index ff458467f4..21b71c9e64 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/NullSAMIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/NullSAMIterator.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.gatk.iterators; -import org.broadinstitute.sting.gatk.ReadProperties; import net.sf.samtools.SAMRecord; import java.util.Iterator; diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/PositionTrackingIterator.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/PositionTrackingIterator.java index c2d3976ea7..cc499b247f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/PositionTrackingIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/PositionTrackingIterator.java @@ -25,7 +25,6 @@ package org.broadinstitute.sting.gatk.iterators; -import org.broadinstitute.sting.gatk.ReadProperties; import net.sf.samtools.SAMRecord; import net.sf.samtools.util.CloseableIterator; diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java index 239392eec2..2f30d12a80 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java @@ -1,13 +1,8 @@ package org.broadinstitute.sting.gatk.iterators; import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMTag; -import net.sf.samtools.SAMReadGroupRecord; -import org.broadinstitute.sting.gatk.ReadProperties; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.apache.log4j.Logger; - -import java.util.List; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; /** * An iterator which does post-processing of a read, including potentially wrapping diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/StingSAMIterator.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/StingSAMIterator.java index d294993d45..1b248d0970 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/StingSAMIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/StingSAMIterator.java @@ -2,7 +2,6 @@ import net.sf.samtools.SAMRecord; import net.sf.samtools.util.CloseableIterator; -import org.broadinstitute.sting.gatk.ReadProperties; /** * * User: aaron diff --git a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java index a51ca92923..69c0b3e0a0 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java @@ -26,19 +26,16 @@ import org.apache.log4j.Level; import org.apache.log4j.Logger; -import org.broadinstitute.sting.commandline.CommandLineUtils; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.exceptions.StingException; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.jets3t.service.S3Service; import org.jets3t.service.S3ServiceException; import org.jets3t.service.impl.rest.httpclient.RestS3Service; -import org.jets3t.service.model.S3Bucket; import org.jets3t.service.model.S3Object; import org.jets3t.service.security.AWSCredentials; import org.simpleframework.xml.Element; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordIterator.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordIterator.java index fa0c323b57..ce924fd875 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordIterator.java @@ -25,17 +25,17 @@ package org.broadinstitute.sting.gatk.refdata; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.gatk.iterators.PushbackIterator; import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.XReadLines; -import org.broadinstitute.sting.gatk.iterators.PushbackIterator; -import java.util.Iterator; -import java.util.regex.Pattern; -import java.io.FileNotFoundException; import java.io.File; +import java.io.FileNotFoundException; import java.lang.reflect.Constructor; +import java.util.Iterator; +import java.util.regex.Pattern; /** * This is a low-level iterator designed to provide system-wide generic support for reading record-oriented data diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordListImpl.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordListImpl.java index cff97e4eec..59b273d386 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordListImpl.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordListImpl.java @@ -2,9 +2,9 @@ import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; +import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.HasGenomeLocation; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.GenomeLoc; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java index 43bf6f8e0b..d03b122e23 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java @@ -1,13 +1,13 @@ package org.broadinstitute.sting.gatk.refdata; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/SeekableRODIterator.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/SeekableRODIterator.java index b3cb22a032..b7437e6e85 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/SeekableRODIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/SeekableRODIterator.java @@ -6,9 +6,9 @@ import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import java.util.Iterator; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index c7c0468e76..1d622e2c7d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -3,17 +3,13 @@ import org.broad.tribble.Feature; import org.broad.tribble.dbsnp.DbSNPFeature; import org.broad.tribble.gelitext.GeliTextFeature; -import org.broadinstitute.sting.utils.codecs.hapmap.HapMapFeature; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.MutableGenotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.utils.classloader.PluginManager; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.codecs.hapmap.HapMapFeature; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.*; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableCodec.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableCodec.java index 59cd14a223..6bba754be1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableCodec.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableCodec.java @@ -25,12 +25,6 @@ package org.broadinstitute.sting.gatk.refdata.features.annotator; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.util.ArrayList; -import java.util.StringTokenizer; - import org.apache.log4j.Logger; import org.broad.tribble.Feature; import org.broad.tribble.exception.CodecLineParsingException; @@ -41,6 +35,12 @@ import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.Utils; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.StringTokenizer; + public class AnnotatorInputTableCodec implements ReferenceDependentFeatureCodec { private static Logger logger = Logger.getLogger(AnnotatorInputTableCodec.class); diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableFeature.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableFeature.java index d97e378fb1..d12badd28c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableFeature.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableFeature.java @@ -25,13 +25,13 @@ package org.broadinstitute.sting.gatk.refdata.features.annotator; +import org.broad.tribble.Feature; + import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.Map; -import org.broad.tribble.Feature; - /** * This class represents a single record in an AnnotatorInputTable. */ diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleCodec.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleCodec.java index 7f97451cf7..5e536d4c1b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleCodec.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleCodec.java @@ -26,19 +26,19 @@ import org.broad.tribble.Feature; +import org.broad.tribble.exception.CodecLineParsingException; import org.broad.tribble.readers.AsciiLineReader; import org.broad.tribble.readers.LineReader; +import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; + import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; -import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.broad.tribble.exception.CodecLineParsingException; -import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; public class BeagleCodec implements ReferenceDependentFeatureCodec { private String[] header; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleFeature.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleFeature.java index c7bf865690..e6832754d0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleFeature.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleFeature.java @@ -25,14 +25,11 @@ package org.broadinstitute.sting.gatk.refdata.features.beagle; import org.broad.tribble.Feature; - +import org.broadinstitute.sting.utils.variantcontext.Allele; import java.util.ArrayList; import java.util.Map; -import net.sf.samtools.util.StringUtil; -import org.broadinstitute.sting.utils.variantcontext.Allele; - public class BeagleFeature implements Feature { private String chr; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqFeature.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqFeature.java index 4648efd1e4..d12114f9ab 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqFeature.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqFeature.java @@ -4,9 +4,8 @@ import org.broadinstitute.sting.gatk.refdata.Transcript; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.ArrayList; import java.util.List; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/sampileup/SAMPileupCodec.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/sampileup/SAMPileupCodec.java index 00b7c45d96..43e2c3ff5b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/sampileup/SAMPileupCodec.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/sampileup/SAMPileupCodec.java @@ -25,15 +25,15 @@ package org.broadinstitute.sting.gatk.refdata.features.sampileup; -import org.broad.tribble.FeatureCodec; import org.broad.tribble.Feature; +import org.broad.tribble.FeatureCodec; import org.broad.tribble.exception.CodecLineParsingException; import org.broad.tribble.readers.LineReader; import org.broad.tribble.util.ParsingUtils; import java.util.ArrayList; -import java.util.regex.Pattern; import java.util.regex.Matcher; +import java.util.regex.Pattern; import static org.broadinstitute.sting.gatk.refdata.features.sampileup.SAMPileupFeature.VariantType; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/sampileup/SAMPileupFeature.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/sampileup/SAMPileupFeature.java index a794c2704b..378f269347 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/sampileup/SAMPileupFeature.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/sampileup/SAMPileupFeature.java @@ -25,12 +25,11 @@ package org.broadinstitute.sting.gatk.refdata.features.sampileup; +import net.sf.samtools.util.StringUtil; import org.broad.tribble.Feature; import java.util.List; -import net.sf.samtools.util.StringUtil; - /** * A tribble feature representing a SAM pileup. * diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/samread/SAMReadCodec.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/samread/SAMReadCodec.java index 15f559d46d..039b8addea 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/samread/SAMReadCodec.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/samread/SAMReadCodec.java @@ -24,14 +24,14 @@ package org.broadinstitute.sting.gatk.refdata.features.samread; +import net.sf.samtools.Cigar; +import net.sf.samtools.TextCigarCodec; +import net.sf.samtools.util.StringUtil; import org.broad.tribble.Feature; import org.broad.tribble.FeatureCodec; import org.broad.tribble.exception.CodecLineParsingException; import org.broad.tribble.readers.LineReader; import org.broad.tribble.util.ParsingUtils; -import net.sf.samtools.util.StringUtil; -import net.sf.samtools.TextCigarCodec; -import net.sf.samtools.Cigar; /** * Decodes a simple SAM text string. diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/BedTableCodec.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/BedTableCodec.java index b831606a3d..745ccdd9f9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/BedTableCodec.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/BedTableCodec.java @@ -1,13 +1,8 @@ package org.broadinstitute.sting.gatk.refdata.features.table; import org.broad.tribble.Feature; -import org.broad.tribble.readers.LineReader; import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.exceptions.UserException; -import java.io.IOException; -import java.util.ArrayList; import java.util.Arrays; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableCodec.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableCodec.java old mode 100644 new mode 100755 index 6f0a712bff..ab1ac59d8b --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableCodec.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableCodec.java @@ -1,16 +1,14 @@ package org.broadinstitute.sting.gatk.refdata.features.table; import org.broad.tribble.Feature; -import org.broad.tribble.FeatureCodec; import org.broad.tribble.readers.LineReader; import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; -import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.interval.IntervalUtils; import java.io.IOException; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; /** * implementation of a simple table (tab or comma delimited format) input files diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableFeature.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableFeature.java old mode 100644 new mode 100755 index 6ff0384a07..ca73ee960a --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableFeature.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableFeature.java @@ -3,7 +3,7 @@ import org.broad.tribble.Feature; import org.broadinstitute.sting.utils.GenomeLoc; -import java.util.*; +import java.util.List; /** * A feature representing a single row out of a text table @@ -55,10 +55,14 @@ public GenomeLoc getLocation() { } public List getAllValues() { - return getValuesTo(values.size()-1); + return getValuesTo(values.size()); } public List getValuesTo(int columnPosition) { return values.subList(0,columnPosition); } + + public List getHeader() { + return keys; + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java index 5bb65f9a2d..085d6b5b3c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java @@ -10,7 +10,6 @@ import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.commandline.Input; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/QueryableTrack.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/QueryableTrack.java index 19050ae117..731df997de 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/QueryableTrack.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/QueryableTrack.java @@ -28,7 +28,6 @@ import org.broadinstitute.sting.utils.GenomeLoc; import java.io.IOException; -import java.util.Iterator; /** * @author aaron diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java index 3b9f8243fa..ba1ca674e2 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java @@ -35,8 +35,6 @@ import java.io.File; import java.io.IOException; -import java.lang.reflect.Type; -import java.util.Iterator; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java index c2057ad5ef..19c91be1b8 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java @@ -42,15 +42,17 @@ import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.SequenceDictionaryUtils; -import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.classloader.PluginManager; -import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.file.FSLockWithShared; import org.broadinstitute.sting.utils.file.FileSystemInabilityToLockException; import org.broadinstitute.sting.utils.instrumentation.Sizeof; -import java.io.*; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIterator.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIterator.java index 462bf98dfe..104ba87b54 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIterator.java @@ -28,8 +28,6 @@ import org.broad.tribble.iterators.CloseableTribbleIterator; import org.broadinstitute.sting.utils.GenomeLocParser; -import java.util.Iterator; - /** * diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeature.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeature.java index 1553402a51..59e8471a37 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeature.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeature.java @@ -28,8 +28,6 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.HasGenomeLocation; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/LocationAwareSeekableRODIterator.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/LocationAwareSeekableRODIterator.java index 83aa5f056a..96086598ae 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/LocationAwareSeekableRODIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/LocationAwareSeekableRODIterator.java @@ -2,12 +2,8 @@ import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.util.CloseableIterator; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.utils.GenomeLoc; -import java.util.Iterator; -import java.util.List; - /** * @author aaron *

diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/StringToGenomeLocIteratorAdapter.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/StringToGenomeLocIteratorAdapter.java index 101784d97d..fc7f7c58f8 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/StringToGenomeLocIteratorAdapter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/StringToGenomeLocIteratorAdapter.java @@ -25,10 +25,10 @@ package org.broadinstitute.sting.gatk.refdata.utils; +import org.broadinstitute.sting.gatk.iterators.PushbackIterator; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.bed.BedParser; -import org.broadinstitute.sting.gatk.iterators.PushbackIterator; import java.util.Iterator; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/helpers/DbSNPHelper.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/helpers/DbSNPHelper.java index 75e7c1a320..3201769e0a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/helpers/DbSNPHelper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/helpers/DbSNPHelper.java @@ -3,8 +3,8 @@ import net.sf.samtools.util.SequenceUtil; import org.broad.tribble.annotation.Strand; import org.broad.tribble.dbsnp.DbSNPFeature; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Arrays; diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java index f4c5653185..59d4968283 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java @@ -100,7 +100,11 @@ private void loadReport(File file) { * @param tableDescription the description of the table */ public void addTable(String tableName, String tableDescription) { - GATKReportTable table = new GATKReportTable(tableName, tableDescription); + addTable(tableName, tableDescription, true); + } + + public void addTable(String tableName, String tableDescription, boolean sortByPrimaryKey) { + GATKReportTable table = new GATKReportTable(tableName, tableDescription, sortByPrimaryKey); tables.put(tableName, table); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java index 0e503f92ac..f7ea256966 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java @@ -3,9 +3,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.PrintStream; -import java.util.HashMap; -import java.util.LinkedHashMap; -import java.util.TreeSet; +import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -96,8 +94,9 @@ public class GATKReportTable { private String tableDescription; private String primaryKeyName; - private TreeSet primaryKeyColumn; + private Collection primaryKeyColumn; private boolean primaryKeyDisplay; + boolean sortByPrimaryKey = true; private LinkedHashMap columns; @@ -121,12 +120,17 @@ private boolean isValidName(String name) { * @param tableDescription the description of the table */ public GATKReportTable(String tableName, String tableDescription) { - if (!isValidName(tableName)) { + this(tableName, tableDescription, true); + } + + public GATKReportTable(String tableName, String tableDescription, boolean sortByPrimaryKey) { + if (!isValidName(tableName)) { throw new ReviewedStingException("Attempted to set a GATKReportTable name of '" + tableName + "'. GATKReportTable names must be purely alphanumeric - no spaces or special characters are allowed."); } this.tableName = tableName; this.tableDescription = tableDescription; + this.sortByPrimaryKey = sortByPrimaryKey; columns = new LinkedHashMap(); } @@ -137,20 +141,14 @@ public GATKReportTable(String tableName, String tableDescription) { * @param primaryKeyName the name of the primary key column */ public void addPrimaryKey(String primaryKeyName) { - if (!isValidName(primaryKeyName)) { - throw new ReviewedStingException("Attempted to set a GATKReportTable primary key name of '" + primaryKeyName + "'. GATKReportTable primary key names must be purely alphanumeric - no spaces or special characters are allowed."); - } - - this.primaryKeyName = primaryKeyName; - - primaryKeyColumn = new TreeSet(); - primaryKeyDisplay = true; + addPrimaryKey(primaryKeyName, true); } /** * Add an optionally visible primary key column. This becomes the unique identifier for every column in the table, and will always be printed as the first column. * * @param primaryKeyName the name of the primary key column + * @param display should this primary key be displayed? */ public void addPrimaryKey(String primaryKeyName, boolean display) { if (!isValidName(primaryKeyName)) { @@ -159,7 +157,7 @@ public void addPrimaryKey(String primaryKeyName, boolean display) { this.primaryKeyName = primaryKeyName; - primaryKeyColumn = new TreeSet(); + primaryKeyColumn = sortByPrimaryKey ? new TreeSet() : new LinkedList(); primaryKeyDisplay = display; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java index e1085c6b28..89a179d0ea 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java @@ -25,11 +25,11 @@ package org.broadinstitute.sting.gatk.traversals; import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.ReadMetrics; import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.gatk.ReadMetrics; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -38,7 +38,10 @@ import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.PrintStream; -import java.util.*; +import java.util.Arrays; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; public abstract class TraversalEngine,ProviderType extends ShardDataProvider> { // Time in milliseconds since we initialized this engine diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java index 89ff688a7c..1ba48ca5f3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java @@ -28,12 +28,11 @@ import net.sf.samtools.SAMRecord; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.datasources.providers.ReadView; import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.providers.ReadView; import org.broadinstitute.sting.gatk.iterators.PushbackIterator; import org.broadinstitute.sting.gatk.walkers.DuplicateWalker; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java index 240176f2fa..232989fb05 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java @@ -10,7 +10,6 @@ import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java index f15a20cd38..196d540363 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java @@ -1,16 +1,18 @@ package org.broadinstitute.sting.gatk.traversals; -import org.broadinstitute.sting.gatk.walkers.Requires; -import org.broadinstitute.sting.gatk.walkers.DataSource; -import org.broadinstitute.sting.gatk.walkers.ReadPairWalker; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMRecordCoordinateComparator; +import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider; import org.broadinstitute.sting.gatk.datasources.providers.ReadView; import org.broadinstitute.sting.gatk.datasources.reads.Shard; -import org.apache.log4j.Logger; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMRecordCoordinateComparator; +import org.broadinstitute.sting.gatk.walkers.DataSource; +import org.broadinstitute.sting.gatk.walkers.ReadPairWalker; +import org.broadinstitute.sting.gatk.walkers.Requires; -import java.util.*; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; /** * Traverse over a collection of read pairs, assuming that a given shard will contain all pairs. diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java index 670676b48c..06e4539c48 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java @@ -2,14 +2,16 @@ import net.sf.samtools.SAMRecord; import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.WalkerManager; import org.broadinstitute.sting.gatk.ReadMetrics; +import org.broadinstitute.sting.gatk.WalkerManager; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.datasources.providers.*; +import org.broadinstitute.sting.gatk.datasources.providers.ReadBasedReferenceOrderedView; +import org.broadinstitute.sting.gatk.datasources.providers.ReadReferenceView; +import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.providers.ReadView; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLoc; /* diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/Allows.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/Allows.java index a29e511892..2541921e9b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/Allows.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Allows.java @@ -1,11 +1,6 @@ package org.broadinstitute.sting.gatk.walkers; -import java.lang.annotation.Documented; -import java.lang.annotation.Inherited; -import java.lang.annotation.Retention; -import java.lang.annotation.RetentionPolicy; -import java.lang.annotation.Target; -import java.lang.annotation.ElementType; +import java.lang.annotation.*; /** * User: hanna * Date: May 19, 2009 diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/BAQMode.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/BAQMode.java index 99dd46cbe4..03097887d3 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/BAQMode.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/BAQMode.java @@ -1,11 +1,6 @@ package org.broadinstitute.sting.gatk.walkers; -import java.lang.annotation.Documented; -import java.lang.annotation.Inherited; -import java.lang.annotation.Retention; -import java.lang.annotation.RetentionPolicy; -import java.lang.annotation.Target; -import java.lang.annotation.ElementType; +import java.lang.annotation.*; /** * User: hanna diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/By.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/By.java index 25455b5872..8fa6a4c1b8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/By.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/By.java @@ -1,11 +1,6 @@ package org.broadinstitute.sting.gatk.walkers; -import java.lang.annotation.Documented; -import java.lang.annotation.Inherited; -import java.lang.annotation.Retention; -import java.lang.annotation.RetentionPolicy; -import java.lang.annotation.Target; -import java.lang.annotation.ElementType; +import java.lang.annotation.*; /** * User: hanna * Date: May 14, 2009 diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java index 1a3f87a7a2..ca4e3f5e3b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java @@ -25,29 +25,29 @@ package org.broadinstitute.sting.gatk.walkers; -import net.sf.samtools.*; -import net.sf.picard.reference.ReferenceSequenceFileFactory; -import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.picard.reference.ReferenceSequence; -import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import net.sf.picard.reference.ReferenceSequenceFile; +import net.sf.picard.reference.ReferenceSequenceFileFactory; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.util.StringUtil; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.clipreads.ClippingOp; import org.broadinstitute.sting.utils.clipreads.ClippingRepresentation; import org.broadinstitute.sting.utils.clipreads.ReadClipper; import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.utils.sam.ReadUtils; -import java.util.*; -import java.util.regex.Pattern; -import java.util.regex.Matcher; import java.io.File; import java.io.PrintStream; - -import net.sf.samtools.util.StringUtil; -import org.broadinstitute.sting.utils.sam.ReadUtils; +import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; /** * This ReadWalker provides simple, yet powerful read clipping capabilities. It allows the user to clip bases in reads diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/DuplicateWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/DuplicateWalker.java index 1fc606f079..4bfedb6724 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/DuplicateWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/DuplicateWalker.java @@ -1,16 +1,13 @@ package org.broadinstitute.sting.gatk.walkers; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter; import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentReadFilter; +import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter; import org.broadinstitute.sting.utils.GenomeLoc; import java.util.List; import java.util.Set; -import java.util.Arrays; - -import net.sf.samtools.SAMRecord; -import net.sf.picard.filter.SamRecordFilter; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/FindReadsWithNamesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/FindReadsWithNamesWalker.java index a272150c76..56287df31f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/FindReadsWithNamesWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/FindReadsWithNamesWalker.java @@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.walkers; import net.sf.samtools.SAMFileWriter; -import net.sf.samtools.SAMReadGroupRecord; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/FlagStatWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/FlagStatWalker.java index 13a55eaace..fcfcb81b58 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/FlagStatWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/FlagStatWalker.java @@ -1,13 +1,13 @@ package org.broadinstitute.sting.gatk.walkers; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import java.io.PrintStream; import java.text.DecimalFormat; import java.text.NumberFormat; -import java.io.PrintStream; /* diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/GCContentByIntervalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/GCContentByIntervalWalker.java index c0f469973a..68bea4dbad 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/GCContentByIntervalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/GCContentByIntervalWalker.java @@ -28,13 +28,12 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.collections.Pair; import java.io.PrintStream; -import java.util.*; +import java.util.List; /** * Walks along reference and calculates the GC content for each interval. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java index fc4b403c55..b0b2687f4d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java @@ -1,9 +1,12 @@ package org.broadinstitute.sting.gatk.walkers; -import org.broadinstitute.sting.gatk.filters.*; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter; +import org.broadinstitute.sting.gatk.filters.FailsVendorQualityCheckReadFilter; +import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentReadFilter; +import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java index 84d868c1a3..508d1f6eed 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java @@ -26,22 +26,22 @@ package org.broadinstitute.sting.gatk.walkers; import org.broad.tribble.dbsnp.DbSNPFeature; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; -import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import java.io.PrintStream; import java.util.ArrayList; import java.util.List; -import java.io.PrintStream; /** * Prints the alignment in the pileup format. In the pileup format, each line represents a genomic position, diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODsWalker.java index 9ac3fc0e6c..158992a220 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODsWalker.java @@ -25,15 +25,15 @@ package org.broadinstitute.sting.gatk.walkers; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; -import org.broadinstitute.sting.commandline.Output; -import java.util.Iterator; import java.io.PrintStream; +import java.util.Iterator; /** * Prints out all of the RODs in the input data set. Data is rendered using the toString() method diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java index 07938d3220..7e1dcd707e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java @@ -28,14 +28,18 @@ import net.sf.samtools.SAMFileWriter; import net.sf.samtools.SAMReadGroupRecord; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.baq.BAQ; -import java.io.PrintStream; +import java.io.File; +import java.util.Collection; +import java.util.Set; +import java.util.TreeSet; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; /** * Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear * in the input file. It can dynamically merge the contents of multiple input BAM files, resulting @@ -54,6 +58,13 @@ public class PrintReadsWalker extends ReadWalker { String platform = null; // E.g. ILLUMINA, 454 @Argument(fullName = "number", shortName = "n", doc="Print the first n reads from the file, discarding the rest", required = false) int nReadsToPrint = -1; + @Argument(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line). Can be specified multiple times", required=false) + public Set sampleFile = new TreeSet(); + @Argument(fullName="sample_name", shortName="sn", doc="Sample name to be included in the analysis. Can be specified multiple times.", required=false) + public Set sampleNames = new TreeSet(); + + private TreeSet samplesToChoose = new TreeSet(); + private boolean SAMPLES_SPECIFIED = false; /** * The initialize function. @@ -61,6 +72,20 @@ public class PrintReadsWalker extends ReadWalker { public void initialize() { if ( platform != null ) platform = platform.toUpperCase(); + + Collection samplesFromFile; + if (!sampleFile.isEmpty()) { + samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFile); + samplesToChoose.addAll(samplesFromFile); + } + + if (!sampleNames.isEmpty()) + samplesToChoose.addAll(sampleNames); + + if(!samplesToChoose.isEmpty()) { + SAMPLES_SPECIFIED = true; + } + } /** @@ -87,6 +112,14 @@ public boolean filter(ReferenceContext ref, SAMRecord read) { if ( readPlatformAttr == null || !readPlatformAttr.toString().toUpperCase().contains(platform)) return false; } + if (SAMPLES_SPECIFIED ) { + // user specified samples to select + // todo - should be case-agnostic but for simplicity and speed this is ignored. + // todo - can check at initialization intersection of requested samples and samples in BAM header to further speedup. + if (!samplesToChoose.contains(read.getReadGroup().getSample())) + return false; + } + // check if we've reached the output limit if ( nReadsToPrint == 0 ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java index a5486fd9a2..db2038aa36 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java @@ -1,8 +1,8 @@ package org.broadinstitute.sting.gatk.walkers; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/Requires.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/Requires.java index 6c1e64c4ed..e9a381a854 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/Requires.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Requires.java @@ -1,11 +1,6 @@ package org.broadinstitute.sting.gatk.walkers; -import java.lang.annotation.Documented; -import java.lang.annotation.Inherited; -import java.lang.annotation.Retention; -import java.lang.annotation.RetentionPolicy; -import java.lang.annotation.Target; -import java.lang.annotation.ElementType; +import java.lang.annotation.*; /** * User: hanna * Date: May 19, 2009 diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/SplitSamFileWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/SplitSamFileWalker.java index 014acff9c5..486d233b74 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/SplitSamFileWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/SplitSamFileWalker.java @@ -25,14 +25,20 @@ package org.broadinstitute.sting.gatk.walkers; -import net.sf.samtools.*; -import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMFileWriter; +import net.sf.samtools.SAMReadGroupRecord; +import net.sf.samtools.SAMRecord; +import org.apache.log4j.Logger; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.utils.sam.ReadUtils; -import org.apache.log4j.Logger; -import java.util.*; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; /** * Divides the input data set into separate BAM files, one for each sample in the input data set. The split diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java index f0ba8bb465..3847423026 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java @@ -25,14 +25,14 @@ package org.broadinstitute.sting.gatk.walkers; -import java.util.List; - +import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.filters.MalformedReadFilter; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.collections.Pair; -import org.apache.log4j.Logger; + +import java.util.List; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/WalkerName.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/WalkerName.java index 0e4d406759..4d46607e58 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/WalkerName.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/WalkerName.java @@ -1,10 +1,6 @@ package org.broadinstitute.sting.gatk.walkers; -import java.lang.annotation.Documented; -import java.lang.annotation.ElementType; -import java.lang.annotation.Retention; -import java.lang.annotation.RetentionPolicy; -import java.lang.annotation.Target; +import java.lang.annotation.*; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/Window.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/Window.java index 0b718071d5..9827fdf095 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/Window.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Window.java @@ -25,7 +25,10 @@ package org.broadinstitute.sting.gatk.walkers; -import java.lang.annotation.*; +import java.lang.annotation.Documented; +import java.lang.annotation.Inherited; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; /** * Describes the size of the window into the genome. Has differing semantics based on diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java index b02dcd8e21..3144098a83 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java @@ -25,21 +25,21 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Map; +import java.util.Arrays; import java.util.HashMap; import java.util.List; -import java.util.Arrays; +import java.util.Map; public class AlleleBalance implements InfoFieldAnnotation { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java index 0be737897b..a99f87a704 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java @@ -1,12 +1,16 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; +import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.gatk.contexts.*; -import org.broadinstitute.sting.utils.variantcontext.*; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; @@ -62,5 +66,5 @@ private Double annotateSNP(AlignmentContext stratifiedContext, VariantContext vc public List getKeyNames() { return Arrays.asList("AB"); } - public List getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), -1, VCFHeaderLineType.Float, "Allele balance for each het genotype")); } + public List getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Allele balance for each het genotype")); } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java index 51b5381dc3..6c14e74457 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java @@ -1,8 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.utils.variantcontext.Genotype; + import java.util.Map; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java index 244627154d..66416ce115 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java @@ -31,19 +31,19 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Map; +import java.util.Arrays; import java.util.HashMap; import java.util.List; -import java.util.Arrays; +import java.util.Map; public class BaseCounts implements InfoFieldAnnotation { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java index ff916bedd0..2a5c996f7d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java @@ -1,16 +1,16 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import java.util.Arrays; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; -import java.util.Arrays; public class BaseQualityRankSumTest extends RankSumTest { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java index 143722d7c4..74f7f9d804 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java @@ -25,24 +25,29 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; -import java.util.*; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; public class ChromosomeCounts implements InfoFieldAnnotation, StandardAnnotation { private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY }; - private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, -1, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"), - new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, -1, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"), + private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"), + new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"), new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes") }; public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java index e56825dbe1..c384e0d09c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java @@ -1,18 +1,19 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Map; +import java.util.Arrays; import java.util.HashMap; import java.util.List; -import java.util.Arrays; +import java.util.Map; public class DepthOfCoverage implements InfoFieldAnnotation, StandardAnnotation { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java index 754d28dfd3..e3e8bc258a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java @@ -1,20 +1,20 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFCompoundHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; -import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; @@ -142,5 +142,5 @@ private String getAlleleRepresentation(Allele allele) { // public String getIndelBases() public List getKeyNames() { return Arrays.asList("AD"); } - public List getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), VCFCompoundHeaderLine.UNBOUNDED, VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles in the order listed")); } + public List getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles in the order listed")); } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java index 2115526a6b..97ed221e76 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java @@ -24,20 +24,20 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import cern.jet.math.Arithmetic; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; -import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import cern.jet.math.Arithmetic; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java index 5eaa30bf31..48677bbe5d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java @@ -1,18 +1,19 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Map; +import java.util.Arrays; import java.util.HashMap; import java.util.List; -import java.util.Arrays; +import java.util.Map; public class GCContent implements InfoFieldAnnotation, ExperimentalAnnotation { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GLstats.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GLstats.java index 3a5db28844..cca0ad4bcf 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GLstats.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GLstats.java @@ -1,19 +1,20 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Map; +import java.util.Arrays; import java.util.HashMap; import java.util.List; -import java.util.Arrays; +import java.util.Map; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java index bd8c51a414..b175579f1c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java @@ -24,25 +24,29 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.genotype.Haplotype; -import org.broadinstitute.sting.utils.pileup.*; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.sam.AlignmentUtils; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.sam.AlignmentUtils; public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation { private final static boolean DEBUG = false; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java index ecebfbbd2a..d86728d5e7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java @@ -1,20 +1,21 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broad.tribble.util.popgen.HardyWeinbergCalculation; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.WorkInProgressAnnotation; import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Map; +import java.util.Arrays; import java.util.HashMap; import java.util.List; -import java.util.Arrays; +import java.util.Map; public class HardyWeinberg implements InfoFieldAnnotation, WorkInProgressAnnotation { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java index 099780fa7b..02efd854ca 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java @@ -1,19 +1,19 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Map; +import java.util.Arrays; import java.util.HashMap; import java.util.List; -import java.util.Arrays; +import java.util.Map; public class HomopolymerRun implements InfoFieldAnnotation, StandardAnnotation { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java index ee8b01d7db..2fd62ddf37 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java @@ -1,14 +1,14 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.IndelUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; @@ -24,11 +24,27 @@ public class IndelType implements InfoFieldAnnotation, ExperimentalAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { int run; - if ( vc.isIndel() && vc.isBiallelic() ) { + if (vc.isMixed()) { + Map map = new HashMap(); + map.put(getKeyNames().get(0), String.format("%s", "MIXED")); + return map; + + } + else if ( vc.isIndel() ) { String type=""; - ArrayList inds = IndelUtils.findEventClassificationIndex(vc, ref); - for (int k : inds) { - type = type+ IndelUtils.getIndelClassificationName(k)+"."; + if (!vc.isBiallelic()) + type = "MULTIALLELIC_INDEL"; + else { + if (vc.isInsertion()) + type = "INS."; + else if (vc.isDeletion()) + type = "DEL."; + else + type = "OTHER."; + ArrayList inds = IndelUtils.findEventClassificationIndex(vc, ref); + for (int k : inds) { + type = type+ IndelUtils.getIndelClassificationName(k)+"."; + } } Map map = new HashMap(); map.put(getKeyNames().get(0), String.format("%s", type)); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java index 33069f1f5b..1d999c531a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java @@ -1,19 +1,19 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Map; +import java.util.Arrays; import java.util.HashMap; import java.util.List; -import java.util.Arrays; +import java.util.Map; public class LowMQ implements InfoFieldAnnotation { @@ -42,5 +42,5 @@ public Map annotate(RefMetaDataTracker tracker, ReferenceContext public List getKeyNames() { return Arrays.asList("LowMQ"); } - public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 3, VCFHeaderLineType.Integer, "3-tuple: ,,")); } + public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 3, VCFHeaderLineType.Float, "3-tuple: ,,")); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java index 11f86b9726..cc62580a9b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java @@ -1,16 +1,17 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; +import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import java.util.Arrays; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; -import java.util.Arrays; public class MappingQualityRankSumTest extends RankSumTest { @@ -21,7 +22,7 @@ public class MappingQualityRankSumTest extends RankSumTest { protected void fillQualsFromPileup(byte ref, byte alt, ReadBackedPileup pileup, List refQuals, List altQuals) { for ( final PileupElement p : pileup ) { - if( isUsableBase(p) && p.getMappingQual() < 254 ) { // 254 and 255 are special mapping qualities used as a code by aligners + if ( isUsableBase(p) ) { if ( p.getBase() == ref ) { refQuals.add((double)p.getMappingQual()); } else if ( p.getBase() == alt ) { @@ -34,7 +35,7 @@ protected void fillIndelQualsFromPileup(ReadBackedPileup pileup, List re // equivalent is whether indel likelihoods for reads corresponding to ref allele are more likely than reads corresponding to alt allele ? HashMap> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(); for (final PileupElement p: pileup) { - if (indelLikelihoodMap.containsKey(p) && p.getMappingQual() < 254) { + if (indelLikelihoodMap.containsKey(p) && p.getMappingQual() != 0 && p.getMappingQual() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE) { // retrieve likelihood information corresponding to this read LinkedHashMap el = indelLikelihoodMap.get(p); // by design, first element in LinkedHashMap was ref allele @@ -54,8 +55,6 @@ protected void fillIndelQualsFromPileup(ReadBackedPileup pileup, List re refQuals.add((double)p.getMappingQual()); else if (altLikelihood > refLikelihood + INDEL_LIKELIHOOD_THRESH) altQuals.add((double)p.getMappingQual()); - - } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java index 25a7b286d1..f240d02bca 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java @@ -1,16 +1,16 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java index 00cc303094..0ca53adf29 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java @@ -25,22 +25,22 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Map; +import java.util.Arrays; import java.util.HashMap; import java.util.List; -import java.util.Arrays; +import java.util.Map; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java index dc4934ade9..08a25a7e3d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java @@ -1,15 +1,15 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java index ba3e2cc8bb..1c70a1b33b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java @@ -1,14 +1,14 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; @@ -47,5 +47,5 @@ public Map annotate(RefMetaDataTracker tracker, ReferenceContext public List getKeyNames() { return Arrays.asList("PercentNBaseSolid"); } - public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("PercentNBaseSolid", 4, VCFHeaderLineType.Float, "Percentage of N bases in the pileup (counting only SOLiD reads)")); } + public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("PercentNBaseSolid", 1, VCFHeaderLineType.Float, "Percentage of N bases in the pileup (counting only SOLiD reads)")); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java index 7209848351..2175d39e6a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java @@ -1,19 +1,19 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Map; +import java.util.Arrays; import java.util.HashMap; import java.util.List; -import java.util.Arrays; +import java.util.Map; public class QualByDepth extends AnnotationByDepth implements InfoFieldAnnotation, StandardAnnotation { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java index 6e80c7555c..d52f07b581 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java @@ -1,19 +1,23 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.*; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; public class RMSMappingQuality implements InfoFieldAnnotation, StandardAnnotation { @@ -38,8 +42,10 @@ else if (context.hasBasePileup()) pileup = context.getBasePileup(); if (pileup != null) { - for (PileupElement p : pileup ) - qualities[index++] = p.getRead().getMappingQuality(); + for (PileupElement p : pileup ) { + if ( p.getMappingQual() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE ) + qualities[index++] = p.getMappingQual(); + } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java index 1a967293fd..5466828f6a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java @@ -1,22 +1,23 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.MannWhitneyU; +import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.List; import java.util.ArrayList; -import java.util.Map; import java.util.HashMap; +import java.util.List; +import java.util.Map; @@ -106,6 +107,9 @@ else if (context.hasBasePileup()) protected abstract void fillIndelQualsFromPileup(ReadBackedPileup pileup, List refQuals, List altQuals); protected static boolean isUsableBase( final PileupElement p ) { - return !( p.isDeletion() || p.getMappingQual() == 0 || ((int)p.getQual()) < 6 ); // need the unBAQed quality score here + return !( p.isDeletion() || + p.getMappingQual() == 0 || + p.getMappingQual() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE || + ((int)p.getQual()) < QualityUtils.MIN_USABLE_Q_SCORE ); // need the unBAQed quality score here } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java index f287549bb8..c56e2622da 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java @@ -26,24 +26,24 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; -import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFCompoundHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Map; +import java.util.Arrays; import java.util.HashMap; import java.util.List; -import java.util.Arrays; +import java.util.Map; /** * Created by IntelliJ IDEA. @@ -200,8 +200,8 @@ public List getDescriptions() { 1, VCFHeaderLineType.Integer, "Total read depth per sample, including MQ0"), - new VCFFormatHeaderLine(getKeyNames().get(1), - VCFCompoundHeaderLine.UNBOUNDED, + new VCFFormatHeaderLine(getKeyNames().get(1), + VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Float, "Fractions of reads (excluding MQ0 from both ref and alt) supporting each reported alternative allele, per sample")); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java index 727904a3b9..aabfb29703 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java @@ -1,14 +1,17 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.AlignmentUtils; -import net.sf.samtools.*; +import org.broadinstitute.sting.utils.variantcontext.Allele; import java.util.Arrays; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java index fc769ac545..a5ebd8db21 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java @@ -1,13 +1,13 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java index 82f16be429..ff9092a718 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java @@ -25,14 +25,15 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; @@ -65,5 +66,5 @@ public Map annotate(RefMetaDataTracker tracker, ReferenceContext public List getKeyNames() { return Arrays.asList("Samples"); } - public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("Samples", VCFInfoHeaderLine.UNBOUNDED, VCFHeaderLineType.String, "List of polymorphic samples")); } + public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("Samples", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "List of polymorphic samples")); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java index 0b6cbcc2ed..a4668eeb61 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java @@ -1,14 +1,14 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java new file mode 100755 index 0000000000..b46d82d8b6 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java @@ -0,0 +1,79 @@ +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.sam.ReadUtils; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Created by IntelliJ IDEA. + * User: delangel + * Date: 6/29/11 + * Time: 3:14 PM + * To change this template use File | Settings | File Templates. + */ +public class TechnologyComposition implements ExperimentalAnnotation,InfoFieldAnnotation { + private String nSLX = "NumSLX"; + private String n454 ="Num454"; + private String nSolid = "NumSOLiD"; + private String nOther = "NumOther"; + public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + if ( stratifiedContexts.size() == 0 ) + return null; + + int readsIllumina = 0; + int readsSolid = 0; + int reads454 = 0; + int readsOther = 0; + + for ( Map.Entry sample : stratifiedContexts.entrySet() ) { + AlignmentContext context = sample.getValue(); + + ReadBackedPileup pileup = null; + if (context.hasExtendedEventPileup()) + pileup = context.getExtendedEventPileup(); + else if (context.hasBasePileup()) + pileup = context.getBasePileup(); + + if (pileup != null) { + for (PileupElement p : pileup ) { + if(ReadUtils.is454Read(p.getRead())) + reads454++; + else if (ReadUtils.isSOLiDRead(p.getRead())) + readsSolid++; + else if (ReadUtils.isSLXRead(p.getRead())) + readsIllumina++; + else + readsOther++; + } + } + } + + Map map = new HashMap(); + map.put(nSLX, String.format("%d", readsIllumina)); + map.put(n454, String.format("%d", reads454)); + map.put(nSolid, String.format("%d", readsSolid)); + map.put(nOther, String.format("%d", readsOther)); + return map; + } + + public List getKeyNames() { return Arrays.asList(nSLX,n454,nSolid,nOther); } + + public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(nSLX, 1, VCFHeaderLineType.Integer, "Number of SLX reads"), + new VCFInfoHeaderLine(n454, 1, VCFHeaderLineType.Integer, "Number of 454 reads"), + new VCFInfoHeaderLine(nSolid, 1, VCFHeaderLineType.Integer, "Number of SOLiD reads"), + new VCFInfoHeaderLine(nOther, 1, VCFHeaderLineType.Integer, "Number of Other technology reads")); } + +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index cd5b6694b1..acbeee3b24 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -25,24 +25,23 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Hidden; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotationType; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.classloader.PluginManager; +import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index 54c8be73a9..fdf498a3d6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -25,30 +25,27 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedHashMap; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.Map.Entry; - -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; -import org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator.*; +import org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator.GenomicAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator.JoinTable; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotationInterfaceManager; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.util.*; +import java.util.Map.Entry; public class VariantAnnotatorEngine { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotation.java index e02c62bafe..05c1b3c524 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotation.java @@ -25,13 +25,6 @@ package org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator; -import java.util.*; -import java.util.Map.Entry; - -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -39,7 +32,14 @@ import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.util.*; +import java.util.Map.Entry; /** * This plugin for {@link VariantAnnotatorEngine} serves as the core diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java index 69a35a584c..b42310780e 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java @@ -26,28 +26,28 @@ package org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator; -import java.io.File; -import java.io.IOException; -import java.util.*; -import java.util.Map.Entry; - -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableCodec; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.io.File; +import java.io.IOException; +import java.util.*; +import java.util.Map.Entry; /** * Annotates variant calls with information from user-specified tabular files. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTable.java index c57aacb5bd..714f374cf7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTable.java @@ -25,6 +25,9 @@ package org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; + import java.io.BufferedReader; import java.io.File; import java.io.FileReader; @@ -33,9 +36,6 @@ import java.util.HashMap; import java.util.List; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; - /** * This is a container that holds all data corresponding to a single join table as specified by one -J arg (ex: -J bindingName1,/path/to/file,bindingName1.columnName=bindingName2.columnName2). * Some terminology: diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTableParser.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTableParser.java index d3fcfd42a4..3b6c87f907 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTableParser.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTableParser.java @@ -25,16 +25,16 @@ package org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; + import java.io.BufferedReader; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.List; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.exceptions.UserException; - /** * Used to parse files passed to the GenomicAnnotator via the -J arg. * The files must be tab-delimited, and the first non-empty/non-commented line diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java index 0ed61fc487..0bbfa51b4c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java @@ -24,9 +24,6 @@ package org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator; -import java.io.*; -import java.util.*; - import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; @@ -36,17 +33,15 @@ import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableCodec; import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableFeature; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; -import org.broadinstitute.sting.gatk.walkers.By; -import org.broadinstitute.sting.gatk.walkers.DataSource; -import org.broadinstitute.sting.gatk.walkers.RMD; -import org.broadinstitute.sting.gatk.walkers.Reference; -import org.broadinstitute.sting.gatk.walkers.Requires; -import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.sting.gatk.walkers.Window; +import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import java.io.IOException; +import java.io.PrintStream; +import java.util.*; + /** * Takes a table of transcripts (eg. UCSC refGene, knownGene, and CCDS tables) and generates the big table which contains * annotations for each possible variant at each transcript position (eg. 4 variants at each genomic position). diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java index 29b2564798..57bc44ab8b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java @@ -1,14 +1,14 @@ package org.broadinstitute.sting.gatk.walkers.annotator.interfaces; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Map; import java.util.List; +import java.util.Map; public interface GenotypeAnnotation { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java index 5b33395b54..4e850d01bc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java @@ -1,13 +1,13 @@ package org.broadinstitute.sting.gatk.walkers.annotator.interfaces; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Map; import java.util.List; +import java.util.Map; public interface InfoFieldAnnotation { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java index aa23abc672..21c8ec4307 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java @@ -25,26 +25,26 @@ package org.broadinstitute.sting.gatk.walkers.beagle; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; -import org.broadinstitute.sting.gatk.refdata.features.beagle.BeagleFeature; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.gatk.refdata.features.beagle.BeagleFeature; import org.broadinstitute.sting.gatk.walkers.RMD; import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.*; + import static java.lang.Math.log10; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java index 93ee0b0850..3eed12992e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java @@ -25,10 +25,6 @@ package org.broadinstitute.sting.gatk.walkers.beagle; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.commandline.Input; @@ -36,17 +32,20 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.RMD; import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VQSRCalibrationCurve; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.StingException; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.io.File; import java.io.PrintStream; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java index 18aa3e2573..f6cd1d6360 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java @@ -25,28 +25,29 @@ package org.broadinstitute.sting.gatk.walkers.beagle; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RMD; import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.io.PrintStream; -import java.util.*; +import java.util.Arrays; +import java.util.Set; /** * Produces an input file to Beagle imputation engine, listing unphased, hard-called genotypes for a single sample diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java index 2c67265d69..90e6fcd77a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java @@ -22,6 +22,8 @@ package org.broadinstitute.sting.gatk.walkers.coverage; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -31,12 +33,10 @@ import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; import java.io.File; -import java.io.PrintStream; import java.io.FileNotFoundException; +import java.io.PrintStream; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoarseCoverageWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoarseCoverageWalker.java index ae947eac1c..405a44c29f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoarseCoverageWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoarseCoverageWalker.java @@ -26,12 +26,11 @@ package org.broadinstitute.sting.gatk.walkers.coverage; import net.sf.samtools.SAMRecord; - -import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; import java.io.PrintStream; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalker.java index 5a9c62b7f9..6b91b01980 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalker.java @@ -22,19 +22,19 @@ package org.broadinstitute.sting.gatk.walkers.coverage; +import org.broad.tribble.bed.FullBEDFeature; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; -import org.broad.tribble.bed.FullBEDFeature; import org.broadinstitute.sting.utils.exceptions.UserException; -import java.util.*; import java.io.PrintStream; +import java.util.Arrays; +import java.util.List; /** * Test routine for new VariantContext object diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java index 298aa90b9f..a41e551665 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java @@ -8,7 +8,9 @@ import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.PileupElement; -import java.util.*; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; /** * IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java index 91ae81cd50..c1956f1d73 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java @@ -26,6 +26,8 @@ package org.broadinstitute.sting.gatk.walkers.coverage; import net.sf.samtools.SAMReadGroupRecord; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -34,12 +36,13 @@ import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqFeature; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; -import org.broadinstitute.sting.gatk.refdata.utils.*; +import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; +import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; +import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java new file mode 100644 index 0000000000..a1c0433652 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.diffengine; + +import net.sf.samtools.SAMFileReader; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMRecordIterator; +import net.sf.samtools.util.BlockCompressedInputStream; + +import java.io.*; +import java.util.Arrays; + + +/** + * Created by IntelliJ IDEA. + * User: depristo + * Date: 7/4/11 + * Time: 1:09 PM + * + * Class implementing diffnode reader for VCF + */ +public class BAMDiffableReader implements DiffableReader { + @Override + public String getName() { return "BAM"; } + + @Override + public DiffElement readFromFile(File file, int maxElementsToRead) { + final SAMFileReader reader = new SAMFileReader(file, null); // null because we don't want it to look for the index + reader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT); + + DiffNode root = DiffNode.rooted(file.getName()); + SAMRecordIterator iterator = reader.iterator(); + + int count = 0; + while ( iterator.hasNext() ) { + if ( count++ > maxElementsToRead && maxElementsToRead != -1) + break; + final SAMRecord record = iterator.next(); + + // name is the read name + first of pair + String name = record.getReadName().replace('.', '_'); + if ( record.getReadPairedFlag() ) { + name += record.getFirstOfPairFlag() ? "_1" : "_2"; + } + + DiffNode readRoot = DiffNode.empty(name, root); + + // add fields + readRoot.add("NAME", record.getReadName()); + readRoot.add("FLAGS", record.getFlags()); + readRoot.add("RNAME", record.getReferenceName()); + readRoot.add("POS", record.getAlignmentStart()); + readRoot.add("MAPQ", record.getMappingQuality()); + readRoot.add("CIGAR", record.getCigarString()); + readRoot.add("RNEXT", record.getMateReferenceName()); + readRoot.add("PNEXT", record.getMateAlignmentStart()); + readRoot.add("TLEN", record.getInferredInsertSize()); + readRoot.add("SEQ", record.getReadString()); + readRoot.add("QUAL", record.getBaseQualityString()); + + for ( SAMRecord.SAMTagAndValue xt : record.getAttributes() ) { + readRoot.add(xt.tag, xt.value); + } + + // add record to root + if ( ! root.hasElement(name) ) + // protect ourselves from malformed files + root.add(readRoot); + } + + reader.close(); + + return root.getBinding(); + } + + @Override + public boolean canRead(File file) { + final byte[] BAM_MAGIC = "BAM\1".getBytes(); + final byte[] buffer = new byte[BAM_MAGIC.length]; + try { + InputStream fstream = new BufferedInputStream(new FileInputStream(file)); + if ( !BlockCompressedInputStream.isValidFile(fstream) ) + return false; + new BlockCompressedInputStream(fstream).read(buffer, 0, BAM_MAGIC.length); + return Arrays.equals(buffer, BAM_MAGIC); + } catch ( IOException e ) { + return false; + } catch ( net.sf.samtools.FileTruncatedException e ) { + return false; + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffElement.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffElement.java new file mode 100644 index 0000000000..eb8a71c2cb --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffElement.java @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.diffengine; + +import com.google.java.contract.Ensures; +import com.google.java.contract.Invariant; +import com.google.java.contract.Requires; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +/** + * Created by IntelliJ IDEA. + * User: depristo + * Date: 7/4/11 + * Time: 12:55 PM + * + * An interface that must be implemented to allow us to calculate differences + * between structured objects + */ +@Invariant({ + "name != null", + "value != null", + "parent != null || name.equals(\"ROOT\")", + "value == null || value.getBinding() == this"}) +public class DiffElement { + public final static DiffElement ROOT = new DiffElement(); + + final private String name; + final private DiffElement parent; + final private DiffValue value; + + /** + * For ROOT only + */ + private DiffElement() { + this.name = "ROOT"; + this.parent = null; + this.value = new DiffValue(this, "ROOT"); + } + + @Requires({"name != null", "parent != null", "value != null"}) + public DiffElement(String name, DiffElement parent, DiffValue value) { + if ( name.equals("ROOT") ) throw new IllegalArgumentException("Cannot use reserved name ROOT"); + this.name = name; + this.parent = parent; + this.value = value; + this.value.setBinding(this); + } + + @Ensures({"result != null"}) + public String getName() { + return name; + } + + public DiffElement getParent() { + return parent; + } + + @Ensures({"result != null"}) + public DiffValue getValue() { + return value; + } + + public boolean isRoot() { return this == ROOT; } + + @Ensures({"result != null"}) + @Override + public String toString() { + return getName() + "=" + getValue().toString(); + } + + public String toString(int offset) { + return (offset > 0 ? Utils.dupString(' ', offset) : 0) + getName() + "=" + getValue().toString(offset); + } + + @Ensures({"result != null"}) + public final String fullyQualifiedName() { + if ( isRoot() ) + return ""; + else if ( parent.isRoot() ) + return name; + else + return parent.fullyQualifiedName() + "." + name; + } + + @Ensures({"result != null"}) + public String toOneLineString() { + return getName() + "=" + getValue().toOneLineString(); + } + + @Ensures({"result != null"}) + public DiffNode getValueAsNode() { + if ( getValue().isCompound() ) + return (DiffNode)getValue(); + else + throw new ReviewedStingException("Illegal request conversion of a DiffValue into a DiffNode: " + this); + } + + public int size() { + return 1 + getValue().size(); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java new file mode 100644 index 0000000000..89e20dad1f --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java @@ -0,0 +1,376 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.diffengine; + +import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.report.GATKReport; +import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.classloader.PluginManager; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; + +import java.io.File; +import java.io.PrintStream; +import java.util.*; + +/** + * Created by IntelliJ IDEA. + * User: depristo + * Date: 7/4/11 + * Time: 12:51 PM + * A generic engine for comparing tree-structured objects + */ +public class DiffEngine { + final protected static Logger logger = Logger.getLogger(DiffEngine.class); + + private final Map readers = new HashMap(); + + public DiffEngine() { + loadDiffableReaders(); + } + + // -------------------------------------------------------------------------------- + // + // difference calculation + // + // -------------------------------------------------------------------------------- + + public List diff(DiffElement master, DiffElement test) { + DiffValue masterValue = master.getValue(); + DiffValue testValue = test.getValue(); + + if ( masterValue.isCompound() && masterValue.isCompound() ) { + return diff(master.getValueAsNode(), test.getValueAsNode()); + } else if ( masterValue.isAtomic() && testValue.isAtomic() ) { + return diff(masterValue, testValue); + } else { + // structural difference in types. one is node, other is leaf + return Arrays.asList(new Difference(master, test)); + } + } + + public List diff(DiffNode master, DiffNode test) { + Set allNames = new HashSet(master.getElementNames()); + allNames.addAll(test.getElementNames()); + List diffs = new ArrayList(); + + for ( String name : allNames ) { + DiffElement masterElt = master.getElement(name); + DiffElement testElt = test.getElement(name); + if ( masterElt == null && testElt == null ) { + throw new ReviewedStingException("BUG: unexceptedly got two null elements for field: " + name); + } else if ( masterElt == null || testElt == null ) { // if either is null, we are missing a value + // todo -- should one of these be a special MISSING item? + diffs.add(new Difference(masterElt, testElt)); + } else { + diffs.addAll(diff(masterElt, testElt)); + } + } + + return diffs; + } + + public List diff(DiffValue master, DiffValue test) { + if ( master.getValue().equals(test.getValue()) ) { + return Collections.emptyList(); + } else { + return Arrays.asList(new Difference(master.getBinding(), test.getBinding())); + } + } + + // -------------------------------------------------------------------------------- + // + // Summarizing differences + // + // -------------------------------------------------------------------------------- + + /** + * Emits a summary of the diffs to out. Suppose you have the following three differences: + * + * A.X.Z:1!=2 + * A.Y.Z:3!=4 + * B.X.Z:5!=6 + * + * The above is the itemized list of the differences. The summary looks for common differences + * in the name hierarchy, counts those shared elements, and emits the differences that occur + * in order of decreasing counts. + * + * So, in the above example, what are the shared elements? + * + * A.X.Z and B.X.Z share X.Z, so there's a *.X.Z with count 2 + * A.X.Z, A.Y.Z, and B.X.Z all share *.*.Z, with count 3 + * Each of A.X.Z, A.Y.Z, and B.X.Z are individually unique, with count 1 + * + * So we would emit the following summary: + * + * *.*.Z: 3 + * *.X.Z: 2 + * A.X.Z: 1 [specific difference: 1!=2] + * A.Y.Z: 1 [specific difference: 3!=4] + * B.X.Z: 1 [specific difference: 5!=6] + * + * The algorithm to accomplish this calculation is relatively simple. Start with all of the + * concrete differences. For each pair of differences A1.A2....AN and B1.B2....BN: + * + * find the longest common subsequence Si.Si+1...SN where Ai = Bi = Si + * If i == 0, then there's no shared substructure + * If i > 0, then generate the summarized value X = *.*...Si.Si+1...SN + * if X is a known summary, increment it's count, otherwise set its count to 1 + * + * Not that only pairs of the same length are considered as potentially equivalent + * + * @param params determines how we display the items + * @param diffs the list of differences to summarize + */ + public void reportSummarizedDifferences(List diffs, SummaryReportParams params ) { + printSummaryReport(summarizeDifferences(diffs), params ); + } + + public List summarizeDifferences(List diffs) { + return summarizedDifferencesOfPaths(diffs); + } + + final protected static String[] diffNameToPath(String diffName) { + return diffName.split("\\."); + } + + protected List summarizedDifferencesOfPathsFromString(List singletonDiffs) { + List diffs = new ArrayList(); + + for ( String diff : singletonDiffs ) { + diffs.add(new Difference(diff)); + } + + return summarizedDifferencesOfPaths(diffs); + } + + protected List summarizedDifferencesOfPaths(List singletonDiffs) { + Map summaries = new HashMap(); + + // create the initial set of differences + for ( int i = 0; i < singletonDiffs.size(); i++ ) { + for ( int j = 0; j <= i; j++ ) { + Difference diffPath1 = singletonDiffs.get(i); + Difference diffPath2 = singletonDiffs.get(j); + if ( diffPath1.length() == diffPath2.length() ) { + int lcp = longestCommonPostfix(diffPath1.getParts(), diffPath2.getParts()); + String path = diffPath2.getPath(); + if ( lcp != 0 && lcp != diffPath1.length() ) + path = summarizedPath(diffPath2.getParts(), lcp); + Difference sumDiff = new Difference(path, diffPath2.getMaster(), diffPath2.getTest()); + sumDiff.setCount(0); + addSummaryIfMissing(summaries, sumDiff); + } + } + } + + // count differences + for ( Difference diffPath : singletonDiffs ) { + for ( Difference sumDiff : summaries.values() ) { + if ( sumDiff.matches(diffPath.getParts()) ) + sumDiff.incCount(); + } + } + + List sortedSummaries = new ArrayList(summaries.values()); + Collections.sort(sortedSummaries); + return sortedSummaries; + } + + protected void addSummaryIfMissing(Map summaries, Difference diff) { + if ( ! summaries.containsKey(diff.getPath()) ) { + summaries.put(diff.getPath(), diff); + } + } + + protected void printSummaryReport(List sortedSummaries, SummaryReportParams params ) { + List toShow = new ArrayList(); + int count = 0, count1 = 0; + for ( Difference diff : sortedSummaries ) { + if ( diff.getCount() < params.minSumDiffToShow ) + // in order, so break as soon as the count is too low + break; + + if ( params.maxItemsToDisplay != 0 && count++ > params.maxItemsToDisplay ) + break; + + if ( diff.getCount() == 1 ) { + count1++; + if ( params.maxCountOneItems != 0 && count1 > params.maxCountOneItems ) + break; + } + + toShow.add(diff); + } + + // if we want it in descending order, reverse the list + if ( ! params.descending ) { + Collections.reverse(toShow); + } + + // now that we have a specific list of values we want to show, display them + GATKReport report = new GATKReport(); + final String tableName = "diffences"; + report.addTable(tableName, "Summarized differences between the master and test files.\nSee http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine for more information", false); + GATKReportTable table = report.getTable(tableName); + table.addPrimaryKey("Difference", true); + table.addColumn("NumberOfOccurrences", 0); + table.addColumn("ExampleDifference", 0); + for ( Difference diff : toShow ) { + table.set(diff.getPath(), "NumberOfOccurrences", diff.getCount()); + table.set(diff.getPath(), "ExampleDifference", diff.valueDiffString()); + } + table.write(params.out); + } + + protected static int longestCommonPostfix(String[] diffPath1, String[] diffPath2) { + int i = 0; + for ( ; i < diffPath1.length; i++ ) { + int j = diffPath1.length - i - 1; + if ( ! diffPath1[j].equals(diffPath2[j]) ) + break; + } + return i; + } + + /** + * parts is [A B C D] + * commonPostfixLength: how many parts are shared at the end, suppose its 2 + * We want to create a string *.*.C.D + * + * @param parts the separated path values [above without .] + * @param commonPostfixLength + * @return + */ + protected static String summarizedPath(String[] parts, int commonPostfixLength) { + int stop = parts.length - commonPostfixLength; + if ( stop > 0 ) parts = parts.clone(); + for ( int i = 0; i < stop; i++ ) { + parts[i] = "*"; + } + return Utils.join(".", parts); + } + + // -------------------------------------------------------------------------------- + // + // plugin manager + // + // -------------------------------------------------------------------------------- + + public void loadDiffableReaders() { + List> drClasses = new PluginManager( DiffableReader.class ).getPlugins(); + + logger.info("Loading diffable modules:"); + for (Class drClass : drClasses ) { + logger.info("\t" + drClass.getSimpleName()); + + try { + DiffableReader dr = drClass.newInstance(); + readers.put(dr.getName(), dr); + } catch (InstantiationException e) { + throw new ReviewedStingException("Unable to instantiate module '" + drClass.getSimpleName() + "'"); + } catch (IllegalAccessException e) { + throw new ReviewedStingException("Illegal access error when trying to instantiate '" + drClass.getSimpleName() + "'"); + } + } + } + + protected Map getReaders() { + return readers; + } + + protected DiffableReader getReader(String name) { + return readers.get(name); + } + + /** + * Returns a reader appropriate for this file, or null if no such reader exists + * @param file + * @return + */ + public DiffableReader findReaderForFile(File file) { + for ( DiffableReader reader : readers.values() ) + if (reader.canRead(file) ) + return reader; + + return null; + } + + /** + * Returns true if reader appropriate for this file, or false if no such reader exists + * @param file + * @return + */ + public boolean canRead(File file) { + return findReaderForFile(file) != null; + } + + + public DiffElement createDiffableFromFile(File file) { + return createDiffableFromFile(file, -1); + } + + public DiffElement createDiffableFromFile(File file, int maxElementsToRead) { + DiffableReader reader = findReaderForFile(file); + if ( reader == null ) + throw new UserException("Unsupported file type: " + file); + else + return reader.readFromFile(file, maxElementsToRead); + } + + public static boolean simpleDiffFiles(File masterFile, File testFile, DiffEngine.SummaryReportParams params) { + DiffEngine diffEngine = new DiffEngine(); + + if ( diffEngine.canRead(masterFile) && diffEngine.canRead(testFile) ) { + DiffElement master = diffEngine.createDiffableFromFile(masterFile); + DiffElement test = diffEngine.createDiffableFromFile(testFile); + List diffs = diffEngine.diff(master, test); + diffEngine.reportSummarizedDifferences(diffs, params); + return true; + } else { + return false; + } + } + + public static class SummaryReportParams { + PrintStream out = System.out; + int maxItemsToDisplay = 0; + int maxCountOneItems = 0; + int minSumDiffToShow = 0; + boolean descending = true; + + public SummaryReportParams(PrintStream out, int maxItemsToDisplay, int maxCountOneItems, int minSumDiffToShow) { + this.out = out; + this.maxItemsToDisplay = maxItemsToDisplay; + this.maxCountOneItems = maxCountOneItems; + this.minSumDiffToShow = minSumDiffToShow; + } + + public void setDescending(boolean descending) { + this.descending = descending; + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java new file mode 100644 index 0000000000..2f48de2d36 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java @@ -0,0 +1,248 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.diffengine; + +import com.google.java.contract.Requires; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.util.*; + +/** + * Created by IntelliJ IDEA. + * User: depristo + * Date: 7/4/11 + * Time: 12:55 PM + * + * An interface that must be implemented to allow us to calculate differences + * between structured objects + */ +public class DiffNode extends DiffValue { + private Map getElementMap() { + return (Map)super.getValue(); + } + private static Map emptyElements() { return new HashMap(); } + + private DiffNode(Map elements) { + super(elements); + } + + private DiffNode(DiffElement binding, Map elements) { + super(binding, elements); + } + + // --------------------------------------------------------------------------- + // + // constructors + // + // --------------------------------------------------------------------------- + + public static DiffNode rooted(String name) { + return empty(name, DiffElement.ROOT); + } + + public static DiffNode empty(String name, DiffElement parent) { + DiffNode df = new DiffNode(emptyElements()); + DiffElement elt = new DiffElement(name, parent, df); + df.setBinding(elt); + return df; + } + + public static DiffNode empty(String name, DiffValue parent) { + return empty(name, parent.getBinding()); + } + + // --------------------------------------------------------------------------- + // + // accessors + // + // --------------------------------------------------------------------------- + + @Override + public boolean isAtomic() { return false; } + + public Collection getElementNames() { + return getElementMap().keySet(); + } + + public Collection getElements() { + return getElementMap().values(); + } + + private Collection getElements(boolean atomicOnly) { + List elts = new ArrayList(); + for ( DiffElement elt : getElements() ) + if ( (atomicOnly && elt.getValue().isAtomic()) || (! atomicOnly && elt.getValue().isCompound())) + elts.add(elt); + return elts; + } + + public Collection getAtomicElements() { + return getElements(true); + } + + public Collection getCompoundElements() { + return getElements(false); + } + + /** + * Returns the element bound to name, or null if no such binding exists + * @param name + * @return + */ + public DiffElement getElement(String name) { + return getElementMap().get(name); + } + + /** + * Returns true if name is bound in this node + * @param name + * @return + */ + public boolean hasElement(String name) { + return getElement(name) != null; + } + + // --------------------------------------------------------------------------- + // + // add + // + // --------------------------------------------------------------------------- + + @Requires("elt != null") + public void add(DiffElement elt) { + if ( getElementMap().containsKey(elt.getName()) ) + throw new IllegalArgumentException("Attempting to rebind already existing binding: " + elt + " node=" + this); + getElementMap().put(elt.getName(), elt); + } + + @Requires("elt != null") + public void add(DiffValue elt) { + add(elt.getBinding()); + } + + @Requires("elts != null") + public void add(Collection elts) { + for ( DiffElement e : elts ) + add(e); + } + + public void add(String name, Object value) { + add(new DiffElement(name, this.getBinding(), new DiffValue(value))); + } + + public int size() { + int count = 0; + for ( DiffElement value : getElements() ) + count += value.size(); + return count; + } + + // --------------------------------------------------------------------------- + // + // toString + // + // --------------------------------------------------------------------------- + + @Override + public String toString() { + return toString(0); + } + + @Override + public String toString(int offset) { + String off = offset > 0 ? Utils.dupString(' ', offset) : ""; + StringBuilder b = new StringBuilder(); + + b.append("(").append("\n"); + Collection atomicElts = getAtomicElements(); + for ( DiffElement elt : atomicElts ) { + b.append(elt.toString(offset + 2)).append('\n'); + } + + for ( DiffElement elt : getCompoundElements() ) { + b.append(elt.toString(offset + 4)).append('\n'); + } + b.append(off).append(")").append("\n"); + + return b.toString(); + } + + @Override + public String toOneLineString() { + StringBuilder b = new StringBuilder(); + + b.append('('); + List parts = new ArrayList(); + for ( DiffElement elt : getElements() ) + parts.add(elt.toOneLineString()); + b.append(Utils.join(" ", parts)); + b.append(')'); + + return b.toString(); + } + + // -------------------------------------------------------------------------------- + // + // fromString and toOneLineString + // + // -------------------------------------------------------------------------------- + + public static DiffElement fromString(String tree) { + return fromString(tree, DiffElement.ROOT); + } + + /** + * Doesn't support full tree structure parsing + * @param tree + * @param parent + * @return + */ + private static DiffElement fromString(String tree, DiffElement parent) { + // X=(A=A B=B C=(D=D)) + String[] parts = tree.split("=", 2); + if ( parts.length != 2 ) + throw new ReviewedStingException("Unexpected tree structure: " + tree + " parts=" + parts); + String name = parts[0]; + String value = parts[1]; + + if ( value.length() == 0 ) + throw new ReviewedStingException("Illegal tree structure: " + value + " at " + tree); + + if ( value.charAt(0) == '(' ) { + if ( ! value.endsWith(")") ) + throw new ReviewedStingException("Illegal tree structure. Missing ): " + value + " at " + tree); + String subtree = value.substring(1, value.length()-1); + DiffNode rec = DiffNode.empty(name, parent); + String[] subParts = subtree.split(" "); + for ( String subPart : subParts ) { + rec.add(fromString(subPart, rec.getBinding())); + } + return rec.getBinding(); + } else { + return new DiffValue(name, parent, value).getBinding(); + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java new file mode 100644 index 0000000000..fba6549fb7 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.diffengine; + +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.gatk.walkers.RodWalker; + +import java.io.File; +import java.io.PrintStream; +import java.util.List; + +/** + * Compares two record-oriented files, itemizing specific difference between equivalent + * records in the two files. Reports both itemized and summarized differences. + * @author Mark DePristo + * @version 0.1 + */ +@Requires(value={}) +public class DiffObjectsWalker extends RodWalker { + @Output(doc="File to which results should be written",required=true) + protected PrintStream out; + + @Argument(fullName="maxObjectsToRead", shortName="motr", doc="Max. number of objects to read from the files. -1 [default] means unlimited", required=false) + int MAX_OBJECTS_TO_READ = -1; + + @Argument(fullName="maxDiffs", shortName="M", doc="Max. number of diffs to process", required=false) + int MAX_DIFFS = 0; + + @Argument(fullName="maxCount1Diffs", shortName="M1", doc="Max. number of diffs occuring exactly once in the file to process", required=false) + int MAX_COUNT1_DIFFS = 0; + + @Argument(fullName="minCountForDiff", shortName="MCFD", doc="Min number of observations for a records to display", required=false) + int minCountForDiff = 1; + + @Argument(fullName="showItemizedDifferences", shortName="SID", doc="Should we enumerate all differences between the files?", required=false) + boolean showItemizedDifferences = false; + + @Argument(fullName="master", shortName="m", doc="Master file: expected results", required=true) + File masterFile; + + @Argument(fullName="test", shortName="t", doc="Test file: new results to compare to the master file", required=true) + File testFile; + + final DiffEngine diffEngine = new DiffEngine(); + + @Override + public void initialize() { + + } + + @Override + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + return 0; + } + + @Override + public Integer reduceInit() { + return 0; + } + + @Override + public Integer reduce(Integer counter, Integer sum) { + return counter + sum; + } + + @Override + public void onTraversalDone(Integer sum) { + out.printf("Reading master file %s%n", masterFile); + DiffElement master = diffEngine.createDiffableFromFile(masterFile, MAX_OBJECTS_TO_READ); + out.printf(" Read %d objects%n", master.size()); + out.printf("Reading test file %s%n", testFile); + DiffElement test = diffEngine.createDiffableFromFile(testFile, MAX_OBJECTS_TO_READ); + out.printf(" Read %d objects%n", test.size()); + +// out.printf("Master diff objects%n"); +// out.println(master.toString()); +// out.printf("Test diff objects%n"); +// out.println(test.toString()); + + List diffs = diffEngine.diff(master, test); + if ( showItemizedDifferences ) { + out.printf("Itemized results%n"); + for ( Difference diff : diffs ) + out.printf("DIFF: %s%n", diff.toString()); + } + + DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(out, MAX_DIFFS, MAX_COUNT1_DIFFS, minCountForDiff); + params.setDescending(false); + diffEngine.reportSummarizedDifferences(diffs, params); + } +} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffValue.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffValue.java new file mode 100644 index 0000000000..9631914461 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffValue.java @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.diffengine; + +/** + * Created by IntelliJ IDEA. + * User: depristo + * Date: 7/4/11 + * Time: 12:55 PM + * + * An interface that must be implemented to allow us to calculate differences + * between structured objects + */ +public class DiffValue { + private DiffElement binding = null; + final private Object value; + + public DiffValue(Object value) { + this.value = value; + } + + public DiffValue(DiffElement binding, Object value) { + this.binding = binding; + this.value = value; + } + + public DiffValue(DiffValue parent, Object value) { + this(parent.getBinding(), value); + } + + public DiffValue(String name, DiffElement parent, Object value) { + this.binding = new DiffElement(name, parent, this); + this.value = value; + } + + public DiffValue(String name, DiffValue parent, Object value) { + this(name, parent.getBinding(), value); + } + + public DiffElement getBinding() { + return binding; + } + + protected void setBinding(DiffElement binding) { + this.binding = binding; + } + + public Object getValue() { + return value; + } + + public String toString() { + return getValue().toString(); + } + + public String toString(int offset) { + return toString(); + } + + public String toOneLineString() { + return getValue().toString(); + } + + public boolean isAtomic() { return true; } + public boolean isCompound() { return ! isAtomic(); } + public int size() { return 1; } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReader.java new file mode 100644 index 0000000000..a117206f1d --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReader.java @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.diffengine; + +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; + +import java.io.File; + +/** + * Created by IntelliJ IDEA. + * User: depristo + * Date: 7/4/11 + * Time: 1:09 PM + * + * Interface for readers creating diffable objects from a file + */ +public interface DiffableReader { + @Ensures("result != null") + /** + * Return the name of this DiffableReader type. For example, the VCF reader returns 'VCF' and the + * bam reader 'BAM' + */ + public String getName(); + + @Ensures("result != null") + @Requires("file != null") + /** + * Read up to maxElementsToRead DiffElements from file, and return them. + */ + public DiffElement readFromFile(File file, int maxElementsToRead); + + /** + * Return true if the file can be read into DiffElement objects with this reader. This should + * be uniquely true/false for all readers, as the system will use the first reader that can read the + * file. This routine should never throw an exception. The VCF reader, for example, looks at the + * first line of the file for the ##format=VCF4.1 header, and the BAM reader for the BAM_MAGIC value + * @param file + * @return + */ + @Requires("file != null") + public boolean canRead(File file); +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java new file mode 100644 index 0000000000..81b6f7e0e9 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.diffengine; + +public class Difference implements Comparable { + final String path; // X.Y.Z + final String[] parts; + int count = 1; + DiffElement master = null , test = null; + + public Difference(String path) { + this.path = path; + this.parts = DiffEngine.diffNameToPath(path); + } + + public Difference(DiffElement master, DiffElement test) { + this(createPath(master, test), master, test); + } + + public Difference(String path, DiffElement master, DiffElement test) { + this(path); + this.master = master; + this.test = test; + } + + public String[] getParts() { + return parts; + } + + public void incCount() { count++; } + + public int getCount() { + return count; + } + + public void setCount(int count) { + this.count = count; + } + + /** + * The fully qualified path object A.B.C etc + * @return + */ + public String getPath() { + return path; + } + + /** + * @return the length of the parts of this summary + */ + public int length() { + return this.parts.length; + } + + /** + * Returns true if the string parts matches this summary. Matches are + * must be equal() everywhere where this summary isn't *. + * @param otherParts + * @return + */ + public boolean matches(String[] otherParts) { + if ( otherParts.length != length() ) + return false; + + // TODO optimization: can start at right most non-star element + for ( int i = 0; i < length(); i++ ) { + String part = parts[i]; + if ( ! part.equals("*") && ! part.equals(otherParts[i]) ) + return false; + } + + return true; + } + + @Override + public String toString() { + return String.format("%s:%d:%s", getPath(), getCount(), valueDiffString()); + } + + @Override + public int compareTo(Difference other) { + // sort first highest to lowest count, then by lowest to highest path + int countCmp = Integer.valueOf(count).compareTo(other.count); + return countCmp != 0 ? -1 * countCmp : path.compareTo(other.path); + } + + public String valueDiffString() { + if ( hasSpecificDifference() ) { + return String.format("%s!=%s", getOneLineString(master), getOneLineString(test)); + } else { + return "N/A"; + } + } + + private static String createPath(DiffElement master, DiffElement test) { + return (master == null ? test : master).fullyQualifiedName(); + } + + private static String getOneLineString(DiffElement elt) { + return elt == null ? "MISSING" : elt.getValue().toOneLineString(); + } + + public boolean hasSpecificDifference() { + return master != null || test != null; + } + + public DiffElement getMaster() { + return master; + } + + public DiffElement getTest() { + return test; + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java new file mode 100644 index 0000000000..77a992ce0b --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.diffengine; + +import org.broad.tribble.readers.AsciiLineReader; +import org.broad.tribble.readers.LineReader; +import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileReader; +import java.io.IOException; +import java.util.Map; + + +/** + * Created by IntelliJ IDEA. + * User: depristo + * Date: 7/4/11 + * Time: 1:09 PM + * + * Class implementing diffnode reader for VCF + */ +public class VCFDiffableReader implements DiffableReader { + @Override + public String getName() { return "VCF"; } + + @Override + public DiffElement readFromFile(File file, int maxElementsToRead) { + DiffNode root = DiffNode.rooted(file.getName()); + try { + // read the version line from the file + LineReader lineReader = new AsciiLineReader(new FileInputStream(file)); + final String version = lineReader.readLine(); + root.add("VERSION", version); + lineReader.close(); + + lineReader = new AsciiLineReader(new FileInputStream(file)); + VCFCodec vcfCodec = new VCFCodec(); + + // must be read as state is stored in reader itself + VCFHeader header = (VCFHeader)vcfCodec.readHeader(lineReader); + for ( VCFHeaderLine headerLine : header.getMetaData() ) { + String key = headerLine.getKey(); + if ( headerLine instanceof VCFNamedHeaderLine ) + key += "_" + ((VCFNamedHeaderLine) headerLine).getName(); + root.add(key, headerLine.toString()); + } + + String line = lineReader.readLine(); + int count = 0, nRecordsAtPos = 1; + String prevName = ""; + while ( line != null ) { + if ( count++ > maxElementsToRead && maxElementsToRead != -1) + break; + + VariantContext vc = (VariantContext)vcfCodec.decode(line); + String name = vc.getChr() + ":" + vc.getStart(); + if ( name.equals(prevName) ) { + name += "_" + ++nRecordsAtPos; + } else { + prevName = name; + } + DiffNode vcRoot = DiffNode.empty(name, root); + + // add fields + vcRoot.add("CHROM", vc.getChr()); + vcRoot.add("POS", vc.getStart()); + vcRoot.add("ID", vc.hasID() ? vc.getID() : VCFConstants.MISSING_VALUE_v4); + vcRoot.add("REF", vc.getReference()); + vcRoot.add("ALT", vc.getAlternateAlleles()); + vcRoot.add("QUAL", vc.hasNegLog10PError() ? vc.getNegLog10PError() * 10 : VCFConstants.MISSING_VALUE_v4); + vcRoot.add("FILTER", vc.getFilters()); + + // add info fields + for (Map.Entry attribute : vc.getAttributes().entrySet()) { + if ( ! attribute.getKey().startsWith("_") && ! attribute.getKey().equals(VariantContext.ID_KEY)) + vcRoot.add(attribute.getKey(), attribute.getValue()); + } + + for (Genotype g : vc.getGenotypes().values() ) { + DiffNode gRoot = DiffNode.empty(g.getSampleName(), vcRoot); + gRoot.add("GT", g.getGenotypeString()); + gRoot.add("GQ", g.hasNegLog10PError() ? g.getNegLog10PError() * 10 : VCFConstants.MISSING_VALUE_v4 ); + + for (Map.Entry attribute : g.getAttributes().entrySet()) { + if ( ! attribute.getKey().startsWith("_") ) + gRoot.add(attribute.getKey(), attribute.getValue()); + } + + vcRoot.add(gRoot); + } + + root.add(vcRoot); + line = lineReader.readLine(); + } + + lineReader.close(); + } catch ( IOException e ) { + return null; + } + + return root.getBinding(); + } + + @Override + public boolean canRead(File file) { + try { + final String VCF4_HEADER = "##fileformat=VCFv4"; + char[] buff = new char[VCF4_HEADER.length()]; + new FileReader(file).read(buff, 0, VCF4_HEADER.length()); + String firstLine = new String(buff); + return firstLine.startsWith(VCF4_HEADER); + } catch ( IOException e ) { + return false; + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java index 828d397179..efc101618f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java @@ -25,13 +25,13 @@ package org.broadinstitute.sting.gatk.walkers.fasta; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.Collection; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceWalker.java index 6be2336c01..2dbfc76ff0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceWalker.java @@ -25,16 +25,15 @@ package org.broadinstitute.sting.gatk.walkers.fasta; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RefWalker; import org.broadinstitute.sting.gatk.walkers.WalkerName; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; import java.io.PrintStream; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java index b315269872..2c009f7f26 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.filters; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; public class ClusteredSnps { private GenomeLocParser genomeLocParser; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContext.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContext.java index c3849e2403..ede19746a2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContext.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContext.java @@ -25,8 +25,8 @@ package org.broadinstitute.sting.gatk.walkers.filters; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; public class FiltrationContext { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContextWindow.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContextWindow.java index 225cdecc32..d7c0dd4d51 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContextWindow.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContextWindow.java @@ -27,7 +27,9 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.*; +import java.util.LinkedList; +import java.util.List; +import java.util.ListIterator; /** * A window of variants surrounding the current variant being investigated diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index 884d0ac240..6c023573ab 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -25,21 +25,20 @@ package org.broadinstitute.sting.gatk.walkers.filters; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java index 47be7e6fe6..83a8ce7d7e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java @@ -26,14 +26,15 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.PrintStream; -import java.util.*; +import java.util.Map; +import java.util.Set; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidIndelGenotypePriors.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidIndelGenotypePriors.java index 22c9dcf919..696a74de8a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidIndelGenotypePriors.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidIndelGenotypePriors.java @@ -4,8 +4,6 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.genotype.DiploidGenotype; -import java.util.Arrays; - /** * Created by IntelliJ IDEA. * User: delangel diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java index ab075eaf25..2014801e48 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java @@ -26,13 +26,14 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import net.sf.samtools.SAMUtils; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.genotype.DiploidGenotype; import org.broadinstitute.sting.utils.pileup.FragmentPileup; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.genotype.DiploidGenotype; -import org.broadinstitute.sting.utils.sam.ReadUtils; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import static java.lang.Math.log10; import static java.lang.Math.pow; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index 89504b371a..5c27bc9434 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -26,17 +26,22 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.SimpleTimer; +import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import sun.reflect.generics.reflectiveObjects.NotImplementedException; -import java.util.*; import java.io.PrintStream; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java index 3902a0b7f4..8261cd5881 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java @@ -30,11 +30,11 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.Allele; import java.util.Map; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GridSearchAFEstimation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GridSearchAFEstimation.java index c4e315f687..10b646d637 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GridSearchAFEstimation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GridSearchAFEstimation.java @@ -26,18 +26,18 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.*; import java.io.PrintStream; +import java.util.*; public class GridSearchAFEstimation extends AlleleFrequencyCalculationModel { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java index 46d20b31ef..60ea601d5e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java @@ -25,16 +25,14 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; -import net.sf.samtools.SAMRecord; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.indels.HaplotypeIndelErrorModel; import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel; -import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.genotype.Haplotype; @@ -42,11 +40,10 @@ import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/MultiallelicGenotypeLikelihoods.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/MultiallelicGenotypeLikelihoods.java index 2cf149fd08..3652763dee 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/MultiallelicGenotypeLikelihoods.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/MultiallelicGenotypeLikelihoods.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; -import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.exceptions.StingException; +import org.broadinstitute.sting.utils.variantcontext.Allele; import java.util.ArrayList; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java index 9f4d4182f4..3e3cd128b7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java @@ -25,23 +25,25 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.genotype.DiploidGenotype; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.*; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java index cf1c57a054..22c3081a34 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java @@ -25,8 +25,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.DownsampleType; @@ -36,8 +34,12 @@ import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.baq.BAQ; +import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.*; +import java.util.HashSet; +import java.util.Set; +import java.util.TreeSet; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java index 1533e87772..68d8f9b544 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java @@ -24,19 +24,19 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index 055eb0b978..2b25df4aac 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -27,7 +27,6 @@ import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Hidden; -import org.broadinstitute.sting.commandline.Input; import java.io.File; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 7a765c602d..2a0338bcab 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -25,22 +25,25 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; -import org.broadinstitute.sting.utils.codecs.vcf.*; -import org.broadinstitute.sting.gatk.contexts.*; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.ArgumentCollection; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.DownsampleType; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.filters.BadMateFilter; +import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableReadFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; -import org.broadinstitute.sting.gatk.DownsampleType; -import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.baq.BAQ; -import org.broadinstitute.sting.commandline.*; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.codecs.vcf.*; -import java.util.*; import java.io.PrintStream; +import java.util.*; /** @@ -48,7 +51,7 @@ * multi-sample data. The user can choose from several different incorporated calculation models. */ @BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_INPUT) -@ReadFilters( {BadMateFilter.class} ) +@ReadFilters( {BadMateFilter.class, MappingQualityUnavailableReadFilter.class} ) @Reference(window=@Window(start=-200,stop=200)) @By(DataSource.REFERENCE) @Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=250) @@ -158,7 +161,7 @@ else if ( source.getName().startsWith(VariantAnnotatorEngine.dbPrefix) ) { } // FORMAT and INFO fields - headerInfo.addAll(VCFUtils.getSupportedHeaderStrings()); + headerInfo.addAll(getSupportedHeaderStrings()); // FILTER fields if ( UAC.STANDARD_CONFIDENCE_FOR_EMITTING < UAC.STANDARD_CONFIDENCE_FOR_CALLING ) @@ -167,6 +170,20 @@ else if ( source.getName().startsWith(VariantAnnotatorEngine.dbPrefix) ) { return headerInfo; } + /** + * return a set of supported format lines; what we currently support for output in the genotype fields of a VCF + * @return a set of VCF format lines + */ + private static Set getSupportedHeaderStrings() { + Set result = new HashSet(); + result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype")); + result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Genotype Quality")); + result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)")); + result.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification")); + + return result; + } + /** * Compute at a given locus. * diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 6fc972b5dc..a10897172c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -25,22 +25,24 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; +import com.google.java.contract.Requires; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.baq.BAQ; -import org.broadinstitute.sting.utils.pileup.*; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import com.google.java.contract.*; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.PrintStream; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java index df1f4f9089..adb7c4c386 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java @@ -1,11 +1,13 @@ package org.broadinstitute.sting.gatk.walkers.indels; import net.sf.picard.sam.SamPairUtil; -import net.sf.samtools.*; +import net.sf.samtools.SAMFileWriter; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMRecordComparator; +import net.sf.samtools.SAMRecordCoordinateComparator; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java index 7617aa9de5..e68aa31e06 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java @@ -25,21 +25,16 @@ package org.broadinstitute.sting.gatk.walkers.indels; -import net.sf.samtools.AlignmentBlock; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.gatk.walkers.genotyper.ExactAFCalculationModel; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; -import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.genotype.Haplotype; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.ReadUtils; +import org.broadinstitute.sting.utils.variantcontext.Allele; import java.util.Arrays; import java.util.HashMap; -import java.util.List; public class HaplotypeIndelErrorModel { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index a53665d640..61f21c4886 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -25,39 +25,41 @@ package org.broadinstitute.sting.gatk.walkers.indels; +import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.samtools.*; import net.sf.samtools.util.RuntimeIOException; -import net.sf.samtools.util.StringUtil; import net.sf.samtools.util.SequenceUtil; -import net.sf.picard.reference.IndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.commandline.*; +import net.sf.samtools.util.StringUtil; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Hidden; +import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; -import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; -import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterStub; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.exceptions.StingException; -import org.broadinstitute.sting.utils.interval.IntervalMergingRule; -import org.broadinstitute.sting.utils.interval.IntervalUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.*; +import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; -import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.BAQMode; -import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.baq.BAQ; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.StingException; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.interval.IntervalFileMergingIterator; +import org.broadinstitute.sting.utils.interval.IntervalMergingRule; +import org.broadinstitute.sting.utils.interval.IntervalUtils; import org.broadinstitute.sting.utils.interval.NwayIntervalMergingIterator; +import org.broadinstitute.sting.utils.sam.AlignmentUtils; import org.broadinstitute.sting.utils.sam.NWaySAMFileWriter; +import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.text.TextFormattingUtils; import org.broadinstitute.sting.utils.text.XReadLines; -import org.broadinstitute.sting.utils.sam.AlignmentUtils; -import org.broadinstitute.sting.utils.sam.ReadUtils; -import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.File; import java.io.FileNotFoundException; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java index 010e0cf6f2..af80513340 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java @@ -25,8 +25,10 @@ package org.broadinstitute.sting.gatk.walkers.indels; -import net.sf.samtools.*; -import org.broadinstitute.sting.commandline.*; +import net.sf.samtools.Cigar; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index 034a667aad..55450486be 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -28,30 +28,25 @@ import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -/*import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.Covariate; -import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalDataManager; -import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalDatum; -import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalibrationArgumentCollection; -*/import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.QualityUtils; -import org.broadinstitute.sting.utils.classloader.PluginManager; -import org.broadinstitute.sting.utils.collections.NestedHashMap; -import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; -import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.genotype.Haplotype; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; -import org.broadinstitute.sting.utils.text.XReadLines; +import org.broadinstitute.sting.utils.variantcontext.Allele; import java.io.File; -import java.io.FileNotFoundException; -import java.util.*; -import java.util.regex.Pattern; +import java.util.Arrays; +import java.util.HashMap; +import java.util.LinkedHashMap; + +/*import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.Covariate; +import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalDataManager; +import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalDatum; +import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalibrationArgumentCollection; +*/ public class PairHMMIndelErrorModel { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignedReadCounter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignedReadCounter.java index fc196e712d..2c89b907bd 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignedReadCounter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignedReadCounter.java @@ -25,19 +25,23 @@ package org.broadinstitute.sting.gatk.walkers.indels; -import net.sf.samtools.*; -import org.broadinstitute.sting.utils.interval.IntervalMergingRule; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.*; -import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.filters.BadMateFilter; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.By; +import org.broadinstitute.sting.gatk.walkers.DataSource; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.interval.IntervalFileMergingIterator; +import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.utils.sam.ReadUtils; -import org.broadinstitute.sting.commandline.Argument; import java.io.File; -import java.util.*; +import java.util.Iterator; @By(DataSource.READS) // walker to count realigned reads diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java index 048dbd8cbb..488e37f260 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java @@ -25,32 +25,32 @@ package org.broadinstitute.sting.gatk.walkers.indels; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.filters.BadCigarFilter; -import org.broadinstitute.sting.gatk.filters.Platform454Filter; -import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter; import org.broadinstitute.sting.gatk.filters.BadMateFilter; +import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter; +import org.broadinstitute.sting.gatk.filters.Platform454Filter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.baq.BAQ; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.ArrayList; import java.io.PrintStream; +import java.util.ArrayList; /** * Emits intervals for the Local Indel Realigner to target for cleaning. Ignores 454 reads, MQ0 reads, and reads with consecutive indel operators in the CIGAR string. */ -@ReadFilters({Platform454Filter.class, ZeroMappingQualityReadFilter.class, BadCigarFilter.class}) +@ReadFilters({Platform454Filter.class, MappingQualityZeroReadFilter.class, BadCigarFilter.class}) @Reference(window=@Window(start=-1,stop=50)) @Allows(value={DataSource.READS, DataSource.REFERENCE}) @By(DataSource.REFERENCE) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SAMRecordCoordinateComparatorWithUnmappedReads.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SAMRecordCoordinateComparatorWithUnmappedReads.java index 1fe3fdd046..3854a4a8c5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SAMRecordCoordinateComparatorWithUnmappedReads.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SAMRecordCoordinateComparatorWithUnmappedReads.java @@ -23,7 +23,8 @@ */ package org.broadinstitute.sting.gatk.walkers.indels; -import net.sf.samtools.*; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMRecordCoordinateComparator; /** * Extends Picard's Comparator for sorting SAMRecords by coordinate. This one actually deals with unmapped reads diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java index c2953d1d7a..443e6e9f2d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java @@ -25,15 +25,21 @@ package org.broadinstitute.sting.gatk.walkers.indels; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import net.sf.samtools.*; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Hidden; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.Tags; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource; -import org.broadinstitute.sting.gatk.filters.*; -import org.broadinstitute.sting.gatk.refdata.*; +import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter; +import org.broadinstitute.sting.gatk.filters.Platform454Filter; +import org.broadinstitute.sting.gatk.filters.PlatformUnitFilter; +import org.broadinstitute.sting.gatk.filters.PlatformUnitFilterHelper; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator; +import org.broadinstitute.sting.gatk.refdata.Transcript; import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqCodec; import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqFeature; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; @@ -42,22 +48,22 @@ import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.walkers.ReadFilters; import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.interval.IntervalUtils; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.collections.CircularArray; +import org.broadinstitute.sting.utils.collections.PrimitivePair; +import org.broadinstitute.sting.utils.exceptions.StingException; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.interval.IntervalFileMergingIterator; import org.broadinstitute.sting.utils.interval.IntervalMergingRule; +import org.broadinstitute.sting.utils.interval.IntervalUtils; import org.broadinstitute.sting.utils.interval.OverlappingIntervalIterator; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.sam.AlignmentUtils; -import org.broadinstitute.sting.utils.collections.CircularArray; -import org.broadinstitute.sting.utils.collections.PrimitivePair; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.commandline.Hidden; - -import net.sf.samtools.*; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.*; import java.util.*; @@ -72,7 +78,7 @@ * if first bam has coverage at the site but no indication for an indel. In the --somatic mode, BED output contains * only somatic calls, while --verbose output contains all calls annotated with GERMLINE/SOMATIC keywords. */ -@ReadFilters({Platform454Filter.class, ZeroMappingQualityReadFilter.class, PlatformUnitFilter.class}) +@ReadFilters({Platform454Filter.class, MappingQualityZeroReadFilter.class, PlatformUnitFilter.class}) public class SomaticIndelDetectorWalker extends ReadWalker { // @Output // PrintStream out; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AllelePair.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AllelePair.java index 869edf7847..cb123c8683 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AllelePair.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AllelePair.java @@ -23,9 +23,9 @@ */ package org.broadinstitute.sting.gatk.walkers.phasing; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.ArrayList; import java.util.List; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AnnotateMNPsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AnnotateMNPsWalker.java index 81d9b4ddba..9aa370d3f2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AnnotateMNPsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AnnotateMNPsWalker.java @@ -24,14 +24,10 @@ package org.broadinstitute.sting.gatk.walkers.phasing; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableFeature; @@ -41,9 +37,12 @@ import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypes.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypes.java new file mode 100644 index 0000000000..298d8d6c89 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypes.java @@ -0,0 +1,110 @@ +package org.broadinstitute.sting.gatk.walkers.phasing; + +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; + +import java.util.*; + +/** + * Merges read-back-phased and phase-by-transmission files. + */ +public class MergeAndMatchHaplotypes extends RodWalker { + @Output + protected VCFWriter vcfWriter = null; + + private Map pbtCache = new HashMap(); + private Map rbpCache = new HashMap(); + + private final String SOURCE_NAME = "MergeReadBackedAndTransmissionPhasedVariants"; + + public void initialize() { + ArrayList rodNames = new ArrayList(); + rodNames.add("pbt"); + + Map vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames); + Set vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); + Set headerLines = new HashSet(); + headerLines.addAll(VCFUtils.getHeaderFields(this.getToolkit())); + + vcfWriter.writeHeader(new VCFHeader(headerLines, vcfSamples)); + } + + @Override + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if (tracker != null) { + Collection pbts = tracker.getVariantContexts(ref, "pbt", null, ref.getLocus(), true, true); + Collection rbps = tracker.getVariantContexts(ref, "rbp", null, ref.getLocus(), true, true); + + VariantContext pbt = pbts.iterator().hasNext() ? pbts.iterator().next() : null; + VariantContext rbp = rbps.iterator().hasNext() ? rbps.iterator().next() : null; + + if (pbt != null && rbp != null) { + Map genotypes = pbt.getGenotypes(); + + if (!rbp.isFiltered()) { + for (String sample : rbp.getSampleNames()) { + Genotype rbpg = rbp.getGenotype(sample); + Genotype pbtg = pbt.getGenotype(sample); + + // Propagate read-backed phasing information to genotypes unphased by transmission + //if (!pbtg.isPhased() && rbpCache.containsKey(sample)) { + if (!pbtg.isPhased() && rbpg.isPhased() && rbpCache.containsKey(sample)) { + boolean orientationMatches = rbpCache.get(sample).sameGenotype(pbtCache.get(sample), false); + + if (orientationMatches) { + pbtg = rbpg; + } else { + List fwdAlleles = rbpg.getAlleles(); + List revAlleles = new ArrayList(); + + for (int i = fwdAlleles.size() - 1; i >= 0; i--) { + revAlleles.add(fwdAlleles.get(i)); + } + + pbtg = new Genotype(sample, revAlleles, rbpg.getNegLog10PError(), rbpg.getFilters(), rbpg.getAttributes(), rbpg.isPhased()); + } + } + + genotypes.put(sample, pbtg); + + // Update the cache + if (/*rbpg.isPhased() &&*/ rbpg.isHet()) { + rbpCache.put(sample, rbpg); + pbtCache.put(sample, pbtg); + } else if (!rbpg.isPhased()) { + rbpCache.remove(sample); + pbtCache.remove(sample); + } + } + } + + VariantContext newvc = new VariantContext(SOURCE_NAME, pbt.getChr(), pbt.getStart(), pbt.getStart(), pbt.getAlleles(), genotypes, pbt.getNegLog10PError(), pbt.getFilters(), pbt.getAttributes()); + vcfWriter.add(newvc, ref.getBase()); + } + } + + return null; + } + + @Override + public Integer reduceInit() { + return null; + } + + @Override + public Integer reduce(Integer value, Integer sum) { + return null; + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsWalker.java index 709bc44cec..5bd4386059 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsWalker.java @@ -24,10 +24,6 @@ package org.broadinstitute.sting.gatk.walkers.phasing; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; @@ -35,7 +31,11 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java index abced442ea..b0491a2818 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java @@ -24,20 +24,19 @@ package org.broadinstitute.sting.gatk.walkers.phasing; -import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.picard.reference.ReferenceSequenceFile; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.io.File; import java.io.FileNotFoundException; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java index ec6f5c6485..be15d45410 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java @@ -24,20 +24,22 @@ package org.broadinstitute.sting.gatk.walkers.phasing; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java new file mode 100755 index 0000000000..b24437c4a4 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java @@ -0,0 +1,320 @@ +package org.broadinstitute.sting.gatk.walkers.phasing; + +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; + +import java.util.*; + +/** + * Phases a trio VCF (child phased by transmission, implied phase carried over to parents). Given genotypes for a trio, + * this walker modifies the genotypes (if necessary) to reflect the most likely configuration given the genotype + * likelihoods and inheritance constraints, phases child by transmission and carries over implied phase to the parents + * (their alleles in their genotypes are ordered as transmitted|untransmitted). Computes probability that the + * determined phase is correct given that the genotype configuration is correct (useful if you want to use this to + * compare phasing accuracy, but want to break that comparison down by phasing confidence in the truth set). Optionally + * filters out sites where the phasing is indeterminate (site has no-calls), ambiguous (everyone is heterozygous), or + * the genotypes exhibit a Mendelian violation. This walker assumes there are only three samples in the VCF file to + * begin. + */ +public class PhaseByTransmission extends RodWalker { + @Argument(shortName="f", fullName="familyPattern", required=true, doc="Pattern for the family structure (usage: mom+dad=child)") + public String familyStr = null; + + @Argument(shortName="nofilters", fullName="disableFilters", required=false, doc="Disable filters for sites where the phase can't be determined, where the parental origin of the alleles is ambiguous (i.e. everyone is heterozygous), or Mendelian violations") + public Boolean noFilters = false; + + @Output + protected VCFWriter vcfWriter = null; + + private String SAMPLE_NAME_MOM; + private String SAMPLE_NAME_DAD; + private String SAMPLE_NAME_CHILD; + + private final String ROD_NAME = "variant"; + private final String AMBIGUOUS_ALLELE_ORIGIN_FILTER_NAME = "AmbiguousAlleleOrigin"; + private final String INSUFFICIENT_DATA_FILTER_NAME = "InsufficientInformation"; + private final String MENDELIAN_VIOLATION_FILTER_NAME = "MendelianViolation"; + private final String TRANSMISSION_PROBABILITY_TAG_NAME = "TP"; + private final String SOURCE_NAME = "PhaseByTransmission"; + + private final Double MENDELIAN_VIOLATION_PRIOR = 1e-8; + + /** + * Parse the familial relationship specification, and initialize VCF writer + */ + public void initialize() { + String[] pieces = familyStr.split("[\\+\\=]"); + + SAMPLE_NAME_MOM = pieces[0]; + SAMPLE_NAME_DAD = pieces[1]; + SAMPLE_NAME_CHILD = pieces[2]; + + ArrayList rodNames = new ArrayList(); + rodNames.add(ROD_NAME); + + Map vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames); + Set vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); + + if (vcfSamples.size() != 3) { + throw new UserException("File to phase by transmission contains more than three samples. This walker only" + + "accepts VCFs with three samples, so that the meaning of the applied filters is" + + "unambiguous."); + } + + if (!vcfSamples.contains(SAMPLE_NAME_MOM) || !vcfSamples.contains(SAMPLE_NAME_DAD) || !vcfSamples.contains(SAMPLE_NAME_CHILD)) { + throw new UserException("One or more of the samples specified in the familyPattern argument is not present" + + "in this file. Please supply a VCF file that contains only three samples: the" + + "mother, the father, and the child"); + } + + Set samples = new TreeSet(); + samples.add(SAMPLE_NAME_MOM); + samples.add(SAMPLE_NAME_DAD); + samples.add(SAMPLE_NAME_CHILD); + + Set headerLines = new HashSet(); + headerLines.addAll(VCFUtils.getHeaderFields(this.getToolkit())); + + if (!noFilters) { + headerLines.add(new VCFFilterHeaderLine(AMBIGUOUS_ALLELE_ORIGIN_FILTER_NAME, "The parental origin of each of the child's allele cannot be determined (ie everyone is heterozygous)")); + headerLines.add(new VCFFilterHeaderLine(INSUFFICIENT_DATA_FILTER_NAME, "The phase of the child's genotype cannot be determined (ie someone is a no-call)")); + headerLines.add(new VCFFilterHeaderLine(MENDELIAN_VIOLATION_FILTER_NAME, "No combination of the parents' alleles can yield the child's genotype (ie a possible Mendelian violation)")); + } + + headerLines.add(new VCFInfoHeaderLine(TRANSMISSION_PROBABILITY_TAG_NAME, 1, VCFHeaderLineType.Float, "Probability that the phase is correct given that the genotypes are correct")); + vcfWriter.writeHeader(new VCFHeader(headerLines, samples)); + } + + private double computeTransmissionLikelihoodOfGenotypeConfiguration(Genotype mom, Genotype dad, Genotype child) { + double[] momLikelihoods = MathUtils.normalizeFromLog10(mom.getLikelihoods().getAsVector()); + double[] dadLikelihoods = MathUtils.normalizeFromLog10(dad.getLikelihoods().getAsVector()); + double[] childLikelihoods = MathUtils.normalizeFromLog10(child.getLikelihoods().getAsVector()); + + int momIndex = mom.getType().ordinal() - 1; + int dadIndex = dad.getType().ordinal() - 1; + int childIndex = child.getType().ordinal() - 1; + + return momLikelihoods[momIndex]*dadLikelihoods[dadIndex]*childLikelihoods[childIndex]; + } + + private ArrayList createAllThreeGenotypes(Allele refAllele, Allele altAllele, Genotype g) { + List homRefAlleles = new ArrayList(); + homRefAlleles.add(refAllele); + homRefAlleles.add(refAllele); + Genotype homRef = new Genotype(g.getSampleName(), homRefAlleles, g.getNegLog10PError(), null, g.getAttributes(), false); + + List hetAlleles = new ArrayList(); + hetAlleles.add(refAllele); + hetAlleles.add(altAllele); + Genotype het = new Genotype(g.getSampleName(), hetAlleles, g.getNegLog10PError(), null, g.getAttributes(), false); + + List homVarAlleles = new ArrayList(); + homVarAlleles.add(altAllele); + homVarAlleles.add(altAllele); + Genotype homVar = new Genotype(g.getSampleName(), homVarAlleles, g.getNegLog10PError(), null, g.getAttributes(), false); + + ArrayList genotypes = new ArrayList(); + genotypes.add(homRef); + genotypes.add(het); + genotypes.add(homVar); + + return genotypes; + } + + private int getNumberOfMatchingAlleles(Allele alleleToMatch, Genotype g) { + List alleles = g.getAlleles(); + int matchingAlleles = 0; + + for (Allele a : alleles) { + if (!alleleToMatch.equals(a)) { + matchingAlleles++; + } + } + + return matchingAlleles; + } + + private boolean isMendelianViolation(Allele refAllele, Allele altAllele, Genotype mom, Genotype dad, Genotype child) { + int numMomRefAlleles = getNumberOfMatchingAlleles(refAllele, mom) > 0 ? 1 : 0; + int numMomAltAlleles = getNumberOfMatchingAlleles(altAllele, mom) > 0 ? 1 : 0; + + int numDadRefAlleles = getNumberOfMatchingAlleles(refAllele, dad) > 0 ? 1 : 0; + int numDadAltAlleles = getNumberOfMatchingAlleles(altAllele, dad) > 0 ? 1 : 0; + + int numChildRefAlleles = getNumberOfMatchingAlleles(refAllele, child); + int numChildAltAlleles = getNumberOfMatchingAlleles(altAllele, child); + + return (numMomRefAlleles + numDadRefAlleles < numChildRefAlleles || numMomAltAlleles + numDadAltAlleles < numChildAltAlleles); + } + + private ArrayList getPhasedGenotypes(Genotype mom, Genotype dad, Genotype child) { + Set possiblePhasedChildGenotypes = new HashSet(); + + for (Allele momAllele : mom.getAlleles()) { + for (Allele dadAllele : dad.getAlleles()) { + ArrayList possiblePhasedChildAlleles = new ArrayList(); + possiblePhasedChildAlleles.add(momAllele); + possiblePhasedChildAlleles.add(dadAllele); + + Genotype possiblePhasedChildGenotype = new Genotype(child.getSampleName(), possiblePhasedChildAlleles, child.getNegLog10PError(), child.getFilters(), child.getAttributes(), true); + + possiblePhasedChildGenotypes.add(possiblePhasedChildGenotype); + } + } + + ArrayList finalGenotypes = new ArrayList(); + + for (Genotype phasedChildGenotype : possiblePhasedChildGenotypes) { + if (child.sameGenotype(phasedChildGenotype, true)) { + Allele momTransmittedAllele = phasedChildGenotype.getAllele(0); + Allele momUntransmittedAllele = mom.getAllele(0) != momTransmittedAllele ? mom.getAllele(0) : mom.getAllele(1); + + ArrayList phasedMomAlleles = new ArrayList(); + phasedMomAlleles.add(momTransmittedAllele); + phasedMomAlleles.add(momUntransmittedAllele); + + Genotype phasedMomGenotype = new Genotype(mom.getSampleName(), phasedMomAlleles, mom.getNegLog10PError(), mom.getFilters(), mom.getAttributes(), true); + + Allele dadTransmittedAllele = phasedChildGenotype.getAllele(1); + Allele dadUntransmittedAllele = dad.getAllele(0) != dadTransmittedAllele ? dad.getAllele(0) : dad.getAllele(1); + + ArrayList phasedDadAlleles = new ArrayList(); + phasedDadAlleles.add(dadTransmittedAllele); + phasedDadAlleles.add(dadUntransmittedAllele); + + Genotype phasedDadGenotype = new Genotype(dad.getSampleName(), phasedDadAlleles, dad.getNegLog10PError(), dad.getFilters(), dad.getAttributes(), true); + + finalGenotypes.add(phasedMomGenotype); + finalGenotypes.add(phasedDadGenotype); + finalGenotypes.add(phasedChildGenotype); + + return finalGenotypes; + } + } + + finalGenotypes.add(mom); + finalGenotypes.add(dad); + finalGenotypes.add(child); + + return finalGenotypes; + } + + private VariantContext phaseTrioGenotypes(VariantContext vc) { + Genotype mom = vc.getGenotype(SAMPLE_NAME_MOM); + Genotype dad = vc.getGenotype(SAMPLE_NAME_DAD); + Genotype child = vc.getGenotype(SAMPLE_NAME_CHILD); + + Set filters = new HashSet(); + filters.addAll(vc.getFilters()); + + Map attributes = new HashMap(); + attributes.putAll(vc.getAttributes()); + attributes.put(TRANSMISSION_PROBABILITY_TAG_NAME, 0.0); + + ArrayList finalGenotypes = new ArrayList(); + finalGenotypes.add(mom); + finalGenotypes.add(dad); + finalGenotypes.add(child); + + if (!mom.isCalled() || !dad.isCalled() || !child.isCalled()) { + filters.add(INSUFFICIENT_DATA_FILTER_NAME); + } else { + ArrayList possibleMomGenotypes = createAllThreeGenotypes(vc.getReference(), vc.getAlternateAllele(0), mom); + ArrayList possibleDadGenotypes = createAllThreeGenotypes(vc.getReference(), vc.getAlternateAllele(0), dad); + ArrayList possibleChildGenotypes = createAllThreeGenotypes(vc.getReference(), vc.getAlternateAllele(0), child); + + double bestConfigurationLikelihood = 0.0; + double bestPrior = 0.0; + Genotype bestMomGenotype = mom; + Genotype bestDadGenotype = dad; + Genotype bestChildGenotype = child; + + double norm = 0.0; + + for (Genotype momGenotype : possibleMomGenotypes) { + for (Genotype dadGenotype : possibleDadGenotypes) { + for (Genotype childGenotype : possibleChildGenotypes) { + double prior = isMendelianViolation(vc.getReference(), vc.getAlternateAllele(0), momGenotype, dadGenotype, childGenotype) ? MENDELIAN_VIOLATION_PRIOR : 1.0 - 12*MENDELIAN_VIOLATION_PRIOR; + double configurationLikelihood = computeTransmissionLikelihoodOfGenotypeConfiguration(momGenotype, dadGenotype, childGenotype); + norm += prior*configurationLikelihood; + + if (prior*configurationLikelihood > bestPrior*bestConfigurationLikelihood) { + bestConfigurationLikelihood = configurationLikelihood; + bestPrior = prior; + bestMomGenotype = momGenotype; + bestDadGenotype = dadGenotype; + bestChildGenotype = childGenotype; + } + } + } + } + + if (isMendelianViolation(vc.getReference(), vc.getAlternateAllele(0), bestMomGenotype, bestDadGenotype, bestChildGenotype)) { + filters.add(MENDELIAN_VIOLATION_FILTER_NAME); + } else if (bestMomGenotype.isHet() && bestDadGenotype.isHet() && bestChildGenotype.isHet()) { + filters.add(AMBIGUOUS_ALLELE_ORIGIN_FILTER_NAME); + } else { + finalGenotypes = getPhasedGenotypes(bestMomGenotype, bestDadGenotype, bestChildGenotype); + + attributes.put(TRANSMISSION_PROBABILITY_TAG_NAME, bestPrior*bestConfigurationLikelihood / norm); + } + } + + return new VariantContext(SOURCE_NAME, vc.getChr(), vc.getStart(), vc.getStart(), vc.getAlleles(), finalGenotypes, vc.getNegLog10PError(), noFilters ? vc.getFilters() : filters, attributes); + } + + /** + * For each variant in the file, determine the phasing for the child and replace the child's genotype with the trio's genotype + * + * @param tracker the reference meta-data tracker + * @param ref the reference context + * @param context the alignment context + * @return null + */ + @Override + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if (tracker != null) { + Collection vcs = tracker.getVariantContexts(ref, ROD_NAME, null, context.getLocation(), true, true); + + for (VariantContext vc : vcs) { + vcfWriter.add(phaseTrioGenotypes(vc), ref.getBase()); + } + } + + return null; + } + + /** + * Provide an initial value for reduce computations. + * + * @return Initial value of reduce. + */ + @Override + public Integer reduceInit() { + return null; + } + + /** + * Reduces a single map with the accumulator provided as the ReduceType. + * + * @param value result of the map. + * @param sum accumulator for the reduce. + * @return accumulator with result of the map taken into account. + */ + @Override + public Integer reduce(Integer value, Integer sum) { + return null; + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java index af24035c80..9702fd18cf 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java @@ -23,27 +23,28 @@ */ package org.broadinstitute.sting.gatk.walkers.phasing; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Hidden; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.datasources.sample.Sample; -import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter; +import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.DisjointSet; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.HasGenomeLocation; +import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.codecs.vcf.SortingVCFWriter; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.io.*; import java.util.*; @@ -58,7 +59,7 @@ @Requires(value = {DataSource.READS, DataSource.REFERENCE}, referenceMetaData = @RMD(name = "variant", type = ReferenceOrderedDatum.class)) @By(DataSource.READS) -@ReadFilters({ZeroMappingQualityReadFilter.class}) +@ReadFilters({MappingQualityZeroReadFilter.class}) // Filter out all reads with zero mapping quality public class ReadBackedPhasingWalker extends RodWalker { @@ -242,7 +243,7 @@ else if (context.hasExtendedEventPileup()) { return new PhasingStatsAndOutput(phaseStats, completedList); } - private static final Set KEYS_TO_KEEP_IN_REDUCED_VCF = new HashSet(Arrays.asList("PQ")); + private static final Set KEYS_TO_KEEP_IN_REDUCED_VCF = new HashSet(Arrays.asList(PQ_KEY)); private VariantContext reduceVCToSamples(VariantContext vc, List samplesToPhase) { // for ( String sample : samplesToPhase ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/SNPallelePair.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/SNPallelePair.java index db1f888a1d..153c4a23f0 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/SNPallelePair.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/SNPallelePair.java @@ -23,10 +23,10 @@ */ package org.broadinstitute.sting.gatk.walkers.phasing; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; public class SNPallelePair extends AllelePair { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/WriteVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/WriteVCF.java index f82e48abd5..2851ace0d1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/WriteVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/WriteVCF.java @@ -24,9 +24,9 @@ package org.broadinstitute.sting.gatk.walkers.phasing; import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; public class WriteVCF { public static void writeVCF(VariantContext vc, VCFWriter writer, Logger logger) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java index feb5f62af5..2bdd4558f2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java @@ -6,14 +6,12 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; -import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.RefWalker; -import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.collections.Pair; -import java.util.List; import java.io.PrintStream; +import java.util.List; /** * Counts the number of contiguous regions the walker traverses over. Slower than it needs to be, but diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLociWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLociWalker.java index ef6ff04f2f..0d68c8493f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLociWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLociWalker.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.gatk.walkers.qc; -import net.sf.samtools.SAMFileWriter; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountPairsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountPairsWalker.java index cece04fcf2..df89efe6d6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountPairsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountPairsWalker.java @@ -25,14 +25,14 @@ package org.broadinstitute.sting.gatk.walkers.qc; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.walkers.ReadPairWalker; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; -import org.broadinstitute.sting.commandline.Output; -import net.sf.samtools.SAMRecord; +import java.io.PrintStream; import java.util.Collection; import java.util.List; -import java.io.PrintStream; /** * Counts the number of read pairs encountered in a file sorted in diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java index 74f63aa2f9..87c0409b93 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java @@ -1,11 +1,11 @@ package org.broadinstitute.sting.gatk.walkers.qc; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.walkers.Requires; /** * Walks over the input data set, calculating the number of reads seen for diagnostic purposes. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodByRefWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodByRefWalker.java index a4e80138f6..d1545f1592 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodByRefWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodByRefWalker.java @@ -25,13 +25,13 @@ package org.broadinstitute.sting.gatk.walkers.qc; +import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RefWalker; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.commandline.Argument; /** * Prints out counts of the number of reference ordered data objects are diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodWalker.java index 92867e1a91..8a03dea44b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodWalker.java @@ -27,23 +27,24 @@ import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceRecord; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; +import java.io.PrintStream; import java.util.ArrayList; -import java.util.List; import java.util.Collection; import java.util.LinkedList; -import java.io.PrintStream; +import java.util.List; /** * Prints out counts of the number of reference ordered data objects are diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CycleQualityWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CycleQualityWalker.java index 5ae35416a9..b5f5442cd6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CycleQualityWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CycleQualityWalker.java @@ -25,22 +25,22 @@ package org.broadinstitute.sting.gatk.walkers.qc; -import org.broadinstitute.sting.gatk.walkers.Requires; +import net.sf.samtools.SAMReadGroupRecord; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.utils.collections.PrimitivePair; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.AlignmentUtils; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMReadGroupRecord; -import java.util.*; import java.io.*; +import java.util.*; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/PrintLocusContextWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/PrintLocusContextWalker.java index 39b69ba29f..d3b992cb5f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/PrintLocusContextWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/PrintLocusContextWalker.java @@ -1,16 +1,16 @@ package org.broadinstitute.sting.gatk.walkers.qc; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.gatk.walkers.TreeReducible; -import java.util.List; -import java.util.Arrays; import java.io.PrintStream; - -import net.sf.samtools.SAMRecord; +import java.util.Arrays; +import java.util.List; /** * At each locus in the input data set, prints the reference base, genomic location, and diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadClippingStatsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadClippingStatsWalker.java index 2f1773d014..908e389a88 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadClippingStatsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadClippingStatsWalker.java @@ -22,21 +22,24 @@ package org.broadinstitute.sting.gatk.walkers.qc; -import org.broadinstitute.sting.gatk.walkers.Requires; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import net.sf.samtools.SAMReadGroupRecord; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.AlignmentUtils; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; -import net.sf.samtools.*; -import java.util.*; -import java.io.*; +import java.io.PrintStream; +import java.util.Arrays; /** * User: depristo diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadValidationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadValidationWalker.java index 6ad0340a40..fa1bb4d557 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadValidationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadValidationWalker.java @@ -1,16 +1,16 @@ package org.broadinstitute.sting.gatk.walkers.qc; +import net.sf.samtools.SAMFileWriter; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMFileWriter; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; -import java.util.List; import java.util.ArrayList; +import java.util.List; /* diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java index 9cb7155071..170630b773 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.gatk.walkers.qc; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; @@ -8,8 +7,11 @@ import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; -import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.gatk.walkers.Reference; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.gatk.walkers.Window; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.*; import java.math.BigInteger; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java index bc68be592e..e1e6c4b69e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java @@ -25,20 +25,20 @@ package org.broadinstitute.sting.gatk.walkers.qc; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.features.sampileup.SAMPileupFeature; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import java.util.Arrays; import java.io.PrintStream; +import java.util.Arrays; /** * At every locus in the input set, compares the pileup data (reference base, aligned base from diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesGatherer.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesGatherer.java index 568e1b638a..fc6b3daee6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesGatherer.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesGatherer.java @@ -7,7 +7,6 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.PrintStream; -import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.regex.Pattern; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java index ee504b6e7a..8c6539f8d3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java @@ -27,23 +27,25 @@ import org.broad.tribble.bed.BEDCodec; import org.broad.tribble.dbsnp.DbSNPCodec; -import org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec; -import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.commandline.Gather; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; -import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter; +import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableReadFilter; +import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.classloader.PluginManager; +import org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec; +import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; import org.broadinstitute.sting.utils.collections.NestedHashMap; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.PileupElement; @@ -75,7 +77,7 @@ @BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN) @By( DataSource.READS ) // Only look at covered loci, not every loci of the reference file -@ReadFilters( {ZeroMappingQualityReadFilter.class} ) // Filter out all reads with zero mapping quality +@ReadFilters( {MappingQualityZeroReadFilter.class, MappingQualityUnavailableReadFilter.class} ) // Filter out all reads with zero or unavailable mapping quality @Requires( {DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES} ) // This walker requires both -I input.bam and -R reference.fasta @PartitionBy(PartitionType.LOCUS) public class CountCovariatesWalker extends LocusWalker implements TreeReducible { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java index 64e0864c0e..945d02837f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java @@ -1,7 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; import net.sf.samtools.SAMRecord; - import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.exceptions.UserException; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java index 0de6897d0f..a7717161a7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java @@ -1,11 +1,10 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; -import java.util.HashMap; - import net.sf.samtools.SAMRecord; - import org.broadinstitute.sting.utils.BaseUtils; +import java.util.HashMap; + /* * Copyright (c) 2009 The Broad Institute * diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java index fdbeb6a31e..e6d0b306c1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java @@ -25,19 +25,21 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; +import net.sf.samtools.SAMReadGroupRecord; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMUtils; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.collections.NestedHashMap; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.AlignmentUtils; -import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.collections.NestedHashMap; - -import java.util.*; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMReadGroupRecord; -import net.sf.samtools.SAMUtils; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDatumOptimized.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDatumOptimized.java index 7ba441cccb..f04989fa57 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDatumOptimized.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDatumOptimized.java @@ -3,7 +3,7 @@ import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.QualityUtils; -import java.util.*; +import java.util.List; /* * Copyright (c) 2010 The Broad Institute diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java index 0eaa1245e7..0277fda0d9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java @@ -25,31 +25,33 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; -import java.io.File; -import java.io.FileNotFoundException; -import java.util.*; -import java.util.regex.Pattern; - import net.sf.samtools.*; import net.sf.samtools.util.SequenceUtil; - +import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.collections.NestedHashMap; -import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.text.TextFormattingUtils; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.text.XReadLines; -import org.broadinstitute.sting.commandline.*; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; + +import java.io.File; +import java.io.FileNotFoundException; +import java.util.ArrayList; +import java.util.List; +import java.util.MissingResourceException; +import java.util.ResourceBundle; +import java.util.regex.Pattern; /** * This walker is designed to work as the second pass in a two-pass processing step, doing a by-read traversal. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/CreateSequenomMask.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/CreateSequenomMask.java deleted file mode 100755 index c1c17bda52..0000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/CreateSequenomMask.java +++ /dev/null @@ -1,50 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.sequenom; - -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.utils.GenomeLoc; - -import java.io.PrintStream; - -/** - * Create a mask for use with the PickSequenomProbes walker. - */ -public class CreateSequenomMask extends RodWalker { - @Output - PrintStream out; - - public void initialize() {} - - public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if ( tracker == null ) - return 0; - - int result = 0; - for ( VariantContext vc : tracker.getAllVariantContexts(ref) ) { - if ( vc.isSNP() ) { - GenomeLoc loc = context.getLocation(); - out.println(loc.getContig() + "\t" + (loc.getStart()-1) + "\t" + loc.getStop()); - result = 1; - break; - } - } - - return result; - } - - public Integer reduceInit() { - return 0; - } - - public Integer reduce(Integer value, Integer sum) { - return value + sum; - } - - public void onTraversalDone(Integer sum) { - logger.info("Found " + sum + " masking sites."); - } -} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbes.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbes.java deleted file mode 100755 index fde233b5de..0000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbes.java +++ /dev/null @@ -1,334 +0,0 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.sequenom; - -import net.sf.samtools.util.CloseableIterator; -import org.broad.tribble.bed.BEDCodec; -import org.broad.tribble.dbsnp.DbSNPCodec; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.*; -import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; -import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; -import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; -import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; -import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; -import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; -import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; - -import java.io.File; -import java.util.*; -import java.io.PrintStream; - - -/** - * Generates Sequenom probe information given a single variant track. Emitted is the variant - * along with the 200 reference bases on each side of the variant. - */ -@WalkerName("PickSequenomProbes") -@Requires(value={DataSource.REFERENCE}) -@Reference(window=@Window(start=-200,stop=200)) -public class PickSequenomProbes extends RodWalker { - @Output - PrintStream out; - - @Argument(required=false, shortName="snp_mask", doc="positions to be masked with N's") - protected String SNP_MASK = null; - @Argument(required=false, shortName="project_id",doc="If specified, all probenames will be prepended with 'project_id|'") - String project_id = null; - @Argument(required = false, shortName="omitWindow", doc = "If specified, the window appender will be omitted from the design files (e.g. \"_chr:start-stop\")") - boolean omitWindow = false; - @Argument(required = false, fullName="usePlinkRODNamingConvention", shortName="nameConvention",doc="Use the naming convention defined in PLINKROD") - boolean useNamingConvention = false; - @Argument(required = false, fullName="noMaskWindow",shortName="nmw",doc="Do not mask bases within X bases of an event when designing probes") - int noMaskWindow = 0; - @Argument(required = false, shortName="counter", doc = "If specified, unique count id (ordinal number) is added to the end of each assay name") - boolean addCounter = false; - - private byte [] maskFlags = new byte[401]; - - private LocationAwareSeekableRODIterator snpMaskIterator=null; - - private GenomeLoc positionOfLastVariant = null; - - private int cnt = 0; - private int discarded = 0; - - VariantCollection VCs ; // will keep a set of distinct variants at a given site - private List processedVariantsInScope = new LinkedList(); - - public void initialize() { - if ( SNP_MASK != null ) { - logger.info("Loading SNP mask... "); - ReferenceOrderedData snp_mask; - //if ( SNP_MASK.contains(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)) { - RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),getToolkit().getGenomeLocParser(),getToolkit().getArguments().unsafe); - RMDTrack track = builder.createInstanceOfTrack(BEDCodec.class, new File(SNP_MASK)); - snpMaskIterator = new SeekableRODIterator(track.getHeader(), - track.getSequenceDictionary(), - getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(), - getToolkit().getGenomeLocParser(), - track.getIterator()); - //} else { - // // TODO: fix me when Plink is back - // throw new IllegalArgumentException("We currently do not support other snp_mask tracks (like Plink)"); - //} - - } - VCs = new VariantCollection(); - } - - - public String map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if ( tracker == null ) - return ""; - - logger.debug("Probing " + ref.getLocus() + " " + ref.getWindow()); - - VCs.clear(); - VCs.addAll( tracker.getAllVariantContexts(ref), ref.getLocus() ); - - discarded += VCs.discarded(); - - if ( VCs.size() == 0 ) { - logger.debug(" Context empty"); - return ""; - } - - if ( VCs.size() > 1 ) { - logger.debug(" "+VCs.size()+ " variants at the locus"); - } - -// System.out.print("At locus "+ref.getLocus()+": "); -// for ( VariantContext vc : VCs ) { -// System.out.println(vc.toString()); -// } - - // little optimization: since we may have few events at the current site on the reference, - // we are going to make sure we compute the mask and ref bases only once for each location and only if we need to - boolean haveMaskForWindow = false; - boolean haveBasesForWindow = false; - String leading_bases = null; - String trailing_bases = null; - - StringBuilder assaysForLocus = new StringBuilder(""); // all assays for current locus will be collected here (will be multi-line if multiple events are assayed) - - // get all variant contexts!!!! - for ( VariantContext vc : VCs ) { - - // we can only deal with biallelic sites for now - if ( !vc.isBiallelic() ) { - logger.debug(" Not biallelic; skipped"); - continue; - } - - // we don't want to see the same multi-base event (deletion, DNP etc) multiple times. - // All the vcs we are currently seeing are clearly on the same contig as the current reference - // poisiton (or we would not see them at all!). All we need to check is if the vc starts at the - // current reference position (i.e. it is the first time we see it) or not (i.e. we saw it already). - if ( ref.getLocus().getStart() != vc.getStart() ) - continue; - - if ( ! haveMaskForWindow ) { - String contig = context.getLocation().getContig(); - int offset = context.getLocation().getStart(); - int true_offset = offset - 200; - - // we have variant; let's load all the snps falling into the current window and prepare the mask array. - // we need to do it only once per window, regardless of how many vcs we may have at this location! - if ( snpMaskIterator != null ) { - // clear the mask - for ( int i = 0 ; i < 401; i++ ) - maskFlags[i] = 0; - - RODRecordList snpList = snpMaskIterator.seekForward(getToolkit().getGenomeLocParser().createGenomeLoc(contig,offset-200,offset+200)); - if ( snpList != null && snpList.size() != 0 ) { - Iterator snpsInWindow = snpList.iterator(); - int i = 0; - while ( snpsInWindow.hasNext() ) { - GenomeLoc snp = snpsInWindow.next().getLocation(); - // we don't really want to mask out multi-base indels - if ( snp.size() > 1 ) - continue; - logger.debug(" SNP at "+snp.getStart()); - int offsetInWindow = (int)(snp.getStart() - true_offset); - maskFlags[offsetInWindow] = 1; - } - } - } - haveMaskForWindow = true; // if we use masking, we will probably need to recompute the window... - } - - if ( ! haveBasesForWindow ) { - byte[] context_bases = ref.getBases(); - for (int i = 0; i < 401; i++) { - if ( maskFlags[i] == 1 && ( i < 200 - noMaskWindow || i > 200 + getNoMaskWindowRightEnd(vc,noMaskWindow) ) ) { - context_bases[i] = 'N'; - } - } - leading_bases = new String(Arrays.copyOfRange(context_bases, 0, 200)); - trailing_bases = new String(Arrays.copyOfRange(context_bases, 201, 401)); - // masked bases are not gonna change for the current window, unless we use windowed masking; - // in the latter case the bases (N's) will depend on the event we are currently looking at, - // so we better recompute.. - if ( noMaskWindow == 0 ) haveBasesForWindow = true; - } - - - // below, build single assay line for the current VC: - - String assay_sequence; - if ( vc.isSNP() ) - assay_sequence = leading_bases + "[" + (char)ref.getBase() + "/" + vc.getAlternateAllele(0).toString() + "]" + trailing_bases; - else if ( vc.isMNP() ) - assay_sequence = leading_bases + "[" + new String(vc.getReference().getBases()) + "/" + new String(vc.getAlternateAllele(0).getBases())+"]"+trailing_bases.substring(vc.getReference().length()-1); - else if ( vc.isInsertion() ) - assay_sequence = leading_bases + (char)ref.getBase() + "[-/" + vc.getAlternateAllele(0).toString() + "]" + trailing_bases; - else if ( vc.isDeletion() ) - assay_sequence = leading_bases + (char)ref.getBase() + "[" + new String(vc.getReference().getBases()) + "/-]" + trailing_bases.substring(vc.getReference().length()); - else - continue; - - StringBuilder assay_id = new StringBuilder(); - if ( project_id != null ) { - assay_id.append(project_id); - assay_id.append('|'); - } - if ( useNamingConvention ) { - assay_id.append('c'); - assay_id.append(context.getLocation().toString().replace(":","_p")); - } else { - assay_id.append(context.getLocation().toString().replace(':','_')); - } - if ( vc.isInsertion() ) assay_id.append("_gI"); - else if ( vc.isDeletion()) assay_id.append("_gD"); - - if ( ! omitWindow ) { - assay_id.append("_"); - assay_id.append(ref.getWindow().toString().replace(':', '_')); - } - ++cnt; - if ( addCounter ) assay_id.append("_"+cnt); - - assaysForLocus.append(assay_id); - assaysForLocus.append('\t'); - assaysForLocus.append(assay_sequence); - assaysForLocus.append('\n'); - } - return assaysForLocus.toString(); - } - - public String reduceInit() { - return ""; - } - - public String reduce(String data, String sum) { - out.print(data); - return ""; - } - - private int getNoMaskWindowRightEnd(VariantContext vc, int window) { - if ( window == 0 ) { - return 0; - } - - if ( vc.isInsertion() ) { - return window-1; - } - - int max = 0; - for (Allele a : vc.getAlleles() ) { - if ( vc.isInsertion() ) { - logger.debug("Getting length of allele "+a.toString()+" it is "+a.getBases().length+" (ref allele is "+vc.getReference().toString()+")"); - } - if ( a.getBases().length > max ) { - max = a.getBases().length; - } - } - return max+window-1; - } - - public void onTraversalDone(String sum) { - logger.info(cnt+" assay seqences generated"); - logger.info(discarded+" events were found to be duplicates and discarded (no redundant assays generated)"); - } - - static class EventComparator implements Comparator { - - public int compare(VariantContext o1, VariantContext o2) { - // if variants start at different positions, they are different. All we actually - // care about is detecting the variants that are strictly the same; the actual ordering of distinct variants - // (which one we deem less and which one greater) is utterly unimportant. We just need to be consistent. - if ( o1.getStart() < o2.getStart() ) return -1; - if ( o1.getStart() > o2.getStart() ) return 1; - - if ( o1.getType() != o2.getType() ) return o1.getType().compareTo(o2.getType()); - - int refComp = o1.getReference().compareTo(o2.getReference()); - if ( refComp != 0 ) return refComp; - - return o1.getAlternateAllele(0).compareTo(o2.getAlternateAllele(0)); - - } - } - - static class VariantCollection implements Iterable { - TreeSet variants = new TreeSet(new EventComparator()); - int discarded = 0; - - public void add(VariantContext vc, GenomeLoc current) { - if ( vc.getStart() != current.getStart() ) return; // we add only variants that start at current locus - // note that we do not check chr here, since the way this class is used, the mathod is always called with - // VCs coming from the same metadata tracker, so they simply can not be on different chrs! - if ( !vc.isBiallelic() ) { - logger.info(" Non-biallelic variant encountered; skipped"); - return; - } - if ( variants.add(vc) == false ) discarded++; - } - - public void addAll(Collection c, GenomeLoc current) { - for ( VariantContext vc : c ) add(vc,current); - } - - public void clear() { - variants.clear(); - discarded = 0; - } - - public int discarded() { return discarded; } - - public int size() { return variants.size(); } - - public Iterator iterator() { return variants.iterator(); } - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java new file mode 100755 index 0000000000..cb03d4c618 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java @@ -0,0 +1,414 @@ +package org.broadinstitute.sting.gatk.walkers.validation; + +import net.sf.picard.reference.ReferenceSequenceFileFactory; +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMSequenceDictionary; +import org.broadinstitute.sting.alignment.Alignment; +import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; +import org.broadinstitute.sting.alignment.bwa.BWTFiles; +import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature; +import org.broadinstitute.sting.gatk.walkers.DataSource; +import org.broadinstitute.sting.gatk.walkers.RMD; +import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.io.File; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: 6/13/11 + * Time: 2:12 PM + * To change this template use File | Settings | File Templates. + */ +@Requires(value={DataSource.REFERENCE}, referenceMetaData={@RMD(name="ProbeIntervals",type=TableFeature.class), +@RMD(name="ValidateAlleles",type=VariantContext.class),@RMD(name="MaskAlleles",type=VariantContext.class)}) +public class ValidationAmplicons extends RodWalker { + + @Argument(doc="Lower case SNPs rather than replacing with 'N'",fullName="lowerCaseSNPs",required=false) + boolean lowerCaseSNPs = false; + + @Argument(doc="Size of the virtual primer to use for lower-casing regions with low specificity",fullName="virtualPrimerSize",required=false) + int virtualPrimerSize = 20; + + @Argument(doc="Monomorphic sites in the mask file will be treated as filtered",fullName="filterMonomorphic",required=false) + boolean filterMonomorphic = false; + + @Argument(doc="Do not use BWA, lower-case repeats only",fullName="doNotUseBWA",required=false) + boolean doNotUseBWA = false; + + GenomeLoc prevInterval; + GenomeLoc allelePos; + String probeName; + StringBuilder sequence; + StringBuilder rawSequence; + boolean sequenceInvalid; + List invReason; + int indelCounter; + + @Argument(fullName="target_reference",shortName="target_ref",doc="The reference to which reads in the source file should be aligned. Alongside this reference should sit index files " + + "generated by bwa index -d bwtsw. If unspecified, will default " + + "to the reference specified via the -R argument.",required=false) + private File targetReferenceFile = null; + + @Output + PrintStream out; + + BWACAligner aligner = null; + + private SAMFileHeader header = null; + + public void initialize() { + if ( ! doNotUseBWA ) { + if(targetReferenceFile == null) + targetReferenceFile = getToolkit().getArguments().referenceFile; + BWTFiles bwtFiles = new BWTFiles(targetReferenceFile.getAbsolutePath()); + BWAConfiguration configuration = new BWAConfiguration(); + aligner = new BWACAligner(bwtFiles,configuration); + header = new SAMFileHeader(); + SAMSequenceDictionary referenceDictionary = + ReferenceSequenceFileFactory.getReferenceSequenceFile(targetReferenceFile).getSequenceDictionary(); + header.setSequenceDictionary(referenceDictionary); + header.setSortOrder(SAMFileHeader.SortOrder.unsorted); + } + } + + public Integer reduceInit() { + prevInterval = null; + sequence = null; + rawSequence = null; + sequenceInvalid = false; + probeName = null; + invReason = null; + indelCounter = 0; + return 0; + } + + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if ( tracker == null || ! tracker.hasROD("ProbeIntervals")) { return null; } + + GenomeLoc interval = ((TableFeature) tracker.getReferenceMetaData("ProbeIntervals",true).get(0)).getLocation(); + //logger.debug(interval); + if ( prevInterval == null || ! interval.equals(prevInterval) ) { + // we're in a new interval, we should: + // 1) print out previous data + // 2) reset internal data + // 3) instantiate traversal of this interval + + // step 1: + if ( prevInterval != null ) { + // there was a previous interval + validateSequence(); // ensure the sequence in the region is valid + // next line removed in favor of the one after + if ( doNotUseBWA ) { + lowerRepeats(); // change repeats in sequence to lower case + } else { + lowerNonUniqueSegments(); + } + print(); // print out the fasta sequence + } + + // step 2: + prevInterval = interval; + allelePos = null; + sequence = new StringBuilder(); + rawSequence = new StringBuilder(); + sequenceInvalid = false; + invReason = new LinkedList(); + logger.debug(Utils.join("\t",((TableFeature) tracker.getReferenceMetaData("ProbeIntervals",true).get(0)).getAllValues())); + probeName = ((TableFeature) tracker.getReferenceMetaData("ProbeIntervals",true).get(0)).getValue(1); + indelCounter = 0; + } + + // step 3 (or 1 if not new): + // build up the sequence + + VariantContext mask = tracker.getVariantContext(ref,"MaskAlleles",ref.getLocus()); + VariantContext validate = tracker.getVariantContext(ref,"ValidateAlleles",ref.getLocus()); + + if ( mask == null && validate == null ) { + if ( indelCounter > 0 ) { + sequence.append('N'); + indelCounter--; + } else { + sequence.append(Character.toUpperCase((char) ref.getBase())); + } + rawSequence.append(Character.toUpperCase((char) ref.getBase())); + } else if ( validate != null ) { + // doesn't matter if there's a mask here too -- this is what we want to validate + if ( validate.isFiltered() ) { + logger.warn("You are attempting to validate a filtered site. Why are you attempting to validate a filtered site? You should not be attempting to validate a filtered site."); + sequenceInvalid = true; + invReason.add("SITE_IS_FILTERED"); + } + if ( validate.isIndel() ) { + sequence.append(Character.toUpperCase((char)ref.getBase())); + rawSequence.append(Character.toUpperCase((char)ref.getBase())); + } + sequence.append('['); + sequence.append(validate.getAlternateAllele(0).toString()); + sequence.append('/'); + sequence.append(validate.getReference().toString()); + sequence.append(']'); + // do this to the raw sequence to -- the indeces will line up that way + rawSequence.append('['); + rawSequence.append(validate.getAlternateAllele(0).getBaseString()); + rawSequence.append('/'); + rawSequence.append(validate.getReference().getBaseString()); + rawSequence.append(']'); + allelePos = ref.getLocus(); + if ( indelCounter > 0 ) { + logger.warn("An indel event overlaps the event to be validated. This completely invalidates the probe."); + sequenceInvalid = true; + invReason.add("INDEL_OVERLAPS_VALIDATION_SITE"); + if ( validate.isSNP() ) { + indelCounter--; + } else { + indelCounter -= validate.getEnd()-validate.getStart(); + } + } + } else /* (mask != null && validate == null ) */ { + if ( ! mask.isSNP() && ! mask.isFiltered() && ( ! filterMonomorphic || ! mask.isMonomorphic() )) { + logger.warn("Mask Variant Context on the following warning line is not a SNP. Currently we can only mask out SNPs. This probe will not be designed."); + logger.warn(String.format("%s:%d-%d\t%s\t%s",mask.getChr(),mask.getStart(),mask.getEnd(),mask.isInsertion() ? "INS" : "DEL", Utils.join(",",mask.getAlleles()))); + sequenceInvalid = true; + invReason.add(mask.isInsertion() ? "INSERTION" : "DELETION"); + // note: indelCounter could be > 0 (could have small deletion within larger one). This always selects + // the larger event. + int indelCounterNew = mask.isInsertion() ? 2 : mask.getEnd()-mask.getStart(); + if ( indelCounterNew > indelCounter ) { + indelCounter = indelCounterNew; + } + //sequence.append((char) ref.getBase()); + //sequence.append(mask.isInsertion() ? 'I' : 'D'); + sequence.append("N"); + indelCounter--; + rawSequence.append(Character.toUpperCase((char) ref.getBase())); + } else if ( indelCounter > 0 ) { + // previous section resets the indel counter. Doesn't matter if there's a SNP underlying this, we just want to append an 'N' and move on. + sequence.append('N'); + indelCounter--; + rawSequence.append(Character.toUpperCase((char)ref.getBase())); + } else if ( ! mask.isFiltered() && ( ! filterMonomorphic || ! mask.isMonomorphic() )){ + logger.debug("SNP in mask found at " + ref.getLocus().toString()); + + if ( lowerCaseSNPs ) { + sequence.append(Character.toLowerCase((char) ref.getBase())); + } else { + sequence.append((char) BaseUtils.N); + } + + rawSequence.append(Character.toUpperCase((char) ref.getBase())); + } else if ( mask.isSNP() ) { + logger.debug("SNP in mask found at "+ref.getLocus().toString()+" but was either filtered or monomorphic"); + sequence.append((Character.toUpperCase((char) ref.getBase()))); + rawSequence.append(Character.toUpperCase((char) ref.getBase())); + } + } + + return 1; + } + + public Integer reduce(Integer i, Integer j) { + return 0; + } + + public void onTraversalDone(Integer fin ) { + validateSequence(); + if ( doNotUseBWA ) { + lowerRepeats(); + } else { + lowerNonUniqueSegments(); + aligner.close(); + } + print(); + } + + public void validateSequence() { + // code for ensuring primer sequence is valid goes here + + // validate that there are no masked sites near to the variant site + String seq = sequence.toString(); + int start = seq.indexOf('[') - 4; + int end = seq.indexOf(']') + 5; + + if ( start < 50 ) { + logger.warn("There is not enough sequence before the start position of the probed allele for adequate probe design. This site will not be designed."); + sequenceInvalid = true; + invReason.add("START_TOO_CLOSE"); + } else if ( end > seq.length() - 50 ) { + logger.warn("There is not enough sequence after the end position of the probed allele fore adequate probe design. This site will not be desinged. "); + sequenceInvalid = true; + invReason.add("END_TOO_CLOSE"); + } else { + boolean maskNearVariantSite = false; + for ( int i = start; i < end; i++ ) { + maskNearVariantSite |= (seq.charAt(i) == 'N' || Character.isLowerCase(seq.charAt(i))); + } + + if ( maskNearVariantSite ) { + logger.warn("There is one (or more) mask variants within 4 basepair of the variant given for validation. This site will not be designed."); + sequenceInvalid = true; + invReason.add("VARIANT_TOO_NEAR_PROBE"); + } + } + + if ( seq.indexOf("[") != seq.lastIndexOf("[") ) { + logger.warn("Multiple probe variants were found within this interval. Please fix the definitions of the intervals so they do not overlap."); + sequenceInvalid = true; + invReason.add("MULTIPLE_PROBES"); + } + + if ( seq.indexOf("[") < 0 ) { + logger.warn("No variants in region were found. This site will not be designed."); + sequenceInvalid = true; + invReason.add("NO_VARIANTS_FOUND"); + } + } + + public void lowerNonUniqueSegments() { + if ( ! invReason.contains("MULTIPLE_PROBES") && !invReason.contains("NO_VARIANTS_FOUND") ) { + String leftFlank = rawSequence.toString().split("\\[")[0]; + String rightFlank = rawSequence.toString().split("\\]")[1]; + List badLeft = getBadIndeces(leftFlank); + List badRight = getBadIndeces(rightFlank); + // propagate lowercases into the printed sequence + for ( int idx = 0; idx < leftFlank.length(); idx++ ) { + while ( badLeft.size() > 0 && idx > badLeft.get(0) + virtualPrimerSize ) { + badLeft.remove(0); + } + + if ( badLeft.size() > 0 && badLeft.get(0) <= idx && idx <= badLeft.get(0) + virtualPrimerSize ) { + sequence.setCharAt(idx,Character.toLowerCase(sequence.charAt(idx))); + } + } + + int offset = 1 + rawSequence.indexOf("]"); + for ( int i= 0; i < rightFlank.length(); i++ ) { + int idx = i + offset; + while ( badRight.size() > 0 && i > badRight.get(0) + virtualPrimerSize ) { + //logger.debug("Removing "+badRight.get(0)+" because "+(badRight.get(0)+virtualPrimerSize)+" < "+i); + badRight.remove(0); + } + + if ( badRight.size() > 0 && badRight.get(0) <= i && i <= badRight.get(0) + virtualPrimerSize ) { + //logger.debug("Resetting character on right flank: "+idx+" "+i+" offset="+offset); + //logger.debug(sequence); + sequence.setCharAt(idx,Character.toLowerCase(sequence.charAt(idx))); + //logger.debug(sequence); + } + } + } + } + + private List getBadIndeces(String sequence) { + + List badLeftIndeces = new ArrayList(sequence.length()-virtualPrimerSize); + for ( int i = 0; i < sequence.length()-virtualPrimerSize ; i++ ) { + String toAlign = sequence.substring(i,i+virtualPrimerSize); + Iterable allAlignments = aligner.getAllAlignments(toAlign.getBytes()); + for ( Alignment[] alignments : allAlignments ) { + if ( alignments.length > 1 ) { + if ( alignments[0].getMappingQuality() == 0 ) { + // this region is bad -- multiple MQ alignments + badLeftIndeces.add(i); + } + } + } + } + + return badLeftIndeces; + } + + + /** + * Note- this is an old function - a proxy for identifying regions with low specificity to genome. Saved in case the alignment-based version + * turns out to be worse than just doing a simple repeat-lowering method. + */ + public void lowerRepeats() { + // convert to lower case low-complexity repeats, e.g. tandem k-mers + final int K_LIM = 8; + String seq = sequence.toString(); + StringBuilder newSequence = new StringBuilder(); + int start_pos = 0; + while( start_pos < seq.length() ) { + boolean broke = false; + for ( int length = K_LIM; length > 1; length -- ) { + //logger.debug(String.format("start1: %d end1: %d start2: %d end2: %d str: %d",start_pos,start_pos+length,start_pos+length,start_pos+2*length,seq.length())); + if ( start_pos + 2*length> seq.length() ) { + continue; + } + if ( equalsIgnoreNs(seq.substring(start_pos,start_pos+length),seq.substring(start_pos+length,start_pos+2*length)) ) { + newSequence.append(seq.substring(start_pos,start_pos+length).toLowerCase()); + newSequence.append(seq.substring(start_pos+length,start_pos+2*length).toLowerCase()); + start_pos += 2*length; + broke = true; + break; + } + } + + if ( ! broke ) { + newSequence.append(seq.substring(start_pos,start_pos+1)); + start_pos++; + } + + } + + if ( seq.indexOf("[") != seq.lastIndexOf("[") ) { + return; + } + + sequence = newSequence; + } + + public boolean equalsIgnoreNs(String one, String two) { + if ( one.length() != two.length() ) { return false; } + for ( int idx = 0; idx < one.length(); idx++ ) { + if ( Character.toUpperCase(one.charAt(idx)) != Character.toUpperCase(two.charAt(idx)) ) { + if ( Character.toUpperCase(one.charAt(idx)) != 'N' && Character.toUpperCase(two.charAt(idx)) != 'N' ) { + return false; + } + } + } + + //logger.debug(String.format("one: %s two: %s",one,two)); + + return true; + } + + public void print() { + String valid; + if ( sequenceInvalid ) { + valid = ""; + while ( invReason.size() > 0 ) { + String reason = invReason.get(0); + invReason.remove(reason); + int num = 1; + while ( invReason.contains(reason) ) { + num++; + invReason.remove(reason); + } + valid += String.format("%s=%d,",reason,num); + } + } else { + valid = "Valid"; + } + + String seqIdentity = sequence.toString().replace('n', 'N').replace('i', 'I').replace('d', 'D'); + out.printf(">%s %s %s%n%s%n", allelePos != null ? allelePos.toString() : "multiple", valid, probeName, seqIdentity); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index 15d808ebe9..fe31735066 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -1,14 +1,12 @@ package org.broadinstitute.sting.gatk.walkers.varianteval; +import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.samtools.SAMSequenceRecord; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; @@ -20,23 +18,21 @@ import org.broadinstitute.sting.gatk.walkers.Window; import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator; import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; import org.broadinstitute.sting.gatk.walkers.varianteval.util.*; import org.broadinstitute.sting.gatk.walkers.variantrecalibration.Tranche; import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibrator; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.exceptions.StingException; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; -import net.sf.picard.reference.FastaSequenceFile; -import net.sf.picard.reference.IndexedFastaSequenceFile; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import net.sf.picard.reference.ReferenceSequence; -import java.io.FileNotFoundException; +import org.broadinstitute.sting.utils.exceptions.StingException; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.io.File; +import java.io.FileNotFoundException; import java.io.PrintStream; import java.lang.reflect.Field; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompEvalGenotypes.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompEvalGenotypes.java index 787dbe9af6..925bff9c09 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompEvalGenotypes.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompEvalGenotypes.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.variantcontext.Genotype; class NewCompEvalGenotypes { private GenomeLoc loc; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java index 76db330edc..255a547373 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java @@ -1,12 +1,12 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; /** * The Broad Institute diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java index c4277adc96..8c281b2f88 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java @@ -1,13 +1,13 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; @Analysis(description = "Counts different classes of variants in the sample") public class CountVariants extends VariantEvaluator implements StandardEval { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java index 4b56cf130b..bbd3f5f548 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java @@ -1,18 +1,18 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java index 3d14dd0e58..a476a2680a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java @@ -1,20 +1,20 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.phasing.AllelePair; import org.broadinstitute.sting.gatk.walkers.phasing.ReadBackedPhasingWalker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.gatk.walkers.varianteval.util.NewEvaluationContext; import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.HashMap; import java.util.HashSet; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java index 5daf33a9fd..77def0f308 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java @@ -1,13 +1,13 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; /** * IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelMetricsByAC.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelMetricsByAC.java index eca6c51936..6e1b76acdf 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelMetricsByAC.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelMetricsByAC.java @@ -1,14 +1,14 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java index 48b06d532b..d99196ecf7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java @@ -1,15 +1,15 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; import org.broadinstitute.sting.utils.IndelUtils; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java index 85e0b58894..a0cc393d92 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java @@ -1,21 +1,15 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; - -import java.util.Arrays; -import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; /** * Mendelian violation detection and counting diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/PrintMissingComp.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/PrintMissingComp.java index 7d54d0df84..b209ee13d5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/PrintMissingComp.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/PrintMissingComp.java @@ -24,12 +24,12 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; @Analysis(name = "PrintMissingComp", description = "the overlap between eval and comp sites") public class PrintMissingComp extends VariantEvaluator { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SamplePreviousGenotypes.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SamplePreviousGenotypes.java index 5f3e6b0fa7..751f61a976 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SamplePreviousGenotypes.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SamplePreviousGenotypes.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.variantcontext.Genotype; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SimpleMetricsByAC.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SimpleMetricsByAC.java index deed05508c..d466645ea4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SimpleMetricsByAC.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SimpleMetricsByAC.java @@ -1,18 +1,18 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.Degeneracy; import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.Sample; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.gatk.walkers.varianteval.util.StateKey; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.ArrayList; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java index 89c67cfe9a..ec43cbd55a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java @@ -1,13 +1,13 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java index 8811dc0010..be957abd78 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java @@ -1,13 +1,13 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; @Analysis(description = "Ti/Tv Variant Evaluator") public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEval { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java index 405f356350..9c331b5771 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java @@ -1,17 +1,16 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.*; +import java.util.Set; /** * The Broad Institute diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java index 6017ecca3d..e29e7ed50d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java @@ -1,12 +1,12 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.NewEvaluationContext; import org.broadinstitute.sting.gatk.walkers.varianteval.util.StateKey; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; public abstract class VariantEvaluator { public void initialize(VariantEvalWalker walker) {} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java index 4af14810b8..b6ad55b184 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java @@ -25,16 +25,16 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.ArrayList; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java index ff59c9e291..411493d4f0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java @@ -1,12 +1,11 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; -import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Set; @@ -43,9 +42,9 @@ public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTrac if (eval != null) { int AC = -1; - if ( eval.hasAttribute("AC") ) + if ( eval.hasAttribute("AC") && eval.getAttribute("AC") instanceof Integer ) { AC = eval.getAttributeAsInt("AC"); - else if ( eval.isVariant() ) { + } else if ( eval.isVariant() ) { for (Allele allele : eval.getAlternateAlleles()) AC = Math.max(AC, eval.getChromosomeCount(allele)); } else diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleFrequency.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleFrequency.java index 48b4ffa91e..2ffc7716ca 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleFrequency.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleFrequency.java @@ -1,10 +1,10 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java index 9942ba8d69..c6975808f7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Contig.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Contig.java index 1a9d310851..c143550356 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Contig.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Contig.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java index 15b1d41c1f..3e8a6ed172 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java index 65af6090c5..155a661867 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java @@ -1,14 +1,14 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.HashMap; -import java.util.Set; import java.util.HashSet; +import java.util.Set; public class Degeneracy extends VariantStratifier { private ArrayList states; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java index 7bd15a9747..40f952fd23 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java index 8d8782ab73..3b7a419f2b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java index fd724d6d15..c6c094f8e6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java index a7ccd31827..76efedbf4c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java @@ -1,10 +1,10 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.ArrayList; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java index 39d2e6b4d5..a0973a088d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Collection; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java index f909de4f38..a2a3eb3fbe 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java index 218cb23cab..2c4b8bc466 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java @@ -1,11 +1,10 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/tags/Analysis.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/Analysis.java similarity index 80% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/tags/Analysis.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/Analysis.java index 129d5a95d1..2b37ce2107 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/tags/Analysis.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/Analysis.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval.tags; +package org.broadinstitute.sting.gatk.walkers.varianteval.util; import java.lang.annotation.Retention; import java.lang.annotation.RetentionPolicy; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/AnalysisModuleScanner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/AnalysisModuleScanner.java index c8d917040d..db44e9e281 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/AnalysisModuleScanner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/AnalysisModuleScanner.java @@ -23,8 +23,6 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.util; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.lang.annotation.Annotation; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/tags/DataPoint.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/DataPoint.java similarity index 77% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/tags/DataPoint.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/DataPoint.java index 3ba4480490..396843252d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/tags/DataPoint.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/DataPoint.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval.tags; +package org.broadinstitute.sting.gatk.walkers.varianteval.util; import java.lang.annotation.Retention; import java.lang.annotation.RetentionPolicy; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java index 3208c26bb4..8112ae97f2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.util; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -9,6 +8,7 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.StingException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.HashMap; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/SortableJexlVCMatchExp.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/SortableJexlVCMatchExp.java index 0281653afe..38f7a7f402 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/SortableJexlVCMatchExp.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/SortableJexlVCMatchExp.java @@ -1,6 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.util; -import org.apache.commons.jexl2.*; +import org.apache.commons.jexl2.Expression; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; public class SortableJexlVCMatchExp extends VariantContextUtils.JexlVCMatchExp implements Comparable { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java index b8e45e462e..0a915db371 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java @@ -1,24 +1,21 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.util; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.report.GATKReport; import org.broadinstitute.sting.gatk.report.GATKReportTable; -import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.StandardEval; import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator; import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.RequiredStratification; import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.StandardStratification; import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.lang.reflect.Field; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java index 7957d35cd8..403c67d3e8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java @@ -25,23 +25,23 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; -import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.collections.NestedHashMap; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.XReadLines; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.File; import java.io.FileNotFoundException; -import java.lang.Double; import java.util.*; /** @@ -55,7 +55,6 @@ public class ApplyRecalibration extends RodWalker { - ///////////////////////////// // Inputs ///////////////////////////// @@ -86,6 +85,7 @@ public class ApplyRecalibration extends RodWalker { final private List tranches = new ArrayList(); final private Set inputNames = new HashSet(); final private NestedHashMap lodMap = new NestedHashMap(); + final private NestedHashMap annotationMap = new NestedHashMap(); final private Set ignoreInputFilterSet = new TreeSet(); //--------------------------------------------------------------------------------------------------------------- @@ -124,6 +124,7 @@ public void initialize() { final Set hInfo = new HashSet(); hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), inputNames)); hInfo.add(new VCFInfoHeaderLine(VariantRecalibrator.VQS_LOD_KEY, 1, VCFHeaderLineType.Float, "Log odds ratio of being a true variant versus being false under the trained gaussian mixture model")); + hInfo.add(new VCFInfoHeaderLine(VariantRecalibrator.CULPRIT_KEY, 1, VCFHeaderLineType.String, "The annotation which was the worst performing in the Gaussian mixture model, likely the reason why the variant was filtered out")); final TreeSet samples = new TreeSet(); samples.addAll(SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames)); @@ -149,6 +150,7 @@ public void initialize() { for ( final String line : new XReadLines( RECAL_FILE ) ) { final String[] vals = line.split(","); lodMap.put( Double.parseDouble(vals[3]), vals[0], Integer.parseInt(vals[1]), Integer.parseInt(vals[2]) ); // value comes before the keys + annotationMap.put( vals[4], vals[0], Integer.parseInt(vals[1]), Integer.parseInt(vals[2]) ); // value comes before the keys } } catch ( FileNotFoundException e ) { throw new UserException.CouldNotReadInputFile(RECAL_FILE, e); @@ -174,11 +176,15 @@ public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentC String filterString = null; final Map attrs = new HashMap(vc.getAttributes()); final Double lod = (Double) lodMap.get( vc.getChr(), vc.getStart(), vc.getEnd() ); + final String worstAnnotation = (String) annotationMap.get( vc.getChr(), vc.getStart(), vc.getEnd() ); if( lod == null ) { throw new UserException("Encountered input variant which isn't found in the input recal file. Please make sure VariantRecalibrator and ApplyRecalibration were run on the same set of input variants. First seen at: " + vc ); } + // Annotate the new record with its VQSLOD and the worst performing annotation attrs.put(VariantRecalibrator.VQS_LOD_KEY, String.format("%.4f", lod)); + attrs.put(VariantRecalibrator.CULPRIT_KEY, worstAnnotation); + for( int i = tranches.size() - 1; i >= 0; i-- ) { final Tranche tranche = tranches.get(i); if( lod >= tranche.minVQSLod ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java index 41fea08964..17461de2f2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; import Jama.Matrix; +import cern.jet.random.Normal; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.MathUtils; @@ -214,6 +215,19 @@ public double evaluateDatum( final VariantDatum datum ) { return MathUtils.log10sumLog10(pVarInGaussianLog10); // Sum(pi_k * p(v|n,k)) } + public Double evaluateDatumInOneDimension( final VariantDatum datum, final int iii ) { + if(datum.isNull[iii]) { return null; } + + final Normal normal = new Normal(0.0, 1.0, null); + final double[] pVarInGaussianLog10 = new double[gaussians.size()]; + int gaussianIndex = 0; + for( final MultivariateGaussian gaussian : gaussians ) { + normal.setState( gaussian.mu[iii], gaussian.sigma.get(iii, iii) ); + pVarInGaussianLog10[gaussianIndex++] = gaussian.pMixtureLog10 + Math.log10( normal.pdf( datum.annotations[iii] ) ); + } + return MathUtils.log10sumLog10(pVarInGaussianLog10); // Sum(pi_k * p(v|n,k)) + } + public double evaluateDatumMarginalized( final VariantDatum datum ) { int numSamples = 0; double sumPVarInGaussian = 0.0; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrainingSet.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrainingSet.java index 67132b1330..6c1a7ddbcb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrainingSet.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrainingSet.java @@ -39,6 +39,7 @@ public class TrainingSet { public String name; public boolean isKnown = false; public boolean isTraining = false; + public boolean isAntiTraining = false; public boolean isTruth = false; public boolean isConsensus = false; public double prior = 0.0; @@ -47,17 +48,24 @@ public class TrainingSet { public TrainingSet( final String name, final Tags tags ) { this.name = name; + + // Parse the tags to decide which tracks have which properties if( tags != null ) { isKnown = tags.containsKey("known") && tags.getValue("known").equals("true"); isTraining = tags.containsKey("training") && tags.getValue("training").equals("true"); + isAntiTraining = tags.containsKey("bad") && tags.getValue("bad").equals("true"); isTruth = tags.containsKey("truth") && tags.getValue("truth").equals("true"); isConsensus = tags.containsKey("consensus") && tags.getValue("consensus").equals("true"); prior = ( tags.containsKey("prior") ? Double.parseDouble(tags.getValue("prior")) : prior ); } - if( !isConsensus ) { + + // Report back to the user which tracks were found and the properties that were detected + if( !isConsensus && !isAntiTraining ) { logger.info( String.format( "Found %s track: \tKnown = %s \tTraining = %s \tTruth = %s \tPrior = Q%.1f", this.name, isKnown, isTraining, isTruth, prior) ); - } else { + } else if( isConsensus ) { logger.info( String.format( "Found consensus track: %s", this.name) ); + } else { + logger.info( String.format( "Found bad sites training track: %s", this.name) ); } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/Tranche.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/Tranche.java index 64fe366376..15424f0f7e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/Tranche.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/Tranche.java @@ -25,11 +25,14 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; -import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.XReadLines; -import java.io.*; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.PrintStream; import java.util.*; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VQSRCalibrationCurve.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VQSRCalibrationCurve.java index 5deb5d8c2a..bc7252ec20 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VQSRCalibrationCurve.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VQSRCalibrationCurve.java @@ -26,10 +26,10 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.XReadLines; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.File; import java.io.FileNotFoundException; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java index e1a076e763..67d54a408e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java @@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -34,9 +33,12 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.PrintStream; -import java.util.*; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; /** * Created by IntelliJ IDEA. @@ -82,19 +84,11 @@ public void normalizeData() { } foundZeroVarianceAnnotation = foundZeroVarianceAnnotation || (theSTD < 1E-6); - if( annotationKeys.get(iii).toLowerCase().contains("ranksum") ) { // BUGBUG: to clean up - for( final VariantDatum datum : data ) { - if( datum.annotations[iii] > 0.0 ) { datum.annotations[iii] /= 3.0; } - } - } meanVector[iii] = theMean; varianceVector[iii] = theSTD; for( final VariantDatum datum : data ) { + // Transform each data point via: (x - mean) / standard deviation datum.annotations[iii] = ( datum.isNull[iii] ? GenomeAnalysisEngine.getRandomGenerator().nextGaussian() : ( datum.annotations[iii] - theMean ) / theSTD ); - // Each data point is now [ (x - mean) / standard deviation ] - if( annotationKeys.get(iii).toLowerCase().contains("ranksum") && datum.isNull[iii] && datum.annotations[iii] > 0.0 ) { - datum.annotations[iii] /= 3.0; - } } } if( foundZeroVarianceAnnotation ) { @@ -108,7 +102,6 @@ public void normalizeData() { remove = remove || (Math.abs(val) > VRAC.STD_THRESHOLD); } datum.failingSTDThreshold = remove; - datum.usedForTraining = 0; } } @@ -142,38 +135,47 @@ public ExpandingArrayList getTrainingData() { for( final VariantDatum datum : data ) { if( datum.atTrainingSite && !datum.failingSTDThreshold && datum.originalQual > VRAC.QUAL_THRESHOLD ) { trainingData.add( datum ); - datum.usedForTraining = 1; } } logger.info( "Training with " + trainingData.size() + " variants after standard deviation thresholding." ); if( trainingData.size() < VRAC.MIN_NUM_BAD_VARIANTS ) { - logger.warn("WARNING: Training with very few variant sites! Please check the model reporting PDF to ensure the quality of the model is reliable."); + logger.warn( "WARNING: Training with very few variant sites! Please check the model reporting PDF to ensure the quality of the model is reliable." ); } return trainingData; } public ExpandingArrayList selectWorstVariants( double bottomPercentage, final int minimumNumber ) { - Collections.sort( data ); + // The return value is the list of training variants final ExpandingArrayList trainingData = new ExpandingArrayList(); - final int numToAdd = Math.max( minimumNumber, Math.round((float)bottomPercentage * data.size()) ); - if( numToAdd > data.size() ) { - throw new UserException.BadInput("Error during negative model training. Minimum number of variants to use in training is larger than the whole call set. One can attempt to lower the --minNumBadVariants arugment but this is unsafe."); + + // First add to the training list all sites overlapping any bad sites training tracks + for( final VariantDatum datum : data ) { + if( datum.atAntiTrainingSite && !datum.failingSTDThreshold && !Double.isInfinite(datum.lod) ) { + trainingData.add( datum ); + } } - if( numToAdd == minimumNumber ) { - logger.warn("WARNING: Training with very few variant sites! Please check the model reporting PDF to ensure the quality of the model is reliable."); + final int numBadSitesAdded = trainingData.size(); + logger.info( "Found " + numBadSitesAdded + " variants overlapping bad sites training tracks." ); + + // Next sort the variants by the LOD coming from the positive model and add to the list the bottom X percent of variants + Collections.sort( data ); + final int numToAdd = Math.max( minimumNumber - trainingData.size(), Math.round((float)bottomPercentage * data.size()) ); + if( numToAdd > data.size() ) { + throw new UserException.BadInput( "Error during negative model training. Minimum number of variants to use in training is larger than the whole call set. One can attempt to lower the --minNumBadVariants arugment but this is unsafe." ); + } else if( numToAdd == minimumNumber - trainingData.size() ) { + logger.warn( "WARNING: Training with very few variant sites! Please check the model reporting PDF to ensure the quality of the model is reliable." ); bottomPercentage = ((float) numToAdd) / ((float) data.size()); } - int index = 0; - int numAdded = 0; + int index = 0, numAdded = 0; while( numAdded < numToAdd ) { final VariantDatum datum = data.get(index++); - if( !datum.failingSTDThreshold && !Double.isInfinite(datum.lod) ) { + if( !datum.atAntiTrainingSite && !datum.failingSTDThreshold && !Double.isInfinite(datum.lod) ) { + datum.atAntiTrainingSite = true; trainingData.add( datum ); - datum.usedForTraining = -1; numAdded++; } } - logger.info("Training with worst " + (float) bottomPercentage * 100.0f + "% of passing data --> " + trainingData.size() + " variants with LOD <= " + String.format("%.4f", data.get(index).lod) + "."); + logger.info( "Additionally training with worst " + String.format("%.3f", (float) bottomPercentage * 100.0f) + "% of passing data --> " + (trainingData.size() - numBadSitesAdded) + " variants with LOD <= " + String.format("%.4f", data.get(index).lod) + "." ); return trainingData; } @@ -186,10 +188,11 @@ public ExpandingArrayList getRandomDataForPlotting( int numToAdd ) returnData.add(datum); } } - // add an extra 5% of points from bad training set, since that set is small but interesting + + // Add an extra 5% of points from bad training set, since that set is small but interesting for( int iii = 0; iii < Math.floor(0.05*numToAdd); iii++) { final VariantDatum datum = data.get(GenomeAnalysisEngine.getRandomGenerator().nextInt(data.size())); - if( datum.usedForTraining == -1 && !datum.failingSTDThreshold ) { returnData.add(datum); } + if( datum.atAntiTrainingSite && !datum.failingSTDThreshold ) { returnData.add(datum); } else { iii--; } } @@ -232,23 +235,15 @@ private static double decodeAnnotation( final String annotationKey, final Varian double value; try { - if( annotationKey.equalsIgnoreCase("QUAL") ) { - value = vc.getPhredScaledQual(); - } else if( annotationKey.equalsIgnoreCase("DP") ) { - value = Double.parseDouble( (String)vc.getAttribute( "DP" ) ) / Double.parseDouble( (String)vc.getAttribute( "AN" ) ); - } else { - value = Double.parseDouble( (String)vc.getAttribute( annotationKey ) ); - if( Double.isInfinite(value) ) { value = Double.NaN; } - if( annotationKey.equalsIgnoreCase("InbreedingCoeff") && value > 0.05 ) { value = Double.NaN; } - if( jitter && annotationKey.equalsIgnoreCase("HRUN") ) { // Integer valued annotations must be jittered a bit to work in this GMM - value += -0.25 + 0.5 * GenomeAnalysisEngine.getRandomGenerator().nextDouble(); - } - if( annotationKey.equalsIgnoreCase("HaplotypeScore") && MathUtils.compareDoubles(value, 0.0, 0.0001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); } - if( annotationKey.equalsIgnoreCase("FS") && MathUtils.compareDoubles(value, 0.0, 0.01) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); } + value = Double.parseDouble( (String)vc.getAttribute( annotationKey ) ); + if( Double.isInfinite(value) ) { value = Double.NaN; } + if( jitter && annotationKey.equalsIgnoreCase("HRUN") ) { // Integer valued annotations must be jittered a bit to work in this GMM + value += -0.25 + 0.5 * GenomeAnalysisEngine.getRandomGenerator().nextDouble(); } - + if( jitter && annotationKey.equalsIgnoreCase("HaplotypeScore") && MathUtils.compareDoubles(value, 0.0, 0.0001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); } + if( jitter && annotationKey.equalsIgnoreCase("FS") && MathUtils.compareDoubles(value, 0.0, 0.001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); } } catch( Exception e ) { - value = Double.NaN; // The VQSR works with missing data now by marginalizing over the missing dimension when evaluating Gaussians + value = Double.NaN; // The VQSR works with missing data by marginalizing over the missing dimension when evaluating the Gaussian mixture model } return value; @@ -258,8 +253,10 @@ public void parseTrainingSets( final RefMetaDataTracker tracker, final Reference datum.isKnown = false; datum.atTruthSite = false; datum.atTrainingSite = false; + datum.atAntiTrainingSite = false; datum.prior = 2.0; datum.consensusCount = 0; + for( final TrainingSet trainingSet : trainingSets ) { for( final VariantContext trainVC : tracker.getVariantContexts( ref, trainingSet.name, null, context.getLocation(), false, false ) ) { if( trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() && @@ -272,13 +269,19 @@ public void parseTrainingSets( final RefMetaDataTracker tracker, final Reference datum.prior = Math.max( datum.prior, trainingSet.prior ); datum.consensusCount += ( trainingSet.isConsensus ? 1 : 0 ); } + if( trainVC != null ) { + datum.atAntiTrainingSite = datum.atAntiTrainingSite || trainingSet.isAntiTraining; + } + } } } public void writeOutRecalibrationTable( final PrintStream RECAL_FILE ) { for( final VariantDatum datum : data ) { - RECAL_FILE.println(String.format("%s,%d,%d,%.4f", datum.contig, datum.start, datum.stop, datum.lod)); + RECAL_FILE.println(String.format("%s,%d,%d,%.4f,%s", + datum.contig, datum.start, datum.stop, datum.lod, + (datum.worstAnnotation != -1 ? annotationKeys.get(datum.worstAnnotation) : "NULL"))); } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDatum.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDatum.java index 8295ec2059..eb9e98fcb7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDatum.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDatum.java @@ -39,16 +39,17 @@ public class VariantDatum implements Comparable { public double lod; public boolean atTruthSite; public boolean atTrainingSite; + public boolean atAntiTrainingSite; public boolean isTransition; public boolean isSNP; public boolean failingSTDThreshold; public double originalQual; public double prior; public int consensusCount; - public int usedForTraining; public String contig; public int start; public int stop; + public int worstAnnotation; public MultivariateGaussian assignment; // used in K-means implementation public int compareTo( final VariantDatum other ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index a77c5962ca..8179463eb1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -25,14 +25,12 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; @@ -42,6 +40,8 @@ import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.io.File; import java.io.FileNotFoundException; @@ -60,6 +60,7 @@ public class VariantRecalibrator extends RodWalker, ExpandingArrayList> implements TreeReducible> { public static final String VQS_LOD_KEY = "VQSLOD"; + public static final String CULPRIT_KEY = "culprit"; @ArgumentCollection private VariantRecalibratorArgumentCollection VRAC = new VariantRecalibratorArgumentCollection(); @@ -175,7 +176,6 @@ public ExpandingArrayList map( final RefMetaDataTracker tracker, f datum.originalQual = vc.getPhredScaledQual(); datum.isSNP = vc.isSNP() && vc.isBiallelic(); datum.isTransition = datum.isSNP && VariantContextUtils.isTransition(vc); - datum.usedForTraining = 0; // Loop through the training data sets and if they overlap this loci then update the prior and training status appropriately dataManager.parseTrainingSets( tracker, ref, context, vc, datum, TRUST_ALL_POLYMORPHIC ); @@ -237,6 +237,7 @@ public void onTraversalDone( final ExpandingArrayList reduceSum ) // Generate the negative model using the worst performing data and evaluate each variant contrastively final GaussianMixtureModel badModel = engine.generateModel( dataManager.selectWorstVariants( VRAC.PERCENT_BAD_VARIANTS, VRAC.MIN_NUM_BAD_VARIANTS ) ); engine.evaluateData( dataManager.getData(), badModel, true ); + engine.calculateWorstPerformingAnnotation( dataManager.getData(), goodModel, badModel ); // Find the VQSLOD cutoff values which correspond to the various tranches of calls requested by the user final int nCallsAtTruth = TrancheManager.countCallsAtTruth( dataManager.getData(), Double.NEGATIVE_INFINITY ); @@ -283,7 +284,7 @@ private void createVisualizationScript( final ExpandingArrayList r throw new UserException.CouldNotCreateOutputFile(RSCRIPT_FILE, "", e); } - // We make extensive use of the ggplot2 library: http://had.co.nz/ggplot2/ + // We make extensive use of the ggplot2 R library: http://had.co.nz/ggplot2/ stream.println("library(ggplot2)"); createArrangeFunction( stream ); @@ -333,7 +334,8 @@ private void createVisualizationScript( final ExpandingArrayList r stream.print("data <- c("); for( final VariantDatum datum : randomData ) { - stream.print(String.format("%.3f, %.3f, %.3f, %d, %d,", datum.annotations[iii], datum.annotations[jjj], (datum.lod < lodCutoff ? -1.0 : 1.0), datum.usedForTraining, (datum.isKnown ? 1 : -1))); + stream.print(String.format("%.3f, %.3f, %.3f, %d, %d,", datum.annotations[iii], datum.annotations[jjj], (datum.lod < lodCutoff ? -1.0 : 1.0), + (datum.atAntiTrainingSite ? -1 : (datum.atTrainingSite ? 1 : 0)), (datum.isKnown ? 1 : -1))); } stream.println("NA,NA,NA,NA,1)"); stream.println("d <- matrix(data,ncol=5,byrow=T)"); @@ -376,6 +378,7 @@ private void createVisualizationScript( final ExpandingArrayList r } } + // The Arrange function is how we place the 4 model plots on one page // from http://gettinggeneticsdone.blogspot.com/2010/03/arrange-multiple-ggplot2-plots-in-same.html private void createArrangeFunction( final PrintStream stream ) { stream.println("vp.layout <- function(x, y) viewport(layout.pos.row=x, layout.pos.col=y)"); @@ -400,5 +403,4 @@ private void createArrangeFunction( final PrintStream stream ) { stream.println("}"); stream.println("}"); } - - } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorArgumentCollection.java index f597fb1285..e1a8159132 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorArgumentCollection.java @@ -60,7 +60,7 @@ public enum Mode { @Argument(fullName="priorCounts", shortName="priorCounts", doc="The number of prior counts to use in variational Bayes algorithm.", required=false) public double PRIOR_COUNTS = 20.0; @Argument(fullName="percentBadVariants", shortName="percentBad", doc="What percentage of the worst scoring variants to use when building the Gaussian mixture model of bad variants. 0.07 means bottom 7 percent.", required=false) - public double PERCENT_BAD_VARIANTS = 0.015; + public double PERCENT_BAD_VARIANTS = 0.03; @Argument(fullName="minNumBadVariants", shortName="minNumBad", doc="The minimum amount of worst scoring variants to use when building the Gaussian mixture model of bad variants. Will override -percentBad arugment if necessary.", required=false) public int MIN_NUM_BAD_VARIANTS = 2000; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorEngine.java index 4deb82f6bb..52b48ff08a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorEngine.java @@ -82,6 +82,23 @@ public void evaluateData( final List data, final GaussianMixtureMo } } + public void calculateWorstPerformingAnnotation( final List data, final GaussianMixtureModel goodModel, final GaussianMixtureModel badModel ) { + for( final VariantDatum datum : data ) { + int worstAnnotation = -1; + double minProb = Double.MAX_VALUE; + for( int iii = 0; iii < datum.annotations.length; iii++ ) { + final Double goodProbLog10 = goodModel.evaluateDatumInOneDimension(datum, iii); + final Double badProbLog10 = badModel.evaluateDatumInOneDimension(datum, iii); + if( goodProbLog10 != null && badProbLog10 != null ) { + final double prob = goodProbLog10 - badProbLog10; + if(prob < minProb) { minProb = prob; worstAnnotation = iii; } + } + } + datum.worstAnnotation = worstAnnotation; + } + } + + ///////////////////////////// // Private Methods used for generating a GaussianMixtureModel ///////////////////////////// diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index 597a54dc1a..9c2a520ef7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -25,12 +25,13 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.apache.poi.hpsf.Variant; +import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Hidden; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.Reference; import org.broadinstitute.sting.gatk.walkers.Requires; @@ -38,11 +39,10 @@ import org.broadinstitute.sting.gatk.walkers.Window; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; -import org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.*; @@ -150,7 +150,7 @@ public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentCo // get all of the vcf rods at this locus // Need to provide reference bases to simpleMerge starting at current locus - Collection vcs = tracker.getAllVariantContexts(ref, null,context.getLocation(), true, false); + Collection vcs = tracker.getAllVariantContexts(ref, null, context.getLocation(), true, false); if ( sitesOnlyVCF ) { vcs = VariantContextUtils.sitesOnlyVariantContexts(vcs); @@ -173,17 +173,25 @@ public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentCo if (minimumN > 1 && (vcs.size() - numFilteredRecords < minimumN)) return 0; - VariantContext mergedVC; + List mergedVCs = new ArrayList(); if ( master ) { - mergedVC = VariantContextUtils.masterMerge(vcs, "master"); + mergedVCs.add(VariantContextUtils.masterMerge(vcs, "master")); } else { - mergedVC = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(),vcs, priority, filteredRecordsMergeType, - genotypeMergeOption, true, printComplexMerges, ref.getBase(), SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC); + Map> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs); + // iterate over the types so that it's deterministic + for ( VariantContext.Type type : VariantContext.Type.values() ) { + if ( VCsByType.containsKey(type) ) + mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type), + priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges, + ref.getBase(), SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC)); + } } - //out.printf(" merged => %s%nannotated => %s%n", mergedVC, annotatedMergedVC); + for ( VariantContext mergedVC : mergedVCs ) { + // only operate at the start of events + if ( mergedVC == null ) + continue; - if ( mergedVC != null ) { // only operate at the start of events HashMap attributes = new HashMap(mergedVC.getAttributes()); // re-compute chromosome counts VariantContextUtils.calculateChromosomeCounts(mergedVC, attributes, false); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java index ba6f5e513e..b45ee1b674 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java @@ -24,18 +24,21 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; -import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.*; +import java.util.Arrays; +import java.util.Collection; +import java.util.Map; +import java.util.Set; /** * Filters a lifted-over VCF file for ref bases that have been changed. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java index d9dd85f0ca..2ebd183f41 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java @@ -28,21 +28,17 @@ import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.SortingVCFWriter; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.sam.AlignmentUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java index 7eda54387e..4f05c8aacb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java @@ -24,28 +24,28 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import net.sf.picard.PicardException; +import net.sf.picard.liftover.LiftOver; +import net.sf.picard.util.Interval; +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMFileReader; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; -import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.RMD; +import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.io.File; import java.util.*; -import net.sf.picard.PicardException; -import net.sf.picard.liftover.LiftOver; -import net.sf.picard.util.Interval; -import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMFileReader; - /** * Lifts a VCF file over from one build to another. Note that the resulting VCF could be mis-sorted. */ diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java index 0c41a9728d..f0756d884b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java @@ -24,11 +24,6 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.StandardVCFWriter; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; @@ -39,11 +34,15 @@ import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.File; -import java.util.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.Set; /** * Takes a VCF file, randomly splits variants into two different sets, and outputs 2 new VCFs with the results. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 963aa0ce5f..ac67976093 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -24,27 +24,39 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; +import org.broadinstitute.sting.commandline.Hidden; +import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.text.XReadLines; import org.broadinstitute.sting.utils.variantcontext.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RMD; import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.io.File; +import java.io.FileNotFoundException; +import java.io.PrintStream; +import java.lang.annotation.AnnotationFormatError; import java.util.*; /** @@ -75,12 +87,31 @@ public class SelectVariants extends RodWalker { @Argument(fullName="excludeFiltered", shortName="ef", doc="Don't include filtered loci in the analysis.", required=false) private boolean EXCLUDE_FILTERED = false; + @Argument(fullName="keepOriginalAC", shortName="keepOriginalAC", doc="Don't include filtered loci.", required=false) + private boolean KEEP_ORIGINAL_CHR_COUNTS = false; + @Argument(fullName="discordance", shortName = "disc", doc="Output variants that were not called on a ROD comparison track. Use -disc ROD_NAME", required=false) private String discordanceRodName = ""; @Argument(fullName="concordance", shortName = "conc", doc="Output variants that were also called on a ROD comparison track. Use -conc ROD_NAME", required=false) private String concordanceRodName = ""; + @Hidden + @Argument(fullName="inputAF", shortName = "inputAF", doc="", required=false) + private String inputAFRodName = ""; + + @Hidden + @Argument(fullName="keepAFSpectrum", shortName="keepAF", doc="Don't include loci found to be non-variant after the subsetting procedure.", required=false) + private boolean KEEP_AF_SPECTRUM = false; + + @Hidden + @Argument(fullName="afFile", shortName="afFile", doc="The output recal file used by ApplyRecalibration", required=false) + private File AF_FILE = new File(""); + + @Hidden + @Argument(fullName="family_structure_file", shortName="familyFile", doc="USE YAML FILE INSTEAD (-SM) !!! string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false) + private File FAMILY_STRUCTURE_FILE = null; + @Argument(fullName="family_structure", shortName="family", doc="USE YAML FILE INSTEAD (-SM) !!! string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false) private String FAMILY_STRUCTURE = ""; @@ -102,6 +133,9 @@ public class SelectVariants extends RodWalker { @Argument(fullName="selectIndels", shortName="indels", doc="Select only Indels.", required=false) private boolean SELECT_INDELS = false; + @Hidden + @Argument(fullName="outMVFile", shortName="outMVFile", doc="USE YAML FILE INSTEAD (-SM) !!! string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false) + private String outMVFile = null; /* Private class used to store the intermediate variants in the integer random selection process */ private class RandomVariantStructure { @@ -129,7 +163,7 @@ public void set (VariantContext vcP, byte refBaseP) { private boolean DISCORDANCE_ONLY = false; private boolean CONCORDANCE_ONLY = false; - private MendelianViolation mv; + private Set mvSet = new HashSet(); /* default name for the variant dataset (VCF) */ private final String variantRodName = "variant"; @@ -144,8 +178,14 @@ public void set (VariantContext vcP, byte refBaseP) { private RandomVariantStructure [] variantArray; + /* Variables used for random selection with AF boosting */ + private ArrayList afBreakpoints = null; + private ArrayList afBoosts = null; + double bkDelta = 0.0; + private PrintStream outMVFileStream = null; + /** * Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher @@ -178,6 +218,12 @@ public void initialize() { // Initialize VCF header Set headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger); headerLines.add(new VCFHeaderLine("source", "SelectVariants")); + + if (KEEP_ORIGINAL_CHR_COUNTS) { + headerLines.add(new VCFFormatHeaderLine("AC_Orig", 1, VCFHeaderLineType.Integer, "Original AC")); + headerLines.add(new VCFFormatHeaderLine("AF_Orig", 1, VCFHeaderLineType.Float, "Original AF")); + headerLines.add(new VCFFormatHeaderLine("AN_Orig", 1, VCFHeaderLineType.Integer, "Original AN")); + } vcfWriter.writeHeader(new VCFHeader(headerLines, samples)); for (int i = 0; i < SELECT_EXPRESSIONS.size(); i++) { @@ -195,10 +241,29 @@ public void initialize() { CONCORDANCE_ONLY = concordanceRodName.length() > 0; if (CONCORDANCE_ONLY) logger.info("Selecting only variants concordant with the track: " + concordanceRodName); - if (MENDELIAN_VIOLATIONS) - mv = new MendelianViolation(getToolkit(), MENDELIAN_VIOLATION_QUAL_THRESHOLD); + if (MENDELIAN_VIOLATIONS) { + if ( FAMILY_STRUCTURE_FILE != null) { + try { + for ( final String line : new XReadLines( FAMILY_STRUCTURE_FILE ) ) { + MendelianViolation mv = new MendelianViolation(line, MENDELIAN_VIOLATION_QUAL_THRESHOLD); + if (samples.contains(mv.getSampleChild()) && samples.contains(mv.getSampleDad()) && samples.contains(mv.getSampleMom())) + mvSet.add(mv); + } + } catch ( FileNotFoundException e ) { + throw new UserException.CouldNotReadInputFile(AF_FILE, e); + } + if (outMVFile != null) + try { + outMVFileStream = new PrintStream(outMVFile); + } + catch (FileNotFoundException e) { + throw new UserException.CouldNotCreateOutputFile(outMVFile, "Can't open output file", e); } + } + else + mvSet.add(new MendelianViolation(getToolkit(), MENDELIAN_VIOLATION_QUAL_THRESHOLD)); + } else if (!FAMILY_STRUCTURE.isEmpty()) { - mv = new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD); + mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD)); MENDELIAN_VIOLATIONS = true; } @@ -210,6 +275,33 @@ else if (!FAMILY_STRUCTURE.isEmpty()) { SELECT_RANDOM_FRACTION = fractionRandom > 0; if (SELECT_RANDOM_FRACTION) logger.info("Selecting approximately " + fractionRandom + "% of the variants at random from the variant track"); + + + if (KEEP_AF_SPECTRUM) { + try { + afBreakpoints = new ArrayList(); + afBoosts = new ArrayList(); + logger.info("Reading in AF boost table..."); + boolean firstLine = false; + for ( final String line : new XReadLines( AF_FILE ) ) { + if (!firstLine) { + firstLine = true; + continue; + } + final String[] vals = line.split(" "); + + double bkp = Double.valueOf(vals[0]); + double afb = Double.valueOf(vals[1]); + afBreakpoints.add(bkp); + afBoosts.add(afb); + + } + bkDelta = afBreakpoints.get(0); + } catch ( FileNotFoundException e ) { + throw new UserException.CouldNotReadInputFile(AF_FILE, e); + } + + } } /** @@ -233,9 +325,24 @@ public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentCo for (VariantContext vc : vcs) { if (MENDELIAN_VIOLATIONS) { - if (!mv.isViolation(vc)) { - break; + boolean foundMV = false; + for (MendelianViolation mv : mvSet) { + if (mv.isViolation(vc)) { + foundMV = true; + //System.out.println(vc.toString()); + if (outMVFile != null) + outMVFileStream.format("MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " + + "childG=%s childGL=%s\n",vc.getChr(), vc.getStart(), + vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(), vc.getChromosomeCount(vc.getAlternateAllele(0)), + mv.getSampleMom(), mv.getSampleDad(), mv.getSampleChild(), + vc.getGenotype(mv.getSampleMom()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(), + vc.getGenotype(mv.getSampleDad()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(), + vc.getGenotype(mv.getSampleChild()).toBriefString(),vc.getGenotype(mv.getSampleChild()).getLikelihoods().getAsString() ); + } } + + if (!foundMV) + break; } if (DISCORDANCE_ONLY) { Collection compVCs = tracker.getVariantContexts(ref, discordanceRodName, null, context.getLocation(), true, false); @@ -266,9 +373,61 @@ public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentCo if (SELECT_RANDOM_NUMBER) { randomlyAddVariant(++variantNumber, sub, ref.getBase()); } - else if (!SELECT_RANDOM_FRACTION || GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom) { + else if (!SELECT_RANDOM_FRACTION || (!KEEP_AF_SPECTRUM && GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) { vcfWriter.add(sub, ref.getBase()); } + else { + if (SELECT_RANDOM_FRACTION && KEEP_AF_SPECTRUM ) { + // ok we have a comp VC and we need to match the AF spectrum of inputAFRodName. + // We then pick a variant with probablity AF*desiredFraction + if ( sub.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) ) { + String afo = sub.getAttributeAsString(VCFConstants.ALLELE_FREQUENCY_KEY); + + double af; + double afBoost = 1.0; + if (afo.contains(",")) { + String[] afs = afo.split(","); + afs[0] = afs[0].substring(1,afs[0].length()); + afs[afs.length-1] = afs[afs.length-1].substring(0,afs[afs.length-1].length()-1); + + double[] afd = new double[afs.length]; + + for (int k=0; k < afd.length; k++) + afd[k] = Double.valueOf(afs[k]); + + af = MathUtils.arrayMax(afd); + //af = Double.valueOf(afs[0]); + + } + else + af = Double.valueOf(afo); + + // now boost af by table read from file if desired + //double bkpt = 0.0; + int bkidx = 0; + if (!afBreakpoints.isEmpty()) { + for ( Double bkpt : afBreakpoints) { + if (af < bkpt + bkDelta) + break; + else bkidx++; + } + if (bkidx >=afBoosts.size()) + bkidx = afBoosts.size()-1; + afBoost = afBoosts.get(bkidx); + //System.out.formatPrin("af:%f bkidx:%d afboost:%f\n",af,bkidx,afBoost); + + + + } + + //System.out.format("%s .. %4.4f\n",afo.toString(), af); + if (GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom * afBoost * afBoost) + vcfWriter.add(sub, ref.getBase()); + } + + + } + } } } @@ -350,8 +509,8 @@ private boolean sampleHasVariant(Genotype g) { private boolean haveSameGenotypes(Genotype g1, Genotype g2) { if ((g1.isCalled() && g2.isFiltered()) || - (g2.isCalled() && g1.isFiltered()) || - (g1.isFiltered() && g2.isFiltered() && EXCLUDE_FILTERED)) + (g2.isCalled() && g1.isFiltered()) || + (g1.isFiltered() && g2.isFiltered() && EXCLUDE_FILTERED)) return false; List a1s = g1.getAlleles(); @@ -384,7 +543,7 @@ public void onTraversalDone(Integer result) { * @param vc the VariantContext record to subset * @param samples the samples to extract * @return the subsetted VariantContext - */ + */ private VariantContext subsetRecord(VariantContext vc, Set samples) { if ( samples == null || samples.isEmpty() ) return vc; @@ -394,7 +553,7 @@ private VariantContext subsetRecord(VariantContext vc, Set samples) { if ( samples.contains(genotypePair.getKey()) ) genotypes.add(genotypePair.getValue()); } - + VariantContext sub = vc.subContextFromGenotypes(genotypes, vc.getAlleles()); HashMap attributes = new HashMap(sub.getAttributes()); @@ -404,7 +563,7 @@ private VariantContext subsetRecord(VariantContext vc, Set samples) { Genotype g = sub.getGenotype(sample); if (g.isNotFiltered() && g.isCalled()) { - + String dp = (String) g.getAttribute("DP"); if (dp != null && ! dp.equals(VCFConstants.MISSING_DEPTH_v3) && ! dp.equals(VCFConstants.MISSING_VALUE_v4) ) { depth += Integer.valueOf(dp); @@ -413,6 +572,16 @@ private VariantContext subsetRecord(VariantContext vc, Set samples) { } + if (KEEP_ORIGINAL_CHR_COUNTS) { + if ( attributes.containsKey(VCFConstants.ALLELE_COUNT_KEY) ) + attributes.put("AC_Orig",attributes.get(VCFConstants.ALLELE_COUNT_KEY)); + if ( attributes.containsKey(VCFConstants.ALLELE_FREQUENCY_KEY) ) + attributes.put("AF_Orig",attributes.get(VCFConstants.ALLELE_FREQUENCY_KEY)); + if ( attributes.containsKey(VCFConstants.ALLELE_NUMBER_KEY) ) + attributes.put("AN_Orig",attributes.get(VCFConstants.ALLELE_NUMBER_KEY)); + + } + VariantContextUtils.calculateChromosomeCounts(sub,attributes,false); attributes.put("DP", depth); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java index 1bd73414c9..0644c669b1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java @@ -25,22 +25,25 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broad.tribble.dbsnp.DbSNPFeature; import org.broad.tribble.TribbleException; +import org.broad.tribble.dbsnp.DbSNPFeature; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Hidden; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Hidden; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.*; import java.io.File; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Set; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java index 4826795930..86bb3b0e86 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java @@ -25,20 +25,19 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java index 71dd5df3f5..39358dad5f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java @@ -24,18 +24,18 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; +import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.io.PrintStream; import java.util.*; @@ -75,17 +75,29 @@ public static abstract class Getter { public abstract String get(VariantContext // #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT getters.put("CHROM", new Getter() { public String get(VariantContext vc) { return vc.getChr(); } }); getters.put("POS", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getStart()); } }); - getters.put("REF", new Getter() { public String get(VariantContext vc) { return vc.getReference().toString(); } }); + getters.put("REF", new Getter() { + public String get(VariantContext vc) { + String x = ""; + if (vc.hasAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)) { + Byte refByte = (Byte)(vc.getAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)); + x=x+new String(new byte[]{refByte}); + } + return x+vc.getReference().getDisplayString(); + } + }); getters.put("ALT", new Getter() { public String get(VariantContext vc) { StringBuilder x = new StringBuilder(); int n = vc.getAlternateAlleles().size(); - if ( n == 0 ) return "."; + if (vc.hasAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)) { + Byte refByte = (Byte)(vc.getAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)); + x.append(new String(new byte[]{refByte})); + } for ( int i = 0; i < n; i++ ) { if ( i != 0 ) x.append(","); - x.append(vc.getAlternateAllele(i).toString()); + x.append(vc.getAlternateAllele(i).getDisplayString()); } return x.toString(); } @@ -169,6 +181,31 @@ public static List extractFields(VariantContext vc, List fields, throw new UserException(String.format("Missing field %s in vc %s at %s", field, vc.getSource(), vc)); } + if (field.equals("AF") || field.equals("AC")) { + String afo = val; + + double af=0; + if (afo.contains(",")) { + String[] afs = afo.split(","); + afs[0] = afs[0].substring(1,afs[0].length()); + afs[afs.length-1] = afs[afs.length-1].substring(0,afs[afs.length-1].length()-1); + + double[] afd = new double[afs.length]; + + for (int k=0; k < afd.length; k++) + afd[k] = Double.valueOf(afs[k]); + + af = MathUtils.arrayMax(afd); + //af = Double.valueOf(afs[0]); + + } + else + if (!afo.equals("NA")) + af = Double.valueOf(afo); + + val = Double.toString(af); + + } vals.add(val); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index 7eb49da345..aa0e5987f6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -28,26 +28,26 @@ import net.sf.samtools.util.CloseableIterator; import org.broad.tribble.dbsnp.DbSNPCodec; import org.broad.tribble.dbsnp.DbSNPFeature; -import org.broadinstitute.sting.utils.codecs.hapmap.HapMapFeature; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; -import org.broadinstitute.sting.gatk.refdata.*; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.hapmap.HapMapFeature; +import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.*; @@ -199,8 +199,8 @@ private void writeRecord(VariantContext vc, RefMetaDataTracker tracker, byte ref // setup the header fields Set hInfo = new HashSet(); hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); - hInfo.add(new VCFHeaderLine("source", "VariantsToVCF")); - hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName())); + //hInfo.add(new VCFHeaderLine("source", "VariantsToVCF")); + //hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName())); allowedGenotypeFormatStrings.add(VCFConstants.GENOTYPE_KEY); for ( VCFHeaderLine field : hInfo ) { diff --git a/public/java/src/org/broadinstitute/sting/jna/clibrary/LibC.java b/public/java/src/org/broadinstitute/sting/jna/clibrary/LibC.java index b01533ee66..b5efcc153e 100644 --- a/public/java/src/org/broadinstitute/sting/jna/clibrary/LibC.java +++ b/public/java/src/org/broadinstitute/sting/jna/clibrary/LibC.java @@ -24,7 +24,10 @@ package org.broadinstitute.sting.jna.clibrary; -import com.sun.jna.*; +import com.sun.jna.LastErrorException; +import com.sun.jna.Native; +import com.sun.jna.NativeLong; +import com.sun.jna.Structure; import com.sun.jna.ptr.NativeLongByReference; /** diff --git a/public/java/src/org/broadinstitute/sting/jna/lsf/v7_0_6/LibLsf.java b/public/java/src/org/broadinstitute/sting/jna/lsf/v7_0_6/LibLsf.java index 0c0579d6fb..c7b3de6cf0 100644 --- a/public/java/src/org/broadinstitute/sting/jna/lsf/v7_0_6/LibLsf.java +++ b/public/java/src/org/broadinstitute/sting/jna/lsf/v7_0_6/LibLsf.java @@ -29,7 +29,7 @@ import com.sun.jna.ptr.IntByReference; import com.sun.jna.ptr.PointerByReference; import org.broadinstitute.sting.jna.clibrary.JNAUtils; -import org.broadinstitute.sting.jna.clibrary.LibC.*; +import org.broadinstitute.sting.jna.clibrary.LibC.timeval; /* NOTE: This library uses Pointer for some Struct.ByReference members going diff --git a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java index fe6758e76a..c09c4037e3 100644 --- a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java +++ b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java @@ -27,13 +27,16 @@ import net.sf.samtools.BAMIndex; import net.sf.samtools.SAMFileWriter; import org.broad.tribble.Tribble; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import java.io.File; import java.lang.annotation.Annotation; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; public abstract class ArgumentDefinitionField extends ArgumentField { diff --git a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java index 2da427d447..e90933504a 100644 --- a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java +++ b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java @@ -35,7 +35,10 @@ import java.io.InputStream; import java.io.OutputStream; import java.lang.annotation.Annotation; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; public abstract class ArgumentField { diff --git a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java index 21fb447332..5095bd6e5f 100644 --- a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java +++ b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java @@ -27,17 +27,20 @@ import org.apache.commons.io.FileUtils; import org.apache.commons.lang.StringUtils; import org.apache.log4j.Logger; -import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor; +import org.broadinstitute.sting.commandline.CommandLineProgram; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.ParsingEngine; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.WalkerManager; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.filters.FilterManager; import org.broadinstitute.sting.gatk.filters.ReadFilter; -import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor; +import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; import org.broadinstitute.sting.gatk.walkers.PartitionBy; import org.broadinstitute.sting.gatk.walkers.PartitionType; diff --git a/public/java/src/org/broadinstitute/sting/utils/DisjointSet.java b/public/java/src/org/broadinstitute/sting/utils/DisjointSet.java index 5f68d34144..52c18e6d65 100644 --- a/public/java/src/org/broadinstitute/sting/utils/DisjointSet.java +++ b/public/java/src/org/broadinstitute/sting/utils/DisjointSet.java @@ -23,7 +23,9 @@ */ package org.broadinstitute.sting.utils; -import java.util.*; +import java.util.Collection; +import java.util.Set; +import java.util.TreeSet; public class DisjointSet { private ItemNode[] nodes; diff --git a/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java b/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java index 1f8800542b..b969235895 100644 --- a/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java +++ b/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java @@ -2,7 +2,6 @@ import com.google.java.contract.Ensures; import com.google.java.contract.Requires; -import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.Serializable; diff --git a/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java b/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java index 18a1e7ffdf..a5c6e05373 100644 --- a/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java +++ b/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java @@ -25,12 +25,14 @@ package org.broadinstitute.sting.utils; -import com.google.java.contract.*; +import com.google.java.contract.Ensures; +import com.google.java.contract.Invariant; +import com.google.java.contract.Requires; +import com.google.java.contract.ThrowEnsures; import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceRecord; - import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; diff --git a/public/java/src/org/broadinstitute/sting/utils/IndelUtils.java b/public/java/src/org/broadinstitute/sting/utils/IndelUtils.java index 30e1a3f5b9..af69ebca68 100755 --- a/public/java/src/org/broadinstitute/sting/utils/IndelUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/IndelUtils.java @@ -1,8 +1,8 @@ package org.broadinstitute.sting.utils; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Arrays; diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index 50aa0f7078..36ed506aaa 100755 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -25,17 +25,14 @@ package org.broadinstitute.sting.utils; -import cern.jet.math.Arithmetic; - -import java.math.BigDecimal; -import java.util.*; - import com.google.java.contract.Requires; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.utils.collections.PrimitivePair; import org.broadinstitute.sting.utils.exceptions.UserException; +import java.math.BigDecimal; +import java.util.*; + /** * MathUtils is a static class (no instantiation allowed!) with some useful math methods. * diff --git a/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java b/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java index a8089ffe8e..c6a07b5ce0 100755 --- a/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java +++ b/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java @@ -1,12 +1,13 @@ package org.broadinstitute.sting.utils; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.datasources.sample.Sample; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.*; +import java.util.Collection; +import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; diff --git a/public/java/src/org/broadinstitute/sting/utils/PathUtils.java b/public/java/src/org/broadinstitute/sting/utils/PathUtils.java index 47466be20e..822d04dfd5 100755 --- a/public/java/src/org/broadinstitute/sting/utils/PathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/PathUtils.java @@ -2,10 +2,10 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.List; -import java.util.ArrayList; import java.io.File; import java.io.FilenameFilter; +import java.util.ArrayList; +import java.util.List; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java b/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java index 23054e95fd..fad2320fcf 100755 --- a/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java @@ -9,9 +9,13 @@ * @author Kiran Garimella */ public class QualityUtils { + public final static byte MAX_QUAL_SCORE = SAMUtils.MAX_PHRED_SCORE; public final static double MIN_REASONABLE_ERROR = 0.0001; public final static byte MAX_REASONABLE_Q_SCORE = 40; + public final static byte MIN_USABLE_Q_SCORE = 6; + + public final static int MAPPING_QUALITY_UNAVAILABLE = 255; /** * Private constructor. No instantiating this class! diff --git a/public/java/src/org/broadinstitute/sting/utils/ReservoirDownsampler.java b/public/java/src/org/broadinstitute/sting/utils/ReservoirDownsampler.java index 30257d9670..a758df4315 100644 --- a/public/java/src/org/broadinstitute/sting/utils/ReservoirDownsampler.java +++ b/public/java/src/org/broadinstitute/sting/utils/ReservoirDownsampler.java @@ -3,7 +3,9 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; /** * Randomly downsample from a stream of elements. This algorithm is a direct, diff --git a/public/java/src/org/broadinstitute/sting/utils/SWPairwiseAlignment.java b/public/java/src/org/broadinstitute/sting/utils/SWPairwiseAlignment.java index 4b7fa3e41a..92d73a5ced 100755 --- a/public/java/src/org/broadinstitute/sting/utils/SWPairwiseAlignment.java +++ b/public/java/src/org/broadinstitute/sting/utils/SWPairwiseAlignment.java @@ -24,15 +24,14 @@ package org.broadinstitute.sting.utils; +import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; -import net.sf.samtools.Cigar; - -import java.util.*; - import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.StingException; +import java.util.*; + /** * Created by IntelliJ IDEA. * User: asivache diff --git a/public/java/src/org/broadinstitute/sting/utils/SampleUtils.java b/public/java/src/org/broadinstitute/sting/utils/SampleUtils.java index c0370064d7..f9997bfd8f 100755 --- a/public/java/src/org/broadinstitute/sting/utils/SampleUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/SampleUtils.java @@ -27,12 +27,12 @@ import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMReadGroupRecord; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.text.XReadLines; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.io.File; import java.io.FileNotFoundException; diff --git a/public/java/src/org/broadinstitute/sting/utils/SimpleTimer.java b/public/java/src/org/broadinstitute/sting/utils/SimpleTimer.java index a59c5134ab..342087b41d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/SimpleTimer.java +++ b/public/java/src/org/broadinstitute/sting/utils/SimpleTimer.java @@ -1,7 +1,10 @@ package org.broadinstitute.sting.utils; +import com.google.java.contract.Ensures; +import com.google.java.contract.Invariant; +import com.google.java.contract.Requires; + import java.io.PrintStream; -import com.google.java.contract.*; /** * A useful simple system for timing code. This code is not thread safe! diff --git a/public/java/src/org/broadinstitute/sting/utils/Utils.java b/public/java/src/org/broadinstitute/sting/utils/Utils.java index 4c13d8b184..6a50badce9 100755 --- a/public/java/src/org/broadinstitute/sting/utils/Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/Utils.java @@ -25,13 +25,12 @@ package org.broadinstitute.sting.utils; -import java.util.*; - import net.sf.samtools.util.StringUtil; - import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.collections.Pair; +import java.util.*; + /** * Created by IntelliJ IDEA. * User: depristo diff --git a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java index 456f0fa0f9..ef7cf751ed 100644 --- a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java +++ b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java @@ -1,10 +1,10 @@ package org.broadinstitute.sting.utils.baq; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.CigarElement; -import net.sf.samtools.CigarOperator; import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.picard.reference.ReferenceSequence; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; diff --git a/public/java/src/org/broadinstitute/sting/utils/baq/BAQSamIterator.java b/public/java/src/org/broadinstitute/sting/utils/baq/BAQSamIterator.java index 24d4152a58..26356a4a4d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/baq/BAQSamIterator.java +++ b/public/java/src/org/broadinstitute/sting/utils/baq/BAQSamIterator.java @@ -2,11 +2,10 @@ import com.google.java.contract.Ensures; import com.google.java.contract.Requires; -import net.sf.samtools.SAMRecord; import net.sf.picard.reference.IndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.baq.BAQ; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.Iterator; diff --git a/public/java/src/org/broadinstitute/sting/utils/bed/BedParser.java b/public/java/src/org/broadinstitute/sting/utils/bed/BedParser.java index abcae066f0..b95165841e 100644 --- a/public/java/src/org/broadinstitute/sting/utils/bed/BedParser.java +++ b/public/java/src/org/broadinstitute/sting/utils/bed/BedParser.java @@ -1,11 +1,12 @@ package org.broadinstitute.sting.utils.bed; -import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.*; -import java.util.*; +import java.util.ArrayList; +import java.util.List; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/utils/classloader/JVMUtils.java b/public/java/src/org/broadinstitute/sting/utils/classloader/JVMUtils.java index 404bd80b6a..e65b8f9216 100755 --- a/public/java/src/org/broadinstitute/sting/utils/classloader/JVMUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/classloader/JVMUtils.java @@ -29,9 +29,9 @@ import org.broadinstitute.sting.utils.exceptions.StingException; import org.reflections.util.ClasspathHelper; -import java.lang.reflect.*; import java.io.File; import java.io.IOException; +import java.lang.reflect.*; import java.net.URL; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java index 95f3e160cf..5449906b2b 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java @@ -4,9 +4,6 @@ import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.walkers.ClipReadsWalker; -import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.sam.ReadUtils; import java.util.Vector; diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java index 031467ed9b..988d297f67 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java @@ -1,7 +1,6 @@ package org.broadinstitute.sting.utils.clipreads; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.walkers.ClipReadsWalker; import java.util.ArrayList; import java.util.List; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/completegenomics/CGVarCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/completegenomics/CGVarCodec.java index a286ce7898..fef6c4ea06 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/completegenomics/CGVarCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/completegenomics/CGVarCodec.java @@ -30,7 +30,6 @@ import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.io.IOException; import java.util.HashMap; import java.util.HashSet; import java.util.regex.Matcher; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/soapsnp/SoapSNPCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/soapsnp/SoapSNPCodec.java index 618d9ce792..e169dbdfc9 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/soapsnp/SoapSNPCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/soapsnp/SoapSNPCodec.java @@ -4,11 +4,11 @@ import org.broad.tribble.FeatureCodec; import org.broad.tribble.NameAwareCodec; import org.broad.tribble.TribbleException; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broad.tribble.exception.CodecLineParsingException; import org.broad.tribble.readers.LineReader; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index 01344a117f..710127f7a1 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -7,6 +7,8 @@ import org.broad.tribble.TribbleException; import org.broad.tribble.readers.LineReader; import org.broad.tribble.util.ParsingUtils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; @@ -96,6 +98,9 @@ protected Object createHeader(List headerStrings, String line) { for ( String str : headerStrings ) { if ( !str.startsWith(VCFHeader.METADATA_INDICATOR) ) { String[] strings = str.substring(1).split(VCFConstants.FIELD_SEPARATOR); + if ( strings.length < VCFHeader.HEADER_FIELDS.values().length ) + throw new TribbleException.InvalidHeader("there are not enough columns present in the header line: " + str); + int arrayIndex = 0; for (VCFHeader.HEADER_FIELDS field : VCFHeader.HEADER_FIELDS.values()) { try { @@ -159,12 +164,11 @@ public Feature decode(String line) { } private Feature reallyDecode(String line) { - try { // the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line if (line.startsWith(VCFHeader.HEADER_INDICATOR)) return null; // our header cannot be null, we need the genotype sample names and counts - if (header == null) throw new IllegalStateException("VCF Header cannot be null when decoding a record"); + if (header == null) throw new ReviewedStingException("VCF Header cannot be null when decoding a record"); if (parts == null) parts = new String[Math.min(header.getColumnCount(), NUM_STANDARD_FIELDS+1)]; @@ -174,17 +178,18 @@ private Feature reallyDecode(String line) { // if we have don't have a header, or we have a header with no genotyping data check that we have eight columns. Otherwise check that we have nine (normal colummns + genotyping data) if (( (header == null || (header != null && !header.hasGenotypingData())) && nParts != NUM_STANDARD_FIELDS) || (header != null && header.hasGenotypingData() && nParts != (NUM_STANDARD_FIELDS + 1)) ) - throw new IllegalArgumentException("There aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) + - " tokens, and saw " + nParts + " )"); + throw new UserException.MalformedVCF("there aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) + + " tokens, and saw " + nParts + " )", lineNo); return parseVCFLine(parts); - } catch (TribbleException e) { - throw new TribbleException.InvalidDecodeLine(e.getMessage(), line); - } } protected void generateException(String message) { - throw new TribbleException.InvalidDecodeLine(message, lineNo); + throw new UserException.MalformedVCF(message, lineNo); + } + + private static void generateException(String message, int lineNo) { + throw new UserException.MalformedVCF(message, lineNo); } /** @@ -472,10 +477,6 @@ protected static boolean isSingleNucleotideEvent(List alleles) { return true; } - private static void generateException(String message, int lineNo) { - throw new TribbleException.InvalidDecodeLine(message, lineNo); - } - private static int computeForwardClipping(List unclippedAlleles, String ref) { boolean clipping = true; // Note that the computation of forward clipping here is meant only to see whether there is a common diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java index 31251c0897..b7f4be39a1 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java @@ -32,13 +32,14 @@ import org.broad.tribble.util.LittleEndianOutputStream; import org.broad.tribble.util.ParsingUtils; import org.broad.tribble.util.PositionalStream; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.*; -import java.util.*; import java.lang.reflect.Array; +import java.util.*; /** * this class writes VCF files @@ -123,12 +124,10 @@ public void writeHeader(VCFHeader header) { try { // the file format field needs to be written first - mWriter.write(VCFHeader.METADATA_INDICATOR + VCFHeaderVersion.VCF4_0.getFormatString() + "=" + VCFHeaderVersion.VCF4_0.getVersionString() + "\n"); + mWriter.write(VCFHeader.METADATA_INDICATOR + VCFHeaderVersion.VCF4_1.getFormatString() + "=" + VCFHeaderVersion.VCF4_1.getVersionString() + "\n"); for ( VCFHeaderLine line : mHeader.getMetaData() ) { - if ( line.getKey().equals(VCFHeaderVersion.VCF4_0.getFormatString()) || - line.getKey().equals(VCFHeaderVersion.VCF3_3.getFormatString()) || - line.getKey().equals(VCFHeaderVersion.VCF3_2.getFormatString()) ) + if ( VCFHeaderVersion.isFormatString(line.getKey()) ) continue; // are the records filtered (so we know what to put in the FILTER column of passing records) ? @@ -302,10 +301,7 @@ public void add(VariantContext vc, byte refBase, boolean refBaseShouldBeAppliedT } else { List genotypeAttributeKeys = new ArrayList(); if ( vc.hasGenotypes() ) { - genotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY); - for ( String key : calcVCFGenotypeKeys(vc) ) { - genotypeAttributeKeys.add(key); - } + genotypeAttributeKeys.addAll(calcVCFGenotypeKeys(vc)); } else if ( mHeader.hasGenotypingData() ) { // this needs to be done in case all samples are no-calls genotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY); @@ -358,16 +354,8 @@ private void writeInfoString(Map infoFields) throws IOException mWriter.write(key); if ( !entry.getValue().equals("") ) { - int numVals = 1; VCFInfoHeaderLine metaData = mHeader.getInfoHeaderLine(key); - if ( metaData != null ) - numVals = metaData.getCount(); - - // take care of unbounded encoding - if ( numVals == VCFInfoHeaderLine.UNBOUNDED ) - numVals = 1; - - if ( numVals > 0 ) { + if ( metaData == null || metaData.getCountType() != VCFHeaderLineCount.INTEGER || metaData.getCount() != 0 ) { mWriter.write("="); mWriter.write(entry.getValue()); } @@ -397,16 +385,22 @@ private void addGenotypeData(VariantContext vc, Map alleleMap, L continue; } - writeAllele(g.getAllele(0), alleleMap); - for (int i = 1; i < g.getPloidy(); i++) { - mWriter.write(g.isPhased() ? VCFConstants.PHASED : VCFConstants.UNPHASED); - writeAllele(g.getAllele(i), alleleMap); - } - List attrs = new ArrayList(genotypeFormatKeys.size()); for ( String key : genotypeFormatKeys ) { - if ( key.equals(VCFConstants.GENOTYPE_KEY) ) + + if ( key.equals(VCFConstants.GENOTYPE_KEY) ) { + if ( !g.isAvailable() ) { + throw new ReviewedStingException("GTs cannot be missing for some samples if they are available for others in the record"); + } + + writeAllele(g.getAllele(0), alleleMap); + for (int i = 1; i < g.getPloidy(); i++) { + mWriter.write(g.isPhased() ? VCFConstants.PHASED : VCFConstants.UNPHASED); + writeAllele(g.getAllele(i), alleleMap); + } + continue; + } Object val = g.hasAttribute(key) ? g.getAttribute(key) : VCFConstants.MISSING_VALUE_v4; @@ -423,7 +417,7 @@ private void addGenotypeData(VariantContext vc, Map alleleMap, L VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(key); if ( metaData != null ) { - int numInFormatField = metaData.getCount(); + int numInFormatField = metaData.getCount(vc.getAlternateAlleles().size()); if ( numInFormatField > 1 && val.equals(VCFConstants.MISSING_VALUE_v4) ) { // If we have a missing field but multiple values are expected, we need to construct a new string with all fields. // For example, if Number=2, the string has to be ".,." @@ -450,9 +444,10 @@ private void addGenotypeData(VariantContext vc, Map alleleMap, L break; } - for (String s : attrs ) { - mWriter.write(VCFConstants.GENOTYPE_FIELD_SEPARATOR); - mWriter.write(s); + for (int i = 0; i < attrs.size(); i++) { + if ( i > 0 || genotypeFormatKeys.contains(VCFConstants.GENOTYPE_KEY) ) + mWriter.write(VCFConstants.GENOTYPE_FIELD_SEPARATOR); + mWriter.write(attrs.get(i)); } } } @@ -498,10 +493,13 @@ else if ( val instanceof List ) { private static List calcVCFGenotypeKeys(VariantContext vc) { Set keys = new HashSet(); + boolean sawGoodGT = false; boolean sawGoodQual = false; boolean sawGenotypeFilter = false; for ( Genotype g : vc.getGenotypes().values() ) { keys.addAll(g.getAttributes().keySet()); + if ( g.isAvailable() ) + sawGoodGT = true; if ( g.hasNegLog10PError() ) sawGoodQual = true; if (g.isFiltered() && g.isCalled()) @@ -514,7 +512,17 @@ private static List calcVCFGenotypeKeys(VariantContext vc) { if (sawGenotypeFilter) keys.add(VCFConstants.GENOTYPE_FILTER_KEY); - return ParsingUtils.sortList(new ArrayList(keys)); + List sortedList = ParsingUtils.sortList(new ArrayList(keys)); + + // make sure the GT is first + if ( sawGoodGT ) { + List newList = new ArrayList(sortedList.size()+1); + newList.add(VCFConstants.GENOTYPE_KEY); + newList.addAll(sortedList); + sortedList = newList; + } + + return sortedList; } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java index f3c99e963d..c29f2ba8ba 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java @@ -141,8 +141,6 @@ public Map createGenotypeMap(String str, List alleles, boolean missing = i >= GTValueSplitSize; if (gtKey.equals(VCFConstants.GENOTYPE_KEY)) { - if (i != 0) - generateException("Saw GT at position " + i + ", but it must be at the first position for genotypes"); genotypeAlleleLocation = i; } else if (gtKey.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) { GTQual = missing ? parseQual(VCFConstants.MISSING_VALUE_v4) : parseQual(GTValueArray[i]); @@ -156,12 +154,13 @@ public Map createGenotypeMap(String str, List alleles, } } - // check to make sure we found a gentoype field - if (genotypeAlleleLocation < 0) generateException("Unable to find required field GT for the record; we don't yet support a missing GT field"); + // check to make sure we found a genotype field + if ( genotypeAlleleLocation < 0 ) + generateException("Unable to find the GT field for the record; the GT field is required"); + if ( genotypeAlleleLocation > 0 ) + generateException("Saw GT field at position " + genotypeAlleleLocation + ", but it must be at the first position for genotypes"); - // todo -- assuming allele list length in the single digits is bad. Fix me. - // Check for > 1 for haploid genotypes - boolean phased = GTValueArray[genotypeAlleleLocation].length() > 1 && GTValueArray[genotypeAlleleLocation].charAt(1) == '|'; + boolean phased = GTValueArray[genotypeAlleleLocation].indexOf(VCFConstants.PHASED) != -1; // add it to the list try { diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFAltHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFAltHeaderLine.java new file mode 100644 index 0000000000..a9de949d85 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFAltHeaderLine.java @@ -0,0 +1,28 @@ +package org.broadinstitute.sting.utils.codecs.vcf; + +/** + * @author ebanks + * A class representing a key=value entry for ALT fields in the VCF header + */ +public class VCFAltHeaderLine extends VCFSimpleHeaderLine { + + /** + * create a VCF filter header line + * + * @param name the name for this header line + * @param description the description for this header line + */ + public VCFAltHeaderLine(String name, String description) { + super(name, description, SupportedHeaderLineType.ALT); + } + + /** + * create a VCF info header line + * + * @param line the header line + * @param version the vcf header version + */ + protected VCFAltHeaderLine(String line, VCFHeaderVersion version) { + super(line, version, SupportedHeaderLineType.ALT); + } +} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java index 0fb2940bb1..05fff5d9e9 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java @@ -145,8 +145,6 @@ public Map createGenotypeMap(String str, List alleles, // todo -- all of these on the fly parsing of the missing value should be static constants if (gtKey.equals(VCFConstants.GENOTYPE_KEY)) { - if (i != 0) - generateException("Saw GT at position " + i + ", but it must be at the first position for genotypes"); genotypeAlleleLocation = i; } else if (gtKey.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) { GTQual = missing ? parseQual(VCFConstants.MISSING_VALUE_v4) : parseQual(GTValueArray[i]); @@ -160,22 +158,24 @@ public Map createGenotypeMap(String str, List alleles, } } - // check to make sure we found a gentoype field - // TODO -- This is no longer required in v4.1 - if (genotypeAlleleLocation < 0) generateException("Unable to find required field GT for the record; we don't yet support a missing GT field"); + // check to make sure we found a genotype field if we are a VCF4.0 file + if ( version == VCFHeaderVersion.VCF4_0 && genotypeAlleleLocation == -1 ) + generateException("Unable to find the GT field for the record; the GT field is required in VCF4.0"); + if ( genotypeAlleleLocation > 0 ) + generateException("Saw GT field at position " + genotypeAlleleLocation + ", but it must be at the first position for genotypes when present"); - // todo -- assuming allele list length in the single digits is bad. Fix me. - // Check for > 1 for haploid genotypes - boolean phased = GTValueArray[genotypeAlleleLocation].length() > 1 && GTValueArray[genotypeAlleleLocation].charAt(1) == '|'; + List GTalleles = (genotypeAlleleLocation == -1 ? null : parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap)); + boolean phased = genotypeAlleleLocation != -1 && GTValueArray[genotypeAlleleLocation].indexOf(VCFConstants.PHASED) != -1; // add it to the list try { - genotypes.put(sampleName, new Genotype(sampleName, - parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap), - GTQual, - genotypeFilters, - gtAttributes, - phased)); + genotypes.put(sampleName, + new Genotype(sampleName, + GTalleles, + GTQual, + genotypeFilters, + gtAttributes, + phased)); } catch (TribbleException e) { throw new TribbleException.InternalCodecException(e.getMessage() + ", at position " + chr+":"+pos); } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java index a799161ad4..bb822f2edf 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java @@ -24,6 +24,8 @@ package org.broadinstitute.sting.utils.codecs.vcf; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + import java.util.Arrays; import java.util.LinkedHashMap; import java.util.Map; @@ -43,26 +45,43 @@ public enum SupportedHeaderLineType { // the field types private String name; - private int count; + private int count = -1; + private VCFHeaderLineCount countType; private String description; private VCFHeaderLineType type; // access methods public String getName() { return name; } - public int getCount() { return count; } public String getDescription() { return description; } public VCFHeaderLineType getType() { return type; } + public VCFHeaderLineCount getCountType() { return countType; } + public int getCount() { + if ( countType != VCFHeaderLineCount.INTEGER ) + throw new ReviewedStingException("Asking for header line count when type is not an integer"); + return count; + } - // - public void setNumberToUnbounded() { this.count = UNBOUNDED; } + // utility method + public int getCount(int numAltAlleles) { + int myCount; + switch ( countType ) { + case INTEGER: myCount = count; break; + case UNBOUNDED: myCount = -1; break; + case A: myCount = numAltAlleles; break; + case G: myCount = ((numAltAlleles + 1) * (numAltAlleles + 2) / 2); break; + default: throw new ReviewedStingException("Unknown count type: " + countType); + } + return myCount; + } + + public void setNumberToUnbounded() { + countType = VCFHeaderLineCount.UNBOUNDED; + count = -1; + } // our type of line, i.e. format, info, etc private final SupportedHeaderLineType lineType; - // line numerical values are allowed to be unbounded (or unknown), which is - // marked with a dot (.) - public static final int UNBOUNDED = -1; // the value we store internally for unbounded types - /** * create a VCF format header line * @@ -70,10 +89,12 @@ public enum SupportedHeaderLineType { * @param count the count for this header line * @param type the type for this header line * @param description the description for this header line + * @param lineType the header line type */ protected VCFCompoundHeaderLine(String name, int count, VCFHeaderLineType type, String description, SupportedHeaderLineType lineType) { super(lineType.toString(), ""); this.name = name; + this.countType = VCFHeaderLineCount.INTEGER; this.count = count; this.type = type; this.description = description; @@ -81,20 +102,53 @@ protected VCFCompoundHeaderLine(String name, int count, VCFHeaderLineType type, validate(); } + /** + * create a VCF format header line + * + * @param name the name for this header line + * @param count the count type for this header line + * @param type the type for this header line + * @param description the description for this header line + * @param lineType the header line type + */ + protected VCFCompoundHeaderLine(String name, VCFHeaderLineCount count, VCFHeaderLineType type, String description, SupportedHeaderLineType lineType) { + super(lineType.toString(), ""); + this.name = name; + this.countType = count; + this.type = type; + this.description = description; + this.lineType = lineType; + validate(); + } + /** * create a VCF format header line * * @param line the header line * @param version the VCF header version + * @param lineType the header line type * */ protected VCFCompoundHeaderLine(String line, VCFHeaderVersion version, SupportedHeaderLineType lineType) { super(lineType.toString(), ""); Map mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Number","Type","Description")); name = mapping.get("ID"); - count = (version == VCFHeaderVersion.VCF4_0 || version == VCFHeaderVersion.VCF4_1) ? - mapping.get("Number").equals(VCFConstants.UNBOUNDED_ENCODING_v4) ? UNBOUNDED : Integer.valueOf(mapping.get("Number")) : - mapping.get("Number").equals(VCFConstants.UNBOUNDED_ENCODING_v3) ? UNBOUNDED : Integer.valueOf(mapping.get("Number")); + count = -1; + final String numberStr = mapping.get("Number"); + if ( numberStr.equals(VCFConstants.PER_ALLELE_COUNT) ) { + countType = VCFHeaderLineCount.A; + } else if ( numberStr.equals(VCFConstants.PER_GENOTYPE_COUNT) ) { + countType = VCFHeaderLineCount.G; + } else if ( ((version == VCFHeaderVersion.VCF4_0 || version == VCFHeaderVersion.VCF4_1) && + numberStr.equals(VCFConstants.UNBOUNDED_ENCODING_v4)) || + ((version == VCFHeaderVersion.VCF3_2 || version == VCFHeaderVersion.VCF3_3) && + numberStr.equals(VCFConstants.UNBOUNDED_ENCODING_v3)) ) { + countType = VCFHeaderLineCount.UNBOUNDED; + } else { + countType = VCFHeaderLineCount.INTEGER; + count = Integer.valueOf(numberStr); + + } type = VCFHeaderLineType.valueOf(mapping.get("Type")); if (type == VCFHeaderLineType.Flag && !allowFlagValues()) throw new IllegalArgumentException("Flag is an unsupported type for this kind of field"); @@ -121,7 +175,15 @@ private void validate() { protected String toStringEncoding() { Map map = new LinkedHashMap(); map.put("ID", name); - map.put("Number", count == UNBOUNDED ? VCFConstants.UNBOUNDED_ENCODING_v4 : count); + Object number; + switch ( countType ) { + case A: number = VCFConstants.PER_ALLELE_COUNT; break; + case G: number = VCFConstants.PER_GENOTYPE_COUNT; break; + case UNBOUNDED: number = VCFConstants.UNBOUNDED_ENCODING_v4; break; + case INTEGER: + default: number = count; + } + map.put("Number", number); map.put("Type", type); map.put("Description", description); return lineType.toString() + "=" + VCFHeaderLine.toStringEncoding(map); @@ -136,15 +198,13 @@ public boolean equals(Object o) { if ( !(o instanceof VCFCompoundHeaderLine) ) return false; VCFCompoundHeaderLine other = (VCFCompoundHeaderLine)o; - return name.equals(other.name) && - count == other.count && - description.equals(other.description) && - type == other.type && - lineType == other.lineType; + return equalsExcludingDescription(other) && + description.equals(other.description); } public boolean equalsExcludingDescription(VCFCompoundHeaderLine other) { return count == other.count && + countType == other.countType && type == other.type && lineType == other.lineType && name.equals(other.name); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java index 695c46c27c..91cf86c702 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java @@ -99,6 +99,8 @@ public final class VCFConstants { public static final String MISSING_DEPTH_v3 = "-1"; public static final String UNBOUNDED_ENCODING_v4 = "."; public static final String UNBOUNDED_ENCODING_v3 = "-1"; + public static final String PER_ALLELE_COUNT = "A"; + public static final String PER_GENOTYPE_COUNT = "G"; public static final String EMPTY_ALLELE = "."; public static final String EMPTY_GENOTYPE = "./."; public static final double MAX_GENOTYPE_QUAL = 99.0; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFilterHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFilterHeaderLine.java index 9176fc16e6..418b800742 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFilterHeaderLine.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFilterHeaderLine.java @@ -1,19 +1,10 @@ package org.broadinstitute.sting.utils.codecs.vcf; -import java.util.Arrays; -import java.util.LinkedHashMap; -import java.util.Map; - - /** * @author ebanks * A class representing a key=value entry for FILTER fields in the VCF header */ -public class VCFFilterHeaderLine extends VCFHeaderLine implements VCFNamedHeaderLine { - - private String name; - private String description; - +public class VCFFilterHeaderLine extends VCFSimpleHeaderLine { /** * create a VCF filter header line @@ -22,12 +13,7 @@ public class VCFFilterHeaderLine extends VCFHeaderLine implements VCFNamedHeader * @param description the description for this header line */ public VCFFilterHeaderLine(String name, String description) { - super("FILTER", ""); - this.name = name; - this.description = description; - - if ( name == null || description == null ) - throw new IllegalArgumentException(String.format("Invalid VCFCompoundHeaderLine: key=%s name=%s desc=%s", super.getKey(), name, description )); + super(name, description, SupportedHeaderLineType.FILTER); } /** @@ -37,34 +23,6 @@ public VCFFilterHeaderLine(String name, String description) { * @param version the vcf header version */ protected VCFFilterHeaderLine(String line, VCFHeaderVersion version) { - super("FILTER", ""); - Map mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Description")); - name = mapping.get("ID"); - description = mapping.get("Description"); - if ( description == null && ALLOW_UNBOUND_DESCRIPTIONS ) // handle the case where there's no description provided - description = UNBOUND_DESCRIPTION; - } - - protected String toStringEncoding() { - Map map = new LinkedHashMap(); - map.put("ID", name); - map.put("Description", description); - return "FILTER=" + VCFHeaderLine.toStringEncoding(map); - } - - public boolean equals(Object o) { - if ( !(o instanceof VCFFilterHeaderLine) ) - return false; - VCFFilterHeaderLine other = (VCFFilterHeaderLine)o; - return name.equals(other.name) && - description.equals(other.description); - } - - public String getName() { - return name; - } - - public String getDescription() { - return description; + super(line, version, SupportedHeaderLineType.FILTER); } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFormatHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFormatHeaderLine.java index 352be3e97a..474c8dd143 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFormatHeaderLine.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFormatHeaderLine.java @@ -16,6 +16,10 @@ public VCFFormatHeaderLine(String name, int count, VCFHeaderLineType type, Strin throw new IllegalArgumentException("Flag is an unsupported type for format fields"); } + public VCFFormatHeaderLine(String name, VCFHeaderLineCount count, VCFHeaderLineType type, String description) { + super(name, count, type, description, SupportedHeaderLineType.FORMAT); + } + protected VCFFormatHeaderLine(String line, VCFHeaderVersion version) { super(line, version, SupportedHeaderLineType.FORMAT); } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLineCount.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLineCount.java new file mode 100644 index 0000000000..d615c7c78d --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLineCount.java @@ -0,0 +1,8 @@ +package org.broadinstitute.sting.utils.codecs.vcf; + +/** + * the count encodings we use for fields in VCF header lines + */ +public enum VCFHeaderLineCount { + INTEGER, A, G, UNBOUNDED; +} diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFInfoHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFInfoHeaderLine.java index 135a5c1a16..9b20f38a15 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFInfoHeaderLine.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFInfoHeaderLine.java @@ -13,6 +13,10 @@ public VCFInfoHeaderLine(String name, int count, VCFHeaderLineType type, String super(name, count, type, description, SupportedHeaderLineType.INFO); } + public VCFInfoHeaderLine(String name, VCFHeaderLineCount count, VCFHeaderLineType type, String description) { + super(name, count, type, description, SupportedHeaderLineType.INFO); + } + protected VCFInfoHeaderLine(String line, VCFHeaderVersion version) { super(line, version, SupportedHeaderLineType.INFO); } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFParser.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFParser.java index dac9964949..1dba351e2a 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFParser.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFParser.java @@ -2,7 +2,9 @@ import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; -import java.util.*; + +import java.util.List; +import java.util.Map; /** diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFSimpleHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFSimpleHeaderLine.java new file mode 100644 index 0000000000..152043f289 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFSimpleHeaderLine.java @@ -0,0 +1,81 @@ +package org.broadinstitute.sting.utils.codecs.vcf; + +import java.util.Arrays; +import java.util.LinkedHashMap; +import java.util.Map; + + +/** + * @author ebanks + * A class representing a key=value entry for simple VCF header types + */ +public abstract class VCFSimpleHeaderLine extends VCFHeaderLine implements VCFNamedHeaderLine { + + public enum SupportedHeaderLineType { + FILTER, ALT; + } + + private String name; + private String description; + + // our type of line, i.e. filter, alt, etc + private final SupportedHeaderLineType lineType; + + + /** + * create a VCF filter header line + * + * @param name the name for this header line + * @param description the description for this header line + * @param lineType the header line type + */ + public VCFSimpleHeaderLine(String name, String description, SupportedHeaderLineType lineType) { + super(lineType.toString(), ""); + this.lineType = lineType; + this.name = name; + this.description = description; + + if ( name == null || description == null ) + throw new IllegalArgumentException(String.format("Invalid VCFSimpleHeaderLine: key=%s name=%s desc=%s", super.getKey(), name, description )); + } + + /** + * create a VCF info header line + * + * @param line the header line + * @param version the vcf header version + * @param lineType the header line type + */ + protected VCFSimpleHeaderLine(String line, VCFHeaderVersion version, SupportedHeaderLineType lineType) { + super(lineType.toString(), ""); + this.lineType = lineType; + Map mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Description")); + name = mapping.get("ID"); + description = mapping.get("Description"); + if ( description == null && ALLOW_UNBOUND_DESCRIPTIONS ) // handle the case where there's no description provided + description = UNBOUND_DESCRIPTION; + } + + protected String toStringEncoding() { + Map map = new LinkedHashMap(); + map.put("ID", name); + map.put("Description", description); + return lineType.toString() + "=" + VCFHeaderLine.toStringEncoding(map); + } + + public boolean equals(Object o) { + if ( !(o instanceof VCFSimpleHeaderLine) ) + return false; + VCFSimpleHeaderLine other = (VCFSimpleHeaderLine)o; + return name.equals(other.name) && + description.equals(other.description); + } + + public String getName() { + return name; + } + + public String getDescription() { + return description; + } +} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java index ecede068e4..f43891e77c 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java @@ -25,10 +25,10 @@ package org.broadinstitute.sting.utils.codecs.vcf; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; -import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; @@ -180,19 +180,4 @@ else if ( line instanceof VCFFilterHeaderLine) { return new HashSet(map.values()); } - - /** - * return a set of supported format lines; what we currently support for output in the genotype fields of a VCF - * @return a set of VCF format lines - */ - public static Set getSupportedHeaderStrings() { - Set result = new HashSet(); - result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype")); - result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Genotype Quality")); - result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)")); - result.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, -1, VCFHeaderLineType.Float, "Normalized, Phred-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; if site is not biallelic, number of likelihoods if n*(n+1)/2")); - - return result; - } - } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/collections/NestedHashMap.java b/public/java/src/org/broadinstitute/sting/utils/collections/NestedHashMap.java index a36ed9ac63..d280ac8049 100755 --- a/public/java/src/org/broadinstitute/sting/utils/collections/NestedHashMap.java +++ b/public/java/src/org/broadinstitute/sting/utils/collections/NestedHashMap.java @@ -25,7 +25,8 @@ package org.broadinstitute.sting.utils.collections; -import java.util.*; +import java.util.HashMap; +import java.util.Map; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/utils/collections/RODMergingIterator.java b/public/java/src/org/broadinstitute/sting/utils/collections/RODMergingIterator.java index 3a731c2fd9..6d6cb8272a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/collections/RODMergingIterator.java +++ b/public/java/src/org/broadinstitute/sting/utils/collections/RODMergingIterator.java @@ -27,10 +27,13 @@ import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.*; +import java.util.Collection; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.PriorityQueue; public class RODMergingIterator implements Iterator, Iterable { PriorityQueue queue = new PriorityQueue(); diff --git a/public/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java b/public/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java index 41de5ef96b..bba47c76cb 100644 --- a/public/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java @@ -26,15 +26,18 @@ package org.broadinstitute.sting.utils.duplicates; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; -import java.util.List; import java.util.Arrays; +import java.util.List; public class DupUtils { private static SAMRecord tmpCopyRead(SAMRecord read) { diff --git a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java index 0be4bec912..7eab6f6c97 100755 --- a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java +++ b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java @@ -28,9 +28,8 @@ import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceRecord; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.File; import java.util.Arrays; @@ -154,6 +153,16 @@ public MalformedBAM(SAMRecord read, String message) { } } + public static class MalformedVCF extends UserException { + public MalformedVCF(String message, String line) { + super(String.format("The provided VCF file is malformed at line %s: %s", line, message)); + } + + public MalformedVCF(String message, int lineNo) { + super(String.format("The provided VCF file is malformed at line nmber %d: %s", lineNo, message)); + } + } + public static class ReadMissingReadGroup extends MalformedBAM { public ReadMissingReadGroup(SAMRecord read) { super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK. Please use http://www.broadinstitute.org/gsa/wiki/index.php/ReplaceReadGroups to fix this problem", read.getReadName())); diff --git a/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java b/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java index 0c5085cc7c..43ef4aa741 100644 --- a/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java +++ b/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java @@ -25,15 +25,16 @@ package org.broadinstitute.sting.utils.fasta; import net.sf.picard.PicardException; -import net.sf.picard.reference.*; +import net.sf.picard.reference.FastaSequenceIndex; +import net.sf.picard.reference.IndexedFastaSequenceFile; +import net.sf.picard.reference.ReferenceSequence; import net.sf.samtools.SAMSequenceRecord; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.File; import java.io.FileNotFoundException; import java.util.Arrays; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; - /** * A caching version of the IndexedFastaSequenceFile that avoids going to disk as often as the raw indexer. * diff --git a/public/java/src/org/broadinstitute/sting/utils/genotype/DiploidGenotype.java b/public/java/src/org/broadinstitute/sting/utils/genotype/DiploidGenotype.java index cbfba848cc..1c2cfe2e1d 100755 --- a/public/java/src/org/broadinstitute/sting/utils/genotype/DiploidGenotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/genotype/DiploidGenotype.java @@ -26,7 +26,6 @@ package org.broadinstitute.sting.utils.genotype; import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.MathUtils; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java b/public/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java index 31791e8058..a17e814615 100755 --- a/public/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java @@ -24,14 +24,14 @@ package org.broadinstitute.sting.utils.genotype; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.variantcontext.Allele; -import java.util.*; +import java.util.Arrays; +import java.util.LinkedHashMap; +import java.util.List; public class Haplotype { protected byte[] bases = null; diff --git a/public/java/src/org/broadinstitute/sting/utils/help/ApplicationDetails.java b/public/java/src/org/broadinstitute/sting/utils/help/ApplicationDetails.java index d9e74640fe..483c874dcd 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/ApplicationDetails.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/ApplicationDetails.java @@ -25,13 +25,13 @@ package org.broadinstitute.sting.utils.help; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.classloader.JVMUtils; import org.broadinstitute.sting.commandline.CommandLineProgram; +import org.broadinstitute.sting.utils.classloader.JVMUtils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.List; -import java.util.Collections; import java.io.IOException; +import java.util.Collections; +import java.util.List; /** * Contains details additional details that the program can diff --git a/public/java/src/org/broadinstitute/sting/utils/help/DescriptionTaglet.java b/public/java/src/org/broadinstitute/sting/utils/help/DescriptionTaglet.java index 68633a2b3a..65c3320485 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/DescriptionTaglet.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/DescriptionTaglet.java @@ -1,7 +1,6 @@ package org.broadinstitute.sting.utils.help; import com.sun.tools.doclets.Taglet; -import com.sun.javadoc.Tag; import java.util.Map; diff --git a/public/java/src/org/broadinstitute/sting/utils/help/DisplayNameTaglet.java b/public/java/src/org/broadinstitute/sting/utils/help/DisplayNameTaglet.java index be6f7f3eb9..6c6dad736d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/DisplayNameTaglet.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/DisplayNameTaglet.java @@ -1,7 +1,6 @@ package org.broadinstitute.sting.utils.help; import com.sun.tools.doclets.Taglet; -import com.sun.javadoc.Tag; import java.util.Map; diff --git a/public/java/src/org/broadinstitute/sting/utils/help/HelpFormatter.java b/public/java/src/org/broadinstitute/sting/utils/help/HelpFormatter.java index 493f26e763..a9d71ef986 100755 --- a/public/java/src/org/broadinstitute/sting/utils/help/HelpFormatter.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/HelpFormatter.java @@ -25,22 +25,16 @@ package org.broadinstitute.sting.utils.help; +import org.apache.log4j.Logger; import org.broadinstitute.sting.commandline.ArgumentDefinition; import org.broadinstitute.sting.commandline.ArgumentDefinitionGroup; import org.broadinstitute.sting.commandline.ArgumentDefinitions; -import org.broadinstitute.sting.utils.text.TextFormattingUtils; import org.broadinstitute.sting.utils.Utils; -import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.text.TextFormattingUtils; -import java.util.Formatter; -import java.util.List; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.Comparator; -import java.util.Collection; -import java.util.Collections; import java.text.DateFormat; import java.text.SimpleDateFormat; +import java.util.*; /** * Print out help for Sting command-line applications. */ diff --git a/public/java/src/org/broadinstitute/sting/utils/help/HelpTaglet.java b/public/java/src/org/broadinstitute/sting/utils/help/HelpTaglet.java index b962664eb7..b350b1a29a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/HelpTaglet.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/HelpTaglet.java @@ -1,9 +1,7 @@ package org.broadinstitute.sting.utils.help; -import com.sun.tools.doclets.Taglet; import com.sun.javadoc.Tag; - -import java.util.Map; +import com.sun.tools.doclets.Taglet; /** * Basic functionality for the help taglet. diff --git a/public/java/src/org/broadinstitute/sting/utils/help/ResourceBundleExtractorDoclet.java b/public/java/src/org/broadinstitute/sting/utils/help/ResourceBundleExtractorDoclet.java index 4afac69c3e..6ee12d42ec 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/ResourceBundleExtractorDoclet.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/ResourceBundleExtractorDoclet.java @@ -26,14 +26,16 @@ package org.broadinstitute.sting.utils.help; import com.sun.javadoc.*; +import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.classloader.JVMUtils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.*; -import java.util.*; - -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.classloader.JVMUtils; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.gatk.walkers.Walker; +import java.util.HashSet; +import java.util.Properties; +import java.util.Scanner; +import java.util.Set; /** * Extracts certain types of javadoc (specifically package and class descriptions) and makes them available diff --git a/public/java/src/org/broadinstitute/sting/utils/help/SummaryTaglet.java b/public/java/src/org/broadinstitute/sting/utils/help/SummaryTaglet.java index 036bbec4fa..db8b559406 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/SummaryTaglet.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/SummaryTaglet.java @@ -1,7 +1,6 @@ package org.broadinstitute.sting.utils.help; import com.sun.tools.doclets.Taglet; -import com.sun.javadoc.Tag; import java.util.Map; diff --git a/public/java/src/org/broadinstitute/sting/utils/instrumentation/Sizeof.java b/public/java/src/org/broadinstitute/sting/utils/instrumentation/Sizeof.java index 3420c98764..73a29ba4f7 100644 --- a/public/java/src/org/broadinstitute/sting/utils/instrumentation/Sizeof.java +++ b/public/java/src/org/broadinstitute/sting/utils/instrumentation/Sizeof.java @@ -27,9 +27,9 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.lang.instrument.Instrumentation; +import java.lang.reflect.Array; import java.lang.reflect.Field; import java.lang.reflect.Modifier; -import java.lang.reflect.Array; import java.util.IdentityHashMap; /** diff --git a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalFileMergingIterator.java b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalFileMergingIterator.java index e722ac196f..988240ef94 100644 --- a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalFileMergingIterator.java +++ b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalFileMergingIterator.java @@ -25,16 +25,16 @@ package org.broadinstitute.sting.utils.interval; +import org.broadinstitute.sting.gatk.iterators.PushbackIterator; +import org.broadinstitute.sting.gatk.refdata.utils.StringToGenomeLocIteratorAdapter; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.XReadLines; -import org.broadinstitute.sting.gatk.iterators.PushbackIterator; -import org.broadinstitute.sting.gatk.refdata.utils.StringToGenomeLocIteratorAdapter; -import java.util.Iterator; import java.io.File; import java.io.FileNotFoundException; +import java.util.Iterator; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java index 80dc354559..f551e1368f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java @@ -5,17 +5,17 @@ import net.sf.samtools.SAMFileHeader; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource; -import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.bed.BedParser; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.XReadLines; +import java.io.File; import java.io.IOException; import java.util.*; -import java.io.File; /** * Parse text representations of interval strings that diff --git a/public/java/src/org/broadinstitute/sting/utils/interval/NwayIntervalMergingIterator.java b/public/java/src/org/broadinstitute/sting/utils/interval/NwayIntervalMergingIterator.java index 31a2f41fbf..7e87ce8b54 100644 --- a/public/java/src/org/broadinstitute/sting/utils/interval/NwayIntervalMergingIterator.java +++ b/public/java/src/org/broadinstitute/sting/utils/interval/NwayIntervalMergingIterator.java @@ -26,11 +26,10 @@ package org.broadinstitute.sting.utils.interval; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.UserException; -import java.util.PriorityQueue; import java.util.Iterator; +import java.util.PriorityQueue; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/utils/interval/OverlappingIntervalIterator.java b/public/java/src/org/broadinstitute/sting/utils/interval/OverlappingIntervalIterator.java index 0b63d582ec..29ffb13e4a 100755 --- a/public/java/src/org/broadinstitute/sting/utils/interval/OverlappingIntervalIterator.java +++ b/public/java/src/org/broadinstitute/sting/utils/interval/OverlappingIntervalIterator.java @@ -25,9 +25,8 @@ package org.broadinstitute.sting.utils.interval; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.gatk.iterators.PushbackIterator; +import org.broadinstitute.sting.utils.GenomeLoc; import java.util.Iterator; diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java index 5433024469..3821c9c8ab 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java @@ -24,19 +24,16 @@ package org.broadinstitute.sting.utils.pileup; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.datasources.sample.Sample; -import org.broadinstitute.sting.utils.HasGenomeLocation; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.gatk.iterators.IterableIterator; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.StingException; import java.util.*; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.exceptions.StingException; - /** * A generic implementation of read-backed pileups. * diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java index b6f3e9f09a..26e66014c1 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java @@ -1,8 +1,5 @@ package org.broadinstitute.sting.utils.pileup; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.BaseUtils; import net.sf.samtools.SAMRecord; import java.util.Arrays; diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java index 6c855c1c7f..f7d237401c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java @@ -1,6 +1,9 @@ package org.broadinstitute.sting.utils.pileup; -import java.util.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; /** * An easy to access fragment-based pileup, which contains two separate pileups. The first diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/MergingPileupElementIterator.java b/public/java/src/org/broadinstitute/sting/utils/pileup/MergingPileupElementIterator.java index d8af2ea8fc..7005cf8699 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/MergingPileupElementIterator.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/MergingPileupElementIterator.java @@ -27,9 +27,9 @@ import net.sf.picard.util.PeekableIterator; import org.broadinstitute.sting.gatk.datasources.sample.Sample; -import java.util.PriorityQueue; import java.util.Comparator; import java.util.Iterator; +import java.util.PriorityQueue; /** * Merges multiple pileups broken down by sample. diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java index 51e02bf74a..66e1afecb5 100755 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java @@ -1,9 +1,10 @@ package org.broadinstitute.sting.utils.pileup; -import org.broadinstitute.sting.utils.*; +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.sam.ReadUtils; -import com.google.java.contract.*; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileup.java index a32aa56458..8d43a368aa 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileup.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileup.java @@ -24,15 +24,13 @@ package org.broadinstitute.sting.utils.pileup; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.datasources.sample.Sample; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.collections.Pair; -import java.util.Iterator; -import java.util.List; import java.util.Collection; - -import net.sf.samtools.SAMRecord; +import java.util.List; /** * A clean interface for working with extended event pileups. diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileupImpl.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileupImpl.java index a1a08c95f0..31d29430a9 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileupImpl.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileupImpl.java @@ -23,15 +23,14 @@ */ package org.broadinstitute.sting.utils.pileup; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.datasources.sample.Sample; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.*; -import net.sf.samtools.SAMRecord; - public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup implements ReadBackedExtendedEventPileup { private int nInsertions; private int maxDeletionLength; // cached value of the length of the longest deletion observed at the site diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java index c52cc0b527..36b8a8c654 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java @@ -24,14 +24,13 @@ package org.broadinstitute.sting.utils.pileup; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.datasources.sample.Sample; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.gatk.iterators.IterableIterator; -import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.HasGenomeLocation; -import java.util.List; import java.util.Collection; +import java.util.List; /** * A data retrieval interface for accessing parts of the pileup. diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java index 70eba577cc..e5b0549612 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java @@ -23,9 +23,9 @@ */ package org.broadinstitute.sting.utils.pileup; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.datasources.sample.Sample; import org.broadinstitute.sting.utils.GenomeLoc; -import net.sf.samtools.SAMRecord; import java.util.List; import java.util.Map; diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java index 4e4294b20e..344eccb83d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java @@ -25,15 +25,16 @@ package org.broadinstitute.sting.utils.sam; -import net.sf.samtools.CigarOperator; -import net.sf.samtools.SAMRecord; import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.pileup.*; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import java.util.ArrayList; import java.util.Arrays; diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java index 5812c9aec8..5f7db458a7 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java @@ -1,15 +1,13 @@ package org.broadinstitute.sting.utils.sam; +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMRecord; +import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.traversals.TraversalEngine; -import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.apache.log4j.Logger; - -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMFileHeader; +import org.broadinstitute.sting.gatk.walkers.Walker; /* diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMFileReader.java b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMFileReader.java index ce6ca570ca..adf60b16ba 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMFileReader.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMFileReader.java @@ -1,15 +1,17 @@ package org.broadinstitute.sting.utils.sam; import net.sf.samtools.*; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.io.InputStream; import java.io.ByteArrayInputStream; +import java.io.InputStream; import java.io.UnsupportedEncodingException; -import java.util.*; - -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; /** * User: hanna * Date: Jun 11, 2009 diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMIterator.java b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMIterator.java index b683f52478..62e371bc03 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMIterator.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMIterator.java @@ -1,9 +1,8 @@ package org.broadinstitute.sting.utils.sam; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import org.broadinstitute.sting.gatk.ReadProperties; -import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import java.util.Iterator; diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java index 4f5dcca619..2b359fe6a7 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java @@ -1,13 +1,12 @@ package org.broadinstitute.sting.utils.sam; import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMSequenceRecord; import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMSequenceRecord; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.List; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; - /* * Copyright (c) 2009 The Broad Institute diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ComparableSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/ComparableSAMRecord.java index 01f1dfe960..31deb7535b 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ComparableSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ComparableSAMRecord.java @@ -26,8 +26,6 @@ package org.broadinstitute.sting.utils.sam; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; public class ComparableSAMRecord implements Comparable { diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java index 2b8ac387c4..c7ffcab0c0 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java @@ -1,6 +1,6 @@ package org.broadinstitute.sting.utils.sam; -import net.sf.samtools.*; +import net.sf.samtools.SAMReadGroupRecord; /** * @author ebanks diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index f3d9edab9c..517f9f75d6 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -1,12 +1,14 @@ package org.broadinstitute.sting.utils.sam; -import java.lang.reflect.Method; -import java.util.*; - import net.sf.samtools.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import java.lang.reflect.Method; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + /** * @author ebanks * GATKSAMRecord diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 080762039f..6c15910b1b 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -25,13 +25,14 @@ package org.broadinstitute.sting.utils.sam; -import com.google.java.contract.*; +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; import net.sf.samtools.*; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.*; import java.io.File; +import java.util.*; /** * A miscellaneous collection of utilities for working with SAM files, headers, etc. diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/SAMFileReaderBuilder.java b/public/java/src/org/broadinstitute/sting/utils/sam/SAMFileReaderBuilder.java index f8a0ca6a70..bb9db5d980 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/SAMFileReaderBuilder.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/SAMFileReaderBuilder.java @@ -26,11 +26,10 @@ package org.broadinstitute.sting.utils.sam; import net.sf.samtools.SAMFileReader; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.File; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; - /** * Allows the user to steadily accumulate information about what * components go into a SAM file writer, ultimately using this diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/SimplifyingSAMFileWriter.java b/public/java/src/org/broadinstitute/sting/utils/sam/SimplifyingSAMFileWriter.java index df2010e8bb..60e1d99487 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/SimplifyingSAMFileWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/SimplifyingSAMFileWriter.java @@ -3,13 +3,6 @@ import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileWriter; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.exceptions.UserException; - -import java.util.Iterator; -import java.util.NoSuchElementException; /** * XXX diff --git a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java index 9beb7895b3..f6aa882ad9 100644 --- a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java @@ -26,7 +26,6 @@ import org.broadinstitute.sting.commandline.ParsingEngine; import org.broadinstitute.sting.commandline.Tags; -import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; diff --git a/public/java/src/org/broadinstitute/sting/utils/text/TextFormattingUtils.java b/public/java/src/org/broadinstitute/sting/utils/text/TextFormattingUtils.java index 803d6ac0f0..1d4251542c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/text/TextFormattingUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/text/TextFormattingUtils.java @@ -28,11 +28,11 @@ import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import java.io.IOException; +import java.io.StringReader; import java.util.*; -import java.util.regex.Pattern; import java.util.regex.Matcher; -import java.io.StringReader; -import java.io.IOException; +import java.util.regex.Pattern; /** * Common utilities for dealing with text formatting. diff --git a/public/java/src/org/broadinstitute/sting/utils/text/XReadLines.java b/public/java/src/org/broadinstitute/sting/utils/text/XReadLines.java index 0642563886..52b6f3b011 100644 --- a/public/java/src/org/broadinstitute/sting/utils/text/XReadLines.java +++ b/public/java/src/org/broadinstitute/sting/utils/text/XReadLines.java @@ -25,10 +25,10 @@ package org.broadinstitute.sting.utils.text; +import java.io.*; import java.util.Iterator; -import java.util.List; import java.util.LinkedList; -import java.io.*; +import java.util.List; /** * Support for Python-like xreadlines() function as a class. This is an iterator and iterable over diff --git a/public/java/src/org/broadinstitute/sting/utils/threading/FileBackedGenomeLocProcessingTracker.java b/public/java/src/org/broadinstitute/sting/utils/threading/FileBackedGenomeLocProcessingTracker.java index cae099eeb6..3763ec67dd 100644 --- a/public/java/src/org/broadinstitute/sting/utils/threading/FileBackedGenomeLocProcessingTracker.java +++ b/public/java/src/org/broadinstitute/sting/utils/threading/FileBackedGenomeLocProcessingTracker.java @@ -1,18 +1,14 @@ package org.broadinstitute.sting.utils.threading; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.*; -import java.nio.ByteBuffer; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.List; -import java.util.concurrent.locks.ReentrantLock; /** * Keeps a copy of the processing locks in a file diff --git a/public/java/src/org/broadinstitute/sting/utils/threading/NoOpGenomeLocProcessingTracker.java b/public/java/src/org/broadinstitute/sting/utils/threading/NoOpGenomeLocProcessingTracker.java index 4e61ef9e19..ad2a6d31bb 100644 --- a/public/java/src/org/broadinstitute/sting/utils/threading/NoOpGenomeLocProcessingTracker.java +++ b/public/java/src/org/broadinstitute/sting/utils/threading/NoOpGenomeLocProcessingTracker.java @@ -1,9 +1,5 @@ package org.broadinstitute.sting.utils.threading; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; - -import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.List; diff --git a/public/java/src/org/broadinstitute/sting/utils/threading/SharedFileLock.java b/public/java/src/org/broadinstitute/sting/utils/threading/SharedFileLock.java index 3eb2be96b8..0f47da4138 100644 --- a/public/java/src/org/broadinstitute/sting/utils/threading/SharedFileLock.java +++ b/public/java/src/org/broadinstitute/sting/utils/threading/SharedFileLock.java @@ -6,10 +6,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.File; -import java.io.FileNotFoundException; import java.io.IOException; -import java.io.RandomAccessFile; -import java.nio.channels.*; /** * User: depristo diff --git a/public/java/src/org/broadinstitute/sting/utils/threading/SharedFileThreadSafeLock.java b/public/java/src/org/broadinstitute/sting/utils/threading/SharedFileThreadSafeLock.java index dec69f7c2d..d70879a0a5 100644 --- a/public/java/src/org/broadinstitute/sting/utils/threading/SharedFileThreadSafeLock.java +++ b/public/java/src/org/broadinstitute/sting/utils/threading/SharedFileThreadSafeLock.java @@ -2,13 +2,8 @@ import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.RandomAccessFile; -import java.nio.channels.*; /** * User: depristo diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java index a9ba461594..c3f437f11b 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.utils.variantcontext; -import java.util.Arrays; -import java.util.List; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; +import java.util.List; /** * Immutable representation of an allele @@ -108,7 +108,7 @@ private Allele(byte[] bases, boolean isRef) { this.bases = bases; if ( ! acceptableAlleleBases(bases) ) - throw new IllegalArgumentException("Unexpected base in allele bases " + new String(bases)); + throw new IllegalArgumentException("Unexpected base in allele bases \'" + new String(bases)+"\'"); } private Allele(String bases, boolean isRef) { diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java index 3a87f11965..0b5976c3c9 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java @@ -3,6 +3,7 @@ import org.broad.tribble.util.ParsingUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.*; @@ -19,12 +20,14 @@ public class Genotype { protected InferredGeneticContext commonInfo; public final static double NO_NEG_LOG_10PERROR = InferredGeneticContext.NO_NEG_LOG_10PERROR; protected List alleles = null; // new ArrayList(); + protected Type type = null; protected boolean isPhased = false; - private boolean filtersWereAppliedToContext; + protected boolean filtersWereAppliedToContext; public Genotype(String sampleName, List alleles, double negLog10PError, Set filters, Map attributes, boolean isPhased) { - this.alleles = Collections.unmodifiableList(alleles); + if ( alleles != null ) + this.alleles = Collections.unmodifiableList(alleles); commonInfo = new InferredGeneticContext(sampleName, negLog10PError, filters, attributes); filtersWereAppliedToContext = filters != null; this.isPhased = isPhased; @@ -66,6 +69,9 @@ public List getAlleles() { } public List getAlleles(Allele allele) { + if ( getType() == Type.UNAVAILABLE ) + throw new ReviewedStingException("Requesting alleles for an UNAVAILABLE genotype"); + List al = new ArrayList(); for ( Allele a : alleles ) if ( a.equals(allele) ) @@ -75,6 +81,8 @@ public List getAlleles(Allele allele) { } public Allele getAllele(int i) { + if ( getType() == Type.UNAVAILABLE ) + throw new ReviewedStingException("Requesting alleles for an UNAVAILABLE genotype"); return alleles.get(i); } @@ -89,10 +97,21 @@ public enum Type { NO_CALL, HOM_REF, HET, - HOM_VAR + HOM_VAR, + UNAVAILABLE } public Type getType() { + if ( type == null ) { + type = determineType(); + } + return type; + } + + protected Type determineType() { + if ( alleles == null ) + return Type.UNAVAILABLE; + Allele firstAllele = alleles.get(0); if ( firstAllele.isNoCall() ) { @@ -122,7 +141,8 @@ public Type getType() { * @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF) */ public boolean isNoCall() { return getType() == Type.NO_CALL; } - public boolean isCalled() { return getType() != Type.NO_CALL; } + public boolean isCalled() { return getType() != Type.NO_CALL && getType() != Type.UNAVAILABLE; } + public boolean isAvailable() { return getType() != Type.UNAVAILABLE; } // // Useful methods for getting genotype likelihoods for a genotype object, if present @@ -157,8 +177,8 @@ private GenotypeLikelihoods getLikelihoods(String key, boolean asPL) { } public void validate() { - if ( alleles == null ) throw new IllegalArgumentException("BUG: alleles cannot be null in setAlleles"); - if ( alleles.size() == 0) throw new IllegalArgumentException("BUG: alleles cannot be of size 0 in setAlleles"); + if ( alleles == null ) return; + if ( alleles.size() == 0) throw new IllegalArgumentException("BUG: alleles cannot be of size 0"); int nNoCalls = 0; for ( Allele allele : alleles ) { @@ -175,6 +195,9 @@ public String getGenotypeString() { } public String getGenotypeString(boolean ignoreRefState) { + if ( alleles == null ) + return null; + // Notes: // 1. Make sure to use the appropriate separator depending on whether the genotype is phased // 2. If ignoreRefState is true, then we want just the bases of the Alleles (ignoring the '*' indicating a ref Allele) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java index 3feaf5e1c8..dba16cf86b 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java @@ -24,8 +24,8 @@ package org.broadinstitute.sting.utils.variantcontext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broad.tribble.TribbleException; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; public class GenotypeLikelihoods { public static final boolean CAP_PLS = false; diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/MutableVariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/MutableVariantContext.java index 4efba88258..a191670a4a 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/MutableVariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/MutableVariantContext.java @@ -1,7 +1,10 @@ package org.broadinstitute.sting.utils.variantcontext; -import java.util.*; +import java.util.Collection; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; /** * Mutable version of VariantContext diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index da80a34311..eab392c4d9 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -2,8 +2,8 @@ import org.broad.tribble.Feature; import org.broad.tribble.TribbleException; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broad.tribble.util.ParsingUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.codecs.vcf.VCFParser; import java.util.*; @@ -1206,9 +1206,11 @@ private void validateGenotypes() { if ( ! name.equals(g.getSampleName()) ) throw new IllegalStateException("Bound sample name " + name + " does not equal the name of the genotype " + g.getSampleName()); - for ( Allele gAllele : g.getAlleles() ) { - if ( ! hasAllele(gAllele) && gAllele.isCalled() ) - throw new IllegalStateException("Allele in genotype " + gAllele + " not in the variant context " + alleles); + if ( g.isAvailable() ) { + for ( Allele gAllele : g.getAlleles() ) { + if ( ! hasAllele(gAllele) && gAllele.isCalled() ) + throw new IllegalStateException("Allele in genotype " + gAllele + " not in the variant context " + alleles); + } } } } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index 5d58954aac..2126003607 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -23,21 +23,26 @@ package org.broadinstitute.sting.utils.variantcontext; -import java.io.Serializable; -import java.util.*; - -import com.google.java.contract.*; +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.samtools.util.StringUtil; -import org.apache.commons.jexl2.*; +import org.apache.commons.jexl2.Expression; +import org.apache.commons.jexl2.JexlEngine; import org.broad.tribble.util.popgen.HardyWeinbergCalculation; -import org.broadinstitute.sting.utils.codecs.vcf.AbstractVCFCodec; import org.broadinstitute.sting.gatk.walkers.phasing.ReadBackedPhasingWalker; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.codecs.vcf.AbstractVCFCodec; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import java.io.Serializable; +import java.util.*; + public class VariantContextUtils { final public static JexlEngine engine = new JexlEngine(); static { @@ -284,8 +289,8 @@ public static double computeHardyWeinbergPvalue(VariantContext vc) { /** * Returns a newly allocated VC that is the same as VC, but without genotypes - * @param vc - * @return + * @param vc variant context + * @return new VC without genotypes */ @Requires("vc != null") @Ensures("result != null") @@ -298,8 +303,8 @@ public static VariantContext sitesOnlyVariantContext(VariantContext vc) { /** * Returns a newly allocated list of VC, where each VC is the same as the input VCs, but without genotypes - * @param vcs - * @return + * @param vcs collection of VCs + * @return new VCs without genotypes */ @Requires("vcs != null") @Ensures("result != null") @@ -357,9 +362,9 @@ public enum FilteredRecordMergeType { * information per genotype. The master merge will add the PQ information from each genotype record, where * appropriate, to the master VC. * - * @param unsortedVCs - * @param masterName - * @return + * @param unsortedVCs collection of VCs + * @param masterName name of master VC + * @return master-merged VC */ public static VariantContext masterMerge(Collection unsortedVCs, String masterName) { VariantContext master = findMaster(unsortedVCs, masterName); @@ -430,11 +435,15 @@ public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collec * If uniqifySamples is true, the priority order is ignored and names are created by concatenating the VC name with * the sample name * - * @param unsortedVCs - * @param priorityListOfVCs - * @param filteredRecordMergeType - * @param genotypeMergeOptions - * @return + * @param genomeLocParser loc parser + * @param unsortedVCs collection of unsorted VCs + * @param priorityListOfVCs priority list detailing the order in which we should grab the VCs + * @param filteredRecordMergeType merge type for filtered records + * @param genotypeMergeOptions merge option for genotypes + * @param annotateOrigin should we annotate the set it came from? + * @param printMessages should we print messages? + * @param inputRefBase the ref base + * @return new VariantContext */ public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection unsortedVCs, List priorityListOfVCs, FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions, @@ -443,6 +452,24 @@ public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collec return simpleMerge(genomeLocParser, unsortedVCs, priorityListOfVCs, filteredRecordMergeType, genotypeMergeOptions, annotateOrigin, printMessages, inputRefBase, "set", false, false); } + /** + * Merges VariantContexts into a single hybrid. Takes genotypes for common samples in priority order, if provided. + * If uniqifySamples is true, the priority order is ignored and names are created by concatenating the VC name with + * the sample name + * + * @param genomeLocParser loc parser + * @param unsortedVCs collection of unsorted VCs + * @param priorityListOfVCs priority list detailing the order in which we should grab the VCs + * @param filteredRecordMergeType merge type for filtered records + * @param genotypeMergeOptions merge option for genotypes + * @param annotateOrigin should we annotate the set it came from? + * @param printMessages should we print messages? + * @param inputRefBase the ref base + * @param setKey the key name of the set + * @param filteredAreUncalled are filtered records uncalled? + * @param mergeInfoWithMaxAC should we merge in info from the VC with maximum allele count? + * @return new VariantContext + */ public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection unsortedVCs, List priorityListOfVCs, FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions, boolean annotateOrigin, boolean printMessages, byte inputRefBase, String setKey, @@ -465,7 +492,7 @@ public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collec if ( ! filteredAreUncalled || vc.isNotFiltered() ) VCs.add(VariantContext.createVariantContextWithPaddedAlleles(vc,inputRefBase,false)); } - if ( VCs.size() == 0 ) // everything is filtered out and we're filteredareUncalled + if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled return null; // establish the baseline info from the first VC @@ -610,6 +637,17 @@ else if ( nVariant == 0 ) // everyone was reference return merged; } + public static Map> separateVariantContextsByType(Collection VCs) { + HashMap> mappedVCs = new HashMap>(); + for ( VariantContext vc : VCs ) { + if ( !mappedVCs.containsKey(vc.getType()) ) + mappedVCs.put(vc.getType(), new ArrayList()); + mappedVCs.get(vc.getType()).add(vc); + } + + return mappedVCs; + } + private static class AlleleMapper { private VariantContext vc = null; private Map map = null; @@ -829,6 +867,7 @@ public static boolean isTransversion(VariantContext context) { /** * create a genome location, given a variant context + * @param genomeLocParser parser * @param vc the variant context * @return the genomeLoc */ diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContext.java index c5a3b6f2ad..a59ed7abe3 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContext.java @@ -29,7 +29,10 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.UserException; -import java.util.*; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; /** * diff --git a/public/java/src/org/broadinstitute/sting/utils/wiggle/WiggleWriter.java b/public/java/src/org/broadinstitute/sting/utils/wiggle/WiggleWriter.java index ab91d0e2ed..5782c2704c 100755 --- a/public/java/src/org/broadinstitute/sting/utils/wiggle/WiggleWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/wiggle/WiggleWriter.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.utils.wiggle; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.*; diff --git a/public/java/test/org/broadinstitute/sting/BaseTest.java b/public/java/test/org/broadinstitute/sting/BaseTest.java index 61bb8b34b4..ef46d4bff1 100755 --- a/public/java/test/org/broadinstitute/sting/BaseTest.java +++ b/public/java/test/org/broadinstitute/sting/BaseTest.java @@ -4,6 +4,7 @@ import org.apache.log4j.*; import org.apache.log4j.spi.LoggingEvent; import org.broadinstitute.sting.commandline.CommandLineUtils; +import org.broadinstitute.sting.gatk.walkers.diffengine.DiffEngine; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.testng.Assert; @@ -12,6 +13,7 @@ import java.math.BigInteger; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.util.*; /** * @@ -78,11 +80,6 @@ public abstract class BaseTest { public static final String networkTempDir = "/broad/shptmp/"; public static final File networkTempDirFile = new File(networkTempDir); - /** - * Subdirectory under the ant build directory where we store integration test md5 results - */ - public static final String MD5_FILE_DB_SUBDIR = "integrationtests"; - public static final String testDir = "public/testdata/"; /** before the class starts up */ @@ -107,6 +104,57 @@ public abstract class BaseTest { } } + /** + * Simple generic utility class to creating TestNG data providers: + * + * 1: inherit this class, as in + * + * private class SummarizeDifferenceTest extends TestDataProvider { + * public SummarizeDifferenceTest() { + * super(SummarizeDifferenceTest.class); + * } + * ... + * } + * + * Provide a reference to your class to the TestDataProvider constructor. + * + * 2: Create instances of your subclass. Return from it the call to getTests, providing + * the class type of your test + * + * @DataProvider(name = "summaries" + * public Object[][] createSummaries() { + * new SummarizeDifferenceTest().addDiff("A", "A").addSummary("A:2"); + * new SummarizeDifferenceTest().addDiff("A", "B").addSummary("A:1", "B:1"); + * return SummarizeDifferenceTest.getTests(SummarizeDifferenceTest.class); + * } + * + * This class magically tracks created objects of this + */ + public static class TestDataProvider { + private static final Map> tests = new HashMap>(); + + /** + * Create a new TestDataProvider instance bound to the class variable C + * @param c + */ + public TestDataProvider(Class c) { + if ( ! tests.containsKey(c) ) + tests.put(c, new ArrayList()); + tests.get(c).add(this); + } + + /** + * Return all of the data providers in the form expected by TestNG of type class C + * @param c + * @return + */ + public static Object[][] getTests(Class c) { + List params2 = new ArrayList(); + for ( Object x : tests.get(c) ) params2.add(new Object[]{x}); + return params2.toArray(new Object[][]{}); + } + } + /** * test if the file exists * @@ -150,192 +198,6 @@ public boolean foundString() { } } - /** - * a little utility function for all tests to md5sum a file - * Shameless taken from: - * - * http://www.javalobby.org/java/forums/t84420.html - * - * @param file the file - * @return a string - */ - public static String md5SumFile(File file) { - MessageDigest digest; - try { - digest = MessageDigest.getInstance("MD5"); - } catch (NoSuchAlgorithmException e) { - throw new ReviewedStingException("Unable to find MD5 digest"); - } - InputStream is; - try { - is = new FileInputStream(file); - } catch (FileNotFoundException e) { - throw new ReviewedStingException("Unable to open file " + file); - } - byte[] buffer = new byte[8192]; - int read; - try { - while ((read = is.read(buffer)) > 0) { - digest.update(buffer, 0, read); - } - byte[] md5sum = digest.digest(); - BigInteger bigInt = new BigInteger(1, md5sum); - return bigInt.toString(16); - - } - catch (IOException e) { - throw new ReviewedStingException("Unable to process file for MD5", e); - } - finally { - try { - is.close(); - } - catch (IOException e) { - throw new ReviewedStingException("Unable to close input stream for MD5 calculation", e); - } - } - } - - protected static void ensureMd5DbDirectory() { - // todo -- make path - File dir = new File(MD5_FILE_DB_SUBDIR); - if ( ! dir.exists() ) { - System.out.printf("##### Creating MD5 db %s%n", MD5_FILE_DB_SUBDIR); - if ( ! dir.mkdir() ) { - throw new ReviewedStingException("Infrastructure failure: failed to create md5 directory " + MD5_FILE_DB_SUBDIR); - } - } - } - - protected static File getFileForMD5(final String md5) { - final String basename = String.format("%s.integrationtest", md5); - return new File(MD5_FILE_DB_SUBDIR + "/" + basename); - } - - private static void updateMD5Db(final String md5, final File resultsFile) { - // todo -- copy results file to DB dir if needed under filename for md5 - final File dbFile = getFileForMD5(md5); - if ( ! dbFile.exists() ) { - // the file isn't already in the db, copy it over - System.out.printf("##### Updating MD5 file: %s%n", dbFile.getPath()); - try { - FileUtils.copyFile(resultsFile, dbFile); - } catch ( IOException e ) { - throw new ReviewedStingException(e.getMessage()); - } - } else { - System.out.printf("##### MD5 file is up to date: %s%n", dbFile.getPath()); - - } - } - - private static String getMD5Path(final String md5, final String valueIfNotFound) { - // todo -- look up the result in the directory and return the path if it exists - final File dbFile = getFileForMD5(md5); - return dbFile.exists() ? dbFile.getPath() : valueIfNotFound; - } - - public static byte[] getBytesFromFile(File file) throws IOException { - InputStream is = new FileInputStream(file); - - // Get the size of the file - long length = file.length(); - - if (length > Integer.MAX_VALUE) { - // File is too large - } - - // Create the byte array to hold the data - byte[] bytes = new byte[(int) length]; - - // Read in the bytes - int offset = 0; - int numRead = 0; - while (offset < bytes.length - && (numRead = is.read(bytes, offset, bytes.length - offset)) >= 0) { - offset += numRead; - } - - // Ensure all the bytes have been read in - if (offset < bytes.length) { - throw new IOException("Could not completely read file " + file.getName()); - } - - // Close the input stream and return bytes - is.close(); - return bytes; - } - - /** - * Tests a file MD5 against an expected value, returning the MD5. NOTE: This function WILL throw an exception if the MD5s are different. - * @param name Name of the test. - * @param resultsFile File to MD5. - * @param expectedMD5 Expected MD5 value. - * @param parameterize If true or if expectedMD5 is an empty string, will print out the calculated MD5 instead of error text. - * @return The calculated MD5. - */ - public static String assertMatchingMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) { - String filemd5sum = testFileMD5(name, resultsFile, expectedMD5, parameterize); - - if (parameterize || expectedMD5.equals("")) { - // Don't assert - } else { - Assert.assertEquals(filemd5sum, expectedMD5, name + " Mismatching MD5s"); - System.out.println(String.format(" => %s PASSED", name)); - } - - return filemd5sum; - } - - - /** - * Tests a file MD5 against an expected value, returning the MD5. NOTE: This function WILL NOT throw an exception if the MD5s are different. - * @param name Name of the test. - * @param resultsFile File to MD5. - * @param expectedMD5 Expected MD5 value. - * @param parameterize If true or if expectedMD5 is an empty string, will print out the calculated MD5 instead of error text. - * @return The calculated MD5. - */ - public static String testFileMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) { - try { - byte[] bytesOfMessage = getBytesFromFile(resultsFile); - byte[] thedigest = MessageDigest.getInstance("MD5").digest(bytesOfMessage); - BigInteger bigInt = new BigInteger(1, thedigest); - String filemd5sum = bigInt.toString(16); - while (filemd5sum.length() < 32) filemd5sum = "0" + filemd5sum; // pad to length 32 - - // - // copy md5 to integrationtests - // - updateMD5Db(filemd5sum, resultsFile); - - if (parameterize || expectedMD5.equals("")) { - System.out.println(String.format("PARAMETERIZATION[%s]: file %s has md5 = %s, stated expectation is %s, equal? = %b", - name, resultsFile, filemd5sum, expectedMD5, filemd5sum.equals(expectedMD5))); - } else { - System.out.println(String.format("Checking MD5 for %s [calculated=%s, expected=%s]", resultsFile, filemd5sum, expectedMD5)); - System.out.flush(); - - if ( ! expectedMD5.equals(filemd5sum) ) { - // we are going to fail for real in assertEquals (so we are counted by the testing framework). - // prepare ourselves for the comparison - System.out.printf("##### Test %s is going fail #####%n", name); - String pathToExpectedMD5File = getMD5Path(expectedMD5, "[No DB file found]"); - String pathToFileMD5File = getMD5Path(filemd5sum, "[No DB file found]"); - System.out.printf("##### Path to expected file (MD5=%s): %s%n", expectedMD5, pathToExpectedMD5File); - System.out.printf("##### Path to calculated file (MD5=%s): %s%n", filemd5sum, pathToFileMD5File); - System.out.printf("##### Diff command: diff %s %s%n", pathToExpectedMD5File, pathToFileMD5File); - - // todo -- add support for simple inline display of the first N differences for text file - } - } - - return filemd5sum; - } catch (Exception e) { - throw new RuntimeException("Failed to read bytes from calls file: " + resultsFile, e); - } - } - /** * Creates a temp file that will be deleted on exit after tests are complete. * @param name Prefix of the file. diff --git a/public/java/test/org/broadinstitute/sting/MD5DB.java b/public/java/test/org/broadinstitute/sting/MD5DB.java new file mode 100644 index 0000000000..bea9eaec57 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/MD5DB.java @@ -0,0 +1,247 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting; + +import org.apache.commons.io.FileUtils; +import org.broadinstitute.sting.gatk.walkers.diffengine.DiffEngine; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.testng.Assert; + +import java.io.*; +import java.math.BigInteger; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.Arrays; + +/** + * Created by IntelliJ IDEA. + * User: depristo + * Date: 7/18/11 + * Time: 9:10 AM + * + * Utilities for manipulating the MD5 database of previous results + */ +public class MD5DB { + /** + * Subdirectory under the ant build directory where we store integration test md5 results + */ + public static final String LOCAL_MD5_DB_DIR = "integrationtests"; + public static final String GLOBAL_MD5_DB_DIR = "/humgen/gsa-hpprojects/GATK/data/integrationtests"; + + // ---------------------------------------------------------------------- + // + // MD5 DB stuff + // + // ---------------------------------------------------------------------- + + /** + * Create the MD5 file directories if necessary + */ + protected static void ensureMd5DbDirectory() { + File dir = new File(LOCAL_MD5_DB_DIR); + if ( ! dir.exists() ) { + System.out.printf("##### Creating MD5 db %s%n", LOCAL_MD5_DB_DIR); + if ( ! dir.mkdir() ) { + throw new ReviewedStingException("Infrastructure failure: failed to create md5 directory " + LOCAL_MD5_DB_DIR); + } + } + } + + /** + * Returns the path to an already existing file with the md5 contents, or valueIfNotFound + * if no such file exists in the db. + * + * @param md5 + * @param valueIfNotFound + * @return + */ + public static String getMD5FilePath(final String md5, final String valueIfNotFound) { + // we prefer the local db to the global DB, so match it first + for ( String dir : Arrays.asList(LOCAL_MD5_DB_DIR, GLOBAL_MD5_DB_DIR)) { + File f = getFileForMD5(md5, dir); + if ( f.exists() && f.canRead() ) + return f.getPath(); + } + + return valueIfNotFound; + } + + /** + * Utility function that given a file's md5 value and the path to the md5 db, + * returns the canonical name of the file. For example, if md5 is XXX and db is YYY, + * this will return YYY/XXX.integrationtest + * + * @param md5 + * @param dbPath + * @return + */ + private static File getFileForMD5(final String md5, final String dbPath) { + final String basename = String.format("%s.integrationtest", md5); + return new File(dbPath + "/" + basename); + } + + /** + * Copies the results file with md5 value to its canonical file name and db places + * + * @param md5 + * @param resultsFile + */ + private static void updateMD5Db(final String md5, final File resultsFile) { + copyFileToDB(getFileForMD5(md5, LOCAL_MD5_DB_DIR), resultsFile); + copyFileToDB(getFileForMD5(md5, GLOBAL_MD5_DB_DIR), resultsFile); + } + + /** + * Low-level utility routine that copies resultsFile to dbFile + * @param dbFile + * @param resultsFile + */ + private static void copyFileToDB(File dbFile, final File resultsFile) { + if ( ! dbFile.exists() ) { + // the file isn't already in the db, copy it over + System.out.printf("##### Updating MD5 file: %s%n", dbFile.getPath()); + try { + FileUtils.copyFile(resultsFile, dbFile); + } catch ( IOException e ) { + System.out.printf("##### Skipping update, cannot write file %s%n", dbFile); + } + } else { + System.out.printf("##### MD5 file is up to date: %s%n", dbFile.getPath()); + } + } + + /** + * Returns the byte[] of the entire contents of file, for md5 calculations + * @param file + * @return + * @throws IOException + */ + private static byte[] getBytesFromFile(File file) throws IOException { + InputStream is = new FileInputStream(file); + + // Get the size of the file + long length = file.length(); + + if (length > Integer.MAX_VALUE) { + // File is too large + } + + // Create the byte array to hold the data + byte[] bytes = new byte[(int) length]; + + // Read in the bytes + int offset = 0; + int numRead = 0; + while (offset < bytes.length + && (numRead = is.read(bytes, offset, bytes.length - offset)) >= 0) { + offset += numRead; + } + + // Ensure all the bytes have been read in + if (offset < bytes.length) { + throw new IOException("Could not completely read file " + file.getName()); + } + + // Close the input stream and return bytes + is.close(); + return bytes; + } + + /** + * Tests a file MD5 against an expected value, returning the MD5. NOTE: This function WILL throw an exception if the MD5s are different. + * @param name Name of the test. + * @param resultsFile File to MD5. + * @param expectedMD5 Expected MD5 value. + * @param parameterize If true or if expectedMD5 is an empty string, will print out the calculated MD5 instead of error text. + * @return The calculated MD5. + */ + public static String assertMatchingMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) { + String filemd5sum = testFileMD5(name, resultsFile, expectedMD5, parameterize); + + if (parameterize || expectedMD5.equals("")) { + // Don't assert + } else if ( filemd5sum.equals(expectedMD5) ) { + System.out.println(String.format(" => %s PASSED", name)); + } else { + Assert.fail(String.format("%s has mismatching MD5s: expected=%s observed=%s", name, expectedMD5, filemd5sum)); + } + + return filemd5sum; + } + + + /** + * Tests a file MD5 against an expected value, returning the MD5. NOTE: This function WILL NOT throw an exception if the MD5s are different. + * @param name Name of the test. + * @param resultsFile File to MD5. + * @param expectedMD5 Expected MD5 value. + * @param parameterize If true or if expectedMD5 is an empty string, will print out the calculated MD5 instead of error text. + * @return The calculated MD5. + */ + public static String testFileMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) { + try { + byte[] bytesOfMessage = getBytesFromFile(resultsFile); + byte[] thedigest = MessageDigest.getInstance("MD5").digest(bytesOfMessage); + BigInteger bigInt = new BigInteger(1, thedigest); + String filemd5sum = bigInt.toString(16); + while (filemd5sum.length() < 32) filemd5sum = "0" + filemd5sum; // pad to length 32 + + // + // copy md5 to integrationtests + // + updateMD5Db(filemd5sum, resultsFile); + + if (parameterize || expectedMD5.equals("")) { + System.out.println(String.format("PARAMETERIZATION[%s]: file %s has md5 = %s, stated expectation is %s, equal? = %b", + name, resultsFile, filemd5sum, expectedMD5, filemd5sum.equals(expectedMD5))); + } else { + System.out.println(String.format("Checking MD5 for %s [calculated=%s, expected=%s]", resultsFile, filemd5sum, expectedMD5)); + System.out.flush(); + + if ( ! expectedMD5.equals(filemd5sum) ) { + // we are going to fail for real in assertEquals (so we are counted by the testing framework). + // prepare ourselves for the comparison + System.out.printf("##### Test %s is going fail #####%n", name); + String pathToExpectedMD5File = getMD5FilePath(expectedMD5, "[No DB file found]"); + String pathToFileMD5File = getMD5FilePath(filemd5sum, "[No DB file found]"); + System.out.printf("##### Path to expected file (MD5=%s): %s%n", expectedMD5, pathToExpectedMD5File); + System.out.printf("##### Path to calculated file (MD5=%s): %s%n", filemd5sum, pathToFileMD5File); + System.out.printf("##### Diff command: diff %s %s%n", pathToExpectedMD5File, pathToFileMD5File); + + // inline differences + DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(System.out, 20, 10, 0); + boolean success = DiffEngine.simpleDiffFiles(new File(pathToExpectedMD5File), new File(pathToFileMD5File), params); + if ( success ) + System.out.printf("Note that the above list is not comprehensive. At most 20 lines of output, and 10 specific differences will be listed. Please use -T DiffObjects -R public/testdata/exampleFASTA.fasta -m %s -t %s to explore the differences more freely%n", + pathToExpectedMD5File, pathToFileMD5File); + } + } + + return filemd5sum; + } catch (Exception e) { + throw new RuntimeException("Failed to read bytes from calls file: " + resultsFile, e); + } + } +} diff --git a/public/java/test/org/broadinstitute/sting/WalkerTest.java b/public/java/test/org/broadinstitute/sting/WalkerTest.java index dacaf27381..386c17659c 100755 --- a/public/java/test/org/broadinstitute/sting/WalkerTest.java +++ b/public/java/test/org/broadinstitute/sting/WalkerTest.java @@ -26,7 +26,9 @@ package org.broadinstitute.sting; import org.apache.commons.lang.StringUtils; +import org.broad.tribble.FeatureCodec; import org.broad.tribble.Tribble; +import org.broad.tribble.index.Index; import org.broad.tribble.index.IndexFactory; import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; import org.broadinstitute.sting.gatk.CommandLineExecutable; @@ -51,7 +53,7 @@ public void initializeRandomGenerator() { } public String assertMatchingMD5(final String name, final File resultsFile, final String expectedMD5) { - return assertMatchingMD5(name, resultsFile, expectedMD5, parameterize()); + return MD5DB.assertMatchingMD5(name, resultsFile, expectedMD5, parameterize()); } public void maybeValidateSupplementaryFile(final String name, final File resultFile) { @@ -63,8 +65,20 @@ public void maybeValidateSupplementaryFile(final String name, final File resultF throw new StingException("Found an index created for file " + resultFile + " but we can only validate VCF files. Extend this code!"); } - System.out.println("Verifying on-the-fly index " + indexFile + " for test " + name + " using file " + resultFile); - Assert.assertTrue(IndexFactory.onDiskIndexEqualToNewlyCreatedIndex(resultFile, indexFile, new VCFCodec()), "Index on disk from indexing on the fly not equal to the index created after the run completed"); + assertOnDiskIndexEqualToNewlyCreatedIndex(indexFile, name, resultFile); + } + } + + + public static void assertOnDiskIndexEqualToNewlyCreatedIndex(final File indexFile, final String name, final File resultFile) { + System.out.println("Verifying on-the-fly index " + indexFile + " for test " + name + " using file " + resultFile); + Index indexFromOutputFile = IndexFactory.createIndex(resultFile, new VCFCodec()); + Index dynamicIndex = IndexFactory.loadIndex(indexFile.getAbsolutePath()); + + if ( ! indexFromOutputFile.equalsIgnoreTimestamp(dynamicIndex) ) { + Assert.fail(String.format("Index on disk from indexing on the fly not equal to the index created after the run completed. FileIndex %s vs. on-the-fly %s%n", + indexFromOutputFile.getProperties(), + dynamicIndex.getProperties())); } } @@ -177,7 +191,7 @@ protected Pair, List> executeTest(final String name, WalkerTe } protected Pair, List> executeTest(final String name, WalkerTestSpec spec) { - ensureMd5DbDirectory(); // ensure the md5 directory exists + MD5DB.ensureMd5DbDirectory(); // ensure the md5 directory exists List tmpFiles = new ArrayList(); for (int i = 0; i < spec.nOutputFiles; i++) { diff --git a/public/java/test/org/broadinstitute/sting/alignment/AlignerIntegrationTest.java b/public/java/test/org/broadinstitute/sting/alignment/AlignerIntegrationTest.java new file mode 100644 index 0000000000..a6af034cb0 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/alignment/AlignerIntegrationTest.java @@ -0,0 +1,27 @@ +package org.broadinstitute.sting.alignment; + +import org.testng.annotations.Test; +import org.broadinstitute.sting.WalkerTest; + +import java.util.Arrays; + +/** + * Integration tests for the aligner. + * + * @author mhanna + * @version 0.1 + */ +public class AlignerIntegrationTest extends WalkerTest { + @Test + public void testBasicAlignment() { + String md5 = "a2bdf907b18114a86ca47f9fc23791bf"; + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-R " + GATKDataLocation + "bwa/human_b36_both.fasta" + + " -T Align" + + " -I " + validationDataLocation + "NA12878_Pilot1_20.trimmed.unmapped.bam" + + " -o %s", + 1, // just one output file + Arrays.asList(md5)); + executeTest("testBasicAlignment", spec); + } +} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 6ba6926c6d..e6300e6c9b 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -15,7 +15,7 @@ public static String baseTestString() { public void testHasAnnotsNotAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("4cc077eb3d343e6b7ba12bff86ebe347")); + Arrays.asList("8a105fa5eebdfffe7326bc5b3d8ffd1c")); executeTest("test file has annotations, not asking for annotations, #1", spec); } @@ -23,7 +23,7 @@ public void testHasAnnotsNotAsking1() { public void testHasAnnotsNotAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("1de8e943fbf55246ebd19efa32f22a58")); + Arrays.asList("964f1016ec9a3c55333f62dd834c14d6")); executeTest("test file has annotations, not asking for annotations, #2", spec); } @@ -31,7 +31,7 @@ public void testHasAnnotsNotAsking2() { public void testHasAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("93c110e45fd4aedb044a8a5501e23336")); + Arrays.asList("8e7de435105499cd71ffc099e268a83e")); executeTest("test file has annotations, asking for annotations, #1", spec); } @@ -39,7 +39,7 @@ public void testHasAnnotsAsking1() { public void testHasAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("f5cb45910ed719f46159f9f71acaecf4")); + Arrays.asList("64b6804cb1e27826e3a47089349be581")); executeTest("test file has annotations, asking for annotations, #2", spec); } @@ -47,7 +47,7 @@ public void testHasAnnotsAsking2() { public void testNoAnnotsNotAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("4b48e7d095ef73e3151542ea976ecd89")); + Arrays.asList("42ccee09fa9f8c58f4a0d4f1139c094f")); executeTest("test file doesn't have annotations, not asking for annotations, #1", spec); } @@ -55,7 +55,7 @@ public void testNoAnnotsNotAsking1() { public void testNoAnnotsNotAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("28dfbfd178aca071b948cd3dc2365357")); + Arrays.asList("f2ddfa8105c290b1f34b7a261a02a1ac")); executeTest("test file doesn't have annotations, not asking for annotations, #2", spec); } @@ -63,7 +63,7 @@ public void testNoAnnotsNotAsking2() { public void testNoAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("a330a5bc3ee72a51dbeb7e6c97a0db99")); + Arrays.asList("fd1ffb669800c2e07df1e2719aa38e49")); executeTest("test file doesn't have annotations, asking for annotations, #1", spec); } @@ -71,7 +71,7 @@ public void testNoAnnotsAsking1() { public void testNoAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("3a31d1ef471acfb881a2dec7963fe3f4")); + Arrays.asList("09f8e840770a9411ff77508e0ed0837f")); executeTest("test file doesn't have annotations, asking for annotations, #2", spec); } @@ -79,7 +79,7 @@ public void testNoAnnotsAsking2() { public void testOverwritingHeader() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G \"Standard\" -B:variant,VCF " + validationDataLocation + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1, - Arrays.asList("a63fd8ff7bafbd46b7f009144a7c2ad1")); + Arrays.asList("78d2c19f8107d865970dbaf3e12edd92")); executeTest("test overwriting header", spec); } @@ -87,7 +87,7 @@ public void testOverwritingHeader() { public void testNoReads() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1, - Arrays.asList("36378f1245bb99d902fbfe147605bc42")); + Arrays.asList("16e3a1403fc376320d7c69492cad9345")); executeTest("not passing it any reads", spec); } @@ -95,7 +95,7 @@ public void testNoReads() { public void testDBTagWithDbsnp() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -D " + GATKDataLocation + "dbsnp_129_b36.rod -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1, - Arrays.asList("0257a1cc3c703535b2d3c5046bf88ab7")); + Arrays.asList("3da8ca2b6bdaf6e92d94a8c77a71313d")); executeTest("getting DB tag with dbSNP", spec); } @@ -103,7 +103,7 @@ public void testDBTagWithDbsnp() { public void testDBTagWithHapMap() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B:compH3,VCF " + validationDataLocation + "fakeHM3.vcf -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1, - Arrays.asList("2d7c73489dcf0db433bebdf79a068764")); + Arrays.asList("1bc01c5b3bd0b7aef75230310c3ce688")); executeTest("getting DB tag with HM3", spec); } @@ -111,13 +111,13 @@ public void testDBTagWithHapMap() { public void testUsingExpression() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B:foo,VCF " + validationDataLocation + "targetAnnotations.vcf -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.AF -BTI variant", 1, - Arrays.asList("2f6efd08d818faa1eb0631844437c64a")); + Arrays.asList("e9c0d832dc6b4ed06c955060f830c140")); executeTest("using expression", spec); } @Test public void testTabixAnnotations() { - final String MD5 = "6c7a6a1c0027bf82656542a9b2671a35"; + final String MD5 = "13269d5a2e16f06fd755cc0fb9271acf"; for ( String file : Arrays.asList("CEU.exon.2010_03.sites.vcf", "CEU.exon.2010_03.sites.vcf.gz")) { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -A HomopolymerRun -B:variant,VCF " + validationDataLocation + "/" + file + " -BTI variant -NO_HEADER", 1, diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotatorIntegrationTest.java index c4f6d5ebc3..c75a5b2dc9 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotatorIntegrationTest.java @@ -29,7 +29,7 @@ public void testGenomicAnnotatorOnDbSNP() { */ - String[] md5WithDashSArg = {"3d3b61a83c1189108eabb2df04218099"}; + String[] md5WithDashSArg = {"efba4ce1641cfa2ef88a64395f2ebce8"}; WalkerTestSpec specWithSArg = new WalkerTestSpec( "-T GenomicAnnotator -R " + b36KGReference + " -B:variant,vcf3 /humgen/gsa-hpprojects/GATK/data/Annotations/examples/CEU_hapmap_nogt_23_subset.vcf" + @@ -58,7 +58,7 @@ public void testGenomicAnnotatorOnIndels() { "-o %s" ), 1, - Arrays.asList("caa562160733aa638e1ba413ede209ae") + Arrays.asList("772fc3f43b70770ec6c6acbb8bbbd4c0") ); executeTest("testGenomicAnnotatorOnIndels", testOnIndels); } @@ -76,7 +76,7 @@ public void testGenomicAnnotatorOnSNPsAndIndels() { "-o %s" ), 1, - Arrays.asList("a4cf76f08fa90284b6988a464b6e0c17") + Arrays.asList("081ade7f3d2d3c5f19cb1e8651a626f3") ); executeTest("testGenomicAnnotatorOnSNPsAndIndels", testOnSNPsAndIndels); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java index 70c34e7291..fef1b6e649 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java @@ -41,7 +41,7 @@ public void testBeagleOutput() { "-B:beagleR2,BEAGLE " + beagleValidationDataLocation + "inttestbgl.r2 " + "-B:beagleProbs,BEAGLE " + beagleValidationDataLocation + "inttestbgl.gprobs " + "-B:beaglePhased,BEAGLE " + beagleValidationDataLocation + "inttestbgl.phased " + - "-o %s -NO_HEADER", 1, Arrays.asList("6bccee48ad2f06ba5a8c774fed444478")); + "-o %s -NO_HEADER", 1, Arrays.asList("3531451e84208264104040993889aaf4")); executeTest("test BeagleOutputToVCF", spec); } @@ -60,7 +60,7 @@ public void testBeagleInput2() { "-T ProduceBeagleInput -B:variant,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878_HSQ_chr22_14-16m.vcf "+ "-B:validation,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878_OMNI_chr22_14-16m.vcf "+ "-L 22:14000000-16000000 -o %s -bvcf %s -bs 0.8 -valp 0.98 -R /humgen/1kg/reference/human_g1k_v37.fasta -NO_HEADER ",2, - Arrays.asList("660986891b30cdc937e0f2a3a5743faa","223fb977e8db567dcaf632c6ee51f294")); + Arrays.asList("660986891b30cdc937e0f2a3a5743faa","e96ddd51da9f4a797b2aa8c20e404166")); executeTest("test BeagleInputWithBootstrap",spec); } @@ -72,7 +72,7 @@ public void testBeagleOutput2() { "-B:beagleR2,beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.r2 "+ "-B:beagleProbs,beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.gprobs.bgl "+ "-B:beaglePhased,beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.phased.bgl "+ - "-L 20:1-70000 -o %s -NO_HEADER ",1,Arrays.asList("24b88ef8cdf6e347daab491f0256be5a")); + "-L 20:1-70000 -o %s -NO_HEADER ",1,Arrays.asList("8dd6ec53994fb46c5c22af8535d22965")); executeTest("testBeagleChangesSitesToRef",spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngineUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngineUnitTest.java new file mode 100644 index 0000000000..2ae19264ee --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngineUnitTest.java @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// our package +package org.broadinstitute.sting.gatk.walkers.diffengine; + + +// the imports for unit testing. + +import org.broadinstitute.sting.BaseTest; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.*; + +/** + * Basic unit test for DifferableReaders in reduced reads + */ +public class DiffEngineUnitTest extends BaseTest { + DiffEngine engine; + + @BeforeClass(enabled = true) + public void createDiffEngine() { + engine = new DiffEngine(); + } + + // -------------------------------------------------------------------------------- + // + // Difference testing routines + // + // -------------------------------------------------------------------------------- + + private class DifferenceTest extends TestDataProvider { + public DiffElement tree1, tree2; + public List differences; + + private DifferenceTest(String tree1, String tree2) { + this(tree1, tree2, Collections.emptyList()); + } + + private DifferenceTest(String tree1, String tree2, String difference) { + this(tree1, tree2, Arrays.asList(difference)); + } + + private DifferenceTest(String tree1, String tree2, List differences) { + super(DifferenceTest.class); + this.tree1 = DiffNode.fromString(tree1); + this.tree2 = DiffNode.fromString(tree2); + this.differences = differences; + } + + public String toString() { + return String.format("tree1=%s tree2=%s diff=%s", + tree1.toOneLineString(), tree2.toOneLineString(), differences); + } + } + + @DataProvider(name = "trees") + public Object[][] createTrees() { + new DifferenceTest("A=X", "A=X"); + new DifferenceTest("A=X", "A=Y", "A:X!=Y"); + new DifferenceTest("A=X", "B=X", Arrays.asList("A:X!=MISSING", "B:MISSING!=X")); + new DifferenceTest("A=(X=1)", "B=(X=1)", Arrays.asList("A:(X=1)!=MISSING", "B:MISSING!=(X=1)")); + new DifferenceTest("A=(X=1)", "A=(X=1)"); + new DifferenceTest("A=(X=1 Y=2)", "A=(X=1 Y=2)"); + new DifferenceTest("A=(X=1 Y=2 B=(Z=3))", "A=(X=1 Y=2 B=(Z=3))"); + new DifferenceTest("A=(X=1)", "A=(X=2)", "A.X:1!=2"); + new DifferenceTest("A=(X=1 Y=2 B=(Z=3))", "A=(X=1 Y=2 B=(Z=4))", "A.B.Z:3!=4"); + new DifferenceTest("A=(X=1)", "A=(X=1 Y=2)", "A.Y:MISSING!=2"); + new DifferenceTest("A=(X=1 Y=2 B=(Z=3))", "A=(X=1 Y=2)", "A.B:(Z=3)!=MISSING"); + return DifferenceTest.getTests(DifferenceTest.class); + } + + @Test(enabled = true, dataProvider = "trees") + public void testDiffs(DifferenceTest test) { + logger.warn("Test tree1: " + test.tree1.toOneLineString()); + logger.warn("Test tree2: " + test.tree2.toOneLineString()); + + List diffs = engine.diff(test.tree1, test.tree2); + logger.warn("Test expected diff : " + test.differences); + logger.warn("Observed diffs : " + diffs); + } + + // -------------------------------------------------------------------------------- + // + // Low-level routines for summarizing differences + // + // -------------------------------------------------------------------------------- + + @Test(enabled = true) + public void testLongestCommonPostfix() { + testLongestCommonPostfixHelper("A", "A", 1); + testLongestCommonPostfixHelper("A", "B", 0); + testLongestCommonPostfixHelper("A.B", "A.B", 2); + testLongestCommonPostfixHelper("A.B.C", "A.B.C", 3); + testLongestCommonPostfixHelper("A.B.C", "X.B.C", 2); + testLongestCommonPostfixHelper("A.B.C", "X.Y.C", 1); + testLongestCommonPostfixHelper("A.B.C", "X.Y.Z", 0); + testLongestCommonPostfixHelper("A.B.C", "A.X.C", 1); + testLongestCommonPostfixHelper("A.B.C", "A.X.Z", 0); + testLongestCommonPostfixHelper("A.B.C", "A.B.Z", 0); + } + + public void testLongestCommonPostfixHelper(String p1, String p2, int expected) { + String[] parts1 = p1.split("\\."); + String[] parts2 = p2.split("\\."); + int obs = DiffEngine.longestCommonPostfix(parts1, parts2); + Assert.assertEquals(obs, expected, "p1=" + p1 + " p2=" + p2 + " failed"); + } + + @Test(enabled = true, dependsOnMethods = "testLongestCommonPostfix") + public void testSummarizePath() { + testSummarizePathHelper("A", "A", "A"); + testSummarizePathHelper("A", "B", "*"); + testSummarizePathHelper("A.B", "A.B", "A.B"); + testSummarizePathHelper("A.B", "X.B", "*.B"); + testSummarizePathHelper("A.B", "X.Y", "*.*"); + testSummarizePathHelper("A.B.C", "A.B.C", "A.B.C"); + testSummarizePathHelper("A.B.C", "X.B.C", "*.B.C"); + testSummarizePathHelper("A.B.C", "X.Y.C", "*.*.C"); + testSummarizePathHelper("A.B.C", "X.Y.Z", "*.*.*"); + testSummarizePathHelper("A.B.C", "A.X.C", "*.*.C"); + testSummarizePathHelper("A.B.C", "A.X.Z", "*.*.*"); + testSummarizePathHelper("A.B.C", "A.B.Z", "*.*.*"); + } + + public void testSummarizePathHelper(String p1, String p2, String expected) { + String[] parts1 = DiffEngine.diffNameToPath(p1); + String[] parts2 = DiffEngine.diffNameToPath(p2); + int obs = DiffEngine.longestCommonPostfix(parts1, parts2); + String path = DiffEngine.summarizedPath(parts2, obs); + Assert.assertEquals(path, expected, "p1=" + p1 + " p2=" + p2 + " failed"); + } + + // -------------------------------------------------------------------------------- + // + // High-level difference summary + // + // -------------------------------------------------------------------------------- + + private class SummarizeDifferenceTest extends TestDataProvider { + List diffs = new ArrayList(); + List expecteds = new ArrayList(); + + public SummarizeDifferenceTest() { super(SummarizeDifferenceTest.class); } + + public SummarizeDifferenceTest addDiff(String... diffsToAdd) { + diffs.addAll(Arrays.asList(diffsToAdd)); + return this; + } + + public SummarizeDifferenceTest addSummary(String... expectedSummary) { + expecteds.addAll(Arrays.asList(expectedSummary)); + return this; + } + + public String toString() { + return String.format("diffs=%s => expected=%s", diffs, expecteds); + } + + public void test() { + List diffPaths = new ArrayList(diffs.size()); + for ( String diff : diffs ) { diffPaths.add(DiffEngine.diffNameToPath(diff)); } + + List sumDiffs = engine.summarizedDifferencesOfPathsFromString(diffs); + + Assert.assertEquals(sumDiffs.size(), expecteds.size(), "Unexpected number of summarized differences: " + sumDiffs); + + for ( int i = 0; i < sumDiffs.size(); i++ ) { + Difference sumDiff = sumDiffs.get(i); + String expected = expecteds.get(i); + String[] pathCount = expected.split(":"); + String path = pathCount[0]; + int count = Integer.valueOf(pathCount[1]); + Assert.assertEquals(sumDiff.getPath(), path, "Unexpected path at: " + expected + " obs=" + sumDiff + " all=" + sumDiffs); + Assert.assertEquals(sumDiff.getCount(), count, "Unexpected counts at: " + expected + " obs=" + sumDiff + " all=" + sumDiffs); + } + } + } + + @DataProvider(name = "summaries") + public Object[][] createSummaries() { + new SummarizeDifferenceTest().addDiff("A", "A").addSummary("A:2"); + new SummarizeDifferenceTest().addDiff("A", "B").addSummary("A:1", "B:1"); + new SummarizeDifferenceTest().addDiff("A", "A", "A").addSummary("A:3"); + new SummarizeDifferenceTest().addDiff("A", "A", "A", "B").addSummary("A:3", "B:1"); + new SummarizeDifferenceTest().addDiff("A", "A", "A", "B", "B").addSummary("A:3", "B:2"); + new SummarizeDifferenceTest().addDiff("A", "A", "A", "B", "B", "C").addSummary("A:3", "B:2", "C:1"); + new SummarizeDifferenceTest().addDiff("A.X", "A.X").addSummary("A.X:2"); + new SummarizeDifferenceTest().addDiff("A.X", "A.X", "B.X").addSummary("*.X:3", "A.X:2", "B.X:1"); + new SummarizeDifferenceTest().addDiff("A.X", "A.X", "B.X", "B.X").addSummary("*.X:4", "A.X:2", "B.X:2"); + new SummarizeDifferenceTest().addDiff("A.B.C", "X.B.C").addSummary("*.B.C:2", "A.B.C:1", "X.B.C:1"); + new SummarizeDifferenceTest().addDiff("A.B.C", "X.Y.C", "X.Y.C").addSummary("*.*.C:3", "X.Y.C:2", "A.B.C:1"); + new SummarizeDifferenceTest().addDiff("A.B.C", "A.X.C", "X.Y.C").addSummary("*.*.C:3", "A.B.C:1", "A.X.C:1", "X.Y.C:1"); + new SummarizeDifferenceTest().addDiff("A.B.C", "A.X.C", "B.X.C").addSummary("*.*.C:3", "*.X.C:2", "A.B.C:1", "A.X.C:1", "B.X.C:1"); + new SummarizeDifferenceTest().addDiff("A.B.C", "A.X.C", "B.X.C", "B.X.C").addSummary("*.*.C:4", "*.X.C:3", "B.X.C:2", "A.B.C:1", "A.X.C:1"); + + return SummarizeDifferenceTest.getTests(SummarizeDifferenceTest.class); + } + + + @Test(enabled = true, dependsOnMethods = "testSummarizePath", dataProvider = "summaries") + public void testSummarizeDifferences(SummarizeDifferenceTest test) { + test.test(); + } +} \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNodeUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNodeUnitTest.java new file mode 100644 index 0000000000..534416d294 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNodeUnitTest.java @@ -0,0 +1,249 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// our package +package org.broadinstitute.sting.gatk.walkers.diffengine; + + +// the imports for unit testing. + + +import org.broadinstitute.sting.BaseTest; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.*; + +/** + * Basic unit test for DifferableReaders in reduced reads + */ +public class DiffNodeUnitTest extends BaseTest { + // Data is: + // MY_ROOT + // fields: A=A, B=B + // nodes: C, D + // C: fields: E=E, nodes: none + // D: fields: F=F, G=G, nodes: none + static DiffNode MY_ROOT = DiffNode.rooted("MY_ROOT"); + static DiffValue Value_A = new DiffValue("A", MY_ROOT, "A"); + static DiffValue Value_B = new DiffValue("B", MY_ROOT, "B"); + static DiffNode NODE_C = DiffNode.empty("C", MY_ROOT); + static DiffNode NODE_D = DiffNode.empty("D", MY_ROOT); + static DiffValue Value_E = new DiffValue("E", NODE_C, "E"); + static DiffValue Value_F = new DiffValue("F", NODE_D, "F"); + static DiffValue Value_G = new DiffValue("G", NODE_D, "G"); + + static { + MY_ROOT.add(Value_A); + MY_ROOT.add(Value_B); + MY_ROOT.add(NODE_C); + MY_ROOT.add(NODE_D); + NODE_C.add(Value_E); + NODE_D.add(Value_F); + NODE_D.add(Value_G); + } + + + // -------------------------------------------------------------------------------- + // + // Element testing routines + // + // -------------------------------------------------------------------------------- + + private class ElementTest extends TestDataProvider { + public DiffElement elt; + public String name; + public String fullName; + public DiffElement parent; + + private ElementTest(DiffValue elt, DiffValue parent, String name, String fullName) { + this(elt.getBinding(), parent.getBinding(), name, fullName); + } + + private ElementTest(DiffElement elt, DiffElement parent, String name, String fullName) { + super(ElementTest.class); + this.elt = elt; + this.name = name; + this.fullName = fullName; + this.parent = parent; + } + + public String toString() { + return String.format("ElementTest elt=%s name=%s fullName=%s parent=%s", + elt.toOneLineString(), name, fullName, parent.getName()); + } + } + + @DataProvider(name = "elementdata") + public Object[][] createElementData() { + new ElementTest(MY_ROOT.getBinding(), DiffElement.ROOT, "MY_ROOT", "MY_ROOT"); + new ElementTest(NODE_C, MY_ROOT, "C", "MY_ROOT.C"); + new ElementTest(NODE_D, MY_ROOT, "D", "MY_ROOT.D"); + new ElementTest(Value_A, MY_ROOT, "A", "MY_ROOT.A"); + new ElementTest(Value_B, MY_ROOT, "B", "MY_ROOT.B"); + new ElementTest(Value_E, NODE_C, "E", "MY_ROOT.C.E"); + new ElementTest(Value_F, NODE_D, "F", "MY_ROOT.D.F"); + new ElementTest(Value_G, NODE_D, "G", "MY_ROOT.D.G"); + return TestDataProvider.getTests(ElementTest.class); + } + + @Test(enabled = true, dataProvider = "elementdata") + public void testElementMethods(ElementTest test) { + Assert.assertNotNull(test.elt.getName()); + Assert.assertNotNull(test.elt.getParent()); + Assert.assertEquals(test.elt.getName(), test.name); + Assert.assertEquals(test.elt.getParent(), test.parent); + Assert.assertEquals(test.elt.fullyQualifiedName(), test.fullName); + } + + // -------------------------------------------------------------------------------- + // + // DiffValue testing routines + // + // -------------------------------------------------------------------------------- + + private class LeafTest extends TestDataProvider { + public DiffValue diffvalue; + public Object value; + + private LeafTest(DiffValue diffvalue, Object value) { + super(LeafTest.class); + this.diffvalue = diffvalue; + this.value = value; + } + + public String toString() { + return String.format("LeafTest diffvalue=%s value=%s", diffvalue.toOneLineString(), value); + } + } + + @DataProvider(name = "leafdata") + public Object[][] createLeafData() { + new LeafTest(Value_A, "A"); + new LeafTest(Value_B, "B"); + new LeafTest(Value_E, "E"); + new LeafTest(Value_F, "F"); + new LeafTest(Value_G, "G"); + return TestDataProvider.getTests(LeafTest.class); + } + + @Test(enabled = true, dataProvider = "leafdata") + public void testLeafMethods(LeafTest test) { + Assert.assertNotNull(test.diffvalue.getValue()); + Assert.assertEquals(test.diffvalue.getValue(), test.value); + } + + // -------------------------------------------------------------------------------- + // + // Node testing routines + // + // -------------------------------------------------------------------------------- + + private class NodeTest extends TestDataProvider { + public DiffNode node; + public Set fields; + public Set subnodes; + public Set allNames; + + private NodeTest(DiffNode node, List fields, List subnodes) { + super(NodeTest.class); + this.node = node; + this.fields = new HashSet(fields); + this.subnodes = new HashSet(subnodes); + this.allNames = new HashSet(fields); + allNames.addAll(subnodes); + } + + public String toString() { + return String.format("NodeTest node=%s fields=%s subnodes=%s", + node.toOneLineString(), fields, subnodes); + } + } + + @DataProvider(name = "nodedata") + public Object[][] createData1() { + new NodeTest(MY_ROOT, Arrays.asList("A", "B"), Arrays.asList("C", "D")); + new NodeTest(NODE_C, Arrays.asList("E"), Collections.emptyList()); + new NodeTest(NODE_D, Arrays.asList("F", "G"), Collections.emptyList()); + return TestDataProvider.getTests(NodeTest.class); + } + + @Test(enabled = true, dataProvider = "nodedata") + public void testNodeAccessors(NodeTest test) { + Assert.assertNotNull(test.node.getElements()); + + for ( String name : test.allNames ) { + DiffElement elt = test.node.getElement(name); + Assert.assertNotNull(elt, "Failed to find field " + elt + " in " + test.node); + Assert.assertEquals(elt.getName(), name); + Assert.assertEquals(elt.getValue().isAtomic(), test.fields.contains(name), "Failed atomic/compound expectation: " + test.node); + } + } + + // NOTE: add routines are being implicitly tested by the creation of the data structures + + @Test(enabled = true, dataProvider = "nodedata") + public void testCounts(NodeTest test) { + Assert.assertEquals(test.node.getElements().size(), test.allNames.size()); + Assert.assertEquals(test.node.getElementNames(), test.allNames); + } + + // -------------------------------------------------------------------------------- + // + // fromString testing routines + // + // -------------------------------------------------------------------------------- + + private class FromStringTest extends TestDataProvider { + public String string; + public DiffElement expected; + + private FromStringTest(String string, DiffElement expected) { + super(FromStringTest.class); + this.string = string; + this.expected = expected; + } + + public String toString() { + return String.format("FromStringTest string=%s expected=%s", string, expected.toOneLineString()); + } + } + + @DataProvider(name = "fromstringdata") + public Object[][] createFromData() { + new FromStringTest("A=A", Value_A.getBinding()); + new FromStringTest("B=B", Value_B.getBinding()); + new FromStringTest("C=(E=E)", NODE_C.getBinding()); + new FromStringTest("D=(F=F G=G)", NODE_D.getBinding()); + return TestDataProvider.getTests(FromStringTest.class); + } + + @Test(enabled = true, dataProvider = "fromstringdata") + public void parseFromString(FromStringTest test) { + logger.warn("Testing from string: " + test.string); + DiffElement elt = DiffNode.fromString(test.string); + Assert.assertEquals(elt.toOneLineString(), test.expected.toOneLineString()); + } +} \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java new file mode 100644 index 0000000000..77159d9c24 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.diffengine; + +import org.broadinstitute.sting.WalkerTest; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.File; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +public class DiffObjectsIntegrationTest extends WalkerTest { + private class TestParams extends TestDataProvider { + public File master, test; + public String MD5; + + private TestParams(String master, String test, String MD5) { + super(TestParams.class); + this.master = new File(master); + this.test = new File(test); + this.MD5 = MD5; + } + + public String toString() { + return String.format("master=%s,test=%s,md5=%s", master, test, MD5); + } + } + + @DataProvider(name = "data") + public Object[][] createData() { + new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "4d9f4636de05b93c354d05011264546e"); + new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "37e6efd833b5cd6d860a9df3df9713fc"); + return TestParams.getTests(TestParams.class); + } + + @Test(enabled = true, dataProvider = "data") + public void testDiffs(TestParams params) { + WalkerTestSpec spec = new WalkerTestSpec( + "-T DiffObjects -R public/testdata/exampleFASTA.fasta " + + " -m " + params.master + + " -t " + params.test + + " -o %s", + Arrays.asList(params.MD5)); + executeTest("testDiffObjects:"+params, spec).getFirst(); + } +} + diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java new file mode 100644 index 0000000000..dee7bbd884 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// our package +package org.broadinstitute.sting.gatk.walkers.diffengine; + + +// the imports for unit testing. + + +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.io.File; +import java.util.*; + +/** + * Basic unit test for DifferableReaders in reduced reads + */ +public class DiffableReaderUnitTest extends BaseTest { + DiffEngine engine; + + File vcfFile = new File(testDir + "diffTestMaster.vcf"); + File bamFile = new File(testDir + "exampleBAM.bam"); + + @BeforeClass(enabled = true) + public void createDiffEngine() { + engine = new DiffEngine(); + } + + @Test(enabled = true) + public void testPluggableDiffableReaders() { + logger.warn("testPluggableDiffableReaders"); + Map readers = engine.getReaders(); + Assert.assertNotNull(readers); + Assert.assertTrue(readers.size() > 0); + Assert.assertNotNull(readers.get("VCF")); + for ( Map.Entry e : engine.getReaders().entrySet() ) { + logger.warn("Found diffable reader: " + e.getKey()); + Assert.assertEquals(e.getValue().getName(), e.getKey()); + Assert.assertEquals(e.getValue(), engine.getReader(e.getKey())); + } + } + + private static void testLeaf(DiffNode rec, String field, Object expected) { + DiffElement value = rec.getElement(field); + Assert.assertNotNull(value, "Expected to see leaf named " + field + " in rec " + rec); + Assert.assertEquals(value.getValue().getValue(), expected, "Expected to leaf named " + field + " to have value " + expected + " in rec " + rec); + } + + @Test(enabled = true, dependsOnMethods = "testPluggableDiffableReaders") + public void testVCF1() { + logger.warn("testVCF1"); + DiffableReader vcfReader = engine.getReader("VCF"); + Assert.assertTrue(vcfReader.canRead(vcfFile)); + Assert.assertFalse(vcfReader.canRead(bamFile)); + + DiffElement diff = vcfReader.readFromFile(vcfFile, -1); + Assert.assertNotNull(diff); + + Assert.assertEquals(diff.getName(), vcfFile.getName()); + Assert.assertSame(diff.getParent(), DiffElement.ROOT); + + DiffNode node = diff.getValueAsNode(); + Assert.assertEquals(node.getElements().size(), 11); + + // chr1 2646 rs62635284 G A 0.15 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:53,75:3:-12.40,-0.90,-0.00:9.03 + DiffNode rec1 = node.getElement("chr1:2646").getValueAsNode(); + testLeaf(rec1, "CHROM", "chr1"); + testLeaf(rec1, "POS", 2646); + testLeaf(rec1, "ID", "rs62635284"); + testLeaf(rec1, "REF", Allele.create("G", true)); + testLeaf(rec1, "ALT", new HashSet(Arrays.asList(Allele.create("A")))); + testLeaf(rec1, "QUAL", 0.15); + testLeaf(rec1, "FILTER", Collections.emptySet()); + testLeaf(rec1, "AC", "2"); + testLeaf(rec1, "AF", "1.00"); + testLeaf(rec1, "AN", "2"); + } + + @Test(enabled = true, dependsOnMethods = "testPluggableDiffableReaders") + public void testBAM() { + logger.warn("testBAM"); + DiffableReader bamReader = engine.getReader("BAM"); + Assert.assertTrue(bamReader.canRead(bamFile)); + Assert.assertFalse(bamReader.canRead(vcfFile)); + + DiffElement diff = bamReader.readFromFile(bamFile, -1); + Assert.assertNotNull(diff); + + Assert.assertEquals(diff.getName(), bamFile.getName()); + Assert.assertSame(diff.getParent(), DiffElement.ROOT); + + DiffNode node = diff.getValueAsNode(); + Assert.assertEquals(node.getElements().size(), 33); + + // 30PPJAAXX090125:1:42:512:1817#0 99 chr1 200 0 76M = + // 255 -130 ACCCTAACCCTAACCCTAACCCTAACCATAACCCTAAGACTAACCCTAAACCTAACCCTCATAATCGAAATACAAC + // BBBBC@C?AABCBB<63>=B@>+B9-9+)2B8,+@327B5A>90((>-+''3?(/'''A)(''19('7.,**%)3: + // PG:Z:0 RG:Z:exampleBAM.bam SM:Z:exampleBAM.bam + + DiffNode rec1 = node.getElement("30PPJAAXX090125:1:42:512:1817#0_1").getValueAsNode(); + testLeaf(rec1, "NAME", "30PPJAAXX090125:1:42:512:1817#0"); + testLeaf(rec1, "FLAGS", 99); + testLeaf(rec1, "RNAME", "chr1"); + testLeaf(rec1, "POS", 200); + testLeaf(rec1, "MAPQ", 0); + testLeaf(rec1, "CIGAR", "76M"); + testLeaf(rec1, "RNEXT", "chr1"); + testLeaf(rec1, "PNEXT", 255); + testLeaf(rec1, "TLEN", -130); + testLeaf(rec1, "SEQ", "ACCCTAACCCTAACCCTAACCCTAACCATAACCCTAAGACTAACCCTAAACCTAACCCTCATAATCGAAATACAAC"); + testLeaf(rec1, "QUAL", "BBBBC@C?AABCBB<63>=B@>+B9-9+)2B8,+@327B5A>90((>-+''3?(/'''A)(''19('7.,**%)3:"); + testLeaf(rec1, "PG", "0"); + testLeaf(rec1, "RG", "exampleBAM.bam"); + testLeaf(rec1, "SM", "exampleBAM.bam"); + } +} \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DifferenceUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DifferenceUnitTest.java new file mode 100644 index 0000000000..4e4080bc7e --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DifferenceUnitTest.java @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// our package +package org.broadinstitute.sting.gatk.walkers.diffengine; + + +// the imports for unit testing. + + +import org.broadinstitute.sting.BaseTest; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +/** + * Basic unit test for DifferableReaders in reduced reads + */ +public class DifferenceUnitTest extends BaseTest { + // -------------------------------------------------------------------------------- + // + // testing routines + // + // -------------------------------------------------------------------------------- + + private class DifferenceTest extends TestDataProvider { + public DiffElement tree1, tree2; + public String difference; + + private DifferenceTest(String tree1, String tree2, String difference) { + this(DiffNode.fromString(tree1), DiffNode.fromString(tree2), difference); + } + + private DifferenceTest(DiffElement tree1, DiffElement tree2, String difference) { + super(DifferenceTest.class); + this.tree1 = tree1; + this.tree2 = tree2; + this.difference = difference; + } + + public String toString() { + return String.format("tree1=%s tree2=%s diff=%s", + tree1 == null ? "null" : tree1.toOneLineString(), + tree2 == null ? "null" : tree2.toOneLineString(), + difference); + } + } + + @DataProvider(name = "data") + public Object[][] createTrees() { + new DifferenceTest("A=X", "A=Y", "A:1:X!=Y"); + new DifferenceTest("A=Y", "A=X", "A:1:Y!=X"); + new DifferenceTest(DiffNode.fromString("A=X"), null, "A:1:X!=MISSING"); + new DifferenceTest(null, DiffNode.fromString("A=X"), "A:1:MISSING!=X"); + return DifferenceTest.getTests(DifferenceTest.class); + } + + @Test(enabled = true, dataProvider = "data") + public void testDiffToString(DifferenceTest test) { + logger.warn("Test tree1: " + (test.tree1 == null ? "null" : test.tree1.toOneLineString())); + logger.warn("Test tree2: " + (test.tree2 == null ? "null" : test.tree2.toOneLineString())); + logger.warn("Test expected diff : " + test.difference); + Difference diff = new Difference(test.tree1, test.tree2); + logger.warn("Observed diffs : " + diff); + Assert.assertEquals(diff.toString(), test.difference, "Observed diff string " + diff + " not equal to expected difference string " + test.difference ); + + } +} \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java index 3d75fdc44f..7bec67d2ef 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java @@ -16,7 +16,7 @@ public static String baseTestString() { public void testNoAction() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("4cc077eb3d343e6b7ba12bff86ebe347")); + Arrays.asList("8a105fa5eebdfffe7326bc5b3d8ffd1c")); executeTest("test no action", spec); } @@ -24,7 +24,7 @@ public void testNoAction() { public void testClusteredSnps() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -window 10 -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("ada5540bb3d9b6eb8f1337ba01e90a94")); + Arrays.asList("27b13f179bb4920615dff3a32730d845")); executeTest("test clustered SNPs", spec); } @@ -32,17 +32,17 @@ public void testClusteredSnps() { public void testMasks() { WalkerTestSpec spec1 = new WalkerTestSpec( baseTestString() + " -mask foo -B:mask,VCF3 " + validationDataLocation + "vcfexample2.vcf -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("b0fcac4af3526e3b2a37602ab4c0e6ae")); + Arrays.asList("578f9e774784c25871678e6464fd212b")); executeTest("test mask all", spec1); WalkerTestSpec spec2 = new WalkerTestSpec( baseTestString() + " -mask foo -B:mask,VCF " + validationDataLocation + "vcfMask.vcf -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("b64baabe905a5d197cc1ab594147d3d5")); + Arrays.asList("bfa86a674aefca1b13d341cb14ab3c4f")); executeTest("test mask some", spec2); WalkerTestSpec spec3 = new WalkerTestSpec( baseTestString() + " -mask foo -maskExtend 10 -B:mask,VCF " + validationDataLocation + "vcfMask.vcf -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("0eff92fe72024d535c44b98e1e9e1993")); + Arrays.asList("5939f80d14b32d88587373532d7b90e5")); executeTest("test mask extend", spec3); } @@ -50,7 +50,7 @@ public void testMasks() { public void testFilter1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("7a40795147cbfa92941489d7239aad92")); + Arrays.asList("45219dbcfb6f81bba2ea0c35f5bfd368")); executeTest("test filter #1", spec); } @@ -58,7 +58,7 @@ public void testFilter1() { public void testFilter2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("e9dd4991b1e325847c77d053dfe8ee54")); + Arrays.asList("c95845e817da7352b9b72bc9794f18fb")); executeTest("test filter #2", spec); } @@ -66,7 +66,7 @@ public void testFilter2() { public void testFilterWithSeparateNames() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " --filterName ABF -filter 'AlleleBalance < 0.7' --filterName FSF -filter 'FisherStrand == 1.4' -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("9ded2cce63b8d97550079047051d80a3")); + Arrays.asList("b8cdd7f44ff1a395e0a9b06a87e1e530")); executeTest("test filter with separate names #2", spec); } @@ -74,12 +74,12 @@ public void testFilterWithSeparateNames() { public void testGenotypeFilters() { WalkerTestSpec spec1 = new WalkerTestSpec( baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("6696e3f65a62ce912230d47cdb0c129b")); + Arrays.asList("96b61e4543a73fe725e433f007260039")); executeTest("test genotype filter #1", spec1); WalkerTestSpec spec2 = new WalkerTestSpec( baseTestString() + " -G_filter 'AF == 0.04 && isHomVar == 1' -G_filterName foo -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("26e5b4ee954c9e0b5eb044afd4b88ee9")); + Arrays.asList("6c8112ab17ce39c8022c891ae73bf38e")); executeTest("test genotype filter #2", spec2); } @@ -87,7 +87,7 @@ public void testGenotypeFilters() { public void testDeletions() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " --filterExpression 'QUAL < 100' --filterName foo -B:variant,VCF " + validationDataLocation + "twoDeletions.vcf", 1, - Arrays.asList("e63b58be33c9126ad6cc55489aac539b")); + Arrays.asList("569546fd798afa0e65c5b61b440d07ac")); executeTest("test deletions", spec); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 20fa7719f9..1f23d262e6 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -28,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("258e1954e6ae55c89abc6a716e19cbe0")); + Arrays.asList("c97829259463d04b0159591bb6fb44af")); executeTest("test MultiSample Pilot1", spec); } @@ -54,12 +54,12 @@ public void testMultiSamplePilot2AndRecallingWithAlleles() { public void testWithAllelesPassedIn() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, - Arrays.asList("edeb1db288a24baff59575ceedd94243")); + Arrays.asList("2b69667f4770e8c0c894066b7f27e440")); executeTest("test MultiSample Pilot2 with alleles passed in", spec1); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, - Arrays.asList("581990130d90071b084024f4cd7caf91")); + Arrays.asList("b77fe007c2a97fcd59dfd5eef94d8b95")); executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2); } @@ -67,7 +67,7 @@ public void testWithAllelesPassedIn() { public void testSingleSamplePilot2() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1, - Arrays.asList("d120db27d694a6da32367cc4fb5770fa")); + Arrays.asList("ee8a5e63ddd470726a749e69c0c20f60")); executeTest("test SingleSample Pilot2", spec); } @@ -77,7 +77,7 @@ public void testSingleSamplePilot2() { // // -------------------------------------------------------------------------------------------------------------- - private final static String COMPRESSED_OUTPUT_MD5 = "75e5c430ed39f79f24e375037a388dc4"; + private final static String COMPRESSED_OUTPUT_MD5 = "ef31654a2b85b9b2d3bba4f4a75a17b6"; @Test public void testCompressedOutput() { @@ -107,7 +107,7 @@ public void testParallelization() { // Note that we need to turn off any randomization for this to work, so no downsampling and no annotations - String md5 = "a29615dd37222a11b8dadd341b53e43c"; + String md5 = "46868a9c4134651c54535fb46b408aee"; WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1, @@ -138,9 +138,9 @@ public void testParallelization() { @Test public void testCallingParameters() { HashMap e = new HashMap(); - e.put( "--min_base_quality_score 26", "93e6269e38db9bc1732555e9969e3648" ); - e.put( "--min_mapping_quality_score 26", "64be99183c100caed4aa5f8bad64c7e9" ); - e.put( "--p_nonref_model GRID_SEARCH", "0592fe33f705ad8e2f13619fcf157805" ); + e.put( "--min_base_quality_score 26", "5043c9a101e691602eb7a3f9704bdf20" ); + e.put( "--min_mapping_quality_score 26", "71a833eb8fd93ee62ae0d5a430f27940" ); + e.put( "--p_nonref_model GRID_SEARCH", "ddf443e9dcadef367476b26b4d52c134" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -153,9 +153,9 @@ public void testCallingParameters() { @Test public void testOutputParameter() { HashMap e = new HashMap(); - e.put( "-sites_only", "1483e637dc0279935a7f90d136d147bb" ); - e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "adcd91bc7dae8020df8caf1a30060e98" ); - e.put( "--output_mode EMIT_ALL_SITES", "b708acc2fa40f336bcd2d0c70091e07e" ); + e.put( "-sites_only", "eaad6ceb71ab94290650a70bea5ab951" ); + e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "05bf7db8a3d19ef4a3d14772c90b732f" ); + e.put( "--output_mode EMIT_ALL_SITES", "e4b86740468d7369f0156550855586c7" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -169,12 +169,12 @@ public void testOutputParameter() { public void testConfidence() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1, - Arrays.asList("64be99183c100caed4aa5f8bad64c7e9")); + Arrays.asList("71a833eb8fd93ee62ae0d5a430f27940")); executeTest("test confidence 1", spec1); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1, - Arrays.asList("e76ca54232d02f0d92730e1affeb804e")); + Arrays.asList("79968844dc3ddecb97748c1acf2984c7")); executeTest("test confidence 2", spec2); } @@ -186,8 +186,8 @@ public void testConfidence() { @Test public void testHeterozyosity() { HashMap e = new HashMap(); - e.put( 0.01, "18d37f7f107853b5e32c757b4e143205" ); - e.put( 1.0 / 1850, "2bcb90ce2f7542bf590f7612018fae8e" ); + e.put( 0.01, "4e878664f61d2d800146d3762303fde1" ); + e.put( 1.0 / 1850, "9204caec095ff5e63ca21a10b6fab453" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -211,7 +211,7 @@ public void testMultiTechnologies() { " -o %s" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("825f05b31b5bb7e82231a15c7e4e2b0d")); + Arrays.asList("1a58ec52df545f946f80cc16c5736a91")); executeTest(String.format("test multiple technologies"), spec); } @@ -230,7 +230,7 @@ public void testCallingWithBAQ() { " -L 1:10,000,000-10,100,000" + " -baq CALCULATE_AS_NECESSARY", 1, - Arrays.asList("0919ab7e513c377610e23a67d33608fa")); + Arrays.asList("62d0f6d9de344ce68ce121c13b1e78b1")); executeTest(String.format("test calling with BAQ"), spec); } @@ -244,7 +244,7 @@ public void testCallingWithBAQOff() { " -L 1:10,000,000-10,100,000" + " -baq OFF", 1, - Arrays.asList("825f05b31b5bb7e82231a15c7e4e2b0d")); + Arrays.asList("1a58ec52df545f946f80cc16c5736a91")); executeTest(String.format("test calling with BAQ OFF"), spec); } @@ -263,7 +263,7 @@ public void testSimpleIndels() { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("cb37348c41b8181be829912730f747e1")); + Arrays.asList("631ae1f1eb6bc4c1a4136b8495250536")); executeTest(String.format("test indel caller in SLX"), spec); } @@ -278,7 +278,7 @@ public void testIndelsWithLowMinAlleleCnt() { " -minIndelCnt 1" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("ca5b6a5fb53ae401b146cc3044f454f2")); + Arrays.asList("fd556585c79e2b892a5976668f45aa43")); executeTest(String.format("test indel caller in SLX witn low min allele count"), spec); } @@ -291,7 +291,7 @@ public void testMultiTechnologyIndels() { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("ca4343a4ab6d3cce94ce61d7d1910f81")); + Arrays.asList("9cd56feedd2787919e571383889fde70")); executeTest(String.format("test indel calling, multiple technologies"), spec); } @@ -301,14 +301,14 @@ public void testWithIndelAllelesPassedIn() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("3f555b53e9dd14cf7cdf96c24e322364")); + Arrays.asList("315e1b78d7a403d7fcbcf0caa8c496b8")); executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("1b9764b783acf7822edc58e6822eef5b")); + Arrays.asList("cf89e0c54f14482a23c105b73a333d8a")); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypesIntegrationTest.java new file mode 100644 index 0000000000..21435dd7dc --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypesIntegrationTest.java @@ -0,0 +1,28 @@ +package org.broadinstitute.sting.gatk.walkers.phasing; + +import org.broadinstitute.sting.WalkerTest; +import org.testng.annotations.Test; + +import java.util.Arrays; + +public class MergeAndMatchHaplotypesIntegrationTest extends WalkerTest { + private static String mergeAndMatchHaplotypesTestDataRoot = validationDataLocation + "/MergeAndMatchHaplotypes"; + private static String fundamentalTestPBTVCF = mergeAndMatchHaplotypesTestDataRoot + "/" + "FundamentalsTest.pbt.vcf"; + private static String fundamentalTestRBPVCF = mergeAndMatchHaplotypesTestDataRoot + "/" + "FundamentalsTest.pbt.rbp.vcf"; + + @Test + public void testBasicFunctionality() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T MergeAndMatchHaplotypes", + "-R " + b37KGReference, + "-B:pbt,VCF " + fundamentalTestPBTVCF, + "-B:rbp,VCF " + fundamentalTestRBPVCF, + "-o %s" + ), + 1, + Arrays.asList("") + ); + executeTest("testBasicMergeAndMatchHaplotypesFunctionality", spec); + } +} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsIntegrationTest.java index 3f87fc1a24..c88eac1494 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsIntegrationTest.java @@ -23,7 +23,7 @@ public void test1() { baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 1) + " -L chr20:556259-756570", 1, - Arrays.asList("e312b7d3854d5b2834a370659514a813")); + Arrays.asList("7f11f7f75d1526077f0173c7ed1fc6c4")); executeTest("Merge MNP sites within genomic distance of 1 [TEST ONE]", spec); } @@ -33,7 +33,7 @@ public void test2() { baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 10) + " -L chr20:556259-756570", 1, - Arrays.asList("681f50e45f1d697370d2c355df2e18bc")); + Arrays.asList("53dd312468296826bdd3c22387390c88")); executeTest("Merge MNP sites within genomic distance of 10 [TEST TWO]", spec); } @@ -43,7 +43,7 @@ public void test3() { baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 100) + " -L chr20:556259-756570", 1, - Arrays.asList("0bccb0ef928a108418246bec01098083")); + Arrays.asList("e26f92d2fb9f4eaeac7f9d8ee27410ee")); executeTest("Merge MNP sites within genomic distance of 100 [TEST THREE]", spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesIntegrationTest.java index 009048c102..f855c1dd3a 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesIntegrationTest.java @@ -23,7 +23,7 @@ public void test1() { baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 1) + " -L chr20:556259-756570", 1, - Arrays.asList("e16f957d888054ae0518e25660295241")); + Arrays.asList("af5e1370822551c0c6f50f23447dc627")); executeTest("Merge sites within genomic distance of 1 [TEST ONE]", spec); } @@ -33,7 +33,7 @@ public void test2() { baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 10) + " -L chr20:556259-756570", 1, - Arrays.asList("122a482090677c7619c2105d44e00d11")); + Arrays.asList("dd8c44ae1ef059a7fe85399467e102eb")); executeTest("Merge sites within genomic distance of 10 [TEST TWO]", spec); } @@ -43,7 +43,7 @@ public void test3() { baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 100) + " -L chr20:556259-756570", 1, - Arrays.asList("bc6a8c8a42bb2601db98e88e9ad74748")); + Arrays.asList("f81fd72ecaa57b3215406fcea860bcc5")); executeTest("Merge sites within genomic distance of 100 [TEST THREE]", spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java new file mode 100644 index 0000000000..9f59adeb66 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java @@ -0,0 +1,44 @@ +package org.broadinstitute.sting.gatk.walkers.phasing; + +import org.broadinstitute.sting.WalkerTest; +import org.testng.annotations.Test; + +import java.util.Arrays; + +public class PhaseByTransmissionIntegrationTest extends WalkerTest { + private static String phaseByTransmissionTestDataRoot = validationDataLocation + "/PhaseByTransmission"; + private static String fundamentalTestVCF = phaseByTransmissionTestDataRoot + "/" + "FundamentalsTest.unfiltered.vcf"; + + @Test + public void testBasicFunctionalityWithoutFilters() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T PhaseByTransmission", + "-R " + b37KGReference, + "-B:variant,VCF " + fundamentalTestVCF, + "-f NA12892+NA12891=NA12878", + "-nofilters", + "-o %s" + ), + 1, + Arrays.asList("416a483e87358cdcb0b09a496e3254c0") + ); + executeTest("testBasicFunctionalityWithoutFilters", spec); + } + + @Test + public void testBasicFunctionalityWithFilters() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T PhaseByTransmission", + "-R " + b37KGReference, + "-B:variant,VCF " + fundamentalTestVCF, + "-f NA12892+NA12891=NA12878", + "-o %s" + ), + 1, + Arrays.asList("8c5db343567e90e97993912c7e541d0d") + ); + executeTest("testBasicFunctionalityWithFilters", spec); + } +} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java index 0ed16967ac..1bf3e579ff 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java @@ -26,7 +26,7 @@ public void test1() { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10) + " -L chr20:332341-382503", 1, - Arrays.asList("6020a68bbec97fcd87819c10cd4e2470")); + Arrays.asList("9568ba0b6624b97ac55a59bdee2d9150")); executeTest("MAX 10 het sites [TEST ONE]; require PQ >= 10", spec); } @@ -36,7 +36,7 @@ public void test2() { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10) + " -L chr20:1232503-1332503", 1, - Arrays.asList("712c2145df4756c9a15758865d8007b5")); + Arrays.asList("ce65194c24fe83b0ec90faa6c8e6109a")); executeTest("MAX 10 het sites [TEST TWO]; require PQ >= 10", spec); } @@ -46,7 +46,7 @@ public void test3() { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 2, 30) + " -L chr20:332341-382503", 1, - Arrays.asList("297e0896e4761529d979f40f5ad694db")); + Arrays.asList("02d134fd544613b1e5dd7f7197fc3753")); executeTest("MAX 2 het sites [TEST THREE]; require PQ >= 30", spec); } @@ -56,7 +56,7 @@ public void test4() { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 5, 100) + " -L chr20:332341-382503", 1, - Arrays.asList("52a17f14692d726d3b726cf0ae7f2a09")); + Arrays.asList("2f7ec9904fc054c2ba1a7db05eb29334")); executeTest("MAX 5 het sites [TEST FOUR]; require PQ >= 100", spec); } @@ -66,7 +66,7 @@ public void test5() { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 1000, 7, 10) + " -L chr20:332341-482503", 1, - Arrays.asList("af768f7958b8f4599c2374f1cc2fc613")); + Arrays.asList("da7a31725f229d1782dd3049848730aa")); executeTest("MAX 7 het sites [TEST FIVE]; require PQ >= 10; cacheWindow = 1000", spec); } @@ -76,7 +76,7 @@ public void test6() { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10) + " -L chr20:652810-681757", 1, - Arrays.asList("3dd886672f59a47908b94136d0427bb0")); + Arrays.asList("e9d35cb88089fb0e8ae6678bfaeeac8c")); executeTest("MAX 10 het sites [TEST SIX]; require PQ >= 10; cacheWindow = 20000; has inconsistent sites", spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java index b0f76229b0..129161da30 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java @@ -19,9 +19,9 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { public void testCountCovariates1() { HashMap e = new HashMap(); e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "7b5832d4b2a23b8ef2bb639eb59bfa88" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "f4f8a49bb5764d2a8f61e055f64dcce4"); + e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "9c006f8e9fb5752b1c139f5a8cc7ea88"); e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "e6f7b4ab9aa291022e0ba8b7dbe4c77e" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "570506533f079d738d70934dfe1c02cd" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "e6b98af01c5a08e4954b79ec42db6fc3" ); for ( String parallelism : Arrays.asList("", " -nt 4")) { for ( Map.Entry entry : e.entrySet() ) { @@ -53,9 +53,9 @@ public void testCountCovariates1() { public void testTableRecalibrator1() { HashMap e = new HashMap(); e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "0278cce4cfdab869dc0c11d6852a984b" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "344d4252143df8c2cce6b568747553a5"); + e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "6797d7ffa4ef6c48413719ba32696ccf"); e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "2bb3374dde131791d7638031ae3b3e10" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "064c4a7bdd23974c3a9c5f924540df76" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "1f9d8944b73169b367cb83b0d22e5432" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -107,7 +107,7 @@ public void testCountCovariatesUseOriginalQuals() { @Test public void testTableRecalibratorMaxQ70() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "344d4252143df8c2cce6b568747553a5" ); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "0278cce4cfdab869dc0c11d6852a984b" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -133,12 +133,10 @@ public void testTableRecalibratorMaxQ70() { } } - - @Test public void testCountCovariatesSolidIndelsRemoveRefBias() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "0a6cdb9611e5880ea6611205080aa267" ); + e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "c9ea5f995e1e2b7a5688533e678dcedc" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -164,7 +162,7 @@ public void testCountCovariatesSolidIndelsRemoveRefBias() { @Test public void testTableRecalibratorSolidIndelsRemoveRefBias() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "9bc7e1ad223ba759fe5e8ddb4c07369c" ); + e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "993fae4270e7e1e15986f270acf247af" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -189,13 +187,10 @@ public void testTableRecalibratorSolidIndelsRemoveRefBias() { } } - - - @Test public void testCountCovariatesVCF() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "3700eaf567e4937f442fc777a226d6ad"); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "170f0c3cc4b8d72c539136effeec9a16"); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -219,7 +214,7 @@ public void testCountCovariatesVCF() { @Test public void testCountCovariatesBED() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "6803891a3398821fc8a37e19ea8e5a00"); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "b460478d9683e827784e42bc352db8bb"); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -243,7 +238,7 @@ public void testCountCovariatesBED() { @Test public void testCountCovariatesVCFPlusDBsnp() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "f224c42fbc4026db973ccc91265ab5c7"); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "a3d892bd60d8f679affda3c1e3af96c1"); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -268,69 +263,10 @@ public void testCountCovariatesVCFPlusDBsnp() { } } - @Test - public void testCountCovariatesNoReadGroups() { - HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "c024e03f019aeceaf364fa58c8295ad8" ); - - for ( Map.Entry entry : e.entrySet() ) { - String bam = entry.getKey(); - String md5 = entry.getValue(); - - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-R " + b36KGReference + - " --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" + - " -T CountCovariates" + - " -I " + bam + - " -L 1:10,000,000-10,200,000" + - " -cov ReadGroupCovariate" + - " -cov QualityScoreCovariate" + - " -cov CycleCovariate" + - " -cov DinucCovariate" + - " --default_read_group DefaultReadGroup" + - " --default_platform illumina" + - " --solid_recal_mode SET_Q_ZERO" + - " -recalFile %s", - 1, // just one output file - Arrays.asList(md5)); - List result = executeTest("testCountCovariatesNoReadGroups", spec).getFirst(); - paramsFilesNoReadGroupTest.put(bam, result.get(0).getAbsolutePath()); - } - } - - @Test - public void testTableRecalibratorNoReadGroups() { - HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "1eefbe7ac0376fc1ed1392d85242171e" ); - - for ( Map.Entry entry : e.entrySet() ) { - String bam = entry.getKey(); - String md5 = entry.getValue(); - String paramsFile = paramsFilesNoReadGroupTest.get(bam); - System.out.printf("PARAMS FOR %s is %s%n", bam, paramsFile); - if ( paramsFile != null ) { - WalkerTestSpec spec = new WalkerTestSpec( - "-R " + b36KGReference + - " -T TableRecalibration" + - " -I " + bam + - " -L 1:10,100,000-10,300,000" + - " -o %s" + - " --no_pg_tag" + - " --solid_recal_mode SET_Q_ZERO" + - " --default_read_group DefaultReadGroup" + - " --default_platform illumina" + - " -recalFile " + paramsFile, - 1, // just one output file - Arrays.asList(md5)); - executeTest("testTableRecalibratorNoReadGroups", spec); - } - } - } - @Test public void testCountCovariatesNoIndex() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "cfc31bb6f51436d1c3b34f62bb801dc8" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "284ccac1f8fe485e52c86333cac7c2d4" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -356,7 +292,7 @@ public void testCountCovariatesNoIndex() { @Test public void testTableRecalibratorNoIndex() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "83b848a16034c2fb423d1bb0f5be7784" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "c167799c2d9cab815d7c9b23337f162e" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -380,11 +316,10 @@ public void testTableRecalibratorNoIndex() { } } - @Test public void testCountCovariatesFailWithoutDBSNP() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", ""); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", ""); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbesIntegrationTest.java deleted file mode 100755 index 850a3113e4..0000000000 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbesIntegrationTest.java +++ /dev/null @@ -1,34 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.sequenom; - -import org.broadinstitute.sting.WalkerTest; -import org.testng.annotations.Test; - -import java.util.Arrays; - -public class PickSequenomProbesIntegrationTest extends WalkerTest { - @Test - public void testProbes() { - String testVCF = validationDataLocation + "complexExample.vcf4"; - String testArgs = "-R " + b36KGReference + " -T PickSequenomProbes -L 1:10,000,000-11,000,000 -B:input,VCF "+testVCF+" -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, - Arrays.asList("6b5409cc78960f1be855536ed89ea9dd")); - executeTest("Test probes", spec); - } - - @Test - public void testProbesUsingDbSNPMask() { - - String md5 = "46d53491af1d3aa0ee1f1e13d68b732d"; - String testVCF = validationDataLocation + "pickSeqIntegrationTest.vcf"; - - String testArgs = "-snp_mask " + validationDataLocation + "pickSeqIntegrationTest.bed -R " - + b36KGReference + " -omitWindow -nameConvention " - + "-project_id 1kgp3_s4_lf -T PickSequenomProbes -B:input,VCF "+testVCF+" -o %s"; - WalkerTestSpec spec1 = new WalkerTestSpec(testArgs, 1, Arrays.asList(md5)); - executeTest("Test probes", spec1); - - testArgs += " -nmw 1"; - WalkerTestSpec spec2 = new WalkerTestSpec(testArgs, 1, Arrays.asList(md5)); - executeTest("Test probes", spec2); - } -} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmpliconsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmpliconsIntegrationTest.java new file mode 100755 index 0000000000..95f4ac0ae3 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmpliconsIntegrationTest.java @@ -0,0 +1,56 @@ +package org.broadinstitute.sting.gatk.walkers.validation; + +import org.broadinstitute.sting.WalkerTest; +import org.testng.annotations.Test; + +import java.util.Arrays; + +/** + * Created by IntelliJ IDEA. + * User: Ghost + * Date: 7/19/11 + * Time: 7:39 PM + * To change this template use File | Settings | File Templates. + */ +public class ValidationAmpliconsIntegrationTest extends WalkerTest { + + @Test(enabled=true) + public void testWikiExample() { + String siteVCF = validationDataLocation + "sites_to_validate.vcf"; + String maskVCF = validationDataLocation + "amplicon_mask_sites.vcf"; + String intervalTable = validationDataLocation + "amplicon_interval_table1.table"; + String testArgs = "-R " + b37KGReference + " -T ValidationAmplicons -B:ValidateAlleles,VCF "+siteVCF+" -o %s"; + testArgs += " -B:ProbeIntervals,table "+intervalTable+" -BTI ProbeIntervals -B:MaskAlleles,VCF "+maskVCF; + testArgs += " --virtualPrimerSize 30"; + WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, + Arrays.asList("27f9450afa132888a8994167f0035fd7")); + executeTest("Test probes", spec); + } + + @Test(enabled=true) + public void testWikiExampleNoBWA() { + String siteVCF = validationDataLocation + "sites_to_validate.vcf"; + String maskVCF = validationDataLocation + "amplicon_mask_sites.vcf"; + String intervalTable = validationDataLocation + "amplicon_interval_table1.table"; + String testArgs = "-R " + b37KGReference + " -T ValidationAmplicons -B:ValidateAlleles,VCF "+siteVCF+" -o %s"; + testArgs += " -B:ProbeIntervals,table "+intervalTable+" -BTI ProbeIntervals -B:MaskAlleles,VCF "+maskVCF; + testArgs += " --virtualPrimerSize 30 --doNotUseBWA"; + WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, + Arrays.asList("f2611ff1d9cd5bedaad003251fed8bc1")); + executeTest("Test probes", spec); + } + + @Test(enabled=true) + public void testWikiExampleMonoFilter() { + String siteVCF = validationDataLocation + "sites_to_validate.vcf"; + String maskVCF = validationDataLocation + "amplicon_mask_sites.vcf"; + String intervalTable = validationDataLocation + "amplicon_interval_table1.table"; + String testArgs = "-R " + b37KGReference + " -T ValidationAmplicons -B:ValidateAlleles,VCF "+siteVCF+" -o %s"; + testArgs += " -B:ProbeIntervals,table "+intervalTable+" -BTI ProbeIntervals -B:MaskAlleles,VCF "+maskVCF; + testArgs += " --virtualPrimerSize 30 --filterMonomorphic"; + WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, + Arrays.asList("77b3f30e38fedad812125bdf6cf3255f")); + executeTest("Test probes", spec); + } + +} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java index eb6a1a4c62..057053a1cf 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; +import org.broadinstitute.sting.MD5DB; import org.broadinstitute.sting.WalkerTest; import org.testng.annotations.Test; import org.testng.annotations.DataProvider; @@ -26,8 +27,8 @@ public VRTest(String inVCF, String tranchesMD5, String recalMD5, String cutVCFMD VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf", "d33212a84368e821cbedecd4f59756d6", // tranches - "a35cd067f378442eee8cd5edeea92be0", // recal file - "126d52843f4a57199ee97750ffc16a07"); // cut VCF + "4652dca41222bebdf9d9fda343b2a835", // recal file + "243a397a33a935fcaccd5deb6d16f0c0"); // cut VCF @DataProvider(name = "VRTest") public Object[][] createData1() { @@ -65,8 +66,8 @@ public void testApplyRecalibration(VRTest params) { " -NO_HEADER" + " -B:input,VCF " + params.inVCF + " -o %s" + - " -tranchesFile " + getFileForMD5(params.tranchesMD5) + - " -recalFile " + getFileForMD5(params.recalMD5), + " -tranchesFile " + MD5DB.getMD5FilePath(params.tranchesMD5, null) + + " -recalFile " + MD5DB.getMD5FilePath(params.recalMD5, null), Arrays.asList(params.cutVCFMD5)); executeTest("testApplyRecalibration-"+params.inVCF, spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index 33a20f7b52..904a5b29b4 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -34,76 +34,75 @@ * Tests CombineVariants */ public class CombineVariantsIntegrationTest extends WalkerTest { -// public static String baseTestString(String args) { -// return "-T CombineVariants -NO_HEADER -L 1:1-50,000,000 -o %s -R " + b36KGReference + args; -// } -// -// public void test1InOut(String file, String md5, boolean vcf3) { -// test1InOut(file, md5, "", vcf3); -// } -// -// public void test1InOut(String file, String md5, String args, boolean vcf3) { -// WalkerTestSpec spec = new WalkerTestSpec( -// baseTestString(" -priority v1 -B:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file + args), -// 1, -// Arrays.asList(md5)); -// executeTest("testInOut1--" + file, spec); -// } -// -// public void combine2(String file1, String file2, String args, String md5, boolean vcf3) { -// WalkerTestSpec spec = new WalkerTestSpec( -// baseTestString(" -priority v1,v2 -B:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file1 + " -B:v2,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file2 + args), -// 1, -// Arrays.asList(md5)); -// executeTest("combine2 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec); -// } -// -// public void combineSites(String args, String md5) { -// String file1 = "1000G_omni2.5.b37.sites.vcf"; -// String file2 = "hapmap_3.3.b37.sites.vcf"; -// WalkerTestSpec spec = new WalkerTestSpec( -// "-T CombineVariants -NO_HEADER -o %s -R " + b37KGReference -// + " -L 1:1-10,000,000 -B:omni,VCF " + validationDataLocation + file1 -// + " -B:hm3,VCF " + validationDataLocation + file2 + args, -// 1, -// Arrays.asList(md5)); -// executeTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec); -// } -// -// -// @Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "2117fff6e0d182cd20be508e9661829c", true); } -// @Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "2cfaf7af3dd119df08b8a9c1f72e2f93", " -setKey foo", true); } -// @Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "1474ac0fde2ce42a3c24f1c97eab333e", " -setKey null", true); } -// @Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "7fc66df048a0ab08cf507906e1d4a308", false); } // official project VCF files in tabix format -// -// @Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "ec9715f53dbf4531570557c212822f12", false); } -// @Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "f1072be5f5c6ee810276d9ca6537224d", false); } -// -// @Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "b77a1eec725201d9d8e74ee0c45638d3", false); } // official project VCF files in tabix format -// @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "802977fdfd2f4905b501bb06800f60af", false); } // official project VCF files in tabix format -// @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "a67157287dd2b24b5cdf7ebf8fcbbe9a", false); } -// -// @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e1f4718a179f1196538a33863da04f53", false); } -// -// @Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "b3783384b7c8e877b971033e90beba48", true); } -// -// @Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "902e541c87caa72134db6293fc46f0ad"); } -// @Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "f339ad4bb5863b58b9c919ce7d040bb9"); } -// -// @Test public void threeWayWithRefs() { -// WalkerTestSpec spec = new WalkerTestSpec( -// baseTestString(" -B:NA19240_BGI,VCF "+validationDataLocation+"NA19240.BGI.RG.vcf" + -// " -B:NA19240_ILLUMINA,VCF "+validationDataLocation+"NA19240.ILLUMINA.RG.vcf" + -// " -B:NA19240_WUGSC,VCF "+validationDataLocation+"NA19240.WUGSC.RG.vcf" + -// " -B:denovoInfo,VCF "+validationDataLocation+"yri_merged_validation_data_240610.annotated.b36.vcf" + -// " -setKey centerSet" + -// " -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED" + -// " -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" + -// " -genotypeMergeOptions UNIQUIFY -L 1"), -// 1, -// Arrays.asList("a07995587b855f3214fb71940bf23c0f")); -// executeTest("threeWayWithRefs", spec); -// } + public static String baseTestString(String args) { + return "-T CombineVariants -NO_HEADER -L 1:1-50,000,000 -o %s -R " + b36KGReference + args; + } + + public void test1InOut(String file, String md5, boolean vcf3) { + test1InOut(file, md5, "", vcf3); + } + + public void test1InOut(String file, String md5, String args, boolean vcf3) { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString(" -priority v1 -B:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file + args), + 1, + Arrays.asList(md5)); + executeTest("testInOut1--" + file, spec); + } + + public void combine2(String file1, String file2, String args, String md5, boolean vcf3) { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString(" -priority v1,v2 -B:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file1 + " -B:v2,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file2 + args), + 1, + Arrays.asList(md5)); + executeTest("combine2 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec); + } + + public void combineSites(String args, String md5) { + String file1 = "1000G_omni2.5.b37.sites.vcf"; + String file2 = "hapmap_3.3.b37.sites.vcf"; + WalkerTestSpec spec = new WalkerTestSpec( + "-T CombineVariants -NO_HEADER -o %s -R " + b37KGReference + + " -L 1:1-10,000,000 -B:omni,VCF " + validationDataLocation + file1 + + " -B:hm3,VCF " + validationDataLocation + file2 + args, + 1, + Arrays.asList(md5)); + executeTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec); + } + + @Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "c608b9fc1e36dba6cebb4f259883f9f0", true); } + @Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "20caad94411d6ab48153b214de916df8", " -setKey foo", true); } + @Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "004f3065cb1bc2ce2f9afd695caf0b48", " -setKey null", true); } + @Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "c9c901ff9ef2a982624b203a8086dff0", false); } // official project VCF files in tabix format + + @Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "7593be578d4274d672fc22fced38012b", false); } + @Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "1cd467863c4e948fadd970681552d57e", false); } + + @Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f", false); } // official project VCF files in tabix format + @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "20163d60f18a46496f6da744ab5cc0f9", false); } // official project VCF files in tabix format + @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "f1cf095c2fe9641b7ca1f8ee2c46fd4a", false); } + + @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e144b6283765494bfe8189ac59965083", false); } + + @Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "89f55abea8f59e39d1effb908440548c", true); } + + @Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "4836086891f6cbdd40eebef3076d215a"); } + @Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "6a34b5d743efda8b2f3b639f3a2f5de8"); } + + @Test public void threeWayWithRefs() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString(" -B:NA19240_BGI,VCF "+validationDataLocation+"NA19240.BGI.RG.vcf" + + " -B:NA19240_ILLUMINA,VCF "+validationDataLocation+"NA19240.ILLUMINA.RG.vcf" + + " -B:NA19240_WUGSC,VCF "+validationDataLocation+"NA19240.WUGSC.RG.vcf" + + " -B:denovoInfo,VCF "+validationDataLocation+"yri_merged_validation_data_240610.annotated.b36.vcf" + + " -setKey centerSet" + + " -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED" + + " -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" + + " -genotypeMergeOptions UNIQUIFY -L 1"), + 1, + Arrays.asList("1de95f91ca15d2a8856de35dee0ce33e")); + executeTest("threeWayWithRefs", spec); + } // complex examples with filtering, indels, and multiple alleles @@ -119,8 +118,8 @@ public void combineComplexSites(String args, String md5) { executeTest("combineComplexSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec); } - @Test public void complexTestFull() { combineComplexSites("", "64b991fd3850f83614518f7d71f0532f"); } - @Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "0db9ef50fe54b60426474273d7c7fa99"); } - @Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "d20acb3d53ba0a02ce92d540ebeda2a9"); } - @Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "8d1b3d120515f8b56b5a0d10bc5da713"); } + @Test public void complexTestFull() { combineComplexSites("", "b5a53ee92bdaacd2bb3327e9004ae058"); } + @Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "df96cb3beb2dbb5e02f80abec7d3571e"); } + @Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "f72a178137e25dbe0b931934cdc0079d"); } + @Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "f704caeaaaed6711943014b847fe381a"); } } \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java index d32ab6282c..82c894c6f7 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java @@ -40,7 +40,7 @@ public void testb36Tohg19() { WalkerTestSpec spec = new WalkerTestSpec( "-T LiftoverVariants -o %s -R " + b36KGReference + " -B:variant,vcf3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict", 1, - Arrays.asList("37e23efd7d6471fc0f807b31ccafe0eb")); + Arrays.asList("70aeaca5b74cc7ba8e2da7b71ff0fbfd")); executeTest("test b36 to hg19", spec); } @@ -49,7 +49,7 @@ public void testb36Tohg19UnsortedSamples() { WalkerTestSpec spec = new WalkerTestSpec( "-T LiftoverVariants -o %s -R " + b36KGReference + " -B:variant,vcf3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.unsortedSamples.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict", 1, - Arrays.asList("b6ef4a2f026fd3843aeb9ed764a66921")); + Arrays.asList("3fd7ec2dc4064ef410786276b0dc9d08")); executeTest("test b36 to hg19, unsorted samples", spec); } @@ -58,7 +58,7 @@ public void testhg18Tohg19Unsorted() { WalkerTestSpec spec = new WalkerTestSpec( "-T LiftoverVariants -o %s -R " + hg18Reference + " -B:variant,vcf " + validationDataLocation + "liftover_test.vcf -chain " + validationDataLocation + "hg18ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict", 1, - Arrays.asList("3275373b3c44ad14a270b50664b3f8a3")); + Arrays.asList("ab2c6254225d7e2ecf52eee604d5673b")); executeTest("test hg18 to hg19, unsorted", spec); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java index e18287a215..b5f41542ef 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -18,7 +18,7 @@ public void testComplexSelection() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' -B:variant,VCF3 " + testfile + " -NO_HEADER"), 1, - Arrays.asList("1b9d551298dc048c7d36b60440ff4d50") + Arrays.asList("d18516c1963802e92cb9e425c0b75fd6") ); executeTest("testComplexSelection--" + testfile, spec); @@ -31,7 +31,7 @@ public void testRepeatedLineSelection() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString(" -sn A -sn B -sn C -B:variant,VCF3 " + testfile + " -NO_HEADER"), 1, - Arrays.asList("5ba7536a0819421b330350a160e4261a") + Arrays.asList("b74038779fe6485dbb8734ae48178356") ); executeTest("testRepeatedLineSelection--" + testfile, spec); @@ -44,7 +44,7 @@ public void testDiscordance() { WalkerTestSpec spec = new WalkerTestSpec( "-T SelectVariants -R " + hg19Reference + " -sn NA12878 -disc myvar -L 20:1012700-1020000 -B:variant,VCF " + b37hapmapGenotypes + " -B:myvar,VCF " + testFile + " -o %s -NO_HEADER", 1, - Arrays.asList("97621ae8f29955eedfc4e0be3515fcb9") + Arrays.asList("78e6842325f1f1bc9ab30d5e7737ee6e") ); executeTest("testDiscordance--" + testFile, spec); @@ -57,7 +57,7 @@ public void testConcordance() { WalkerTestSpec spec = new WalkerTestSpec( "-T SelectVariants -R " + hg19Reference + " -sn NA12878 -conc hapmap -L 20:1012700-1020000 -B:hapmap,VCF " + b37hapmapGenotypes + " -B:variant,VCF " + testFile + " -o %s -NO_HEADER", 1, - Arrays.asList("a0ae016fdffcbe7bfb99fd3dbc311407") + Arrays.asList("d2ba3ea30a810f6f0fbfb1b643292b6a") ); executeTest("testConcordance--" + testFile, spec); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java index cf0673ee69..d7efe42120 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java @@ -60,7 +60,7 @@ public void testSimpleVCFStreaming() throws IOException { " --NO_HEADER" + " -o %s", 1, - Arrays.asList("debbbf3e661b6857cc8d99ff7635bb1d") + Arrays.asList("658f580f7a294fd334bd897102616fed") ); executeTest("testSimpleVCFStreaming", spec); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableIntegrationTest.java old mode 100644 new mode 100755 index 72647c8e15..1db7123537 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableIntegrationTest.java @@ -44,7 +44,7 @@ private String variantsToTableCmd(String moreArgs) { @Test(enabled = true) public void testComplexVariantsToTable() { WalkerTestSpec spec = new WalkerTestSpec(variantsToTableCmd(" -AMD"), - Arrays.asList("b2a3712c1bfad8f1383ffada8b5017ba")); + Arrays.asList("e8f771995127b727fb433da91dd4ee98")); executeTest("testComplexVariantsToTable", spec).getFirst(); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java index 64d0db14b5..8c96c1e116 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java @@ -20,7 +20,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest { @Test public void testVariantsToVCFUsingGeliInput() { List md5 = new ArrayList(); - md5.add("bd15d98adc76b5798e3bbeff3f936feb"); + md5.add("4accae035d271b35ee2ec58f403c68c6"); WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + @@ -38,7 +38,7 @@ public void testVariantsToVCFUsingGeliInput() { @Test public void testGenotypesToVCFUsingGeliInput() { List md5 = new ArrayList(); - md5.add("acd15d3f85bff5b545bc353e0e23cc6e"); + md5.add("71e8c98d7c3a73b6287ecc339086fe03"); WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + @@ -56,7 +56,7 @@ public void testGenotypesToVCFUsingGeliInput() { @Test public void testGenotypesToVCFUsingHapMapInput() { List md5 = new ArrayList(); - md5.add("6f34528569f8cf5941cb365fa77288c1"); + md5.add("f343085305e80c7a2493422e4eaad983"); WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + @@ -73,7 +73,7 @@ public void testGenotypesToVCFUsingHapMapInput() { @Test public void testGenotypesToVCFUsingVCFInput() { List md5 = new ArrayList(); - md5.add("d8316fc1b9d8e954a58940354119a32e"); + md5.add("86f02e2e764ba35854cff2aa05a1fdd8"); WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + diff --git a/public/java/test/org/broadinstitute/sting/jna/lsf/v7_0_6/LibBatIntegrationTest.java b/public/java/test/org/broadinstitute/sting/jna/lsf/v7_0_6/LibBatIntegrationTest.java index aa6303a6fd..77db34cbcf 100644 --- a/public/java/test/org/broadinstitute/sting/jna/lsf/v7_0_6/LibBatIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/jna/lsf/v7_0_6/LibBatIntegrationTest.java @@ -34,7 +34,6 @@ import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.jna.lsf.v7_0_6.LibBat.*; -import javax.jws.soap.SOAPBinding; import java.io.File; /** @@ -55,25 +54,25 @@ public void testClusterName() { @Test public void testReadConfEnv() { - LibLsf.config_param[] unitsParam = (LibLsf.config_param[]) new LibLsf.config_param().toArray(4); + LibLsf.config_param[] configParams = (LibLsf.config_param[]) new LibLsf.config_param().toArray(4); - unitsParam[0].paramName = "LSF_UNIT_FOR_LIMITS"; - unitsParam[1].paramName = "LSF_CONFDIR"; - unitsParam[2].paramName = "MADE_UP_PARAMETER"; + configParams[0].paramName = "LSF_UNIT_FOR_LIMITS"; + configParams[1].paramName = "LSF_CONFDIR"; + configParams[2].paramName = "MADE_UP_PARAMETER"; - Structure.autoWrite(unitsParam); + Structure.autoWrite(configParams); - if (LibLsf.ls_readconfenv(unitsParam[0], null) != 0) { + if (LibLsf.ls_readconfenv(configParams[0], null) != 0) { Assert.fail(LibLsf.ls_sysmsg()); } - Structure.autoRead(unitsParam); + Structure.autoRead(configParams); - System.out.println("LSF_UNIT_FOR_LIMITS: " + unitsParam[0].paramValue); - Assert.assertNotNull(unitsParam[1].paramValue); - Assert.assertNull(unitsParam[2].paramValue); - Assert.assertNull(unitsParam[3].paramName); - Assert.assertNull(unitsParam[3].paramValue); + System.out.println("LSF_UNIT_FOR_LIMITS: " + configParams[0].paramValue); + Assert.assertNotNull(configParams[1].paramValue); + Assert.assertNull(configParams[2].paramValue); + Assert.assertNull(configParams[3].paramName); + Assert.assertNull(configParams[3].paramValue); } @Test diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java index 2f6b589f46..68a2ecf8d9 100755 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java @@ -4,6 +4,7 @@ import org.broad.tribble.index.*; import org.broad.tribble.iterators.CloseableTribbleIterator; import org.broad.tribble.source.BasicFeatureSource; +import org.broadinstitute.sting.WalkerTest; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.testng.Assert; import org.testng.annotations.Test; @@ -75,7 +76,7 @@ public void testOnTheFlyIndexing1() throws IOException { // test that the input index is the same as the one created from the identical input file // test that the dynamic index is the same as the output index, which is equal to the input index - Assert.assertTrue(IndexFactory.onDiskIndexEqualToNewlyCreatedIndex(outputFile, outputFileIndex, new VCFCodec())); + WalkerTest.assertOnDiskIndexEqualToNewlyCreatedIndex(outputFileIndex, "unittest", outputFile); } } } diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java new file mode 100644 index 0000000000..32ff25c7bb --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java @@ -0,0 +1,28 @@ +package org.broadinstitute.sting.utils.codecs.vcf; + +import org.broadinstitute.sting.WalkerTest; +import org.testng.annotations.Test; + +import java.io.File; +import java.util.Arrays; +import java.util.List; + +public class VCFIntegrationTest extends WalkerTest { + + @Test + public void testReadingAndWritingWitHNoChanges() { + + String md5ofInputVCF = "a990ba187a69ca44cb9bc2bb44d00447"; + String testVCF = validationDataLocation + "vcf4.1.example.vcf"; + + String baseCommand = "-R " + b37KGReference + " -NO_HEADER -o %s "; + + String test1 = baseCommand + "-T VariantAnnotator -BTI variant -B:variant,vcf " + testVCF; + WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList(md5ofInputVCF)); + List result = executeTest("Test Variant Annotator with no changes", spec1).getFirst(); + + String test2 = baseCommand + "-T VariantsToVCF -B:variant,vcf " + result.get(0).getAbsolutePath(); + WalkerTestSpec spec2 = new WalkerTestSpec(test2, 1, Arrays.asList(md5ofInputVCF)); + executeTest("Test Variants To VCF from new output", spec2); + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderUnitTest.java index c4ca6a551e..14e63191dd 100644 --- a/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderUnitTest.java @@ -2,15 +2,16 @@ import org.broad.tribble.readers.AsciiLineReader; import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.testng.Assert; import org.broadinstitute.sting.BaseTest; import org.testng.annotations.Test; -import java.io.File; -import java.io.IOException; -import java.io.PrintWriter; -import java.io.StringBufferInputStream; +import java.io.*; +import java.math.BigInteger; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; /** * Created by IntelliJ IDEA. @@ -40,6 +41,52 @@ public void testVCF4ToVCF4_alternate() { checkMD5ofHeaderFile(header, "ad8c4cf85e868b0261ab49ee2c613088"); } + /** + * a little utility function for all tests to md5sum a file + * Shameless taken from: + * + * http://www.javalobby.org/java/forums/t84420.html + * + * @param file the file + * @return a string + */ + private static String md5SumFile(File file) { + MessageDigest digest; + try { + digest = MessageDigest.getInstance("MD5"); + } catch (NoSuchAlgorithmException e) { + throw new ReviewedStingException("Unable to find MD5 digest"); + } + InputStream is; + try { + is = new FileInputStream(file); + } catch (FileNotFoundException e) { + throw new ReviewedStingException("Unable to open file " + file); + } + byte[] buffer = new byte[8192]; + int read; + try { + while ((read = is.read(buffer)) > 0) { + digest.update(buffer, 0, read); + } + byte[] md5sum = digest.digest(); + BigInteger bigInt = new BigInteger(1, md5sum); + return bigInt.toString(16); + + } + catch (IOException e) { + throw new ReviewedStingException("Unable to process file for MD5", e); + } + finally { + try { + is.close(); + } + catch (IOException e) { + throw new ReviewedStingException("Unable to close input stream for MD5 calculation", e); + } + } + } + private void checkMD5ofHeaderFile(VCFHeader header, String md5sum) { File myTempFile = null; PrintWriter pw = null; diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java index 5d42f8d0c9..a344817a0f 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java @@ -49,7 +49,7 @@ public void testToVCF() { WalkerTestSpec spec = new WalkerTestSpec( cmdRoot + " -NO_HEADER -B:vcf,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.vcf -L 1:1-1000000 -o %s --outputVCF %s", 2, // just one output file - Arrays.asList("e3c35d0c4b5d4935c84a270f9df0951f", "e6673737acbb6bfabfcd92c4b2268241")); + Arrays.asList("e3c35d0c4b5d4935c84a270f9df0951f", "ff91731213fd0bbdc200ab6fd1c93e63")); executeTest("testToVCF", spec); } diff --git a/public/packages/PicardPrivate.xml b/public/packages/PicardPrivate.xml index 110b41d3f2..581c479792 100644 --- a/public/packages/PicardPrivate.xml +++ b/public/packages/PicardPrivate.xml @@ -7,6 +7,8 @@ + + diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index b64ba8952e..1f4f799939 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -201,9 +201,6 @@ class DataProcessingPipeline extends QScript { } - - - /**************************************************************************** * Main script ****************************************************************************/ diff --git a/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala b/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala index 71970a36bf..05c1a1775a 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala @@ -45,7 +45,7 @@ class QSettings { var jobPriority: Option[Int] = None @Argument(fullName="default_memory_limit", shortName="memLimit", doc="Default memory limit for jobs, in gigabytes.", required=false) - var memoryLimit: Option[Int] = None + var memoryLimit: Option[Double] = None @Argument(fullName="run_directory", shortName="runDir", doc="Root directory to run functions from.", required=false) var runDirectory = new File(".") diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/CommandLineJobRunner.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/CommandLineJobRunner.scala index 2fbfab5ec4..2e31081368 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/CommandLineJobRunner.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/CommandLineJobRunner.scala @@ -33,12 +33,29 @@ import org.broadinstitute.sting.queue.util.{Logging, IOUtils} */ trait CommandLineJobRunner extends JobRunner[CommandLineFunction] with Logging { + /** The string representation of the identifier of the running job. */ + def jobIdString: String = null + /** A generated exec shell script. */ protected var jobScript: File = _ /** Which directory to use for the job status files. */ protected def jobStatusDir = function.jobTempDir + /** Amount of time a job can go without status before giving up. */ + private val unknownStatusMaxSeconds = 5 * 60 + + /** Last known status */ + protected var lastStatus: RunnerStatus.Value = _ + + /** The last time the status was updated */ + protected var lastStatusUpdate: Long = _ + + final override def status = this.lastStatus + + def residentRequestMB: Option[Double] = function.memoryLimit.map(_ * 1024) + def residentLimitMB: Option[Double] = residentRequestMB.map( _ * 1.2 ) + override def init() { super.init() var exec = new StringBuilder @@ -53,7 +70,21 @@ trait CommandLineJobRunner extends JobRunner[CommandLineFunction] with Logging { } exec.append(function.commandLine) - this.jobScript = IOUtils.writeTempFile(exec.toString, ".exec", "", jobStatusDir) + this.jobScript = IOUtils.writeTempFile(exec.toString(), ".exec", "", jobStatusDir) + } + + protected def updateStatus(updatedStatus: RunnerStatus.Value) { + this.lastStatus = updatedStatus + this.lastStatusUpdate = System.currentTimeMillis + } + + override def checkUnknownStatus() { + val unknownStatusMillis = (System.currentTimeMillis - lastStatusUpdate) + if (unknownStatusMillis > (unknownStatusMaxSeconds * 1000L)) { + // Unknown status has been returned for a while now. + updateStatus(RunnerStatus.FAILED) + logger.error("Unable to read status for %0.2f minutes: job id %d: %s".format(unknownStatusMillis/(60 * 1000D), jobIdString, function.description)) + } } override def cleanup() { diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/JobManager.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/JobManager.scala index d2be4939a8..30187f7e23 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/JobManager.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/JobManager.scala @@ -44,9 +44,9 @@ trait JobManager[TFunction <: QFunction, TRunner <: JobRunner[TFunction]] { /** * Updates the status on a list of functions. * @param runners Runners to update. + * @return runners which were updated. */ - def updateStatus(runners: Set[TRunner]) { - } + def updateStatus(runners: Set[TRunner]): Set[TRunner] = Set.empty /** * Stops a list of functions. diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/JobRunner.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/JobRunner.scala index 4b4d449886..de5fbde054 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/JobRunner.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/JobRunner.scala @@ -52,6 +52,11 @@ trait JobRunner[TFunction <: QFunction] { */ def status: RunnerStatus.Value + /** + * Checks if the status has been unknown for an extended period of time. + */ + def checkUnknownStatus() {} + /** * Returns the function to be run. */ diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala index bfcc4d48c9..a52e9c5618 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala @@ -138,30 +138,32 @@ class QGraph extends Logging { validate() if (running && numMissingValues == 0) { - logger.info("Generating scatter gather jobs.") val scatterGathers = jobGraph.edgeSet.filter(edge => scatterGatherable(edge)) + if (!scatterGathers.isEmpty) { + logger.info("Generating scatter gather jobs.") + + var addedFunctions = List.empty[QFunction] + for (scatterGather <- scatterGathers) { + val functions = scatterGather.asInstanceOf[FunctionEdge] + .function.asInstanceOf[ScatterGatherableFunction] + .generateFunctions() + addedFunctions ++= functions + } - var addedFunctions = List.empty[QFunction] - for (scatterGather <- scatterGathers) { - val functions = scatterGather.asInstanceOf[FunctionEdge] - .function.asInstanceOf[ScatterGatherableFunction] - .generateFunctions() - addedFunctions ++= functions - } - - logger.info("Removing original jobs.") - this.jobGraph.removeAllEdges(scatterGathers) - prune() + logger.info("Removing original jobs.") + this.jobGraph.removeAllEdges(scatterGathers) + prune() - logger.info("Adding scatter gather jobs.") - addedFunctions.foreach(function => if (running) this.add(function)) + logger.info("Adding scatter gather jobs.") + addedFunctions.foreach(function => if (running) this.add(function)) - logger.info("Regenerating graph.") - fill - val scatterGatherDotFile = if (settings.expandedDotFile != null) settings.expandedDotFile else settings.dotFile - if (scatterGatherDotFile != null) - renderToDot(scatterGatherDotFile) - validate() + logger.info("Regenerating graph.") + fill + val scatterGatherDotFile = if (settings.expandedDotFile != null) settings.expandedDotFile else settings.dotFile + if (scatterGatherDotFile != null) + renderToDot(scatterGatherDotFile) + validate() + } } } @@ -1003,7 +1005,10 @@ class QGraph extends Logging { .asInstanceOf[Set[JobRunner[QFunction]]] if (managerRunners.size > 0) try { - manager.updateStatus(managerRunners) + val updatedRunners = manager.updateStatus(managerRunners) + for (runner <- managerRunners.diff(updatedRunners)) { + runner.checkUnknownStatus() + } } catch { case e => /* ignore */ } diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/gridengine/GridEngineJobRunner.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/gridengine/GridEngineJobRunner.scala index 82edf6221c..8c639b5bb0 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/gridengine/GridEngineJobRunner.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/gridengine/GridEngineJobRunner.scala @@ -40,12 +40,7 @@ class GridEngineJobRunner(val function: CommandLineFunction) extends CommandLine /** Job Id of the currently executing job. */ private var jobId: String = _ - - /** Last known status */ - private var lastStatus: RunnerStatus.Value = _ - - /** The last time the status was updated */ - protected var lastStatusUpdate: Long = _ + override def jobIdString = jobId def start() { GridEngineJobRunner.gridEngineSession.synchronized { @@ -82,11 +77,14 @@ class GridEngineJobRunner(val function: CommandLineFunction) extends CommandLine nativeSpecString += " -q " + function.jobQueue } - // If the memory limit is set (GB) specify the memory limit - if (function.memoryLimit.isDefined) { - val memAvl: String = function.memoryLimit.get + "G" - val memMax: String = (function.memoryLimit.get * 1.2 * 1024).ceil.toInt + "M" - nativeSpecString += " -l mem_free=" + memAvl + ",h_rss=" + memMax + // If the resident set size is requested pass on the memory request + if (residentRequestMB.isDefined) { + nativeSpecString += " -l mem_free=%dM".format(residentRequestMB.get.ceil.toInt) + } + + // If the resident set size limit is defined specify the memory limit + if (residentLimitMB.isDefined) { + nativeSpecString += " -l h_rss=%dM".format(residentLimitMB.get.ceil.toInt) } // If the priority is set (user specified Int) specify the priority @@ -121,21 +119,11 @@ class GridEngineJobRunner(val function: CommandLineFunction) extends CommandLine logger.info("Submitted Grid Engine job id: " + jobId) } } - - def status = this.lastStatus - - private def updateStatus(updatedStatus: RunnerStatus.Value) { - this.lastStatus = updatedStatus - this.lastStatusUpdate = System.currentTimeMillis - } } object GridEngineJobRunner extends Logging { private val gridEngineSession = SessionFactory.getFactory.getSession - /** Amount of time a job can go without status before giving up. */ - private val unknownStatusMaxSeconds = 5 * 60 - initGridEngine() /** @@ -156,16 +144,14 @@ object GridEngineJobRunner extends Logging { /** * Updates the status of a list of jobs. * @param runners Runners to update. + * @return runners which were updated. */ - def updateStatus(runners: Set[GridEngineJobRunner]) { + def updateStatus(runners: Set[GridEngineJobRunner]) = { var updatedRunners = Set.empty[GridEngineJobRunner] gridEngineSession.synchronized { runners.foreach(runner => if (updateRunnerStatus(runner)) {updatedRunners += runner}) } - - for (runner <- runners.diff(updatedRunners)) { - checkUnknownStatus(runner) - } + updatedRunners } /** @@ -219,20 +205,11 @@ object GridEngineJobRunner extends Logging { logger.warn("Unable to determine status of Grid Engine job id " + runner.jobId, de) } - Option(returnStatus) match { - case Some(returnStatus) => - runner.updateStatus(returnStatus) - return true - case None => return false - } - } - - private def checkUnknownStatus(runner: GridEngineJobRunner) { - val unknownStatusSeconds = (System.currentTimeMillis - runner.lastStatusUpdate) - if (unknownStatusSeconds > (unknownStatusMaxSeconds * 1000L)) { - // Unknown status has been returned for a while now. - runner.updateStatus(RunnerStatus.FAILED) - logger.error("Unable to read Grid Engine status for %d minutes: job id %d: %s".format(unknownStatusSeconds/60, runner.jobId, runner.function.description)) + if (returnStatus != null) { + runner.updateStatus(returnStatus) + true + } else { + false } } diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobManager.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobManager.scala index c0fff91255..23ddab619a 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobManager.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobManager.scala @@ -34,6 +34,6 @@ class Lsf706JobManager extends CommandLineJobManager[Lsf706JobRunner] { def runnerType = classOf[Lsf706JobRunner] def create(function: CommandLineFunction) = new Lsf706JobRunner(function) - override def updateStatus(runners: Set[Lsf706JobRunner]) { Lsf706JobRunner.updateStatus(runners) } + override def updateStatus(runners: Set[Lsf706JobRunner]) = { Lsf706JobRunner.updateStatus(runners) } override def tryStop(runners: Set[Lsf706JobRunner]) { Lsf706JobRunner.tryStop(runners) } } diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobRunner.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobRunner.scala index 57d133dfe5..46dd083325 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobRunner.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobRunner.scala @@ -32,8 +32,8 @@ import org.broadinstitute.sting.utils.Utils import org.broadinstitute.sting.jna.clibrary.LibC import org.broadinstitute.sting.jna.lsf.v7_0_6.LibBat.{submitReply, submit} import com.sun.jna.ptr.IntByReference -import com.sun.jna.{StringArray, NativeLong} import org.broadinstitute.sting.queue.engine.{RunnerStatus, CommandLineJobRunner} +import com.sun.jna.{Structure, StringArray, NativeLong} /** * Runs jobs on an LSF compute cluster. @@ -45,12 +45,7 @@ class Lsf706JobRunner(val function: CommandLineFunction) extends CommandLineJobR /** Job Id of the currently executing job. */ private var jobId = -1L - - /** Last known status */ - private var lastStatus: RunnerStatus.Value = _ - - /** The last time the status was updated */ - protected var lastStatusUpdate: Long = _ + override def jobIdString = jobId.toString /** * Dispatches the function on the LSF cluster. @@ -85,12 +80,19 @@ class Lsf706JobRunner(val function: CommandLineFunction) extends CommandLineJobR request.options |= LibBat.SUB_QUEUE } - // If the memory limit is set (GB) specify the memory limit - if (function.memoryLimit.isDefined) { - request.resReq = "rusage[mem=" + function.memoryLimit.get + "]" + // If the resident set size is requested pass on the memory request + if (residentRequestMB.isDefined) { + val memInUnits = Lsf706JobRunner.convertUnits(residentRequestMB.get) + request.resReq = "select[mem>%1$d] rusage[mem=%1$d]".format(memInUnits) request.options |= LibBat.SUB_RES_REQ } + // If the resident set size limit is defined specify the memory limit + if (residentLimitMB.isDefined) { + val memInUnits = Lsf706JobRunner.convertUnits(residentLimitMB.get) + request.rLimits(LibLsf.LSF_RLIMIT_RSS) = memInUnits + } + // If the priority is set (user specified Int) specify the priority if (function.jobPriority.isDefined) { request.userPriority = function.jobPriority.get @@ -122,11 +124,13 @@ class Lsf706JobRunner(val function: CommandLineFunction) extends CommandLineJobR } } - def status = this.lastStatus - - private def updateStatus(updatedStatus: RunnerStatus.Value) { - this.lastStatus = updatedStatus - this.lastStatusUpdate = System.currentTimeMillis + override def checkUnknownStatus() { + // TODO: Need a second pass through either of the two archive logs using lsb_geteventrecbyline() for disappeared jobs. + // Can also tell if we wake up and the last time we saw status was greater than lsb_parameterinfo().cleanPeriod + // LSB_SHAREDIR/cluster_name/logdir/lsb.acct (man bacct) + // LSB_SHAREDIR/cluster_name/logdir/lsb.events (man bhist) + logger.debug("Job Id %s status / exitStatus / exitInfo: ??? / ??? / ???".format(jobId)) + super.checkUnknownStatus() } } @@ -137,17 +141,8 @@ object Lsf706JobRunner extends Logging { /** Number of seconds for a non-normal exit status before we give up on expecting LSF to retry the function. */ private val retryExpiredSeconds = 5 * 60 - /** Amount of time a job can go without status before giving up. */ - private val unknownStatusMaxSeconds = 5 * 60 - initLsf() - /** The name of the default queue. */ - private var defaultQueue: String = _ - - /** The run limits for each queue. */ - private var queueRlimitRun = Map.empty[String,Int] - /** * Initialize the Lsf library. */ @@ -161,8 +156,9 @@ object Lsf706JobRunner extends Logging { /** * Bulk updates job statuses. * @param runners Runners to update. + * @return runners which were updated. */ - def updateStatus(runners: Set[Lsf706JobRunner]) { + def updateStatus(runners: Set[Lsf706JobRunner]) = { var updatedRunners = Set.empty[Lsf706JobRunner] Lsf706JobRunner.lsfLibLock.synchronized { @@ -192,8 +188,44 @@ object Lsf706JobRunner extends Logging { } } - for (runner <- runners.diff(updatedRunners)) { - checkUnknownStatus(runner) + updatedRunners + } + + private def updateRunnerStatus(runner: Lsf706JobRunner, jobInfo: LibBat.jobInfoEnt) { + val jobStatus = jobInfo.status + val exitStatus = jobInfo.exitStatus + val exitInfo = jobInfo.exitInfo + val endTime = jobInfo.endTime + + logger.debug("Job Id %s status / exitStatus / exitInfo: 0x%02x / 0x%02x / 0x%02x".format(runner.jobId, jobStatus, exitStatus, exitInfo)) + + runner.updateStatus( + if (Utils.isFlagSet(jobStatus, LibBat.JOB_STAT_DONE)) { + // Done successfully. + RunnerStatus.DONE + } else if (Utils.isFlagSet(jobStatus, LibBat.JOB_STAT_EXIT) && !willRetry(exitInfo, endTime)) { + // Exited function that (probably) won't be retried. + RunnerStatus.FAILED + } else { + // Note that we still saw the job in the system. + RunnerStatus.RUNNING + } + ) + } + + /** + * Returns true if LSF is expected to retry running the function. + * @param exitInfo The reason the job exited. + * @param endTime THe time the job exited. + * @return true if LSF is expected to retry running the function. + */ + private def willRetry(exitInfo: Int, endTime: NativeLong) = { + exitInfo match { + case LibBat.EXIT_NORMAL => false + case _ => { + val seconds = LibC.difftime(LibC.time(null), endTime) + (seconds <= retryExpiredSeconds) + } } } @@ -217,6 +249,19 @@ object Lsf706JobRunner extends Logging { } } + /** The name of the default queue. */ + private lazy val defaultQueue: String = { + lsfLibLock.synchronized { + val numQueues = new IntByReference(1) + val queueInfo = LibBat.lsb_queueinfo(null, numQueues, null, null, 0) + if (queueInfo == null) + throw new QException(LibBat.lsb_sperror("Unable to get LSF queue info for the default queue")) + queueInfo.queue + } + } + + /** The run limits for each queue. */ + private var queueRlimitRun = Map.empty[String,Int] /** * Returns the run limit in seconds for the queue. @@ -224,89 +269,45 @@ object Lsf706JobRunner extends Logging { * @param queue Name of the queue or null for the default queue. * @return the run limit in seconds for the queue. */ - private def getRlimitRun(queue: String) = { + private def getRlimitRun(queueName: String) = { lsfLibLock.synchronized { - if (queue == null) { - if (defaultQueue != null) { - queueRlimitRun(defaultQueue) - } else { - // Get the info on the default queue. + val queue = if (queueName == null) defaultQueue else queueName + queueRlimitRun.get(queue) match { + case Some(limit) => limit + case None => + // Cache miss. Go get the run limits from LSF. + val queues = new StringArray(Array(queue)) val numQueues = new IntByReference(1) - val queueInfo = LibBat.lsb_queueinfo(null, numQueues, null, null, 0) + val queueInfo = LibBat.lsb_queueinfo(queues, numQueues, null, null, 0) if (queueInfo == null) - throw new QException(LibBat.lsb_sperror("Unable to get LSF queue info for the default queue")) - defaultQueue = queueInfo.queue + throw new QException(LibBat.lsb_sperror("Unable to get LSF queue info for queue: " + queue)) val limit = queueInfo.rLimits(LibLsf.LSF_RLIMIT_RUN) - queueRlimitRun += defaultQueue -> limit + queueRlimitRun += queue -> limit limit - } - } else { - queueRlimitRun.get(queue) match { - case Some(limit) => limit - case None => - // Cache miss. Go get the run limits from LSF. - val queues = new StringArray(Array[String](queue)) - val numQueues = new IntByReference(1) - val queueInfo = LibBat.lsb_queueinfo(queues, numQueues, null, null, 0) - if (queueInfo == null) - throw new QException(LibBat.lsb_sperror("Unable to get LSF queue info for queue: " + queue)) - val limit = queueInfo.rLimits(LibLsf.LSF_RLIMIT_RUN) - queueRlimitRun += queue -> limit - limit - } } } } - private def updateRunnerStatus(runner: Lsf706JobRunner, jobInfo: LibBat.jobInfoEnt) { - val jobStatus = jobInfo.status - val exitStatus = jobInfo.exitStatus - val exitInfo = jobInfo.exitInfo - val endTime = jobInfo.endTime - - logger.debug("Job Id %s status / exitStatus / exitInfo: 0x%02x / 0x%02x / 0x%02x".format(runner.jobId, jobStatus, exitStatus, exitInfo)) - - runner.updateStatus( - if (Utils.isFlagSet(jobStatus, LibBat.JOB_STAT_DONE)) { - // Done successfully. - RunnerStatus.DONE - } else if (Utils.isFlagSet(jobStatus, LibBat.JOB_STAT_EXIT) && !willRetry(exitInfo, endTime)) { - // Exited function that (probably) won't be retried. - RunnerStatus.FAILED - } else { - // Note that we still saw the job in the system. - RunnerStatus.RUNNING + private lazy val unitDivisor: Double = { + lsfLibLock.synchronized { + val unitsParam: Array[LibLsf.config_param] = new LibLsf.config_param().toArray(2).asInstanceOf[Array[LibLsf.config_param]] + unitsParam(0).paramName = "LSF_UNIT_FOR_LIMITS" + + Structure.autoWrite(unitsParam.asInstanceOf[Array[Structure]]) + if (LibLsf.ls_readconfenv(unitsParam(0), null) != 0) + throw new QException(LibBat.lsb_sperror("ls_readconfenv() failed")) + Structure.autoRead(unitsParam.asInstanceOf[Array[Structure]]) + + unitsParam(0).paramValue match { + case "MB" => 1D + case "GB" => 1024D + case "TB" => 1024D * 1024 + case "PB" => 1024D * 1024 * 1024 + case "EB" => 1024D * 1024 * 1024 * 1024 + case null => 1D } - ) - } - - private def checkUnknownStatus(runner: Lsf706JobRunner) { - // TODO: Need a second pass through either of the two archive logs using lsb_geteventrecbyline() for disappeared jobs. - // Can also tell if we wake up and the last time we saw status was greater than lsb_parameterinfo().cleanPeriod - // LSB_SHAREDIR/cluster_name/logdir/lsb.acct (man bacct) - // LSB_SHAREDIR/cluster_name/logdir/lsb.events (man bhist) - logger.debug("Job Id %s status / exitStatus / exitInfo: ??? / ??? / ???".format(runner.jobId)) - val unknownStatusSeconds = (System.currentTimeMillis - runner.lastStatusUpdate) - if (unknownStatusSeconds > (unknownStatusMaxSeconds * 1000L)) { - // Unknown status has been returned for a while now. - runner.updateStatus(RunnerStatus.FAILED) - logger.error("Unable to read LSF status for %d minutes: job id %d: %s".format(unknownStatusSeconds/60, runner.jobId, runner.function.description)) } } - /** - * Returns true if LSF is expected to retry running the function. - * @param exitInfo The reason the job exited. - * @param endTime THe time the job exited. - * @return true if LSF is expected to retry running the function. - */ - private def willRetry(exitInfo: Int, endTime: NativeLong) = { - exitInfo match { - case LibBat.EXIT_NORMAL => false - case _ => { - val seconds = LibC.difftime(LibC.time(null), endTime) - (seconds <= retryExpiredSeconds) - } - } - } + private def convertUnits(mb: Double) = (mb / unitDivisor).ceil.toInt } diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/shell/ShellJobRunner.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/shell/ShellJobRunner.scala index 603511a309..128d8773c6 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/shell/ShellJobRunner.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/shell/ShellJobRunner.scala @@ -50,10 +50,10 @@ class ShellJobRunner(val function: CommandLineFunction) extends CommandLineJobRu // Allow advanced users to update the job. updateJobRun(job) - runStatus = RunnerStatus.RUNNING + updateStatus(RunnerStatus.RUNNING) job.run() - runStatus = RunnerStatus.DONE + updateStatus(RunnerStatus.FAILED) } - def status = runStatus + override def checkUnknownStatus() {} } diff --git a/public/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala index 2b1abb2d03..c62fdcd7ce 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala @@ -9,7 +9,7 @@ trait CommandLineFunction extends QFunction with Logging { def commandLine: String /** Upper memory limit */ - var memoryLimit: Option[Int] = None + var memoryLimit: Option[Double] = None /** Job project to run the command */ var jobProject: String = _ @@ -56,7 +56,7 @@ trait CommandLineFunction extends QFunction with Logging { if (memoryLimit.isEmpty) memoryLimit = qSettings.memoryLimit - super.freezeFieldValues + super.freezeFieldValues() } /** diff --git a/public/scala/src/org/broadinstitute/sting/queue/function/JavaCommandLineFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/function/JavaCommandLineFunction.scala index 72445442ec..e8279f62bc 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/function/JavaCommandLineFunction.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/function/JavaCommandLineFunction.scala @@ -47,7 +47,7 @@ trait JavaCommandLineFunction extends CommandLineFunction { /** * Memory limit for the java executable, or if None will use the default memoryLimit. */ - var javaMemoryLimit: Option[Int] = None + var javaMemoryLimit: Option[Double] = None /** * Returns the java executable to run. @@ -61,8 +61,8 @@ trait JavaCommandLineFunction extends CommandLineFunction { null } - override def freezeFieldValues = { - super.freezeFieldValues + override def freezeFieldValues() { + super.freezeFieldValues() if (javaMemoryLimit.isEmpty && memoryLimit.isDefined) javaMemoryLimit = memoryLimit @@ -72,7 +72,7 @@ trait JavaCommandLineFunction extends CommandLineFunction { } def javaOpts = "%s -Djava.io.tmpdir=%s" - .format(optional(" -Xmx", javaMemoryLimit, "g"), jobTempDir) + .format(optional(" -Xmx", javaMemoryLimit.map(gb => (gb * 1024).ceil.toInt), "m"), jobTempDir) def commandLine = "java%s %s" .format(javaOpts, javaExecutable) diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala index dc3cfd9d4c..c2c9561182 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala @@ -31,6 +31,7 @@ import org.broadinstitute.sting.commandline.CommandLineProgram import java.util.Date import java.text.SimpleDateFormat import org.broadinstitute.sting.BaseTest +import org.broadinstitute.sting.MD5DB import org.broadinstitute.sting.queue.QCommandLine import org.broadinstitute.sting.queue.util.{Logging, ProcessController} import java.io.{FileNotFoundException, File} @@ -105,7 +106,7 @@ object PipelineTest extends BaseTest with Logging { private def assertMatchingMD5s(name: String, fileMD5s: Traversable[(File, String)], parameterize: Boolean) { var failed = 0 for ((file, expectedMD5) <- fileMD5s) { - val calculatedMD5 = BaseTest.testFileMD5(name, file, expectedMD5, parameterize) + val calculatedMD5 = MD5DB.testFileMD5(name, file, expectedMD5, parameterize) if (!parameterize && expectedMD5 != "" && expectedMD5 != calculatedMD5) failed += 1 } diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/HelloWorldPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/HelloWorldPipelineTest.scala index 0871e769bd..7c76823da5 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/HelloWorldPipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/HelloWorldPipelineTest.scala @@ -29,7 +29,7 @@ import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec} class HelloWorldPipelineTest { @Test - def testHelloWorld { + def testHelloWorld() { val spec = new PipelineTestSpec spec.name = "HelloWorld" spec.args = "-S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/HelloWorld.scala" @@ -37,15 +37,23 @@ class HelloWorldPipelineTest { } @Test - def testHelloWorldWithPrefix { + def testHelloWorldWithPrefix() { val spec = new PipelineTestSpec spec.name = "HelloWorldWithPrefix" spec.args = "-S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/HelloWorld.scala -jobPrefix HelloWorld" PipelineTest.executeTest(spec) } + @Test + def testHelloWorldWithMemoryLimit() { + val spec = new PipelineTestSpec + spec.name = "HelloWorldWithPrefix" + spec.args = "-S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/HelloWorld.scala -memLimit 1.25" + PipelineTest.executeTest(spec) + } + @Test(enabled=false) - def testHelloWorldWithPriority { + def testHelloWorldWithPriority() { val spec = new PipelineTestSpec spec.name = "HelloWorldWithPriority" spec.args = "-S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/HelloWorld.scala -jobPriority 100" diff --git a/public/testdata/diffTestMaster.vcf b/public/testdata/diffTestMaster.vcf new file mode 100644 index 0000000000..549f543457 --- /dev/null +++ b/public/testdata/diffTestMaster.vcf @@ -0,0 +1,11 @@ +##fileformat=VCFv4.0 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 +chr1 2646 rs62635284 G A 0.15 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:53,75:3:-12.40,-0.90,-0.00:9.03 +chr1 2979 rs62635286 T G 83.67 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:31,32:9:-33.61,-2.71,-0.00:27.09 +chr1 2981 rs62028691 A G 14.69 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:31,33:9:-32.12,-2.71,-0.00:27.08 +chr1 4536 rs11582131 G C 0.18 PASS AC=1;AF=0.50;AN=2 GT:AD:DP:GL:GQ 0/1:42,33:16:-41.67,-4.82,-26.29:99 +chr1 4562 rs11490464 C G 0.14 PASS AC=1;AF=0.50;AN=2 GT:AD:DP:GL:GQ 0/1:26,30:9:-19.64,-2.72,-14.87:99 +chr1 4770 rs6682375 A G 0.32 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:9,111:84:-306.27,-28.58,-3.46:99 +chr1 4793 rs6682385 A G 0.15 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:4,115:109:-350.74,-32.88,-0.10:99 +chr1 5074 rs11586607 T G 0.01 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:29,97:39:-130.41,-11.75,-3.82:79.31 +chr1 5137 rs62636497 A T 140.49 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:0,74:39:-148.99,-11.75,-0.01:99 diff --git a/public/testdata/diffTestTest.vcf b/public/testdata/diffTestTest.vcf new file mode 100644 index 0000000000..8699ab253b --- /dev/null +++ b/public/testdata/diffTestTest.vcf @@ -0,0 +1,11 @@ +##fileformat=VCFv4.0 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 +chr1 2646 rs62635284 G A 0.15 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:53,75:3:-12.40,-0.90,-0.00:9.03 +chr1 2979 rs62635286 T G 83.67 CHANGED_FILTER AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:31,32:9:-33.61,-2.71,-0.00:27.09 +chr1 2981 rs62028691 A G 14.69 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:31,33:9:-32.12,-2.71,-0.00:27.08 +chr1 4536 rs11582131 G C 0.18 PASS AC=2;AF=0.50;AN=2 GT:AD:DP:GL:GQ 0/1:42,33:16:-41.67,-4.82,-26.29:99 +chr1 4562 rs11490464 C G 0.14 PASS AC=1;AF=0.50;AN=2 GT:AD:DP:GL:GQ 1/1:26,30:9:-19.64,-2.72,-14.87:99 +chr1 4770 rs6682375 A G 0.32 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 0/1:9,111:84:-306.27,-28.58,-3.46:99 +chr1 4793 rs6682385 A G 0.15 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:4,114:109:-350.74,-32.88,-0.10:99 +chr1 5074 rs11586607 T G 0.01 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:29,97:39:-130.41,-11.74,-3.82:79.31 +chr1 5137 rs62636497 A T 140.49 PASS AC=2;AF=1.00;AN=2 GT:AD:DP:GL:GQ 1/1:0,74:39:-148.99,-11.75,-0.01:9 diff --git a/public/testdata/exampleBAM.simple.bai b/public/testdata/exampleBAM.simple.bai new file mode 100644 index 0000000000..2d8268b1d9 Binary files /dev/null and b/public/testdata/exampleBAM.simple.bai differ diff --git a/public/testdata/exampleBAM.simple.bam b/public/testdata/exampleBAM.simple.bam new file mode 100644 index 0000000000..c3eb7ae7bf Binary files /dev/null and b/public/testdata/exampleBAM.simple.bam differ diff --git a/settings/ivysettings.xml b/settings/ivysettings.xml index b77414df93..2b4a081d4b 100644 --- a/settings/ivysettings.xml +++ b/settings/ivysettings.xml @@ -25,6 +25,7 @@ + diff --git a/settings/repository/edu.mit.broad/picard-private-parts-1941.jar b/settings/repository/edu.mit.broad/picard-private-parts-1941.jar deleted file mode 100644 index 760db5cb8e..0000000000 Binary files a/settings/repository/edu.mit.broad/picard-private-parts-1941.jar and /dev/null differ diff --git a/settings/repository/edu.mit.broad/picard-private-parts-1959.jar b/settings/repository/edu.mit.broad/picard-private-parts-1959.jar new file mode 100644 index 0000000000..ae11e636b1 Binary files /dev/null and b/settings/repository/edu.mit.broad/picard-private-parts-1959.jar differ diff --git a/settings/repository/edu.mit.broad/picard-private-parts-1941.xml b/settings/repository/edu.mit.broad/picard-private-parts-1959.xml similarity index 58% rename from settings/repository/edu.mit.broad/picard-private-parts-1941.xml rename to settings/repository/edu.mit.broad/picard-private-parts-1959.xml index 07d51ae536..e7c7e3a21c 100644 --- a/settings/repository/edu.mit.broad/picard-private-parts-1941.xml +++ b/settings/repository/edu.mit.broad/picard-private-parts-1959.xml @@ -1,3 +1,3 @@ - + diff --git a/settings/repository/net.sf.gridscheduler/drmaa-6.2u5p2-sources.jar b/settings/repository/net.sf.gridscheduler/drmaa-6.2u5p2-sources.jar new file mode 100644 index 0000000000..dc77c7d333 Binary files /dev/null and b/settings/repository/net.sf.gridscheduler/drmaa-6.2u5p2-sources.jar differ diff --git a/settings/repository/net.sf.gridscheduler/drmaa-6.2u5p2.jar b/settings/repository/net.sf.gridscheduler/drmaa-6.2u5p2.jar new file mode 100644 index 0000000000..f267be4b58 Binary files /dev/null and b/settings/repository/net.sf.gridscheduler/drmaa-6.2u5p2.jar differ diff --git a/settings/repository/net.sf.gridscheduler/drmaa-6.2u5p2.xml b/settings/repository/net.sf.gridscheduler/drmaa-6.2u5p2.xml new file mode 100644 index 0000000000..c6a8da0528 --- /dev/null +++ b/settings/repository/net.sf.gridscheduler/drmaa-6.2u5p2.xml @@ -0,0 +1,3 @@ + + + diff --git a/settings/repository/net.sf/picard-1.47.869.xml b/settings/repository/net.sf/picard-1.47.869.xml deleted file mode 100644 index 86d07d4fa1..0000000000 --- a/settings/repository/net.sf/picard-1.47.869.xml +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/settings/repository/net.sf/picard-1.47.869.jar b/settings/repository/net.sf/picard-1.49.895.jar similarity index 87% rename from settings/repository/net.sf/picard-1.47.869.jar rename to settings/repository/net.sf/picard-1.49.895.jar index d277fd2177..3ee1f20905 100644 Binary files a/settings/repository/net.sf/picard-1.47.869.jar and b/settings/repository/net.sf/picard-1.49.895.jar differ diff --git a/settings/repository/net.sf/picard-1.49.895.xml b/settings/repository/net.sf/picard-1.49.895.xml new file mode 100644 index 0000000000..52d4900c5b --- /dev/null +++ b/settings/repository/net.sf/picard-1.49.895.xml @@ -0,0 +1,3 @@ + + + diff --git a/settings/repository/net.sf/sam-1.47.869.xml b/settings/repository/net.sf/sam-1.47.869.xml deleted file mode 100644 index 1b76fe2f9d..0000000000 --- a/settings/repository/net.sf/sam-1.47.869.xml +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/settings/repository/net.sf/sam-1.47.869.jar b/settings/repository/net.sf/sam-1.49.895.jar similarity index 91% rename from settings/repository/net.sf/sam-1.47.869.jar rename to settings/repository/net.sf/sam-1.49.895.jar index 933b9cfd6f..c55ab0b727 100644 Binary files a/settings/repository/net.sf/sam-1.47.869.jar and b/settings/repository/net.sf/sam-1.49.895.jar differ diff --git a/settings/repository/net.sf/sam-1.49.895.xml b/settings/repository/net.sf/sam-1.49.895.xml new file mode 100644 index 0000000000..0436ce8812 --- /dev/null +++ b/settings/repository/net.sf/sam-1.49.895.xml @@ -0,0 +1,3 @@ + + + diff --git a/settings/repository/org.broad/tribble-3.jar b/settings/repository/org.broad/tribble-16.jar similarity index 67% rename from settings/repository/org.broad/tribble-3.jar rename to settings/repository/org.broad/tribble-16.jar index f0ab44a05f..331f28ec34 100644 Binary files a/settings/repository/org.broad/tribble-3.jar and b/settings/repository/org.broad/tribble-16.jar differ diff --git a/settings/repository/org.broad/tribble-3.xml b/settings/repository/org.broad/tribble-16.xml similarity index 57% rename from settings/repository/org.broad/tribble-3.xml rename to settings/repository/org.broad/tribble-16.xml index c353583313..e23eec339a 100644 --- a/settings/repository/org.broad/tribble-3.xml +++ b/settings/repository/org.broad/tribble-16.xml @@ -1,4 +1,4 @@ -