diff --git a/README.md b/README.md index 1378f46..dde4989 100644 --- a/README.md +++ b/README.md @@ -270,7 +270,7 @@ Please note that the trimming for `--max_len` limitation will be applied at the 4, quality pruning at 5' (--cut_front) 5, quality pruning by sliding window (--cut_right) 6, quality pruning at 3' (--cut_tail) -7, trim polyG (--trim_poly_g, enabled by default for NovaSeq/NextSeq data) +7, trim polyG (--trim_poly_g, enabled by default for NovaSeq/NextSeq/iSeq data) 8, trim adapter by overlap analysis (enabled by default for PE data) 9, trim adapter by adapter sequence (--adapter_sequence, --adapter_sequence_r2. For PE data, this step is skipped if last step succeeded) 10, trim polyX (--trim_poly_x) @@ -278,7 +278,7 @@ Please note that the trimming for `--max_len` limitation will be applied at the ``` # polyG tail trimming -For Illumina NextSeq/NovaSeq data, `polyG` can happen in read tails since `G` means no signal in the Illumina two-color systems. `fastp` can detect the polyG in read tails and trim them. This feature is enabled for NextSeq/NovaSeq data by default, and you can specify `-g` or `--trim_poly_g` to enable it for any data, or specify `-G` or `--disable_trim_poly_g` to disable it. NextSeq/NovaSeq data is detected by the machine ID in the FASTQ records.   +For Illumina NextSeq/NovaSeq data, `polyG` can happen in read tails since `G` means no signal in the Illumina two-color systems. `fastp` can detect the polyG in read tails and trim them. This feature is enabled for NextSeq/NovaSeq data by default, and you can specify `-g` or `--trim_poly_g` to enable it for any data, or specify `-G` or `--disable_trim_poly_g` to disable it. NextSeq/NovaSeq/iSeq data is detected by the machine ID in the FASTQ records.   A minimum length can be set with `` for `fastp` to detect polyG. This value is 10 by default. diff --git a/src/evaluator.cpp b/src/evaluator.cpp index 48bca94..063931d 100644 --- a/src/evaluator.cpp +++ b/src/evaluator.cpp @@ -13,7 +13,7 @@ Evaluator::Evaluator(Options* opt){ Evaluator::~Evaluator(){ } -bool Evaluator::isTwoColorSystem() { +bool Evaluator::isOneOrTwoColorSystem() { FastqReader reader(mOptions->in1); Read* r = reader.read(); @@ -21,8 +21,9 @@ bool Evaluator::isTwoColorSystem() { if(!r) return false; - // NEXTSEQ500, NEXTSEQ 550/550DX, NOVASEQ - if(starts_with(r->mName, "@NS") || starts_with(r->mName, "@NB") || starts_with(r->mName, "@NDX") || starts_with(r->mName, "@A0")) { + // NEXTSEQ500, NEXTSEQ 550/550DX, NOVASEQ [2-color-system] + // iSeq 100 [1-color-system] + if(starts_with(r->mName, "@NS") || starts_with(r->mName, "@NB") || starts_with(r->mName, "@NDX") || starts_with(r->mName, "@A0") || starts_with(r->mName, "@FS")) { delete r; return true; } diff --git a/src/evaluator.h b/src/evaluator.h index 540489d..f80969a 100644 --- a/src/evaluator.h +++ b/src/evaluator.h @@ -18,7 +18,7 @@ class Evaluator{ void evaluateReadNum(long& readNum); string evalAdapterAndReadNumDepreciated(long& readNum); string evalAdapterAndReadNum(long& readNum, bool isR2); - bool isTwoColorSystem(); + bool isOneOrTwoColorSystem(); void evaluateSeqLen(); void evaluateOverRepSeqs(); void computeOverRepSeq(string filename, map& hotseqs, int seqLen); diff --git a/src/main.cpp b/src/main.cpp index e852bb8..4f67875 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -73,9 +73,9 @@ int main(int argc, char* argv[]){ cmd.add("dont_eval_duplication", 0, "don't evaluate duplication rate to save time and use less memory."); // polyG tail trimming - cmd.add("trim_poly_g", 'g', "force polyG tail trimming, by default trimming is automatically enabled for Illumina NextSeq/NovaSeq data"); + cmd.add("trim_poly_g", 'g', "force polyG tail trimming, by default trimming is automatically enabled for Illumina NextSeq/NovaSeq/iSeq data"); cmd.add("poly_g_min_len", 0, "the minimum length to detect polyG in the read tail. 10 by default.", false, 10); - cmd.add("disable_trim_poly_g", 'G', "disable polyG tail trimming, by default trimming is automatically enabled for Illumina NextSeq/NovaSeq data"); + cmd.add("disable_trim_poly_g", 'G', "disable polyG tail trimming, by default trimming is automatically enabled for Illumina NextSeq/NovaSeq/iSeq data"); // polyX tail trimming cmd.add("trim_poly_x", 'x', "enable polyX trimming in 3' ends."); @@ -498,8 +498,8 @@ int main(int argc, char* argv[]){ // using evaluator to check if it's two color system if(!cmd.exist("trim_poly_g") && !cmd.exist("disable_trim_poly_g") && supportEvaluation) { - bool twoColorSystem = eva.isTwoColorSystem(); - if(twoColorSystem){ + bool oneOrTwoColorSystem = eva.isOneOrTwoColorSystem(); + if(oneOrTwoColorSystem){ opt.polyGTrim.enabled = true; } }