18
18
import java .util .Arrays ;
19
19
import java .util .List ;
20
20
21
+ import static htsjdk .samtools .util .SequenceUtil .getSamReadNameFromFastqHeader ;
22
+
21
23
/**
22
24
* Converts a FASTQ file to an unaligned BAM or SAM file.
23
25
* <p>
@@ -205,36 +207,6 @@ public static void main(final String[] argv) {
205
207
206
208
private static final SolexaQualityConverter solexaQualityConverter = SolexaQualityConverter .getSingleton ();
207
209
208
- /**
209
- * Looks at fastq input(s) and attempts to determine the proper quality format
210
- *
211
- * Closes the reader(s) by side effect
212
- *
213
- * @param reader1 The first fastq input
214
- * @param reader2 The second fastq input, if necessary. To not use this input, set it to null
215
- * @param expectedQuality If provided, will be used for sanity checking. If left null, autodetection will occur
216
- */
217
- public static FastqQualityFormat determineQualityFormat (final FastqReader reader1 , final FastqReader reader2 , final FastqQualityFormat expectedQuality ) {
218
- final QualityEncodingDetector detector = new QualityEncodingDetector ();
219
-
220
- if (reader2 == null ) {
221
- detector .add (QualityEncodingDetector .DEFAULT_MAX_RECORDS_TO_ITERATE , reader1 );
222
- } else {
223
- detector .add (QualityEncodingDetector .DEFAULT_MAX_RECORDS_TO_ITERATE , reader1 , reader2 );
224
- reader2 .close ();
225
- }
226
-
227
- reader1 .close ();
228
-
229
- final FastqQualityFormat qualityFormat = detector .generateBestGuess (QualityEncodingDetector .FileContext .FASTQ , expectedQuality );
230
- if (detector .isDeterminationAmbiguous ()) {
231
- LOG .warn ("Making ambiguous determination about fastq's quality encoding; more than one format possible based on observed qualities." );
232
- }
233
- LOG .info (String .format ("Auto-detected quality format as: %s." , qualityFormat ));
234
-
235
- return qualityFormat ;
236
- }
237
-
238
210
239
211
/**
240
212
* Get a list of FASTQs that are sequentially numbered based on the first (base) fastq.
@@ -362,7 +334,7 @@ protected int doUnpaired(final FastqReader freader, final SAMFileWriter writer)
362
334
final ProgressLogger progress = new ProgressLogger (LOG );
363
335
for ( ; freader .hasNext () ; readCount ++) {
364
336
final FastqRecord frec = freader .next ();
365
- final String frecName = SequenceUtil . getSamReadNameFromFastqHeader (frec .getReadName ());
337
+ final String frecName = getSamReadNameFromFastqHeader (frec .getReadName ());
366
338
final SAMRecord srec = createSamRecord (writer .getFileHeader (), frecName , frec , false ) ;
367
339
srec .setReadPairedFlag (false );
368
340
writer .addAlignment (srec );
@@ -380,8 +352,8 @@ protected int doPaired(final FastqReader freader1, final FastqReader freader2, f
380
352
final FastqRecord frec1 = freader1 .next ();
381
353
final FastqRecord frec2 = freader2 .next ();
382
354
383
- final String frec1Name = SequenceUtil . getSamReadNameFromFastqHeader (frec1 .getReadName ());
384
- final String frec2Name = SequenceUtil . getSamReadNameFromFastqHeader (frec2 .getReadName ());
355
+ final String frec1Name = getSamReadNameFromFastqHeader (frec1 .getReadName ());
356
+ final String frec2Name = getSamReadNameFromFastqHeader (frec2 .getReadName ());
385
357
final String baseName = getBaseName (frec1Name , frec2Name , freader1 , freader2 );
386
358
387
359
final SAMRecord srec1 = createSamRecord (writer .getFileHeader (), baseName , frec1 , true ) ;
@@ -415,8 +387,9 @@ public static SAMRecord createSamRecord(final SAMFileHeader header, final String
415
387
srec .setReadString (frec .getReadString ());
416
388
srec .setReadUnmappedFlag (true );
417
389
srec .setAttribute (ReservedTagConstants .READ_GROUP_ID , readGroupName );
418
- String additionalHeader = frec .getReadName ().replace (baseName , "" );
419
- if (additionalHeader .length () > 0 ) {
390
+ // Optimize additional header handling
391
+ String additionalHeader = frec .getReadName ().substring (baseName .length ());
392
+ if ( ! additionalHeader .isEmpty ()) {
420
393
/*
421
394
If this contains the trimmed bases flag (TB:) then put that in a separate tag
422
395
*/
@@ -430,13 +403,18 @@ If this contains the trimmed bases flag (TB:) then put that in a separate tag
430
403
}
431
404
}
432
405
}
406
+
407
+ // Convert and validate quality scores
433
408
final byte [] quals = StringUtil .stringToBytes (frec .getBaseQualityString ());
434
409
convertQuality (quals , fqFormat );
435
- for (final byte qual : quals ) {
436
- final int uQual = qual & 0xff ;
437
- if (uQual < minQ || uQual > maxQ ) {
438
- throw new PicardException ("Base quality " + uQual + " is not in the range " + minQ + ".." +
439
- maxQ + " for read " + frec .getReadName ());
410
+
411
+ if (fqFormat != FastqQualityFormat .Standard ) {
412
+ for (final byte qual : quals ) {
413
+ final int uQual = qual & 0xff ;
414
+ if (uQual < minQ || uQual > maxQ ) {
415
+ throw new PicardException ("Base quality " + uQual + " is not in the range " + minQ + ".." +
416
+ maxQ + " for read " + frec .getReadName ());
417
+ }
440
418
}
441
419
}
442
420
srec .setBaseQualities (quals );
@@ -485,7 +463,8 @@ static void convertQuality(final byte[] quals, final FastqQualityFormat version)
485
463
}
486
464
}
487
465
488
- /** Returns read baseName and asserts correct pair read name format:
466
+ /**
467
+ * Returns the read baseName and asserts correct pair read name format:
489
468
* <ul>
490
469
* <li> Paired reads must either have the exact same read names or they must contain at least one "/"
491
470
* <li> and the First pair read name must end with "/1" and second pair read name ends with "/2"
@@ -494,54 +473,64 @@ static void convertQuality(final byte[] quals, final FastqQualityFormat version)
494
473
* </ul>
495
474
*/
496
475
String getBaseName (final String readName1 , final String readName2 , final FastqReader freader1 , final FastqReader freader2 ) {
497
- String [] toks = getReadNameTokens (readName1 , 1 , freader1 );
498
- final String baseName1 = toks [0 ] ;
499
- final String num1 = toks [1 ] ;
500
476
501
- toks = getReadNameTokens (readName2 , 2 , freader2 );
502
- final String baseName2 = toks [0 ] ;
503
- final String num2 = toks [1 ];
504
-
505
- if (!baseName1 .equals (baseName2 )) {
506
- throw new PicardException (String .format ("In paired mode, read name 1 (%s) does not match read name 2 (%s)" , baseName1 ,baseName2 ));
507
- }
508
-
509
- final boolean num1Blank = StringUtil .isBlank (num1 );
510
- final boolean num2Blank = StringUtil .isBlank (num2 );
511
- if (num1Blank || num2Blank ) {
512
- if (!num1Blank ) throw new PicardException (error (freader1 ,"Pair 1 number is missing (" + readName1 + "). Both pair numbers must be present or neither." )); //num1 != blank and num2 == blank
513
- else if (!num2Blank ) throw new PicardException (error (freader2 , "Pair 2 number is missing (" + readName2 + "). Both pair numbers must be present or neither." )); //num1 == blank and num =2 != blank
514
- } else {
515
- if (!num1 .equals ("1" )) throw new PicardException (error (freader1 ,"Pair 1 number must be 1 (" + readName1 + ")" ));
516
- if (!num2 .equals ("2" )) throw new PicardException (error (freader2 ,"Pair 2 number must be 2 (" + readName2 + ")" ));
477
+ /*
478
+ readName1 and readName2 will not end in /1 or /2 as those have been trimmed by the time we are here.
479
+ if they are the same, return, otherwise, do the check
480
+ */
481
+ if ( ! readName1 .isEmpty () && readName1 .equals (readName2 )) {
482
+ return readName1 ;
517
483
}
518
484
519
- return baseName1 ;
520
- }
485
+ // Retrieve base name and pair number in one operation to avoid multiple traversals
486
+ String [] toks1 = breakReadName (readName1 , freader1 );
487
+ String [] toks2 = breakReadName (readName2 , freader2 );
521
488
522
- /** Breaks up read name into baseName and number separated by the last / */
523
- private String [] getReadNameTokens ( final String readName , final int pairNum , final FastqReader freader ) {
524
- if ( readName . equals ( "" )) throw new PicardException ( error ( freader , "Pair read name " + pairNum + " cannot be empty: " + readName ));
489
+ if (! toks1 [ 0 ]. equals ( toks2 [ 0 ])) {
490
+ throw new PicardException ( String . format ( "In paired mode, read name 1 (%s) does not match read name 2 (%s)" , readName1 , readName2 ));
491
+ }
525
492
526
- final int idx = readName .lastIndexOf ('/' );
527
- final String [] result = new String [2 ];
493
+ // Check for the most common scenario and exit early if matched
494
+ if ("1" .equals (toks1 [1 ]) && "2" .equals (toks2 [1 ])) {
495
+ return toks1 [0 ];
496
+ }
528
497
529
- if (idx == -1 ) {
530
- result [0 ] = readName ;
531
- result [1 ] = null ;
532
- } else {
533
- result [1 ] = readName .substring (idx + 1 ); // should be a 1 or 2
498
+ // Handle the case where either both are null or one is null and the other is not
499
+ final String num1 = toks1 [1 ];
500
+ final String num2 = toks2 [1 ];
534
501
535
- if (!result [1 ].equals ("1" ) && !result [1 ].equals ("2" )) { //if not a 1 or 2 then names must be identical
536
- result [0 ] = readName ;
537
- result [1 ] = null ;
502
+ if ((num1 == null ) != (num2 == null )) {
503
+ if (num1 != null ) {
504
+ throw new PicardException (error (freader1 , "Pair 1 number is missing (" + readName1 + "). Both pair numbers must be present or neither." ));
505
+ } else {
506
+ throw new PicardException (error (freader2 , "Pair 2 number is missing (" + readName2 + "). Both pair numbers must be present or neither." ));
538
507
}
539
- else {
540
- result [0 ] = readName .substring (0 , idx ); // baseName
508
+ } else if (num1 != null ) {
509
+ if (!"1" .equals (num1 )) {
510
+ throw new PicardException (error (freader1 , "Pair 1 number must be 1 (" + readName1 + ")" ));
541
511
}
512
+ throw new PicardException (error (freader2 , "Pair 2 number must be 2 (" + readName2 + ")" ));
542
513
}
543
514
544
- return result ;
515
+ return toks1 [0 ];
516
+ }
517
+
518
+ /**
519
+ * Breaks read name into baseName and number in one traversal.
520
+ * Returns an array containing the baseName (index 0) and number (index 1).
521
+ */
522
+ private String [] breakReadName (final String readName , final FastqReader freader ) {
523
+ if (readName .isEmpty ()) {
524
+ throw new PicardException (error (freader , "Pair read name cannot be empty: " + readName ));
525
+ }
526
+ final int idx = readName .lastIndexOf ('/' );
527
+ final String [] result = { readName , null };
528
+ if (idx != -1 ) {
529
+ final String num = readName .substring (idx + 1 );
530
+ result [1 ] = ("1" .equals (num ) || "2" .equals (num )) ? num : null ;
531
+ result [0 ] = result [1 ] != null ? readName .substring (0 , idx ) : readName ;
532
+ }
533
+ return result ;
545
534
}
546
535
547
536
/** Little utility to give error messages corresponding to line numbers in the input files. */
0 commit comments