88
99import static java .util .Comparator .comparing ;
1010
11+ import java .io .BufferedReader ;
1112import java .io .File ;
1213import java .io .IOException ;
1314import java .nio .charset .StandardCharsets ;
3940import org .apache .commons .math3 .util .Pair ;
4041import org .qcmg .common .log .QLogger ;
4142import org .qcmg .common .log .QLoggerFactory ;
43+ import org .qcmg .common .model .PositionRange ;
4244import org .qcmg .common .string .StringUtils ;
4345import org .qcmg .common .util .BaseUtils ;
4446import org .qcmg .common .util .ChrPositionCache ;
@@ -336,11 +338,14 @@ public static float getVAF(int[] counts, String ref) {
336338};
337339 }
338340
339- public static Pair <SigMeta , TMap <String , TIntByteHashMap >> loadSignatureGenotype (File file , int minCoverage , int minRGCoverage ) throws IOException {
340- return loadSignatureGenotype (file , minCoverage , minRGCoverage , HOM_CUTOFF , HET_UPPER_CUTOFF , HET_LOWER_CUTOFF );
341+ public static Pair <SigMeta , TMap <String , TIntByteHashMap >> loadSignatureGenotype (File file , int minCoverage , int minRGCoverage ) throws IOException {
342+ return loadSignatureGenotype (file , minCoverage , minRGCoverage , HOM_CUTOFF , HET_UPPER_CUTOFF , HET_LOWER_CUTOFF , null );
343+ }
344+ public static Pair <SigMeta , TMap <String , TIntByteHashMap >> loadSignatureGenotype (File file , int minCoverage , int minRGCoverage , Map <String , List <PositionRange >> blockedPositions ) throws IOException {
345+ return loadSignatureGenotype (file , minCoverage , minRGCoverage , HOM_CUTOFF , HET_UPPER_CUTOFF , HET_LOWER_CUTOFF , blockedPositions );
341346 }
342347
343- public static Pair <SigMeta , TMap <String , TIntByteHashMap >> loadSignatureGenotype (File file , int minCoverage , int minRGCoverage , float homCutoff , float upperHetCutoff , float lowerHetCutoff ) throws IOException {
348+ public static Pair <SigMeta , TMap <String , TIntByteHashMap >> loadSignatureGenotype (File file , int minCoverage , int minRGCoverage , float homCutoff , float upperHetCutoff , float lowerHetCutoff , Map < String , List < PositionRange >> blockedPositions ) throws IOException {
344349 if (null == file ) {
345350 throw new IllegalArgumentException ("Null file object passed to loadSignatureGenotype" );
346351 }
@@ -361,16 +366,55 @@ public static Pair<SigMeta, TMap<String, TIntByteHashMap>> loadSignatureGenotype
361366 }
362367
363368 if (null != sm && sm .isValid ()) {
364- getDataFromBespokeLayout (file , minCoverage , minRGCoverage , ratios , rgRatios , rgIds , reader , homCutoff , upperHetCutoff , lowerHetCutoff );
369+ getDataFromBespokeLayout (file , minCoverage , minRGCoverage , ratios , rgRatios , rgIds , reader , homCutoff , upperHetCutoff , lowerHetCutoff , blockedPositions );
365370 } else {
366371 rgRatios .put ("all" , loadSignatureRatiosFloatGenotypeNew (file , MINIMUM_COVERAGE , homCutoff , upperHetCutoff , lowerHetCutoff ));
367372 }
368373 }
369374 return new Pair <>(sm , rgRatios );
370375 }
376+
377+ public static void loadBlockListIntoMap (String blocklistFile , Map <String , List <PositionRange >> map ) {
378+ try {
379+ // Use buffered reading with larger buffer for better I/O performance
380+ try (BufferedReader reader = Files .newBufferedReader (Paths .get (blocklistFile ), StandardCharsets .UTF_8 )) {
381+
382+ String line ;
383+ while ((line = reader .readLine ()) != null ) {
384+ // Skip comments and empty lines early
385+ if (line .isEmpty () || line .charAt (0 ) == '#' ) continue ;
386+
387+ // Use indexOf instead of split for better performance
388+ int firstTab = line .indexOf ('\t' );
389+ if (firstTab == -1 ) continue ;
390+
391+ int secondTab = line .indexOf ('\t' , firstTab + 1 );
392+ if (secondTab == -1 ) continue ;
393+
394+ // Check if there's a third tab (tokens.length >= 3 equivalent)
395+ int thirdTab = line .indexOf ('\t' , secondTab + 1 );
396+ if (thirdTab == -1 && secondTab == line .length () - 1 ) continue ;
397+
398+ try {
399+ String contig = line .substring (0 , firstTab );
400+ int start = Integer .parseInt (line , firstTab + 1 , secondTab , 10 );
401+ int stop = Integer .parseInt (line , secondTab + 1 ,
402+ thirdTab == -1 ? line .length () : thirdTab , 10 );
403+
404+ map .computeIfAbsent (contig , v -> new ArrayList <>()).add (new PositionRange (start , stop ));
405+ } catch (NumberFormatException e ) {
406+ // Skip malformed lines silently or log if needed
407+ logger .debug ("Skipping malformed line: " + line );
408+ }
409+ }
410+ }
411+ } catch (IOException e ) {
412+ logger .error ("Error reading blocklist file: " + blocklistFile , e );
413+ }
414+ }
371415
372416 public static void getDataFromBespokeLayout (File file , int minCoverage , int minRGCoverage , TIntByteHashMap ratios ,
373- TMap <String , TIntByteHashMap > rgRatios , Map <String , String > rgIds , StringFileReader reader , float homCutoff , float upperHetCutoff , float lowerHetCutoff ) {
417+ TMap <String , TIntByteHashMap > rgRatios , Map <String , String > rgIds , StringFileReader reader , float homCutoff , float upperHetCutoff , float lowerHetCutoff , Map < String , List < PositionRange >> blockedPositions ) {
374418 int noOfRGs = rgIds .size ();
375419 logger .debug ("Number of rgs for " + file .getAbsolutePath () + " is " + noOfRGs );
376420
@@ -386,9 +430,26 @@ public static void getDataFromBespokeLayout(File file, int minCoverage, int minR
386430
387431 String coverage = line .substring (line .lastIndexOf (Constants .TAB_STRING ));
388432 String chrPosString = line .substring (0 , line .indexOf (Constants .TAB_STRING , line .indexOf (Constants .TAB_STRING ) + 1 ));
389-
390-
391- /*
433+
434+ if (null != blockedPositions ) {
435+ /*
436+ get chr and position
437+ */
438+ int tabIndex = chrPosString .indexOf (Constants .TAB );
439+ String chr = chrPosString .substring (0 , tabIndex );
440+ List <PositionRange > list = blockedPositions .get (chr );
441+ if (null != list ) {
442+ int pos = Integer .parseInt (chrPosString , tabIndex + 1 , chrPosString .length (), 10 );
443+ boolean blocked = list .stream ().anyMatch (r -> r .containsPosition (pos ));
444+ if (blocked ) {
445+ logger .debug ("Found blocked position for " + chrPosString );
446+ continue ;
447+ }
448+ }
449+ }
450+
451+
452+ /*
392453 * This should be in the QAF=t:5-0-0-0,rg4:2-0-0-0,rg1:1-0-0-0,rg2:2-0-0-0 format
393454 * Need to tease out the pertinent bits
394455 */
@@ -402,7 +463,6 @@ public static void getDataFromBespokeLayout(File file, int minCoverage, int minR
402463
403464 if (isCodedGenotypeValid (genotype1 )) {
404465 cachePosition .set (ChrPositionCache .getStringIndex (chrPosString ));
405-
406466 ratios .put (cachePosition .get (), genotype1 );
407467 /*
408468 * Get rg data if we have more than 1 rg
0 commit comments