Skip to content

Commit 34e388d

Browse files
authored
Add warning to CheckIlluminaDir if we detect cycles without data. (broadinstitute#874)
* Add warning to CheckIlluminaDir if we detect cycles without data. CBCLReader will ignore and log a warning for tiles without data. * Review changes.
1 parent 46d8617 commit 34e388d

File tree

7 files changed

+90
-44
lines changed

7 files changed

+90
-44
lines changed

src/main/java/picard/illumina/CheckIlluminaDirectory.java

+15-3
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import java.util.TreeSet;
3232
import java.util.regex.Matcher;
3333
import java.util.regex.Pattern;
34+
import java.util.stream.Collectors;
3435

3536
import static picard.illumina.BasecallsConverter.TILE_NUMBER_COMPARATOR;
3637
import static picard.illumina.NewIlluminaBasecallsConverter.getTiledFiles;
@@ -152,7 +153,7 @@ protected int doWork() {
152153
tiles.sort(TILE_NUMBER_COMPARATOR);
153154

154155
//check s.locs
155-
final File locsFile = new File(BASECALLS_DIR.getParentFile(), "s.locs");
156+
final File locsFile = new File(BASECALLS_DIR.getParentFile(), AbstractIlluminaPositionFileReader.S_LOCS_FILE);
156157
final LocsFileReader locsFileReader = new LocsFileReader(locsFile);
157158
final List<AbstractIlluminaPositionFileReader.PositionInfo> locs = new ArrayList<>();
158159
while (locsFileReader.hasNext()) {
@@ -167,8 +168,19 @@ protected int doWork() {
167168
final OutputMapping outputMapping = new OutputMapping(readStructure);
168169

169170
final CbclReader reader = new CbclReader(cbcls, filterFileMap, readStructure.readLengths, tiles.get(0), locs, outputMapping.getOutputCycles(), true);
170-
171171
reader.getAllTiles().forEach((key, value) -> {
172+
//we are looking for cycles with compressed data count of 2 bytes (standard gzip header size)
173+
String emptyCycleString = value.stream()
174+
.filter(cycle -> cycle.getCompressedBlockSize() <= 2)
175+
.map(BaseBclReader.TileData::getTileNum)
176+
.map(Object::toString)
177+
.collect(Collectors.joining(", "));
178+
179+
if (emptyCycleString.length() > 0) {
180+
log.warn("The following tiles have no data for cycle " + key);
181+
log.warn(emptyCycleString);
182+
}
183+
172184
final List<File> fileForCycle = reader.getFilesForCycle(key);
173185
final long totalFilesSize = fileForCycle.stream().mapToLong(file -> file.length() - reader.getHeaderSize()).sum();
174186
final long expectedFileSize = value.stream().mapToLong(BaseBclReader.TileData::getCompressedBlockSize).sum();
@@ -221,7 +233,7 @@ protected int doWork() {
221233
}
222234

223235
private void createLocFileSymlinks(final IlluminaFileUtil fileUtil, final int lane) {
224-
final File baseFile = new File(BASECALLS_DIR.getParentFile().getAbsolutePath() + File.separator + "s.locs");
236+
final File baseFile = new File(BASECALLS_DIR.getParentFile().getAbsolutePath() + File.separator + AbstractIlluminaPositionFileReader.S_LOCS_FILE);
225237
final File newFileBase = new File(baseFile.getParent() + File.separator + IlluminaFileUtil
226238
.longLaneStr(lane) + File.separator);
227239
if (baseFile.exists()) {

src/main/java/picard/illumina/NewIlluminaBasecallsConverter.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ outputRecordClass, numProcessors, new IlluminaDataProviderFactory(basecallsDir,
9797
IOUtil.assertFilesAreReadable(cbcls);
9898

9999
//locs
100-
final File locsFile = new File(basecallsDir.getParentFile(), "s.locs");
100+
final File locsFile = new File(basecallsDir.getParentFile(), AbstractIlluminaPositionFileReader.S_LOCS_FILE);
101101
try (LocsFileReader locsFileReader = new LocsFileReader(locsFile)) {
102102
while (locsFileReader.hasNext()) {
103103
locs.add(locsFileReader.next());

src/main/java/picard/illumina/parser/readers/BaseBclReader.java

+6-1
Original file line numberDiff line numberDiff line change
@@ -156,12 +156,17 @@ public class TileData {
156156
this.filePosition = filePosition;
157157
}
158158

159+
public int getTileNum() {
160+
return tileNum;
161+
}
162+
159163
public int getCompressedBlockSize() {
160164
return compressedBlockSize;
161165
}
162166

163-
public int getNumClustersInTile() {
167+
int getNumClustersInTile() {
164168
return numClustersInTile;
165169
}
170+
166171
}
167172
}

src/main/java/picard/illumina/parser/readers/CbclReader.java

+52-35
Original file line numberDiff line numberDiff line change
@@ -314,10 +314,6 @@ private void cacheFilterAndLocs(final TileData currentTileData, final List<Abstr
314314

315315
private void cacheTile(final int totalCycleCount, final TileData tileData, final CycleData currentCycleData) throws IOException {
316316
final byte[] tileByteArray = new byte[tileData.compressedBlockSize];
317-
//we are going to explode the nibbles in to bytes to make PF filtering easier
318-
final byte[] uncompressedByteArray = new byte[tileData.uncompressedBlockSize];
319-
// ByteBuffer uncompressedByteArray = ByteBuffer.allocate(tileData.uncompressedBlockSize);
320-
final byte[] unNibbledByteArray = new byte[tileData.uncompressedBlockSize * 2];
321317

322318
// Read the whole compressed block into a buffer, then sanity check the length
323319
final InputStream stream = this.streams[totalCycleCount];
@@ -332,38 +328,18 @@ private void cacheTile(final int totalCycleCount, final TileData tileData, final
332328
(totalCycleCount + 1), this.streamFiles[totalCycleCount].getAbsolutePath()));
333329
}
334330

335-
// Uncompress the data from the buffer we just wrote - use gzip input stream to write to uncompressed buffer
331+
// Decompress the data from the buffer we just wrote - use gzip input stream to write to uncompressed buffer
336332
final ByteArrayInputStream byteInputStream = new ByteArrayInputStream(Arrays.copyOfRange(tileByteArray, 0, readBytes));
337-
338-
final GZIPInputStream gzipInputStream = new GZIPInputStream(byteInputStream, uncompressedByteArray.length);
339-
int read;
340-
int totalRead = 0;
341-
try {
342-
while ((read = gzipInputStream.read(uncompressedByteArray, totalRead, uncompressedByteArray.length - totalRead)) != -1) {
343-
if (read == 0) break;
344-
totalRead += read;
345-
}
346-
} catch (final EOFException eofException) {
347-
throw new PicardException("Unexpected end of file " + this.streamFiles[totalCycleCount].getAbsolutePath()
348-
+ " this file is likely corrupt or truncated. We have read "
349-
+ totalRead + " and were expecting to read "
350-
+ uncompressedByteArray.length);
351-
}
352-
if (totalRead != tileData.uncompressedBlockSize) {
353-
throw new PicardException(String.format("Error while decompressing from BCL file for cycle %d. Offending file on disk is %s",
354-
(totalCycleCount + 1), this.streamFiles[totalCycleCount].getAbsolutePath()));
355-
}
333+
byte[] decompressedByteArray = decompressTile(totalCycleCount, tileData, byteInputStream);
356334

357335
// Read uncompressed data from the buffer and expand each nibble into a full byte for ease of use
358-
int index = 0;
359-
for (final byte singleByte : uncompressedByteArray) {
360-
unNibbledByteArray[index] = (byte) (singleByte & 0x0f);
361-
index++;
362-
unNibbledByteArray[index] = (byte) ((singleByte >> 4) & 0x0f);
363-
index++;
364-
}
365-
gzipInputStream.close();
336+
byte[] unNibbledByteArray = promoteNibblesToBytes(decompressedByteArray);
337+
cachedTile[totalCycleCount] = filterNonPfReads(tileData, currentCycleData, unNibbledByteArray);
366338

339+
cachedTilePosition[totalCycleCount] = 0;
340+
}
341+
342+
private byte[] filterNonPfReads(TileData tileData, CycleData currentCycleData, byte[] unNibbledByteArray) {
367343
// Write buffer contents to cached tile array
368344
// if nonPF reads are included we need to strip them out
369345
if (!currentCycleData.pfExcluded) {
@@ -383,11 +359,52 @@ private void cacheTile(final int totalCycleCount, final TileData tileData, final
383359
}
384360
filterIndex++;
385361
}
386-
cachedTile[totalCycleCount] = filteredByteArray;
362+
return filteredByteArray;
387363
} else {
388-
cachedTile[totalCycleCount] = unNibbledByteArray;
364+
return unNibbledByteArray;
389365
}
390-
cachedTilePosition[totalCycleCount] = 0;
366+
}
367+
368+
private byte[] promoteNibblesToBytes(byte[] decompressedByteArray) {
369+
//we are going to explode the nibbles in to bytes to make PF filtering easier
370+
final byte[] unNibbledByteArray = new byte[decompressedByteArray.length * 2];
371+
int index = 0;
372+
for (final byte singleByte : decompressedByteArray) {
373+
unNibbledByteArray[index] = (byte) (singleByte & 0x0f);
374+
index++;
375+
unNibbledByteArray[index] = (byte) ((singleByte >> 4) & 0x0f);
376+
index++;
377+
}
378+
return unNibbledByteArray;
379+
}
380+
381+
private byte[] decompressTile(int totalCycleCount, TileData tileData, ByteArrayInputStream byteInputStream) throws IOException {
382+
final byte[] decompressedByteArray = new byte[tileData.uncompressedBlockSize];
383+
//only decompress the data if we are expecting data.
384+
if (decompressedByteArray.length == 0) {
385+
log.warn("Ignoring tile " + tileData.tileNum + " there are no PF reads.");
386+
} else {
387+
int read;
388+
int totalRead = 0;
389+
try (GZIPInputStream gzipInputStream = new GZIPInputStream(byteInputStream, decompressedByteArray.length)) {
390+
while ((read = gzipInputStream.read(decompressedByteArray, totalRead, decompressedByteArray.length - totalRead)) != -1) {
391+
if (read == 0) {
392+
break;
393+
}
394+
totalRead += read;
395+
}
396+
} catch (final EOFException eofException) {
397+
throw new PicardException("Unexpected end of file " + this.streamFiles[totalCycleCount].getAbsolutePath()
398+
+ " this file is likely corrupt or truncated. We have read "
399+
+ totalRead + " and were expecting to read "
400+
+ decompressedByteArray.length);
401+
}
402+
if (totalRead != tileData.uncompressedBlockSize) {
403+
throw new PicardException(String.format("Error while decompressing from BCL file for cycle %d. Offending file on disk is %s",
404+
(totalCycleCount + 1), this.streamFiles[totalCycleCount].getAbsolutePath()));
405+
}
406+
}
407+
return decompressedByteArray;
391408
}
392409

393410
public CycleData[] getCycleData() {

src/test/java/picard/illumina/CheckIlluminaDirectoryTest.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,13 @@
88
import org.testng.annotations.BeforeMethod;
99
import org.testng.annotations.DataProvider;
1010
import org.testng.annotations.Test;
11-
import picard.cmdline.CommandLineProgramTest;
1211
import picard.PicardException;
12+
import picard.cmdline.CommandLineProgramTest;
1313
import picard.cmdline.StandardOptionDefinitions;
1414
import picard.illumina.parser.IlluminaDataType;
1515
import picard.illumina.parser.IlluminaFileUtil;
1616
import picard.illumina.parser.IlluminaFileUtilTest;
17+
import picard.illumina.parser.readers.AbstractIlluminaPositionFileReader;
1718

1819
import java.io.BufferedWriter;
1920
import java.io.File;
@@ -373,13 +374,12 @@ public void symlinkLocsTest() {
373374

374375
private void createSingleLocsFile() {
375376
try {
376-
final File singleLocsFile = new File(intensityDir, "s.locs");
377+
final File singleLocsFile = new File(intensityDir, AbstractIlluminaPositionFileReader.S_LOCS_FILE);
377378
final FileWriter writer = new FileWriter(singleLocsFile);
378379
writer.write("This is a test string.");
379380
writer.close();
380381
} catch (final IOException e) {
381382
e.printStackTrace();
382383
}
383-
384384
}
385385
}

src/test/java/picard/illumina/parser/readers/CbclReaderTest.java

+13-1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
import java.io.File;
99
import java.util.Arrays;
10+
import java.util.Collections;
1011
import java.util.HashMap;
1112
import java.util.List;
1213
import java.util.Map;
@@ -16,8 +17,8 @@ public class CbclReaderTest {
1617
private static final File TestDataDir = new File("testdata/picard/illumina/readerTests/cbcls");
1718
private static final File PASSING_CBCL_C1_1 = new File(TestDataDir + "/C1.1", "L001_1.cbcl");
1819
private static final File PASSING_CBCL_C2_1 = new File(TestDataDir + "/C2.1", "L001_1.cbcl");
20+
private static final File CBCL_WITH_EMPTY_TILE = new File(TestDataDir + "/C3.1", "L001_1.cbcl");
1921
private static final File TILE_1101_FILTER = new File(TestDataDir, "tile_1101.filter");
20-
private static final File TILE_1102_FILTER = new File(TestDataDir, "tile_1102.filter");
2122

2223
private static final char[] expectedBases = new char[]{
2324
'G', 'G', 'C', 'C', 'G', 'A', 'A', 'G'
@@ -63,4 +64,15 @@ public void testMissingTile() {
6364
filters, new int[]{2}, 1102, locs, new int[]{1, 2}, false);
6465

6566
}
67+
68+
@Test
69+
public void testEmptyTile() {
70+
final Map<Integer, File> filters = new HashMap<>();
71+
filters.put(1101, TILE_1101_FILTER);
72+
final LocsFileReader locsFileReader = new LocsFileReader(new File("testdata/picard/illumina/readerTests/s_1_6.locs"));
73+
List<AbstractIlluminaPositionFileReader.PositionInfo> locs = locsFileReader.toList();
74+
CbclReader reader = new CbclReader(Collections.singletonList(CBCL_WITH_EMPTY_TILE),
75+
filters, new int[]{1}, 1101, locs, new int[]{3}, false);
76+
Assert.assertFalse(reader.hasNext());
77+
}
6678
}
Binary file not shown.

0 commit comments

Comments
 (0)