Skip to content

Commit b6ddd40

Browse files
committed
fix(qprofiler2): add ordering of readgroups in BamSummaryReport
To ensure same order in xml output each time, read groups in the BamSummaryReport needed to be sorted before writing to xml
1 parent 4631e9f commit b6ddd40

File tree

2 files changed

+94
-39
lines changed

2 files changed

+94
-39
lines changed

qprofiler2/src/org/qcmg/qprofiler2/bam/BamSummaryReport.java

+41-32
Original file line numberDiff line numberDiff line change
@@ -297,33 +297,34 @@ private void createQual(Element parent, boolean isLongReadBam) {
297297

298298
private void createTLen(Element parent) {
299299
//ISIZE
300-
parent = XmlElementUtils.createSubElement(parent, XmlUtils.READGROUPS);
301-
for (Entry<String, ReadGroupSummary> entry : rgSummaries.entrySet()) {
302-
// output tLen inside pairSummary, eg. inward, f3f5
303-
entry.getValue().pairTlen2Xml(XmlUtils.createReadGroupNode(parent, entry.getKey()));
304-
}
300+
parent = XmlElementUtils.createSubElement(parent, XmlUtils.READGROUPS);
301+
List<String> sortedKeys = rgSummaries.keySet().stream().sorted().toList();
302+
for (String key : sortedKeys) {
303+
rgSummaries.get(key).pairTlen2Xml(XmlUtils.createReadGroupNode(parent, key));
304+
}
305305
}
306306

307307
private void createRLENGTH(Element parent) {
308308
//Read length
309309
parent = XmlElementUtils.createSubElement(parent, XmlUtils.READGROUPS);
310-
for (Entry<String, ReadGroupSummary> entry : rgSummaries.entrySet()) {
311-
// output ReadLength
312-
entry.getValue().readLength2Xml(XmlUtils.createReadGroupNode(parent, entry.getKey()));
310+
List<String> sortedKeys = rgSummaries.keySet().stream().sorted().toList();
311+
for (String key : sortedKeys) {
312+
rgSummaries.get(key).readLength2Xml(XmlUtils.createReadGroupNode(parent, key));
313313
}
314314
}
315315

316316
private void createCigar(Element parent) {
317-
parent = XmlElementUtils.createSubElement(parent, XmlUtils.READGROUPS);
318-
for (Entry<String, ReadGroupSummary> entry : rgSummaries.entrySet()) {
319-
Element ele = XmlUtils.createMetricsNode(XmlUtils.createReadGroupNode(parent, entry.getKey()), null,
320-
new Pair<String, Number>(ReadGroupSummary.READ_COUNT,entry.getValue().getCigarReadCount()));
321-
322-
// cigar string from reads including duplicateReads, notProperPairs and unmappedReads but excluding discardedReads (failed, secondary and supplementary).
323-
Map<String, AtomicLong> tallys = new TreeMap<>(new CigarStringComparator());
324-
tallys.putAll( entry.getValue().getCigarCount());
325-
XmlUtils.outputTallyGroup(ele ,XmlUtils.CIGAR , tallys, true, false);
326-
}
317+
parent = XmlElementUtils.createSubElement(parent, XmlUtils.READGROUPS);
318+
List<String> sortedKeys = rgSummaries.keySet().stream().sorted().toList();
319+
for (String key : sortedKeys) {
320+
Element ele = XmlUtils.createMetricsNode(XmlUtils.createReadGroupNode(parent, key), null,
321+
new Pair<String, Number>(ReadGroupSummary.READ_COUNT,rgSummaries.get(key).getCigarReadCount()));
322+
323+
// cigar string from reads including duplicateReads, notProperPairs and unmappedReads but excluding discardedReads (failed, secondary and supplementary).
324+
Map<String, AtomicLong> tallys = new TreeMap<>(new CigarStringComparator());
325+
tallys.putAll( rgSummaries.get(key).getCigarCount());
326+
XmlUtils.outputTallyGroup(ele ,XmlUtils.CIGAR , tallys, true, false);
327+
}
327328
}
328329

329330
private void createRNAME(Element parent) {
@@ -371,12 +372,13 @@ private void createMAPQ(Element parent) {
371372

372373
private void createPOS(Element parent) {
373374
parent = XmlElementUtils.createSubElement(parent, XmlUtils.READGROUPS);
374-
375-
for (String rg : rgSummaries.keySet()) {
375+
376+
List<String> sortedKeys = rgSummaries.keySet().stream().sorted().toList();
377+
for (String rg : sortedKeys) {
376378
long readCount = rNamePosition.values().stream().mapToLong(x -> x.getTotalCountByRg(rg)).sum();
377-
Element ele = XmlUtils.createMetricsNode(XmlUtils.createReadGroupNode(parent, rg) , null, new Pair<String, Number>(ReadGroupSummary.READ_COUNT, readCount));
378-
rNamePosition.keySet().stream().sorted(new ReferenceNameComparator()).forEach(ref ->
379-
XmlUtils.outputBins(ele, ref, rNamePosition.get(ref).getCoverageByRg(rg), PositionSummary.BUCKET_SIZE));
379+
Element ele = XmlUtils.createMetricsNode(XmlUtils.createReadGroupNode(parent, rg) , null, new Pair<String, Number>(ReadGroupSummary.READ_COUNT, readCount));
380+
rNamePosition.keySet().stream().sorted(new ReferenceNameComparator()).forEach(ref ->
381+
XmlUtils.outputBins(ele, ref, rNamePosition.get(ref).getCoverageByRg(rg), PositionSummary.BUCKET_SIZE));
380382
}
381383
}
382384

@@ -463,7 +465,7 @@ public void parseRecord(final SAMRecord record) {
463465
}
464466
}
465467

466-
private void summaryToXml(Element parent) {
468+
public void summaryToXml(Element parent) {
467469
Element summaryElement = XmlElementUtils.createSubElement(parent, XmlUtils.BAM_SUMMARY);
468470

469471
long discardReads = 0;
@@ -473,29 +475,30 @@ private void summaryToXml(Element parent) {
473475
long noncanonicalBase = 0;
474476
long trimBases = 0,overlappedBase = 0, softClippedBase = 0, hardClippedBase = 0;
475477
long readCount = 0, lostBase = 0; // baseCount = 0,
476-
Element rgsElement = XmlElementUtils.createSubElement(summaryElement, XmlUtils.READGROUPS);
477-
for (ReadGroupSummary summary: rgSummaries.values()) {
478+
Element rgsElement = XmlElementUtils.createSubElement(summaryElement, XmlUtils.READGROUPS);
479+
List<String> sortedKeys = rgSummaries.keySet().stream().sorted().toList();
480+
for (String key : sortedKeys) {
478481
try {
479-
482+
ReadGroupSummary summary = rgSummaries.get(key);
480483
Element rgEle = XmlUtils.createReadGroupNode(rgsElement, summary.getReadGroupId());
481484
summary.readSummary2Xml(rgEle);
482-
summary.pairSummary2Xml(rgEle);
485+
summary.pairSummary2Xml(rgEle);
483486
// presummary
484487
lostBase += summary.getDuplicateBase() + summary.getUnmappedBase() + summary.getNotProperPairedBase()
485-
+ summary.getTrimmedBase() + summary.getOverlappedBase() + summary.getSoftClippedBase() + summary.getHardClippedBase();
486-
maxBases += summary.getReadCount() * summary.getMaxReadLength();
488+
+ summary.getTrimmedBase() + summary.getOverlappedBase() + summary.getSoftClippedBase() + summary.getHardClippedBase();
489+
maxBases += summary.getReadCount() * summary.getMaxReadLength();
487490
duplicateBase += summary.getDuplicateBase();
488491
unmappedBase += summary.getUnmappedBase();
489492
noncanonicalBase += summary.getNotProperPairedBase();
490493
trimBases += summary.getTrimmedBase();
491494
overlappedBase += summary.getOverlappedBase();
492495
softClippedBase += summary.getSoftClippedBase();
493496
hardClippedBase += summary.getHardClippedBase();
494-
497+
495498
discardReads += summary.getDiscardreads();
496499
readCount += summary.getReadCount();
497500
} catch (Exception e) {
498-
logger.warn(e.getMessage());
501+
logger.warn(e.getMessage());
499502
}
500503
}
501504

@@ -578,6 +581,12 @@ public void setSamSequenceDictionary(SAMSequenceDictionary samSeqDictionary) {
578581
public void setReadGroups(List<String> ids) {
579582
readGroupIds = Stream.concat(ids.stream(), readGroupIds.stream()).collect(Collectors.toList());
580583
}
584+
585+
public void setReadGroupSummaries(List<String> ids) {
586+
for (String readGroupId : ids) {
587+
rgSummaries.put(readGroupId, new ReadGroupSummary(readGroupId, isLongReadBam));
588+
}
589+
}
581590

582591
ConcurrentMap<String, PositionSummary> getRNamePosition() {
583592
return rNamePosition;

qprofiler2/test/org/qcmg/qprofiler2/bam/BamSummaryReportTest.java

+53-7
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,16 @@
22

33
import static org.junit.Assert.*;
44

5-
import java.io.BufferedWriter;
5+
66
import java.io.File;
7-
import java.io.FileWriter;
8-
import java.io.IOException;
97
import java.util.ArrayList;
108
import java.util.Arrays;
119
import java.util.List;
1210
import java.util.stream.Collectors;
1311
import javax.xml.parsers.ParserConfigurationException;
1412

13+
14+
1515
import org.qcmg.qprofiler2.summarise.*;
1616
import org.w3c.dom.Element;
1717
import htsjdk.samtools.SAMRecord;
@@ -22,14 +22,15 @@
2222
import org.qcmg.common.string.StringUtils;
2323
import org.qcmg.common.util.XmlElementUtils;
2424
import org.qcmg.picard.BwaPair.Pair;
25-
import org.qcmg.qprofiler2.bam.BamSummaryReport;
2625
import org.qcmg.qprofiler2.util.XmlUtils;
26+
import org.w3c.dom.NodeList;
2727

2828

2929
public class BamSummaryReportTest {
30-
30+
3131
@Rule
32-
public TemporaryFolder testFolder = new TemporaryFolder();
32+
public TemporaryFolder testFolder = new TemporaryFolder();
33+
3334

3435
@Test
3536
public void testParseRNameAndPos() throws Exception {
@@ -79,6 +80,8 @@ public void testParseRNameAndPos() throws Exception {
7980
assertEquals(1, returnedSummary.getCoverageByRg(rg).size());
8081

8182
}
83+
84+
8285

8386
@Test
8487
public void testCompareWithSAMUtils() {
@@ -358,13 +361,56 @@ public static Element createLongReadRoot(File input) throws Exception {
358361
return root;
359362
}
360363

364+
@Test
365+
public void testReadGroupOrder() throws Exception {
366+
BamSummaryReport report = new BamSummaryReport(3, false, false);
367+
String[] rg = new String[] {"cd90dd75-8a1f-4fd0-a352-0364d8dd5300","69f81d0d-c430-4a6f-9ccd-05ea88b22c1d","374ed445-b8ee-4a1d-9337-f3fdd661f408"};
368+
List list= Arrays.asList(rg);
369+
report.setReadGroups(Arrays.asList(rg));
370+
371+
//Before sort
372+
assertEquals(list.get(0),"cd90dd75-8a1f-4fd0-a352-0364d8dd5300");
373+
assertEquals(list.get(1),"69f81d0d-c430-4a6f-9ccd-05ea88b22c1d");
374+
assertEquals(list.get(2),"374ed445-b8ee-4a1d-9337-f3fdd661f408");
375+
376+
Element root = XmlElementUtils.createRootElement("root", null);
377+
report.setReadGroupSummaries(list);
378+
report.summaryToXml(root);
379+
380+
List<String> readGroupNames = getAllReadGroupNames(root);
381+
assertTrue(readGroupNames.size() == 3);
382+
assertEquals(readGroupNames.get(0),"374ed445-b8ee-4a1d-9337-f3fdd661f408");
383+
assertEquals(readGroupNames.get(1),"69f81d0d-c430-4a6f-9ccd-05ea88b22c1d");
384+
assertEquals(readGroupNames.get(2),"cd90dd75-8a1f-4fd0-a352-0364d8dd5300");
385+
386+
}
387+
388+
public List<String> getAllReadGroupNames(Element root) {
389+
List<String> readGroupNames = new ArrayList<>();
390+
NodeList bamSummaryList = root.getElementsByTagName("bamSummary");
391+
for (int i = 0; i < bamSummaryList.getLength(); i++) {
392+
Element bamSummary = (Element) bamSummaryList.item(i);
393+
NodeList readGroupsList = bamSummary.getElementsByTagName("readGroups");
394+
for (int j = 0; j < readGroupsList.getLength(); j++) {
395+
Element readGroups = (Element) readGroupsList.item(j);
396+
NodeList readGroupList = readGroups.getElementsByTagName("readGroup");
397+
for (int k = 0; k < readGroupList.getLength(); k++) {
398+
Element readGroup = (Element) readGroupList.item(k);
399+
readGroupNames.add(readGroup.getAttribute("name"));
400+
}
401+
}
402+
}
403+
return readGroupNames;
404+
}
405+
361406
@Test
362407
public void unpairedTest() throws ParserConfigurationException {
363408
BamSummaryReport report = new BamSummaryReport(3, false, false);
364409

365410
SAMRecord record = new SAMRecord(null);
411+
366412
record.setReadName("TESTDATA");
367-
413+
368414
// first read
369415
record.setReadBases("ACCCT AACCC CAACC CTAAC CNTAA CCCTA ACCCA AC".replace(" ","").getBytes());
370416
report.parseRecord(record);// unapired

0 commit comments

Comments
 (0)