Skip to content

Commit b59be0f

Browse files
authored
Merge pull request #356 from AdamaJava/getReadGroup
perf(adamajava): ensure code that calls SAMRecord.getReadGroup() does so sparingly
2 parents 44ab78a + edc29fd commit b59be0f

File tree

10 files changed

+40
-69
lines changed

10 files changed

+40
-69
lines changed

qbammerge/src/org/qcmg/bammerge/FileMerger.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -780,8 +780,9 @@ private void mergeAlignments() throws BamMergeException {
780780

781781
while (iter.hasNext() && !hasReachedNumberRecords()) {
782782
SAMRecord record = iter.next();
783+
SAMReadGroupRecord srgr = record.getReadGroup();
783784

784-
if (null == record.getReadGroup()) {
785+
if (null == srgr) {
785786
logger.warn(record.getSAMString());
786787
logger.warn(record.getAttribute(RG_TAG).toString());
787788
logger.warn(record.getHeader().toString());
@@ -790,7 +791,7 @@ private void mergeAlignments() throws BamMergeException {
790791

791792
SamReader fileReader = iter.getCurrentSAMFileReader();
792793
if ( ! replacementMap.isEmpty()) {
793-
String oldGroup = record.getReadGroup().getReadGroupId();
794+
String oldGroup = srgr.getReadGroupId();
794795
File file = inputReader.getFile(fileReader);
795796
String newGroup = getReplacementGroup(file, oldGroup);
796797
if (null != newGroup) {
@@ -800,7 +801,6 @@ private void mergeAlignments() throws BamMergeException {
800801
Integer oldZc = record.getIntegerAttribute(ZC);
801802
if (null == oldZc) {
802803
Integer zc = inputReader.getDefaultZc(fileReader);
803-
// assert null != zc;
804804
record.setAttribute(ZC, zc);
805805
} else {
806806
Set<Integer> permissibleZcs = inputReader.getOldZcs(fileReader);

qpicard/src/org/qcmg/picard/MultiSAMFileReader.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ public MultiSAMFileReader(final Set<File> files, ValidationStringency validation
4848
if (SAMFileHeader.SortOrder.coordinate != header.getSortOrder()) {
4949
throw new Exception("Input files must be coordinate sorted");
5050
}
51-
final Set<Integer> zcs = new HashSet<Integer>(8);
51+
final Set<Integer> zcs = new HashSet<>(8);
5252
for (SAMReadGroupRecord record : header.getReadGroups()) {
5353
final String attribute = getAttributeZc( record);
5454
if (null != attribute ) {

qprofiler/src/org/qcmg/qprofiler/bam/BamSummarizer.java

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,7 @@
1616
import java.io.File;
1717
import java.util.List;
1818

19-
import htsjdk.samtools.SamReader;
20-
import htsjdk.samtools.ValidationStringency;
21-
import htsjdk.samtools.SAMProgramRecord;
22-
import htsjdk.samtools.SAMRecord;
23-
import htsjdk.samtools.SAMSequenceDictionary;
19+
import htsjdk.samtools.*;
2420

2521
import org.qcmg.common.date.DateUtils;
2622
import org.qcmg.common.log.QLevel;
@@ -68,7 +64,7 @@ public SummaryReport summarize(String input, String index, String[] regions) thr
6864
bamSummaryReport.setStartTime(DateUtils.getCurrentDateAsString());
6965

7066
try(SamReader reader = SAMFileReaderFactory.createSAMFileReaderAsStream(input, index, vs);) {
71-
readGroupIds = reader.getFileHeader().getReadGroups().stream().map( it -> it.getId() ).collect(toList());
67+
readGroupIds = reader.getFileHeader().getReadGroups().stream().map(SAMReadGroupRecord::getId).collect(toList());
7268
bamSummaryReport.setReadGroups(readGroupIds);
7369

7470
boolean logLevelEnabled = logger.isLevelEnabled(QLevel.DEBUG);

qprofiler/src/org/qcmg/qprofiler/bam/BamSummarizerMT.java

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,7 @@
2525
import java.util.concurrent.Executors;
2626
import java.util.concurrent.TimeUnit;
2727

28-
import htsjdk.samtools.SAMFileHeader;
29-
import htsjdk.samtools.SamReader;
30-
import htsjdk.samtools.ValidationStringency;
31-
import htsjdk.samtools.SAMProgramRecord;
32-
import htsjdk.samtools.SAMRecord;
33-
import htsjdk.samtools.SAMRecordIterator;
34-
import htsjdk.samtools.SAMSequenceDictionary;
35-
import htsjdk.samtools.SAMSequenceRecord;
28+
import htsjdk.samtools.*;
3629

3730
import org.qcmg.common.date.DateUtils;
3831
import org.qcmg.common.log.QLogger;
@@ -120,7 +113,7 @@ public int compare(SAMSequenceRecord o1, SAMSequenceRecord o2) {
120113

121114
samSeqDict = reader.getFileHeader().getSequenceDictionary();
122115
bamHeader = HeaderUtils.getHeaderStringFromHeader(header);
123-
readGroupIds = header.getReadGroups().stream().map( it -> it.getId() ).collect(toList());
116+
readGroupIds = header.getReadGroups().stream().map(SAMReadGroupRecord::getId).collect(toList());
124117

125118
List<SAMProgramRecord> pgLines = header.getProgramRecords();
126119
for (SAMProgramRecord pgLine : pgLines) {

qprofiler/src/org/qcmg/qprofiler/bam/BamSummaryReport.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -484,9 +484,8 @@ public void parseRecord(final SAMRecord record) throws Exception {
484484
updateRecordsParsed();
485485
MAPQMatrix matrix = null;
486486

487-
String readGroup = SummaryReportUtils.UNKNOWN_READGROUP;
488-
if (record.getReadGroup() != null && record.getReadGroup().getId() != null)
489-
readGroup = record.getReadGroup().getReadGroupId();
487+
SAMReadGroupRecord srgr = record.getReadGroup();
488+
String readGroup = srgr == null ? SummaryReportUtils.UNKNOWN_READGROUP : srgr.getReadGroupId();
490489

491490
// Xu code: check if record has its fail or duplicate flag set. if so, miss out some of the summaries
492491
//anyway, add to summary and then add to it's readgroup

qprofiler/src/org/qcmg/qprofiler/summarise/ReadGroupSummary.java

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,10 @@ public class ReadGroupSummary {
4646
QCMGAtomicLongArray readLength = new QCMGAtomicLongArray(128);
4747
QCMGAtomicLongArray overlapBase = new QCMGAtomicLongArray(128);
4848

49-
//QCMGAtomicLongArray.get(arrayTlenLimit) for tlen=[bigTlenValue, ~)
50-
QCMGAtomicLongArray isize = new QCMGAtomicLongArray(bigTlenValue + 1);
49+
QCMGAtomicLongArray isize = new QCMGAtomicLongArray(bigTlenValue + 1);
5150
AtomicInteger maxIsize = new AtomicInteger();
5251

53-
//bad reads inforamtion
52+
//bad reads information
5453
AtomicLong duplicate = new AtomicLong();
5554
AtomicLong secondary = new AtomicLong();
5655
AtomicLong supplementary = new AtomicLong();
@@ -77,9 +76,8 @@ public class ReadGroupSummary {
7776

7877
private final String readGroupId;
7978
public ReadGroupSummary(String rgId){ this.readGroupId = rgId; }
80-
public String getReadGroupId(){return readGroupId; }
81-
82-
private class Pair {
79+
80+
private class Pair {
8381
private final String name;
8482
Pair(String name){this.name = name;}
8583

qprofiler2/src/org/qcmg/qprofiler2/bam/BamSummarizer.java

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,7 @@
1818
import java.util.Comparator;
1919
import java.util.List;
2020

21-
import htsjdk.samtools.SamReader;
22-
import htsjdk.samtools.ValidationStringency;
23-
import htsjdk.samtools.SAMFileHeader;
24-
import htsjdk.samtools.SAMRecord;
25-
import htsjdk.samtools.SAMSequenceDictionary;
21+
import htsjdk.samtools.*;
2622

2723
import org.qcmg.common.date.DateUtils;
2824
import org.qcmg.common.log.QLevel;
@@ -52,10 +48,10 @@ public static BamSummaryReport createReport(SAMFileHeader header, String file, i
5248
// create the SummaryReport
5349

5450
SAMSequenceDictionary samSeqDict = header.getSequenceDictionary();
55-
List<String> readGroupIds = header.getReadGroups().stream().map( it -> it.getId() ).collect(toList());
56-
readGroupIds.sort(Comparator.comparing( String::toString ) ); // Natural order
57-
58-
BamSummaryReport bamSummaryReport = new BamSummaryReport( maxRecords, isFullBamHeader );
51+
// Natural order
52+
List<String> readGroupIds = header.getReadGroups().stream().map(SAMReadGroupRecord::getId).sorted(Comparator.comparing(String::toString)).collect(toList());
53+
54+
BamSummaryReport bamSummaryReport = new BamSummaryReport( maxRecords, isFullBamHeader );
5955
bamSummaryReport.setBamHeader(header, isFullBamHeader);
6056
bamSummaryReport.setSamSequenceDictionary(samSeqDict);
6157
bamSummaryReport.setReadGroups(readGroupIds);

qprofiler2/src/org/qcmg/qprofiler2/bam/BamSummaryReport.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,7 @@ public void parseRecord(final SAMRecord record) {
363363
}
364364

365365
// check if record has its fail or duplicate flag set. if so, miss out some of the summaries
366-
ReadGroupSummary rgSumm = rgSummaries.computeIfAbsent(readGroup, k -> new ReadGroupSummary(k));
366+
ReadGroupSummary rgSumm = rgSummaries.computeIfAbsent(readGroup, ReadGroupSummary::new);
367367
if (rgSumm.parseRecord(record)) {
368368

369369
// SEQ
@@ -419,12 +419,12 @@ private void summaryToXml(Element parent) {
419419
summary.readSummary2Xml(rgEle);
420420
summary.pairSummary2Xml(rgEle);
421421
// presummary
422-
lostBase += summary.getDuplicateBase() + summary.getUnmappedBase() + summary.getnotPoperPairedBase()
422+
lostBase += summary.getDuplicateBase() + summary.getUnmappedBase() + summary.getNotProperPairedBase()
423423
+ summary.getTrimmedBase() + summary.getOverlappedBase() + summary.getSoftClippedBase() + summary.getHardClippedBase();
424424
maxBases += summary.getReadCount() * summary.getMaxReadLength();
425425
duplicateBase += summary.getDuplicateBase();
426426
unmappedBase += summary.getUnmappedBase();
427-
noncanonicalBase += summary.getnotPoperPairedBase();
427+
noncanonicalBase += summary.getNotProperPairedBase();
428428
trimBases += summary.getTrimmedBase();
429429
overlappedBase += summary.getOverlappedBase();
430430
softClippedBase += summary.getSoftClippedBase();

qprofiler2/src/org/qcmg/qprofiler2/summarise/ReadGroupSummary.java

Lines changed: 17 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -18,22 +18,16 @@
1818

1919
public class ReadGroupSummary {
2020

21-
public static final int ERR_READ_LIMIT = 10;
22-
// xml node name
23-
public static final String NODE_READGROUP = "readGroup";
2421
public static final String NODE_SOFTCLIP = "softClippedBases";
2522
public static final String NODE_TRIM = "trimmedBases";
2623
public static final String NODE_HARDCLIP = "hardClippedBases";
2724
public static final String NODE_READ_LENGTH = "readLength" ;
2825
public static final String NODE_PAIR_TLEN = "tLen" ;
2926
public static final String NODE_OVERLAP = "overlappedBases";
3027
public static final String NODE_DUPLICATE = "duplicateReads";
31-
public static final String NODE_SECONDARY = "secondary";
32-
public static final String NODE_SUPPLEMENTARY = "supplementary";
3328
public static final String NODE_UNMAPPED = "unmappedReads";
3429
public static final String NODE_NOT_PROPER_PAIR = "notProperPairs";
35-
public static final String NODE_FAILED_VENDOR_QUALITY = "failedVendorQuality";
36-
30+
3731
public static final String MIN = "min";
3832
public static final String MAX = "max";
3933
public static final String MEAN = "mean";
@@ -54,11 +48,11 @@ public class ReadGroupSummary {
5448
// record read length excluding the discard reads but includes duplicate,unmapped and nonCanonicalReads
5549
QCMGAtomicLongArray readLength = new QCMGAtomicLongArray(128);
5650
QCMGAtomicLongArray forTrimLength = new QCMGAtomicLongArray(128);
57-
private final ConcurrentMap<String, AtomicLong> cigarValuesCount = new ConcurrentHashMap<String, AtomicLong>();
51+
private final ConcurrentMap<String, AtomicLong> cigarValuesCount = new ConcurrentHashMap<>();
5852
// must be concurrent set for multi threads
5953
private final ConcurrentMap<Integer, PairSummary> pairCategory = new ConcurrentHashMap<>();
6054

61-
// bad reads inforamtion
55+
// bad reads information
6256
AtomicLong duplicate = new AtomicLong();
6357
AtomicLong secondary = new AtomicLong();
6458
AtomicLong supplementary = new AtomicLong();
@@ -122,7 +116,7 @@ public long getUnmappedBase() {
122116
return this.unmapped.get() * getMaxReadLength();
123117
}
124118

125-
public long getnotPoperPairedBase() {
119+
public long getNotProperPairedBase() {
126120
return notProperPairedReads.get() * getMaxReadLength();
127121
}
128122

@@ -149,7 +143,7 @@ public boolean parseRecord( final SAMRecord record ) {
149143
return false;
150144
}
151145

152-
// parseing cigar
146+
// parsing cigar
153147
// cigar string from reads including duplicateReads, nonCanonicalPairs and unmappedReads but excluding discardedReads (failed, secondary and supplementary).
154148
int lHard = 0;
155149
int lSoft = 0;
@@ -181,7 +175,7 @@ public boolean parseRecord( final SAMRecord record ) {
181175
return false;
182176
}
183177

184-
// check pair orientaiton, tLen, mate
178+
// check pair orientation, tLen, mate
185179
if (record.getReadPairedFlag()) {
186180
BwaPair.Pair pairType = BwaPair.getPairType(record);
187181
boolean isProper = record.getProperPairFlag();
@@ -204,7 +198,7 @@ public boolean parseRecord( final SAMRecord record ) {
204198
softClip.increment(lSoft);
205199
}
206200
// record read length excluding the discard reads duplicate.get() + unmapped.get() + getnonCanonicalReadsCount();
207-
// due to it for trimmed base caculation as well
201+
// due to it for trimmed base calculation as well
208202
forTrimLength.increment(record.getReadLength() + lHard);
209203

210204
return true;
@@ -221,8 +215,8 @@ public long getDiscardreads() {
221215

222216

223217
/**
224-
* check all globle value and assign the sumamry value
225-
* eg. private long trimedBase = 0;
218+
* check all global value and assign the summary value
219+
* eg. private long trimmedBase = 0;
226220
*/
227221
public void preSummary() {
228222
// check overlap and tLen from pairSummary
@@ -247,19 +241,18 @@ public void preSummary() {
247241
this.hardclipStats = new SummaryReportUtils.TallyStats( hardClip);
248242
this.readlengthStats = new SummaryReportUtils.TallyStats( readLength );
249243

250-
int maxLenght = (int)readlengthStats.getMax();
251-
QCMGAtomicLongArray trimedBase = new QCMGAtomicLongArray(maxLenght + 1);
244+
int maxLength = (int)readlengthStats.getMax();
245+
QCMGAtomicLongArray trimmedBase = new QCMGAtomicLongArray(maxLength + 1);
252246
for (int i = 0 ; i < forTrimLength.length() ; i ++) {
253-
if (forTrimLength.get(i) == 0 || maxLenght == i ) {
247+
if (forTrimLength.get(i) == 0 || maxLength == i ) {
254248
continue;
255249
}
256-
trimedBase.increment( maxLenght - i, forTrimLength.get(i));
250+
trimmedBase.increment( maxLength - i, forTrimLength.get(i));
257251
}
258-
this.trimBaseStats = new SummaryReportUtils.TallyStats( trimedBase );
252+
this.trimBaseStats = new SummaryReportUtils.TallyStats( trimmedBase );
259253
}
260254

261-
@SuppressWarnings("unchecked")
262-
public void readSummary2Xml(Element parent ) throws Exception {
255+
public void readSummary2Xml(Element parent ) {
263256

264257
preSummary();
265258

@@ -275,7 +268,7 @@ public void readSummary2Xml(Element parent ) throws Exception {
275268
lostBaseStats( rgElement, NODE_OVERLAP, overlapStats );
276269

277270
// create node for overall
278-
rgElement = XmlUtils.createMetricsNode(parent,"reads", new Pair<String, Number>(READ_COUNT, inputReadCounts.get()));
271+
rgElement = XmlUtils.createMetricsNode(parent,"reads", new Pair<>(READ_COUNT, inputReadCounts.get()));
279272
Element ele = XmlUtils.createGroupNode(rgElement, XmlUtils.DISCARD_READS );
280273
XmlUtils.outputValueNode(ele, "supplementaryAlignmentCount", supplementary.get());
281274
XmlUtils.outputValueNode(ele, "secondaryAlignmentCount", secondary.get());
@@ -326,7 +319,7 @@ public void pairSummary2Xml( Element parent ) {
326319
sum += p.getFirstOfPairCounts();
327320
}
328321
}
329-
// can't really count he pair number due to RAM limits, just pickup number of firstOfPair
322+
// can't really count the pair number due to RAM limits, just pickup number of firstOfPair
330323
ele.setAttribute( PAIR_COUNT, sum + "");
331324
}
332325
}

qsv/src/org/qcmg/qsv/softclip/SoftClipStaticMethods.java

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,7 @@
2020
import htsjdk.samtools.SAMRecord;
2121

2222
public class SoftClipStaticMethods {
23-
24-
public static void writeSoftClipRecord(BufferedWriter writer, SAMRecord record, int start, int end, String chromosome) throws IOException {
25-
SAMReadGroupRecord rg = record.getReadGroup();
26-
writeSoftClipRecord( writer, record, (null != rg ? rg.getId() : Constants.EMPTY_STRING), start, end, chromosome);
27-
}
23+
2824
public static void writeSoftClipRecord(BufferedWriter writer, SAMRecord record, String rgId, int start, int end, String chromosome) throws IOException {
2925

3026
String clipRecordString = createSoftClipRecordString(record, rgId, start, end, chromosome);

0 commit comments

Comments
 (0)