Skip to content

Commit

Permalink
Merge pull request #356 from AdamaJava/getReadGroup
Browse files Browse the repository at this point in the history
perf(adamajava): ensure code that calls SAMRecord.getReadGroup() does so sparingly
  • Loading branch information
holmeso authored Apr 26, 2024
2 parents 44ab78a + edc29fd commit b59be0f
Show file tree
Hide file tree
Showing 10 changed files with 40 additions and 69 deletions.
6 changes: 3 additions & 3 deletions qbammerge/src/org/qcmg/bammerge/FileMerger.java
Original file line number Diff line number Diff line change
Expand Up @@ -780,8 +780,9 @@ private void mergeAlignments() throws BamMergeException {

while (iter.hasNext() && !hasReachedNumberRecords()) {
SAMRecord record = iter.next();
SAMReadGroupRecord srgr = record.getReadGroup();

if (null == record.getReadGroup()) {
if (null == srgr) {
logger.warn(record.getSAMString());
logger.warn(record.getAttribute(RG_TAG).toString());
logger.warn(record.getHeader().toString());
Expand All @@ -790,7 +791,7 @@ private void mergeAlignments() throws BamMergeException {

SamReader fileReader = iter.getCurrentSAMFileReader();
if ( ! replacementMap.isEmpty()) {
String oldGroup = record.getReadGroup().getReadGroupId();
String oldGroup = srgr.getReadGroupId();
File file = inputReader.getFile(fileReader);
String newGroup = getReplacementGroup(file, oldGroup);
if (null != newGroup) {
Expand All @@ -800,7 +801,6 @@ private void mergeAlignments() throws BamMergeException {
Integer oldZc = record.getIntegerAttribute(ZC);
if (null == oldZc) {
Integer zc = inputReader.getDefaultZc(fileReader);
// assert null != zc;
record.setAttribute(ZC, zc);
} else {
Set<Integer> permissibleZcs = inputReader.getOldZcs(fileReader);
Expand Down
2 changes: 1 addition & 1 deletion qpicard/src/org/qcmg/picard/MultiSAMFileReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ public MultiSAMFileReader(final Set<File> files, ValidationStringency validation
if (SAMFileHeader.SortOrder.coordinate != header.getSortOrder()) {
throw new Exception("Input files must be coordinate sorted");
}
final Set<Integer> zcs = new HashSet<Integer>(8);
final Set<Integer> zcs = new HashSet<>(8);
for (SAMReadGroupRecord record : header.getReadGroups()) {
final String attribute = getAttributeZc( record);
if (null != attribute ) {
Expand Down
8 changes: 2 additions & 6 deletions qprofiler/src/org/qcmg/qprofiler/bam/BamSummarizer.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,7 @@
import java.io.File;
import java.util.List;

import htsjdk.samtools.SamReader;
import htsjdk.samtools.ValidationStringency;
import htsjdk.samtools.SAMProgramRecord;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.*;

import org.qcmg.common.date.DateUtils;
import org.qcmg.common.log.QLevel;
Expand Down Expand Up @@ -68,7 +64,7 @@ public SummaryReport summarize(String input, String index, String[] regions) thr
bamSummaryReport.setStartTime(DateUtils.getCurrentDateAsString());

try(SamReader reader = SAMFileReaderFactory.createSAMFileReaderAsStream(input, index, vs);) {
readGroupIds = reader.getFileHeader().getReadGroups().stream().map( it -> it.getId() ).collect(toList());
readGroupIds = reader.getFileHeader().getReadGroups().stream().map(SAMReadGroupRecord::getId).collect(toList());
bamSummaryReport.setReadGroups(readGroupIds);

boolean logLevelEnabled = logger.isLevelEnabled(QLevel.DEBUG);
Expand Down
11 changes: 2 additions & 9 deletions qprofiler/src/org/qcmg/qprofiler/bam/BamSummarizerMT.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,7 @@
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;

import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.ValidationStringency;
import htsjdk.samtools.SAMProgramRecord;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMRecordIterator;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.*;

import org.qcmg.common.date.DateUtils;
import org.qcmg.common.log.QLogger;
Expand Down Expand Up @@ -120,7 +113,7 @@ public int compare(SAMSequenceRecord o1, SAMSequenceRecord o2) {

samSeqDict = reader.getFileHeader().getSequenceDictionary();
bamHeader = HeaderUtils.getHeaderStringFromHeader(header);
readGroupIds = header.getReadGroups().stream().map( it -> it.getId() ).collect(toList());
readGroupIds = header.getReadGroups().stream().map(SAMReadGroupRecord::getId).collect(toList());

List<SAMProgramRecord> pgLines = header.getProgramRecords();
for (SAMProgramRecord pgLine : pgLines) {
Expand Down
5 changes: 2 additions & 3 deletions qprofiler/src/org/qcmg/qprofiler/bam/BamSummaryReport.java
Original file line number Diff line number Diff line change
Expand Up @@ -484,9 +484,8 @@ public void parseRecord(final SAMRecord record) throws Exception {
updateRecordsParsed();
MAPQMatrix matrix = null;

String readGroup = SummaryReportUtils.UNKNOWN_READGROUP;
if (record.getReadGroup() != null && record.getReadGroup().getId() != null)
readGroup = record.getReadGroup().getReadGroupId();
SAMReadGroupRecord srgr = record.getReadGroup();
String readGroup = srgr == null ? SummaryReportUtils.UNKNOWN_READGROUP : srgr.getReadGroupId();

// Xu code: check if record has its fail or duplicate flag set. if so, miss out some of the summaries
//anyway, add to summary and then add to it's readgroup
Expand Down
10 changes: 4 additions & 6 deletions qprofiler/src/org/qcmg/qprofiler/summarise/ReadGroupSummary.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,10 @@ public class ReadGroupSummary {
QCMGAtomicLongArray readLength = new QCMGAtomicLongArray(128);
QCMGAtomicLongArray overlapBase = new QCMGAtomicLongArray(128);

//QCMGAtomicLongArray.get(arrayTlenLimit) for tlen=[bigTlenValue, ~)
QCMGAtomicLongArray isize = new QCMGAtomicLongArray(bigTlenValue + 1);
QCMGAtomicLongArray isize = new QCMGAtomicLongArray(bigTlenValue + 1);
AtomicInteger maxIsize = new AtomicInteger();

//bad reads inforamtion
//bad reads information
AtomicLong duplicate = new AtomicLong();
AtomicLong secondary = new AtomicLong();
AtomicLong supplementary = new AtomicLong();
Expand All @@ -77,9 +76,8 @@ public class ReadGroupSummary {

private final String readGroupId;
public ReadGroupSummary(String rgId){ this.readGroupId = rgId; }
public String getReadGroupId(){return readGroupId; }

private class Pair {

private class Pair {
private final String name;
Pair(String name){this.name = name;}

Expand Down
14 changes: 5 additions & 9 deletions qprofiler2/src/org/qcmg/qprofiler2/bam/BamSummarizer.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,7 @@
import java.util.Comparator;
import java.util.List;

import htsjdk.samtools.SamReader;
import htsjdk.samtools.ValidationStringency;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.*;

import org.qcmg.common.date.DateUtils;
import org.qcmg.common.log.QLevel;
Expand Down Expand Up @@ -52,10 +48,10 @@ public static BamSummaryReport createReport(SAMFileHeader header, String file, i
// create the SummaryReport

SAMSequenceDictionary samSeqDict = header.getSequenceDictionary();
List<String> readGroupIds = header.getReadGroups().stream().map( it -> it.getId() ).collect(toList());
readGroupIds.sort(Comparator.comparing( String::toString ) ); // Natural order
BamSummaryReport bamSummaryReport = new BamSummaryReport( maxRecords, isFullBamHeader );
// Natural order
List<String> readGroupIds = header.getReadGroups().stream().map(SAMReadGroupRecord::getId).sorted(Comparator.comparing(String::toString)).collect(toList());

BamSummaryReport bamSummaryReport = new BamSummaryReport( maxRecords, isFullBamHeader );
bamSummaryReport.setBamHeader(header, isFullBamHeader);
bamSummaryReport.setSamSequenceDictionary(samSeqDict);
bamSummaryReport.setReadGroups(readGroupIds);
Expand Down
6 changes: 3 additions & 3 deletions qprofiler2/src/org/qcmg/qprofiler2/bam/BamSummaryReport.java
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ public void parseRecord(final SAMRecord record) {
}

// check if record has its fail or duplicate flag set. if so, miss out some of the summaries
ReadGroupSummary rgSumm = rgSummaries.computeIfAbsent(readGroup, k -> new ReadGroupSummary(k));
ReadGroupSummary rgSumm = rgSummaries.computeIfAbsent(readGroup, ReadGroupSummary::new);
if (rgSumm.parseRecord(record)) {

// SEQ
Expand Down Expand Up @@ -419,12 +419,12 @@ private void summaryToXml(Element parent) {
summary.readSummary2Xml(rgEle);
summary.pairSummary2Xml(rgEle);
// presummary
lostBase += summary.getDuplicateBase() + summary.getUnmappedBase() + summary.getnotPoperPairedBase()
lostBase += summary.getDuplicateBase() + summary.getUnmappedBase() + summary.getNotProperPairedBase()
+ summary.getTrimmedBase() + summary.getOverlappedBase() + summary.getSoftClippedBase() + summary.getHardClippedBase();
maxBases += summary.getReadCount() * summary.getMaxReadLength();
duplicateBase += summary.getDuplicateBase();
unmappedBase += summary.getUnmappedBase();
noncanonicalBase += summary.getnotPoperPairedBase();
noncanonicalBase += summary.getNotProperPairedBase();
trimBases += summary.getTrimmedBase();
overlappedBase += summary.getOverlappedBase();
softClippedBase += summary.getSoftClippedBase();
Expand Down
41 changes: 17 additions & 24 deletions qprofiler2/src/org/qcmg/qprofiler2/summarise/ReadGroupSummary.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,22 +18,16 @@

public class ReadGroupSummary {

public static final int ERR_READ_LIMIT = 10;
// xml node name
public static final String NODE_READGROUP = "readGroup";
public static final String NODE_SOFTCLIP = "softClippedBases";
public static final String NODE_TRIM = "trimmedBases";
public static final String NODE_HARDCLIP = "hardClippedBases";
public static final String NODE_READ_LENGTH = "readLength" ;
public static final String NODE_PAIR_TLEN = "tLen" ;
public static final String NODE_OVERLAP = "overlappedBases";
public static final String NODE_DUPLICATE = "duplicateReads";
public static final String NODE_SECONDARY = "secondary";
public static final String NODE_SUPPLEMENTARY = "supplementary";
public static final String NODE_UNMAPPED = "unmappedReads";
public static final String NODE_NOT_PROPER_PAIR = "notProperPairs";
public static final String NODE_FAILED_VENDOR_QUALITY = "failedVendorQuality";


public static final String MIN = "min";
public static final String MAX = "max";
public static final String MEAN = "mean";
Expand All @@ -54,11 +48,11 @@ public class ReadGroupSummary {
// record read length excluding the discard reads but includes duplicate,unmapped and nonCanonicalReads
QCMGAtomicLongArray readLength = new QCMGAtomicLongArray(128);
QCMGAtomicLongArray forTrimLength = new QCMGAtomicLongArray(128);
private final ConcurrentMap<String, AtomicLong> cigarValuesCount = new ConcurrentHashMap<String, AtomicLong>();
private final ConcurrentMap<String, AtomicLong> cigarValuesCount = new ConcurrentHashMap<>();
// must be concurrent set for multi threads
private final ConcurrentMap<Integer, PairSummary> pairCategory = new ConcurrentHashMap<>();

// bad reads inforamtion
// bad reads information
AtomicLong duplicate = new AtomicLong();
AtomicLong secondary = new AtomicLong();
AtomicLong supplementary = new AtomicLong();
Expand Down Expand Up @@ -122,7 +116,7 @@ public long getUnmappedBase() {
return this.unmapped.get() * getMaxReadLength();
}

public long getnotPoperPairedBase() {
public long getNotProperPairedBase() {
return notProperPairedReads.get() * getMaxReadLength();
}

Expand All @@ -149,7 +143,7 @@ public boolean parseRecord( final SAMRecord record ) {
return false;
}

// parseing cigar
// parsing cigar
// cigar string from reads including duplicateReads, nonCanonicalPairs and unmappedReads but excluding discardedReads (failed, secondary and supplementary).
int lHard = 0;
int lSoft = 0;
Expand Down Expand Up @@ -181,7 +175,7 @@ public boolean parseRecord( final SAMRecord record ) {
return false;
}

// check pair orientaiton, tLen, mate
// check pair orientation, tLen, mate
if (record.getReadPairedFlag()) {
BwaPair.Pair pairType = BwaPair.getPairType(record);
boolean isProper = record.getProperPairFlag();
Expand All @@ -204,7 +198,7 @@ public boolean parseRecord( final SAMRecord record ) {
softClip.increment(lSoft);
}
// record read length excluding the discard reads duplicate.get() + unmapped.get() + getnonCanonicalReadsCount();
// due to it for trimmed base caculation as well
// due to it for trimmed base calculation as well
forTrimLength.increment(record.getReadLength() + lHard);

return true;
Expand All @@ -221,8 +215,8 @@ public long getDiscardreads() {


/**
* check all globle value and assign the sumamry value
* eg. private long trimedBase = 0;
* check all global value and assign the summary value
* eg. private long trimmedBase = 0;
*/
public void preSummary() {
// check overlap and tLen from pairSummary
Expand All @@ -247,19 +241,18 @@ public void preSummary() {
this.hardclipStats = new SummaryReportUtils.TallyStats( hardClip);
this.readlengthStats = new SummaryReportUtils.TallyStats( readLength );

int maxLenght = (int)readlengthStats.getMax();
QCMGAtomicLongArray trimedBase = new QCMGAtomicLongArray(maxLenght + 1);
int maxLength = (int)readlengthStats.getMax();
QCMGAtomicLongArray trimmedBase = new QCMGAtomicLongArray(maxLength + 1);
for (int i = 0 ; i < forTrimLength.length() ; i ++) {
if (forTrimLength.get(i) == 0 || maxLenght == i ) {
if (forTrimLength.get(i) == 0 || maxLength == i ) {
continue;
}
trimedBase.increment( maxLenght - i, forTrimLength.get(i));
trimmedBase.increment( maxLength - i, forTrimLength.get(i));
}
this.trimBaseStats = new SummaryReportUtils.TallyStats( trimedBase );
this.trimBaseStats = new SummaryReportUtils.TallyStats( trimmedBase );
}

@SuppressWarnings("unchecked")
public void readSummary2Xml(Element parent ) throws Exception {
public void readSummary2Xml(Element parent ) {

preSummary();

Expand All @@ -275,7 +268,7 @@ public void readSummary2Xml(Element parent ) throws Exception {
lostBaseStats( rgElement, NODE_OVERLAP, overlapStats );

// create node for overall
rgElement = XmlUtils.createMetricsNode(parent,"reads", new Pair<String, Number>(READ_COUNT, inputReadCounts.get()));
rgElement = XmlUtils.createMetricsNode(parent,"reads", new Pair<>(READ_COUNT, inputReadCounts.get()));
Element ele = XmlUtils.createGroupNode(rgElement, XmlUtils.DISCARD_READS );
XmlUtils.outputValueNode(ele, "supplementaryAlignmentCount", supplementary.get());
XmlUtils.outputValueNode(ele, "secondaryAlignmentCount", secondary.get());
Expand Down Expand Up @@ -326,7 +319,7 @@ public void pairSummary2Xml( Element parent ) {
sum += p.getFirstOfPairCounts();
}
}
// can't really count he pair number due to RAM limits, just pickup number of firstOfPair
// can't really count the pair number due to RAM limits, just pickup number of firstOfPair
ele.setAttribute( PAIR_COUNT, sum + "");
}
}
Expand Down
6 changes: 1 addition & 5 deletions qsv/src/org/qcmg/qsv/softclip/SoftClipStaticMethods.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,7 @@
import htsjdk.samtools.SAMRecord;

public class SoftClipStaticMethods {

public static void writeSoftClipRecord(BufferedWriter writer, SAMRecord record, int start, int end, String chromosome) throws IOException {
SAMReadGroupRecord rg = record.getReadGroup();
writeSoftClipRecord( writer, record, (null != rg ? rg.getId() : Constants.EMPTY_STRING), start, end, chromosome);
}

public static void writeSoftClipRecord(BufferedWriter writer, SAMRecord record, String rgId, int start, int end, String chromosome) throws IOException {

String clipRecordString = createSoftClipRecordString(record, rgId, start, end, chromosome);
Expand Down

0 comments on commit b59be0f

Please sign in to comment.