Skip to content

Commit

Permalink
Merge pull request #383 from AdamaJava/qcoverage_refactor
Browse files Browse the repository at this point in the history
refactor(qcoverage): minor java21 refactoring based on IDEA suggestions
  • Loading branch information
newellf authored Oct 23, 2024
2 parents d65fc66 + fc06edf commit a4c2d23
Show file tree
Hide file tree
Showing 18 changed files with 129 additions and 165 deletions.
10 changes: 4 additions & 6 deletions qcoverage/src/org/qcmg/coverage/Algorithm.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,8 @@
import htsjdk.samtools.SAMRecord;

interface Algorithm {
public String getName();
public CoverageType getCoverageType();
public void applyTo(final SAMRecord read, Object coverageCounter);
public void applyTo(final SAMRecord read, Object coverageCounter, boolean fullyPopulated);
// public void applyTo(final SAMRecord read, final int[] perBaseCoverages);
// public void applyTo(final SAMRecord read, final int[] perBaseCoverages, boolean fullyPopulated);
String getName();
CoverageType getCoverageType();
void applyTo(final SAMRecord read, Object coverageCounter);
void applyTo(final SAMRecord read, Object coverageCounter, boolean fullyPopulated);
}
33 changes: 18 additions & 15 deletions qcoverage/src/org/qcmg/coverage/Configuration.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ public final class Configuration {
private final File outputFile;
private File[] inputBAMFiles;
private File[] inputBAIFiles;
private final HashSet<Pair<File, File>> filePairs = new HashSet<Pair<File, File>>();
private final HashSet<Pair<File, File>> filePairs = new HashSet<>();
private final Options options;
private final CoverageType coverageType;
private final QueryExecutor filter;
Expand All @@ -33,26 +33,29 @@ public final class Configuration {
private final String validation;
private final ReadsNumberCounter countReadFromInput;
private final ReadsNumberCounter countReadToCoverage;

private final QLogger logger;

public Configuration(final Options options) throws Exception {
options.detectBadOptions();
this.options = options;

type = options.getTypes()[0];
if (type.equals("sequence") || type.equals("seq")) {
coverageType = CoverageType.SEQUENCE;
algorithm = new SequenceCoverageAlgorithm();
} else if (type.equals("physical") || type.equals("phys")) {
coverageType = CoverageType.PHYSICAL;
algorithm = new PhysicalCoverageAlgorithm();
} else if (type.equals("low_readdepth")) {
coverageType = CoverageType.LOW_READDEPTH;
algorithm = new LowReadDepthAlgorithm(options.getLowReadDepthCutoff());
} else {
throw new Exception("Unknown coverage type: '" + type + "'");
}
switch (type) {
case "sequence", "seq" -> {
coverageType = CoverageType.SEQUENCE;
algorithm = new SequenceCoverageAlgorithm();
}
case "physical", "phys" -> {
coverageType = CoverageType.PHYSICAL;
algorithm = new PhysicalCoverageAlgorithm();
}
case "low_readdepth" -> {
coverageType = CoverageType.LOW_READDEPTH;
algorithm = new LowReadDepthAlgorithm(options.getLowReadDepthCutoff());
}
default -> throw new Exception("Unknown coverage type: '" + type + "'");
}

loggerInfo = new LoggerInfo(options);
logger = QLoggerFactory.getLogger(Configuration.class);
Expand Down Expand Up @@ -188,7 +191,7 @@ private void inferMissingBaiFileNames() throws Exception {
}

private void inferAllBaiFileNames() {
Vector<String> baiFileNameList = new Vector<String>();
Vector<String> baiFileNameList = new Vector<>();
for (String bamFileName : bamFileNames) {
baiFileNameList.add(bamFileName + ".bai");
}
Expand Down
23 changes: 11 additions & 12 deletions qcoverage/src/org/qcmg/coverage/CoverageJob.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@


import java.io.File;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.atomic.AtomicLong;

Expand All @@ -27,7 +26,7 @@ class CoverageJob implements Job {
private final String refName;
private final HashSet<Gff3Record> features;
private int[] perBaseCoverages; // Uses 0-based coordinate indexing
private final HashMap<String, HashMap<Integer, AtomicLong>> idToCoverageToBaseCountMap = new HashMap<String, HashMap<Integer, AtomicLong>>();
private final HashMap<String, HashMap<Integer, AtomicLong>> idToCoverageToBaseCountMap = new HashMap<>();
private final HashMap<String, List<LowReadDepthRegion>> lowReadDepthMap = new HashMap<>();
private final QLogger logger;
private final QueryExecutor filter;
Expand Down Expand Up @@ -67,8 +66,8 @@ class CoverageJob implements Job {
SamReader reader = SAMFileReaderFactory.createSAMFileReader(bamFile, validation);
fileReaders.add(reader);
}
logger.debug("length of sequence to be processed by job '" + toString() + "':" + refLength);
logger.debug("number of features to be processed by job '" + toString() + "':" + features.size());
logger.debug("length of sequence to be processed by job '" + this + "':" + refLength);
logger.debug("number of features to be processed by job '" + this + "':" + features.size());
}

@Override
Expand Down Expand Up @@ -122,17 +121,17 @@ void constructCoverageMap() {
Arrays.fill(perBaseCoverages, start-1, feature.getEnd(), 0);
logger.debug("filled in from : " + (start-1) + " to " + feature.getEnd());
}
for (int i = 0 , len = perBaseCoverages.length ; i < len ; i++) {
if (perBaseCoverages[i] < 0) {
isArrayFull = false;
break;
}
}
for (int perBaseCoverage : perBaseCoverages) {
if (perBaseCoverage < 0) {
isArrayFull = false;
break;
}
}
this.fullyPopulated = isArrayFull;
logger.info("fully populated: " + isArrayFull);
}

private void assembleResultsByAlgorithm() throws IOException {
private void assembleResultsByAlgorithm() {
if (alg.getCoverageType().equals(CoverageType.LOW_READDEPTH)) {
assembleLowReadDepthResults();
} else {
Expand Down Expand Up @@ -240,7 +239,7 @@ private int addLowReadDepthRegionIfNeeded(int cov, int pos, int coverageLimit, i
return(startPos);
}

private void assembleLowReadDepthResults() throws IOException {
private void assembleLowReadDepthResults() {
for (Gff3Record feature : features) {
//If low read depth flag is being requested, then we need to find regions with <=8 and <=12 coverage
LowReadDepthAlgorithm lowRdepthAlg = (LowReadDepthAlgorithm) alg;
Expand Down
8 changes: 4 additions & 4 deletions qcoverage/src/org/qcmg/coverage/Feature.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,15 @@ public Feature(String featureString, int priority) {
String[] values = featureString.split(",");
this.name = values[0];
this.priority = priority;
this.preList = new ArrayList<Integer>();
this.postList = new ArrayList<Integer>();
this.preList = new ArrayList<>();
this.postList = new ArrayList<>();

for (int i=1; i<values.length; i++) {
String current = values[i];
if (current.startsWith("+")) {
postList.add(Integer.valueOf(current.substring(1, current.length())));
postList.add(Integer.valueOf(current.substring(1)));
} else if (current.startsWith("-")) {
preList.add(Integer.valueOf(current.substring(1, current.length())));
preList.add(Integer.valueOf(current.substring(1)));
} else {
Integer currentInt = Integer.valueOf(current);
preList.add(currentInt);
Expand Down
8 changes: 4 additions & 4 deletions qcoverage/src/org/qcmg/coverage/Job.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
import java.util.concurrent.atomic.AtomicLong;

interface Job {
public HashMap<String, HashMap<Integer, AtomicLong>> getResults();
public HashMap<String, List<LowReadDepthRegion>> getLowReadDepthResults();
public void run() throws Exception;
public String toString();
HashMap<String, HashMap<Integer, AtomicLong>> getResults();
HashMap<String, List<LowReadDepthRegion>> getLowReadDepthResults();
void run() throws Exception;
String toString();
}
77 changes: 31 additions & 46 deletions qcoverage/src/org/qcmg/coverage/JobQueue.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,23 +29,23 @@
import org.qcmg.qio.gff3.Gff3Record;

public final class JobQueue {
private final HashMap<String, HashMap<Integer, AtomicLong>> perIdPerCoverageBaseCounts = new HashMap<String, HashMap<Integer, AtomicLong>>();
private final HashMap<String, List<LowReadDepthRegion>> lowReadDepthResultsFinalMap = new HashMap<String, List<LowReadDepthRegion>>();
private final HashMap<String, HashMap<Integer, AtomicLong>> perIdPerCoverageBaseCounts = new HashMap<>();
private final HashMap<String, List<LowReadDepthRegion>> lowReadDepthResultsFinalMap = new HashMap<>();
private final boolean perFeatureFlag;
private final int numberThreads;
private int numberFeatures = 0;
private final File gff3File;
private final HashSet<String> refNames = new HashSet<String>();
private final LinkedHashSet<String> refNamesOrdered = new LinkedHashSet<String>();
private final HashMap<String, HashSet<Gff3Record>> perRefnameFeatures = new HashMap<String, HashSet<Gff3Record>>();
private final HashMap<String, Integer> perRefnameLengths = new HashMap<String, Integer>();
private final HashMap<Integer, HashSet<String>> perLengthRefnames = new HashMap<Integer, HashSet<String>>();
private final HashSet<String> refNames = new HashSet<>();
private final LinkedHashSet<String> refNamesOrdered = new LinkedHashSet<>();
private final HashMap<String, HashSet<Gff3Record>> perRefnameFeatures = new HashMap<>();
private final HashMap<String, Integer> perRefnameLengths = new HashMap<>();
private final HashMap<Integer, HashSet<String>> perLengthRefnames = new HashMap<>();
private final HashSet<Pair<File, File>> filePairs;
private final HashMap<String, HashSet<Pair<File, File>>> refnameFilePairs = new HashMap<String, HashSet<Pair<File, File>>>();
private final Vector<String> refnameExecutionOrder = new Vector<String>();
private final HashSet<HashMap<String, TreeMap<Integer, AtomicLong>>> perRefnameResults = new HashSet<HashMap<String, TreeMap<Integer, AtomicLong>>>();
private final HashSet<HashMap<String, List<LowReadDepthRegion>>> lowReadDepthResultsSet = new HashSet<HashMap<String, List<LowReadDepthRegion>>>();
private final BlockingQueue<Job> jobQueue = new LinkedBlockingQueue<Job>();
private final HashMap<String, HashSet<Pair<File, File>>> refnameFilePairs = new HashMap<>();
private final Vector<String> refnameExecutionOrder = new Vector<>();
private final HashSet<HashMap<String, TreeMap<Integer, AtomicLong>>> perRefnameResults = new HashSet<>();
private final HashSet<HashMap<String, List<LowReadDepthRegion>>> lowReadDepthResultsSet = new HashSet<>();
private final BlockingQueue<Job> jobQueue = new LinkedBlockingQueue<>();
private final LoggerInfo loggerInfo;
private final QLogger logger;
private final QueryExecutor filter;
Expand All @@ -55,7 +55,7 @@ public final class JobQueue {
private final ReadsNumberCounter countIn;
private final ReadsNumberCounter countOut;
private final Options options;

public JobQueue(final Configuration invariants) throws Exception {
perFeatureFlag = invariants.isPerFeatureFlag();
gff3File = invariants.getInputGFF3File();
Expand All @@ -74,7 +74,7 @@ public JobQueue(final Configuration invariants) throws Exception {
execute();
}

private void execute() throws Exception, IOException {
private void execute() throws Exception {
logger.info("Loading features from GFF file");
loadFeatures();
logger.info("Queueing jobs");
Expand Down Expand Up @@ -122,22 +122,14 @@ private void queueCoverageJobs() throws Exception {
logger.info("Queued jobs are: " + jobQueue);
}

private void reduceResults() throws Exception {
private void reduceResults() {
for (HashMap<String, TreeMap<Integer, AtomicLong>> mappedResult : perRefnameResults) {
for (String id : mappedResult.keySet()) {
HashMap<Integer, AtomicLong> covToBaseCountMap = perIdPerCoverageBaseCounts
.get(id);
if (null == covToBaseCountMap) {
covToBaseCountMap = new HashMap<Integer, AtomicLong>();
perIdPerCoverageBaseCounts.put(id, covToBaseCountMap);
}
for (Integer cov : mappedResult.get(id).keySet()) {
AtomicLong reducedBaseCount = covToBaseCountMap.get(cov);
if (null == reducedBaseCount) {
reducedBaseCount = new AtomicLong();
covToBaseCountMap.put(cov, reducedBaseCount);
}
AtomicLong mappedBaseCount = mappedResult.get(id).get(cov);
HashMap<Integer, AtomicLong> covToBaseCountMap = perIdPerCoverageBaseCounts
.computeIfAbsent(id, k -> new HashMap<>());
for (Integer cov : mappedResult.get(id).keySet()) {
AtomicLong reducedBaseCount = covToBaseCountMap.computeIfAbsent(cov, k -> new AtomicLong());
AtomicLong mappedBaseCount = mappedResult.get(id).get(cov);
assert (null != mappedBaseCount); // Implicit to above logic
reducedBaseCount.addAndGet(mappedBaseCount.get());
}
Expand All @@ -156,18 +148,14 @@ private void reduceResults() throws Exception {

}

private void loadFeatures() throws Exception, IOException {
private void loadFeatures() throws Exception {
identifyRefNames();
Gff3FileReader featureReader = new Gff3FileReader(gff3File);
for (final Gff3Record feature : featureReader) {
String ref = feature.getSeqId();
if (refNames.contains(ref)) {
HashSet<Gff3Record> features = perRefnameFeatures.get(ref);
if (null == features) {
features = new HashSet<Gff3Record>();
perRefnameFeatures.put(ref, features);
}
features.add(feature);
HashSet<Gff3Record> features = perRefnameFeatures.computeIfAbsent(ref, k -> new HashSet<>());
features.add(feature);
}
}
featureReader.close();
Expand All @@ -190,11 +178,11 @@ private void identifyRefNames() throws Exception {
}
}

private Collection<String> identifyGff3RefNames() throws Exception, IOException {
private Collection<String> identifyGff3RefNames() throws Exception {

LinkedHashMap<String, Integer> gff3RefNames = new LinkedHashMap<>();
final StringBuilder gffErrors = new StringBuilder();
try (Gff3FileReader gff3Reader = new Gff3FileReader(gff3File);) {
try (Gff3FileReader gff3Reader = new Gff3FileReader(gff3File)) {
for (Gff3Record record : gff3Reader) {
if (isGff3RecordValid(record)) {
numberFeatures++;
Expand Down Expand Up @@ -229,7 +217,7 @@ private Collection<String> identifyGff3RefNames() throws Exception, IOException
}
}

if (gffErrors.length() > 0) {
if (!gffErrors.isEmpty()) {
logger.error(gffErrors.toString());
throw new IllegalArgumentException("Errors in gff file: " + gff3File);
}
Expand Down Expand Up @@ -261,7 +249,7 @@ private LinkedHashSet<String> identifyBamRefNames() throws IOException {
bamRefNames.add(seqName);
Integer seqLength = seqRecord.getSequenceLength();
perRefnameLengths.put(seqName, seqLength);
HashSet<Pair<File, File>> filePairs = refnameFilePairs.computeIfAbsent(seqName, k -> new HashSet<Pair<File, File>>());
HashSet<Pair<File, File>> filePairs = refnameFilePairs.computeIfAbsent(seqName, k -> new HashSet<>());
filePairs.add(pair);
}
}
Expand All @@ -278,8 +266,7 @@ private LinkedHashSet<String> identifyBamRefNames() throws IOException {
}

private void processJobs() throws Exception {
assert (null != refNames);
HashSet<WorkerThread> workerThreads = new HashSet<WorkerThread>();
HashSet<WorkerThread> workerThreads = new HashSet<>();
for (int j = 0; j < numberThreads; j++) {
WorkerThread thread = new WorkerThread(jobQueue, loggerInfo, Thread.currentThread());
workerThreads.add(thread);
Expand All @@ -304,7 +291,7 @@ private void processJobs() throws Exception {
// Prioritise thread execution based on decreasing sequence length
private void identifyRefNameExecutionOrder() {
for (Integer length : perRefnameLengths.values()) {
perLengthRefnames.put(length, new HashSet<String>());
perLengthRefnames.put(length, new HashSet<>());
}
// Identify refnames for each length
for (String refName : refNames) {
Expand All @@ -319,16 +306,14 @@ private void identifyRefNameExecutionOrder() {
// Determine refName execution order from largest-to-smallest length
for (int i = lengths.length - 1; i >= 0; i--) {
assert (perLengthRefnames.containsKey(lengths[i]));
for (String refName : perLengthRefnames.get(lengths[i])) {
refnameExecutionOrder.add(refName);
}
refnameExecutionOrder.addAll(perLengthRefnames.get(lengths[i]));
}
logger.debug("Refname execution order (first-to-last): "
+ refnameExecutionOrder);
}

public List<CoverageReport> getCoverageReport() {
List<CoverageReport> results = new Vector<CoverageReport>();
List<CoverageReport> results = new Vector<>();
for (final String type : perIdPerCoverageBaseCounts.keySet()) {
HashMap<Integer, AtomicLong> value = perIdPerCoverageBaseCounts
.get(type);
Expand Down
4 changes: 1 addition & 3 deletions qcoverage/src/org/qcmg/coverage/LowReadDepthRegion.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,11 @@ public class LowReadDepthRegion {
private final String refName;
private final int start;
private final int end;
private final int lowReadDepthLimit;

public LowReadDepthRegion(String name, int start, int end, int minCoverage) {
public LowReadDepthRegion(String name, int start, int end, int minCoverage) {
this.refName = name;
this.start = start;
this.end = end;
this.lowReadDepthLimit = minCoverage;
}

public String getRefName() {
Expand Down
5 changes: 2 additions & 3 deletions qcoverage/src/org/qcmg/coverage/Messages.java
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,8 @@ static String getProgramVersion() {
* Gets the version message.
*
* @return the version message
* @throws Exception the exception
*/
static String getVersionMessage() throws Exception {
*/
static String getVersionMessage() {
return getProgramName() + ", version " + getProgramVersion();
}

Expand Down
Loading

0 comments on commit a4c2d23

Please sign in to comment.