diff --git a/misc/sync_dependency.sh b/misc/sync_dependency.sh
new file mode 100755
index 00000000000..316bbc39d3b
--- /dev/null
+++ b/misc/sync_dependency.sh
@@ -0,0 +1,145 @@
+#!/bin/bash
+
+function yellow (){
+ echo "$(tput setaf 3)$1$(tput setaf 7)"
+}
+function green (){
+ echo "$(tput setaf 2)$1$(tput setaf 7)"
+}
+function cyan (){
+ echo "$(tput setaf 6)$1$(tput setaf 7)"
+}
+
+function printUsage(){
+ echo ""
+ yellow "Release an OpenCB project."
+ echo ""
+ echo "Usage: $(basename $0) --biodata|-b|--java-common-libs|-j"
+ echo ""
+ cyan "Options:"
+ green " -j --java-common-libs STRING Update java-common-libs dependency"
+ green " -b --biodata STRING Update biodata dependency"
+ echo ""
+}
+
+## Check if the repo status is clean.
+function check_repo_clean() {
+ GIT_STATUS=$(git status --short)
+ if [ -n "$GIT_STATUS" ]; then
+ yellow "Repository is not clean:"
+ yellow "$GIT_STATUS"
+ exit
+ fi
+}
+
+## This function removes TASK-XXX- if exists, otherwise it adds it.
+function toggle_version() {
+ local BRANCH=$1
+ if [[ "$POM_DEPENDENCY_VERSION" == *"$BRANCH"* ]]; then
+ ## Remove TASK-XXX- from the current version
+ ## Example: remove 'TASK-1234-' from 2.6.0-TASK-1234-SNAPSHOT
+ NEW_VERSION=${POM_DEPENDENCY_VERSION/"$BRANCH-"}
+ else
+ ## Add 'TASK-XXX-' to the current version
+ ## Example: 2.6.0-SNAPSHOT --> 2.6.0-TASK-1234-SNAPSHOT
+ CLEAN_RELEASE_VERSION=$(echo "$POM_DEPENDENCY_VERSION" | cut -d "-" -f 1)
+ TAG_VERSION=$(echo "$POM_DEPENDENCY_VERSION" | cut -d "-" -f 2)
+ NEW_VERSION="$CLEAN_RELEASE_VERSION-$BRANCH-$TAG_VERSION"
+ fi
+}
+
+## Change version in the dependency.
+## Usage: update_dependency "$DEPENDENCY_REPO" "$NEW_VERSION" "$BRANCH_NAME"
+function update_dependency() {
+ ## Save current directory
+ local pwd=$PWD
+ cd "$1" || exit 2
+ check_repo_clean
+ git checkout "$3"
+ ## Check branch exists
+ local BRANCH=$(git branch --show-current)
+ if [ "$BRANCH" != "$3" ]; then
+ yellow "Branch '$3' does not exist"
+ exit
+ fi
+ ## Rename and commit new version
+ mvn versions:set -DnewVersion="$2" -DgenerateBackupPoms=false
+ git commit -am "Update version to $2"
+ ## Restore directory
+ cd "$pwd" || exit 2
+}
+
+## At least one parameter is required.
+if [ -z "$1" ]; then
+ printUsage
+ exit 1
+fi
+
+while [[ $# -gt 0 ]]; do
+ key="$1"
+ if [ -n "$2" ]; then
+ DEPENDENCY_REPO="$2"
+ fi
+ case $key in
+ -h | --help)
+ printUsage
+ exit 0
+ ;;
+ -j | --java-common-libs)
+ LIB="JAVA_COMMONS_LIB"
+ if [ -z "$DEPENDENCY_REPO" ]; then
+ DEPENDENCY_REPO="../java-common-libs"
+ else
+ shift
+ fi
+ shift # past argument
+ ;;
+ -b | --biodata)
+ LIB="BIODATA"
+ if [ -z "$DEPENDENCY_REPO" ]; then
+ DEPENDENCY_REPO="../biodata"
+ else
+ shift
+ fi
+ shift # past argument
+ ;;
+ *) # unknown option
+ echo "Unknown option $key"
+ printUsage
+ exit 1
+ ;;
+ esac
+done
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+CURRENT_DIR=$PWD
+cd "$SCRIPT_DIR" || exit 2
+cd ..
+BRANCH_NAME=$(git branch --show-current)
+if [[ "$BRANCH_NAME" == "TASK-"* ]]; then
+ check_repo_clean "$BRANCH_NAME"
+else
+ yellow "[$BRANCH_NAME] The branch name must start with TASK-"
+ yellow "$GIT_STATUS"
+ exit
+fi
+
+function update_library(){
+ local LIBRARY="$1"
+ POM_DEPENDENCY_VERSION=$(grep -m 1 "$LIBRARY" pom.xml | cut -d ">" -f 2 | cut -d "<" -f 1)
+ toggle_version "$BRANCH_NAME"
+ update_dependency "$DEPENDENCY_REPO" "$NEW_VERSION" "$BRANCH_NAME"
+ mvn versions:set-property -Dproperty=java-common-libs.version -DnewVersion="$NEW_VERSION" -DgenerateBackupPoms=false
+ git commit -am "Update '$LIBRARY' dependency to $NEW_VERSION"
+}
+
+
+if [ "$LIB" = "JAVA_COMMONS_LIB" ];then
+ update_library java-common-libs.version
+fi
+if [ "$LIB" = "BIODATA" ];then
+ update_library biodata.version
+fi
+
+yellow "The new dependency version is $NEW_VERSION"
+cd "$CURRENT_DIR" || exit 2
diff --git a/opencga b/opencga
deleted file mode 100644
index 97288a06c67..00000000000
--- a/opencga
+++ /dev/null
@@ -1,33 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-OpenCGA server
-
-
-
-
\ No newline at end of file
diff --git a/opencga-analysis/pom.xml b/opencga-analysis/pom.xml
index 90cb0d4375e..5d6e9c1dbd7 100644
--- a/opencga-analysis/pom.xml
+++ b/opencga-analysis/pom.xml
@@ -22,7 +22,7 @@
org.opencb.opencga
opencga
- 2.4.13-SNAPSHOT
+ 2.6.0-SNAPSHOT
../pom.xml
diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/AnalysisUtils.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/AnalysisUtils.java
index 88d38950f49..628bc63b2ff 100644
--- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/AnalysisUtils.java
+++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/AnalysisUtils.java
@@ -6,8 +6,11 @@
import org.opencb.opencga.catalog.db.api.FileDBAdaptor;
import org.opencb.opencga.catalog.exceptions.CatalogException;
import org.opencb.opencga.catalog.managers.FileManager;
+import org.opencb.opencga.catalog.managers.JobManager;
import org.opencb.opencga.core.exceptions.ToolException;
+import org.opencb.opencga.core.models.common.Enums;
import org.opencb.opencga.core.models.file.File;
+import org.opencb.opencga.core.models.job.Job;
import org.opencb.opencga.core.response.OpenCGAResult;
import java.io.*;
@@ -121,4 +124,54 @@ public static Map> parseRelatednessThresholds(Path th
}
return thresholds;
}
+
+ public static boolean waitFor(String jobId, String study, JobManager jobManager, String token) throws ToolException, CatalogException {
+ Query query = new Query("id", jobId);
+ OpenCGAResult result = jobManager.search(study, query, QueryOptions.empty(), token);
+ Job job = result.first();
+ String status = job.getInternal().getStatus().getId();
+
+ while (status.equals(Enums.ExecutionStatus.PENDING) || status.equals(Enums.ExecutionStatus.RUNNING)
+ || status.equals(Enums.ExecutionStatus.QUEUED) || status.equals(Enums.ExecutionStatus.READY)
+ || status.equals(Enums.ExecutionStatus.REGISTERING)) {
+ try {
+ // Sleep for 30 seconds
+ Thread.sleep(30000);
+ result = jobManager.search(study, query, QueryOptions.empty(), token);
+ job = result.first();
+ } catch (CatalogException | InterruptedException e) {
+ new ToolException("Error waiting for job '" + jobId + "': " + e.getMessage());
+ }
+ status = job.getInternal().getStatus().getId();
+ }
+ return status.equals(Enums.ExecutionStatus.DONE) ? true : false;
+ }
+
+ public static Job getJob(String jobId, String study, JobManager jobManager, String token) throws ToolException, CatalogException {
+ Query query = new Query("id", jobId);
+ OpenCGAResult result = jobManager.search(study, query, QueryOptions.empty(), token);
+ Job job = result.first();
+ if (job == null) {
+ new ToolException("Error getting job '" + jobId + "' from study '" + study + "'.");
+ }
+ return job;
+ }
+
+ public static final String JOBS_IN_JOBDIR = "JOBS";
+
+ public static String getJobBaseDir(String path) {
+ int index = path.indexOf(JOBS_IN_JOBDIR);
+ if (index == -1) {
+ return null;
+ }
+ return path.substring(0, index + 5);
+ }
+
+ public static String getJobFileRelativePath(String path) {
+ int index = path.indexOf(JOBS_IN_JOBDIR);
+ if (index == -1) {
+ return null;
+ }
+ return path.substring(index + 5);
+ }
}
diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/alignment/AlignmentStorageManager.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/alignment/AlignmentStorageManager.java
index 5285e538a26..47b8b3d0939 100644
--- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/alignment/AlignmentStorageManager.java
+++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/alignment/AlignmentStorageManager.java
@@ -228,6 +228,7 @@ public OpenCGAResult coverageStats(String studyIdStr, String
}
String species = projectQueryResult.first().getOrganism().getScientificName();
String assembly = projectQueryResult.first().getOrganism().getAssembly();
+ String dataRelease = projectQueryResult.first().getCellbase().getDataRelease();
for (String geneName : geneNames) {
@@ -248,9 +249,9 @@ public OpenCGAResult coverageStats(String studyIdStr, String
// Query CellBase to get gene coordinates and then apply the offset (up and downstream) to create a gene region
- CellBaseClient cellBaseClient = new CellBaseClient(storageEngineFactory.getVariantStorageEngine().getConfiguration().getCellbase()
+ CellBaseClient cellBaseClient = new CellBaseClient(species, assembly, dataRelease, projectQueryResult.first().getCellbase()
.toClientConfiguration());
- GeneClient geneClient = new GeneClient(species, assembly, cellBaseClient.getClientConfiguration());
+ GeneClient geneClient = cellBaseClient.getGeneClient();
Gene gene = geneClient.get(Collections.singletonList(geneName), QueryOptions.empty()).firstResult();
if (gene != null) {
List transcriptCoverageStatsList = new ArrayList<>();
@@ -445,9 +446,10 @@ public List mergeRegions(List regions, List genes, boole
// Query CellBase to get gene coordinates and then apply the offset (up and downstream) to create a gene region
String species = projectQueryResult.first().getOrganism().getScientificName();
String assembly = projectQueryResult.first().getOrganism().getAssembly();
- CellBaseClient cellBaseClient = new CellBaseClient(storageEngineFactory.getVariantStorageEngine().getConfiguration().getCellbase()
+ String dataRelease = projectQueryResult.first().getCellbase().getDataRelease();
+ CellBaseClient cellBaseClient = new CellBaseClient(species, assembly, dataRelease, projectQueryResult.first().getCellbase()
.toClientConfiguration());
- GeneClient geneClient = new GeneClient(species, assembly, cellBaseClient.getClientConfiguration());
+ GeneClient geneClient = cellBaseClient.getGeneClient();
List response = geneClient.get(genes, QueryOptions.empty()).allResults();
if (CollectionUtils.isNotEmpty(response)) {
for (Gene gene : response) {
@@ -500,15 +502,13 @@ private void updateRegionMap(Region region, Map map) {
// PRIVATE METHODS
//-------------------------------------------------------------------------
- public Map> getExonRegionsPerTranscript(String geneName, String species, String assembly)
+ public Map> getExonRegionsPerTranscript(String geneName, CellBaseClient cellBaseClient)
throws StorageEngineException, IOException {
// Init region map, where key = transcript and value = list of exon regions
Map> regionMap = new HashMap<>();
// Query CellBase to get gene coordinates and then apply the offset (up and downstream) to create a gene region
- CellBaseClient cellBaseClient = new CellBaseClient(storageEngineFactory.getVariantStorageEngine().getConfiguration().getCellbase()
- .toClientConfiguration());
- GeneClient geneClient = new GeneClient(species, assembly, cellBaseClient.getClientConfiguration());
+ GeneClient geneClient = cellBaseClient.getGeneClient();
Gene gene = geneClient.get(Collections.singletonList(geneName), QueryOptions.empty()).firstResult();
if (gene != null) {
// Create region from gene coordinates
diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/alignment/qc/AlignmentQcAnalysis.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/alignment/qc/AlignmentQcAnalysis.java
index 43af49f934c..d4d08bba37f 100644
--- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/alignment/qc/AlignmentQcAnalysis.java
+++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/alignment/qc/AlignmentQcAnalysis.java
@@ -61,7 +61,6 @@ public class AlignmentQcAnalysis extends OpenCgaToolScopeStudy {
private boolean runStats = true;
private boolean runFlagStats = true;
private boolean runFastqc = true;
- private boolean runHsmetrics = true;
@Override
protected void check() throws Exception {
@@ -99,33 +98,10 @@ protected void check() throws Exception {
(!analysisParams.isOverwrite() && alignmentQc != null && alignmentQc.getFastQcMetrics() != null)) {
runFastqc = false;
}
- if (skipValues.contains(AlignmentQcParams.HS_METRICS_SKIP_VALUE)
- ||
- (!analysisParams.isOverwrite() && alignmentQc != null && alignmentQc.getHsMetrics() != null)
- ||
- StringUtils.isEmpty(analysisParams.getBedFile())
- ||
- StringUtils.isEmpty(analysisParams.getDictFile())) {
- runHsmetrics = false;
- }
}
} catch (CatalogException e) {
throw new ToolException("Error accessing to the BAM file '" + analysisParams.getBamFile() + "'", e);
}
-
- if (runHsmetrics) {
- try {
- AnalysisUtils.getCatalogFile(analysisParams.getBedFile(), study, catalogManager.getFileManager(), token);
- } catch (CatalogException e) {
- throw new ToolException("Error accessing to the BED file '" + analysisParams.getBedFile() + "'", e);
- }
-
- try {
- AnalysisUtils.getCatalogFile(analysisParams.getDictFile(), study, catalogManager.getFileManager(), token);
- } catch (CatalogException e) {
- throw new ToolException("Error accessing to the dictionary file '" + analysisParams.getDictFile() + "'", e);
- }
- }
}
@Override
@@ -136,7 +112,6 @@ protected void run() throws ToolException {
String statsJobId = null;
String flagStatsJobId = null;
String fastQcMetricsJobId = null;
- String hsMetricsJobId = null;
try {
if (runFlagStats) {
@@ -189,29 +164,10 @@ protected void run() throws ToolException {
addWarning("Error launching job for Alignment FastQC Metrics Analysis: " + e.getMessage());
}
- try {
- if (runHsmetrics) {
- // HS metrics
- params = new AlignmentHsMetricsParams(analysisParams.getBamFile(), analysisParams.getBedFile(),
- analysisParams.getDictFile(), null).toParams(new ObjectMap(ParamConstants.STUDY_PARAM, study));
-
- OpenCGAResult hsMetricsJobResult = catalogManager.getJobManager()
- .submit(study, AlignmentHsMetricsAnalysis.ID, Enums.Priority.MEDIUM, params, null,
- "Job generated by " + getId() + " - " + getJobId(), Collections.emptyList(), Collections.emptyList(),
- token);
- hsMetricsJobId = hsMetricsJobResult.first().getId();
- addEvent(Event.Type.INFO, "Submit job " + hsMetricsJobId + " to compute HS metrics (" + AlignmentHsMetricsAnalysis.ID
- + ")");
- }
- } catch (CatalogException e) {
- addWarning("Error launching job for Alignment HS Metrics Analysis: " + e.getMessage());
- }
-
// Wait for those jobs before saving QC
SamtoolsFlagstats samtoolsFlagstats = null;
SamtoolsStats samtoolsStats = null;
FastQcMetrics fastQcMetrics = null;
- HsMetrics hsMetrics = null;
if (flagStatsJobId != null) {
try {
@@ -249,17 +205,6 @@ protected void run() throws ToolException {
addWarning("Error waiting for job '" + fastQcMetricsJobId + "' (Alignment FastQC Metrics Analysis): " + e.getMessage());
}
}
- if (hsMetricsJobId != null) {
- try {
- if (waitFor(hsMetricsJobId)) {
- Job job = getJob(hsMetricsJobId);
- logger.info("Alignment HS Metrics Analysis, job.outDir = " + job.getOutDir());
- hsMetrics = AlignmentHsMetricsAnalysis.parseResults(Paths.get(job.getOutDir().getUri().getPath()));
- }
- } catch (Exception e) {
- addWarning("Error waiting for job '" + hsMetricsJobId + "' (Alignment FastQC Metrics Analysis): " + e.getMessage());
- }
- }
// Update quality control for the catalog file
catalogBamFile = AnalysisUtils.getCatalogFile(analysisParams.getBamFile(), study, catalogManager.getFileManager(), token);
@@ -284,10 +229,6 @@ protected void run() throws ToolException {
qc.getAlignment().setFastQcMetrics(fastQcMetrics);
saveQc = true;
}
- if (hsMetrics != null) {
- qc.getAlignment().setHsMetrics(hsMetrics);
- saveQc = true;
- }
if (saveQc) {
catalogManager.getFileManager().update(getStudy(), catalogBamFile.getId(), new FileUpdateParams().setQualityControl(qc),
@@ -324,7 +265,7 @@ private boolean waitFor(String jobId) throws ToolException {
return status.equals(Enums.ExecutionStatus.DONE) ? true : false;
}
- private Job getJob(String jobId) throws ToolException {
+ private Job getJob(String jobId) {
Job job = null;
try {
Query query = new Query("id", jobId);
diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/clinical/ClinicalInterpretationManager.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/clinical/ClinicalInterpretationManager.java
index a5ce5a6cf96..2e9f8a022aa 100644
--- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/clinical/ClinicalInterpretationManager.java
+++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/clinical/ClinicalInterpretationManager.java
@@ -35,7 +35,6 @@
import org.opencb.biodata.tools.clinical.ClinicalVariantCreator;
import org.opencb.biodata.tools.clinical.DefaultClinicalVariantCreator;
import org.opencb.biodata.tools.pedigree.ModeOfInheritance;
-import org.opencb.cellbase.client.rest.CellBaseClient;
import org.opencb.commons.datastore.core.DataResult;
import org.opencb.commons.datastore.core.FacetField;
import org.opencb.commons.datastore.core.Query;
@@ -98,7 +97,6 @@ public class ClinicalInterpretationManager extends StorageManager {
private ClinicalVariantEngine clinicalVariantEngine;
private VariantStorageManager variantStorageManager;
- protected CellBaseClient cellBaseClient;
protected AlignmentStorageManager alignmentStorageManager;
private VariantCatalogQueryUtils catalogQueryUtils;
@@ -138,7 +136,6 @@ public ClinicalInterpretationManager(CatalogManager catalogManager, StorageEngin
this.clinicalAnalysisManager = catalogManager.getClinicalAnalysisManager();
this.variantStorageManager = new VariantStorageManager(catalogManager, StorageEngineFactory.get(storageConfiguration));
- this.cellBaseClient = new CellBaseClient(storageConfiguration.getCellbase().toClientConfiguration());
this.alignmentStorageManager = new AlignmentStorageManager(catalogManager, StorageEngineFactory.get(storageConfiguration));
this.catalogQueryUtils = new VariantCatalogQueryUtils(catalogManager);
diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/sample/qc/SampleQcAnalysis.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/sample/qc/SampleQcAnalysis.java
index f55b4ca6bb0..a103568d020 100644
--- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/sample/qc/SampleQcAnalysis.java
+++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/sample/qc/SampleQcAnalysis.java
@@ -18,11 +18,19 @@
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
-import org.opencb.biodata.models.clinical.qc.SampleQcVariantStats;
+import org.opencb.biodata.formats.alignment.picard.HsMetrics;
+import org.opencb.biodata.formats.alignment.samtools.SamtoolsFlagstats;
+import org.opencb.biodata.formats.alignment.samtools.SamtoolsStats;
+import org.opencb.biodata.formats.sequence.fastqc.FastQcMetrics;
+import org.opencb.biodata.models.clinical.qc.*;
import org.opencb.commons.datastore.core.Event;
import org.opencb.commons.datastore.core.ObjectMap;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.opencga.analysis.AnalysisUtils;
+import org.opencb.opencga.analysis.alignment.qc.AlignmentFastQcMetricsAnalysis;
+import org.opencb.opencga.analysis.alignment.qc.AlignmentFlagStatsAnalysis;
+import org.opencb.opencga.analysis.alignment.qc.AlignmentHsMetricsAnalysis;
+import org.opencb.opencga.analysis.alignment.qc.AlignmentStatsAnalysis;
import org.opencb.opencga.analysis.individual.qc.IndividualQcUtils;
import org.opencb.opencga.analysis.tools.OpenCgaToolScopeStudy;
import org.opencb.opencga.analysis.variant.genomePlot.GenomePlotAnalysis;
@@ -32,10 +40,17 @@
import org.opencb.opencga.core.api.ParamConstants;
import org.opencb.opencga.core.common.JacksonUtils;
import org.opencb.opencga.core.exceptions.ToolException;
+import org.opencb.opencga.core.models.alignment.AlignmentFileQualityControl;
+import org.opencb.opencga.core.models.alignment.AlignmentQcParams;
import org.opencb.opencga.core.models.common.Enums;
import org.opencb.opencga.core.models.file.File;
+import org.opencb.opencga.core.models.file.FileQualityControl;
+import org.opencb.opencga.core.models.file.FileUpdateParams;
import org.opencb.opencga.core.models.job.Job;
import org.opencb.opencga.core.models.sample.Sample;
+import org.opencb.opencga.core.models.sample.SampleQualityControl;
+import org.opencb.opencga.core.models.sample.SampleUpdateParams;
+import org.opencb.opencga.core.models.sample.SampleVariantQualityControlMetrics;
import org.opencb.opencga.core.models.variant.GenomePlotAnalysisParams;
import org.opencb.opencga.core.models.variant.MutationalSignatureAnalysisParams;
import org.opencb.opencga.core.models.variant.SampleQcAnalysisParams;
@@ -46,9 +61,9 @@
import java.nio.file.Path;
import java.nio.file.Paths;
-import java.util.Collections;
-import java.util.Map;
+import java.util.*;
+import static org.opencb.opencga.analysis.variant.mutationalSignature.MutationalSignatureAnalysis.*;
import static org.opencb.opencga.core.models.study.StudyPermissions.Permissions.WRITE_SAMPLES;
@Tool(id = SampleQcAnalysis.ID, resource = Enums.Resource.SAMPLE, description = SampleQcAnalysis.DESCRIPTION)
@@ -64,7 +79,8 @@ public class SampleQcAnalysis extends OpenCgaToolScopeStudy {
private Path genomePlotConfigPath;
private boolean runVariantStats = true;
- private boolean runSignature = true;
+ private boolean runSignatureCatalogue = true;
+ private boolean runSignatureFitting = true;
private boolean runGenomePlot = true;
@Override
@@ -94,38 +110,51 @@ protected void check() throws Exception {
throw new ToolException("Sample '" + analysisParams.getSample() + "' not found.");
}
- String msg;
-
- // Check variant stats
- final String OPENCGA_ALL = "ALL";
- if (OPENCGA_ALL.equals(analysisParams.getVsId())) {
- msg = "Invalid parameters: " + OPENCGA_ALL + " is a reserved word, you can not use as a variant stats ID";
- addWarning(msg);
- logger.warn(msg);
- runVariantStats = false;
- }
-
- if (StringUtils.isEmpty(analysisParams.getVsId()) && analysisParams.getVsQuery() != null
- && !analysisParams.getVsQuery().toParams().isEmpty()) {
- msg = "Invalid parameters: if variant stats ID is empty, variant stats query must be empty";
- addWarning(msg);
- logger.warn(msg);
- runVariantStats = false;
- }
- if (StringUtils.isNotEmpty(analysisParams.getVsId())
- && (analysisParams.getVsQuery() == null || analysisParams.getVsQuery().toParams().isEmpty())) {
- msg = "Invalid parameters: if you provide a variant stats ID, variant stats query can not be empty";
- addWarning(msg);
- logger.warn(msg);
- runVariantStats = false;
+ // Prepare flags
+ String skip = null;
+ if (StringUtils.isNotEmpty(analysisParams.getSkip())) {
+ skip = analysisParams.getSkip().toLowerCase().replace(" ", "");
}
- if (StringUtils.isEmpty(analysisParams.getVsId())) {
- analysisParams.setVsId(OPENCGA_ALL);
+ if (StringUtils.isNotEmpty(skip)) {
+ Set skipValues = new HashSet<>(Arrays.asList(skip.split(",")));
+ if (skipValues.contains(SampleQcAnalysisParams.VARIANT_STATS_SKIP_VALUE)) {
+ runVariantStats = false;
+ }
+ if (skipValues.contains(SampleQcAnalysisParams.SIGNATURE_SKIP_VALUE)
+ || skipValues.contains(SampleQcAnalysisParams.SIGNATURE_CATALOGUE_SKIP_VALUE)) {
+ runSignatureCatalogue = false;
+ }
+ if (skipValues.contains(SampleQcAnalysisParams.SIGNATURE_SKIP_VALUE)
+ || skipValues.contains(SampleQcAnalysisParams.SIGNATURE_FITTING_SKIP_VALUE)) {
+ runSignatureFitting = false;
+ }
+ if (skipValues.contains(SampleQcAnalysisParams.GENOME_PLOT_SKIP_VALUE)) {
+ runGenomePlot = false;
+ }
}
- if (analysisParams.getVsQuery() == null) {
- runVariantStats = false;
- } else {
+ // Check variant stats
+ if (runVariantStats) {
+ final String OPENCGA_ALL = "ALL";
+ if (OPENCGA_ALL.equals(analysisParams.getVsId())) {
+ new ToolException("Invalid parameters: " + OPENCGA_ALL + " is a reserved word, you can not use as a variant stats ID");
+ }
+
+ if (StringUtils.isEmpty(analysisParams.getVsId()) && analysisParams.getVsQuery() != null
+ && !analysisParams.getVsQuery().toParams().isEmpty()) {
+ new ToolException("Invalid parameters: if variant stats ID is empty, variant stats query must be empty");
+ }
+ if (StringUtils.isNotEmpty(analysisParams.getVsId())
+ && (analysisParams.getVsQuery() == null || analysisParams.getVsQuery().toParams().isEmpty())) {
+ new ToolException("Invalid parameters: if you provide a variant stats ID, variant stats query can not be empty");
+ }
+ if (StringUtils.isEmpty(analysisParams.getVsId())) {
+ analysisParams.setVsId(OPENCGA_ALL);
+ }
+
+ if (analysisParams.getVsQuery() == null) {
+ new ToolException("Invalid parameters: variant stats query is empty");
+ }
if (sample.getQualityControl() != null && sample.getQualityControl().getVariant() != null) {
if (CollectionUtils.isNotEmpty(sample.getQualityControl().getVariant().getVariantStats())
&& OPENCGA_ALL.equals(analysisParams.getVsId())) {
@@ -133,10 +162,8 @@ protected void check() throws Exception {
} else {
for (SampleQcVariantStats variantStats : sample.getQualityControl().getVariant().getVariantStats()) {
if (variantStats.getId().equals(analysisParams.getVsId())) {
- msg = "Invalid parameters: variant stats ID '" + analysisParams.getVsId() + "' is already used";
- addWarning(msg);
- logger.warn(msg);
- runVariantStats = false;
+ throw new ToolException("Invalid parameters: variant stats ID '" + analysisParams.getVsId()
+ + "' is already used");
}
}
}
@@ -144,23 +171,33 @@ protected void check() throws Exception {
}
// Check mutational signature
- if (StringUtils.isEmpty(analysisParams.getMsQuery())) {
- runSignature = false;
+ if (runSignatureCatalogue) {
+ if (StringUtils.isEmpty(analysisParams.getMsQuery())) {
+ new ToolException("Invalid parameters: mutational signature query is empty");
+ }
}
- if (runSignature && !sample.isSomatic()) {
- msg = "Skipping mutational signature: sample '" + sample.getId() + "' is not somatic.";
+ if (runSignatureCatalogue && !sample.isSomatic()) {
+ String msg = "Skipping mutational signature catalog analysis: sample '" + sample.getId() + "' is not somatic.";
addWarning(msg);
logger.warn(msg);
- runSignature = false;
+ runSignatureCatalogue = false;
+ }
+
+ if (runSignatureFitting && !sample.isSomatic()) {
+ String msg = "Skipping mutational signature fitting analysis: sample '" + sample.getId() + "' is not somatic.";
+ addWarning(msg);
+ logger.warn(msg);
+ runSignatureFitting = false;
}
// Check genome plot
- if (StringUtils.isEmpty(analysisParams.getGpConfigFile())) {
- runGenomePlot = false;
- } else {
+ if (runGenomePlot) {
+ if (StringUtils.isEmpty(analysisParams.getGpConfigFile())) {
+ new ToolException("Invalid parameters: genome plot configuration file is empty");
+ }
if (runGenomePlot && !sample.isSomatic()) {
- msg = "Skipping genome plot: sample '" + sample.getId() + "' is not somatic.";
+ String msg = "Skipping genome plot: sample '" + sample.getId() + "' is not somatic.";
addWarning(msg);
logger.warn(msg);
runGenomePlot = false;
@@ -169,25 +206,21 @@ protected void check() throws Exception {
catalogManager.getFileManager(), getToken());
genomePlotConfigPath = Paths.get(genomePlotConfFile.getUri().getPath());
if (!genomePlotConfigPath.toFile().exists()) {
- msg = "Invalid parameters: genome plot configuration file does not exist (" + genomePlotConfigPath + ")";
- addWarning(msg);
- logger.warn(msg);
- runGenomePlot = false;
+ new ToolException("Invalid parameters: genome plot configuration file does not exist (" + genomePlotConfigPath + ")");
}
}
}
-
}
@Override
protected void run() throws ToolException {
step(() -> {
- try {
- Map params;
- OpenCGAResult variantStatsJobResult;
- OpenCGAResult signatureJobResult;
- OpenCGAResult genomePlotJobResult;
+ Map params;
+ String variantStatsJobId = null;
+ String signatureJobId = null;
+ String genomePlotJobId = null;
+ try {
if (runVariantStats) {
// Run variant stats
params = new SampleVariantStatsAnalysisParams(Collections.singletonList(analysisParams.getSample()), null, null, true,
@@ -195,64 +228,144 @@ protected void run() throws ToolException {
analysisParams.getVsQuery())
.toParams(new ObjectMap(ParamConstants.STUDY_PARAM, getStudy()));
- variantStatsJobResult = catalogManager.getJobManager()
- .submit(getStudy(), SampleVariantStatsAnalysis.ID, Enums.Priority.MEDIUM, params, null, "Job generated by "
+ OpenCGAResult variantStatsJobResult = catalogManager.getJobManager()
+ .submit(study, SampleVariantStatsAnalysis.ID, Enums.Priority.MEDIUM, params, null, "Job generated by "
+ getId() + " - " + getJobId(), Collections.emptyList(), Collections.emptyList(), token);
- addEvent(Event.Type.INFO, "Submit job " + variantStatsJobResult.first().getId() + " to compute sample variant stats ("
- + SampleVariantStatsAnalysis.ID + ")");
+ variantStatsJobId = variantStatsJobResult.first().getId();
+ addEvent(Event.Type.INFO, "Submit job " + variantStatsJobId + " to compute stats (" + SampleVariantStatsAnalysis.ID
+ + ")");
}
+ } catch (CatalogException e) {
+ addWarning("Error launching job for sample variant stats analysis: " + e.getMessage());
+ variantStatsJobId = null;
+ }
- if (runSignature) {
+ try {
+ if (runSignatureCatalogue || runSignatureFitting) {
// Run mutational signature
+ logger.info("Preparing to submit the mutational signature analysis job");
- // Be sure to update sample quality control
- ObjectMap query = JacksonUtils.getDefaultObjectMapper().readValue(analysisParams.getMsQuery(), ObjectMap.class);
- query.append(MutationalSignatureAnalysis.QC_UPDATE_KEYNAME, true);
- String queryString = query.toJson();
+ String skip = null;
+ if (!runSignatureCatalogue) {
+ skip = MutationalSignatureAnalysisParams.SIGNATURE_CATALOGUE_SKIP_VALUE;
+ } else if (!runSignatureFitting) {
+ skip = MutationalSignatureAnalysisParams.SIGNATURE_FITTING_SKIP_VALUE;
+ }
params = new MutationalSignatureAnalysisParams()
.setId(analysisParams.getMsId())
.setDescription(analysisParams.getMsDescription())
- .setQuery(queryString)
+ .setSample(analysisParams.getSample())
+ .setQuery(analysisParams.getMsQuery())
+ .setFitId(analysisParams.getMsFitId())
.setFitMethod(analysisParams.getMsFitMethod())
- .setSigVersion(analysisParams.getMsSigVersion())
- .setOrgan(analysisParams.getMsOrgan())
- .setnBoot(analysisParams.getMsNBoot())
- .setThresholdPerc(analysisParams.getMsThresholdPerc())
- .setThresholdPval(analysisParams.getMsThresholdPval())
- .setMaxRareSigs(analysisParams.getMsMaxRareSigs())
- .setSignaturesFile(analysisParams.getMsSignaturesFile())
- .setRareSignaturesFile(analysisParams.getMsRareSignaturesFile())
+ .setFitSigVersion(analysisParams.getMsFitSigVersion())
+ .setFitOrgan(analysisParams.getMsFitOrgan())
+ .setFitNBoot(analysisParams.getMsFitNBoot())
+ .setFitThresholdPerc(analysisParams.getMsFitThresholdPerc())
+ .setFitThresholdPval(analysisParams.getMsFitThresholdPval())
+ .setFitMaxRareSigs(analysisParams.getMsFitMaxRareSigs())
+ .setFitSignaturesFile(analysisParams.getMsFitSignaturesFile())
+ .setFitRareSignaturesFile(analysisParams.getMsFitRareSignaturesFile())
+ .setSkip(skip)
.toParams(new ObjectMap(ParamConstants.STUDY_PARAM, getStudy()));
- signatureJobResult = catalogManager.getJobManager()
+ OpenCGAResult signatureJobResult = catalogManager.getJobManager()
.submit(getStudy(), MutationalSignatureAnalysis.ID, Enums.Priority.MEDIUM, params, null, "Job generated by "
+ getId() + " - " + getJobId(), Collections.emptyList(), Collections.emptyList(), token);
- addEvent(Event.Type.INFO, "Submit job " + signatureJobResult.first().getId() + " to compute the mutational signature ("
+ signatureJobId = signatureJobResult.first().getId();
+ logger.info("Submitted job {} to compute the mutational signature analysis {}", signatureJobId,
+ MutationalSignatureAnalysis.ID);
+ addEvent(Event.Type.INFO, "Submit job " + signatureJobId + " to compute the mutational signature ("
+ MutationalSignatureAnalysis.ID + ")");
}
+ } catch (CatalogException e) {
+ throw new ToolException(e);
+ }
+
+ try {
if (runGenomePlot) {
// Run genome plot
params = new GenomePlotAnalysisParams(analysisParams.getSample(), analysisParams.getGpId(),
analysisParams.getGpDescription(), analysisParams.getGpConfigFile(), null)
.toParams(new ObjectMap(ParamConstants.STUDY_PARAM, getStudy()));
- genomePlotJobResult = catalogManager.getJobManager()
+ OpenCGAResult genomePlotJobResult = catalogManager.getJobManager()
.submit(getStudy(), GenomePlotAnalysis.ID, Enums.Priority.MEDIUM, params, null,
"Job generated by " + getId() + " - " + getJobId(), Collections.emptyList(), Collections.emptyList(),
token);
- addEvent(Event.Type.INFO, "Submit job " + genomePlotJobResult.first().getId() + " to compute genome plot ("
- + GenomePlotAnalysis.ID + ")");
+ genomePlotJobId = genomePlotJobResult.first().getId();
+ addEvent(Event.Type.INFO, "Submit job " + genomePlotJobId + " to compute genome plot (" + GenomePlotAnalysis.ID
+ + ")");
}
+ } catch (CatalogException e) {
+ addWarning("Error launching job for sample genome plot analysis: " + e.getMessage());
+ genomePlotJobId = null;
+ }
- // Wait for those jobs ???
-// waitFor(variantStatsJobResult.first().getId());
-// waitFor(signatureJobResult.first().getId());
-// waitFor(genomePlotJobResult.first().getId());
- } catch (CatalogException e) {
- throw new ToolException(e);
+ // Wait for those jobs before saving QC
+ GenomePlot genomePlot = null;
+
+ if (variantStatsJobId != null) {
+ try {
+ logger.info("Waiting for variant stats job: {} ...", variantStatsJobId);
+ AnalysisUtils.waitFor(variantStatsJobId, getStudy(), catalogManager.getJobManager(), getToken());
+ // Sample quality control is updated in the variant stats analysis, nothing more to do here
+ } catch (Exception e) {
+ addWarning("Error waiting for job '" + variantStatsJobId + "' (sample variant stats): " + e.getMessage());
+ }
+ }
+
+ if (signatureJobId != null) {
+ try {
+ logger.info("Waiting for mutational signature job: {} ...", signatureJobId);
+ AnalysisUtils.waitFor(signatureJobId, getStudy(), catalogManager.getJobManager(), getToken());
+ } catch (Exception e) {
+ addWarning("Error waiting for job '" + signatureJobId + "' (mutational signature analysis): " + e.getMessage());
+ }
+ }
+
+ if (genomePlotJobId != null) {
+ try {
+ if (AnalysisUtils.waitFor(genomePlotJobId, getStudy(), catalogManager.getJobManager(), getToken())) {
+ Job job = AnalysisUtils.getJob(genomePlotJobId, getStudy(), catalogManager.getJobManager(), getToken());
+
+ // Parse configuration file
+ GenomePlotConfig plotConfig = JacksonUtils.getDefaultObjectMapper().readerFor(GenomePlotConfig.class)
+ .readValue(genomePlotConfigPath.toFile());
+
+ // Parse genome plot results
+ genomePlot = GenomePlotAnalysis.parseResults(Paths.get(job.getOutDir().getUri().getPath()),
+ analysisParams.getGpDescription(), plotConfig);
+ }
+ } catch (Exception e) {
+ addWarning("Error waiting for job '" + genomePlotJobId + "' (genome plot analysis): " + e.getMessage());
+ }
+ }
+
+ // Update quality control for the sample
+ logger.info("Preparing to save quality control for sample {}", analysisParams.getSample());
+ Sample sample = IndividualQcUtils.getValidSampleById(getStudy(), analysisParams.getSample(), catalogManager, token);
+ if (sample == null) {
+ throw new ToolException("Can not access to the sample " + analysisParams.getSample() + " in order to save quality control");
+ }
+ SampleQualityControl qc = sample.getQualityControl();
+
+ // Sanity check
+ if (qc == null) {
+ qc = new SampleQualityControl();
+ } else if (qc.getVariant() == null) {
+ qc.setVariant(new SampleVariantQualityControlMetrics());
+ }
+
+ if (genomePlot != null) {
+ qc.getVariant().setGenomePlot(genomePlot);
+
+ catalogManager.getSampleManager().update(getStudy(), sample.getId(), new SampleUpdateParams().setQualityControl(qc),
+ QueryOptions.empty(), getToken());
+ logger.info("Quality control saved for sample {}", sample.getId());
}
});
}
diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/genomePlot/GenomePlotAnalysis.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/genomePlot/GenomePlotAnalysis.java
index 85332389073..f7f63f37706 100644
--- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/genomePlot/GenomePlotAnalysis.java
+++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/genomePlot/GenomePlotAnalysis.java
@@ -35,6 +35,8 @@
import org.opencb.opencga.core.tools.annotations.ToolParams;
import org.opencb.opencga.core.tools.variant.GenomePlotAnalysisExecutor;
+import java.io.IOException;
+import java.nio.file.Path;
import java.nio.file.Paths;
@Tool(id = GenomePlotAnalysis.ID, resource = Enums.Resource.VARIANT)
@@ -113,6 +115,18 @@ protected void run() throws ToolException {
});
}
+ public static GenomePlot parseResults(Path outDir, String description, GenomePlotConfig plotConfig) throws IOException {
+ // Get image file
+ for (java.io.File imgFile : outDir.toFile().listFiles()) {
+ if (imgFile.getName().endsWith(GenomePlotAnalysis.SUFFIX_FILENAME)) {
+ int index = imgFile.getAbsolutePath().indexOf("JOBS/");
+ String relativeFilePath = (index == -1 ? imgFile.getName() : imgFile.getAbsolutePath().substring(index));
+ return new GenomePlot("", description, plotConfig, relativeFilePath);
+ }
+ }
+ return null;
+ }
+
public String getStudy() {
return study;
}
diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/hrdetect/HRDetectAnalysis.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/hrdetect/HRDetectAnalysis.java
new file mode 100644
index 00000000000..23d04db3b65
--- /dev/null
+++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/hrdetect/HRDetectAnalysis.java
@@ -0,0 +1,341 @@
+/*
+ * Copyright 2015-2020 OpenCB
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.opencb.opencga.analysis.variant.hrdetect;
+
+import com.mongodb.client.ListCollectionsIterable;
+import org.apache.commons.collections4.CollectionUtils;
+import org.apache.commons.collections4.MapUtils;
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.opencb.biodata.models.clinical.qc.HRDetect;
+import org.opencb.biodata.models.clinical.qc.Signature;
+import org.opencb.biodata.models.clinical.qc.SignatureFitting;
+import org.opencb.commons.datastore.core.ObjectMap;
+import org.opencb.commons.datastore.core.QueryOptions;
+import org.opencb.opencga.analysis.AnalysisUtils;
+import org.opencb.opencga.analysis.ResourceUtils;
+import org.opencb.opencga.analysis.individual.qc.IndividualQcUtils;
+import org.opencb.opencga.analysis.tools.OpenCgaToolScopeStudy;
+import org.opencb.opencga.catalog.exceptions.CatalogException;
+import org.opencb.opencga.core.common.JacksonUtils;
+import org.opencb.opencga.core.exceptions.ToolException;
+import org.opencb.opencga.core.models.common.Enums;
+import org.opencb.opencga.core.models.individual.Individual;
+import org.opencb.opencga.core.models.sample.Sample;
+import org.opencb.opencga.core.models.sample.SampleQualityControl;
+import org.opencb.opencga.core.models.sample.SampleUpdateParams;
+import org.opencb.opencga.core.models.sample.SampleVariantQualityControlMetrics;
+import org.opencb.opencga.core.models.variant.HRDetectAnalysisParams;
+import org.opencb.opencga.core.models.variant.MutationalSignatureAnalysisParams;
+import org.opencb.opencga.core.response.OpenCGAResult;
+import org.opencb.opencga.core.tools.annotations.Tool;
+import org.opencb.opencga.core.tools.annotations.ToolParams;
+import org.opencb.opencga.core.tools.variant.HRDetectAnalysisExecutor;
+import org.opencb.opencga.core.tools.variant.MutationalSignatureAnalysisExecutor;
+import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+
+@Tool(id = HRDetectAnalysis.ID, resource = Enums.Resource.VARIANT)
+public class HRDetectAnalysis extends OpenCgaToolScopeStudy {
+
+ public static final String ID = "hr-detect";
+ public static final String DESCRIPTION = "Run HRDetect analysis for a given somatic sample.";
+
+ public final static String HRDETECT_SCORES_FILENAME_DEFAULT = "data_matrix.tsv";
+
+ @ToolParams
+ private HRDetectAnalysisParams hrdetectParams = new HRDetectAnalysisParams();
+
+ private Sample somaticSample;
+ private Sample germlineSample;
+ private String assembly;
+ private Path pathSnvFittingRData;
+ private Path pathSvFittingRData;
+ private ObjectMap cnvQuery;
+ private ObjectMap indelQuery;
+
+ @Override
+ protected void check() throws Exception {
+ super.check();
+ setUpStorageEngineExecutor(study);
+
+ if (StringUtils.isEmpty(getStudy())) {
+ throw new ToolException("Missing study");
+ }
+
+ assembly = ResourceUtils.getAssembly(catalogManager, study, token);
+ if (StringUtils.isEmpty(assembly)) {
+ throw new ToolException("Missing assembly for study '" + study + "'");
+ }
+
+ if (StringUtils.isEmpty(hrdetectParams.getSampleId())) {
+ throw new ToolException("Missing sample ID");
+ }
+
+ if (StringUtils.isEmpty(hrdetectParams.getSnvFittingId())) {
+ throw new ToolException("Missing mutational signature fitting ID for SNV");
+ }
+
+ if (StringUtils.isEmpty(hrdetectParams.getSvFittingId())) {
+ throw new ToolException("Missing mutational signature fitting ID for SV");
+ }
+
+ if (StringUtils.isEmpty(hrdetectParams.getCnvQuery())) {
+ throw new ToolException("Missing CNV query");
+ }
+
+ if (StringUtils.isEmpty(hrdetectParams.getIndelQuery())) {
+ throw new ToolException("Missing INDEL query");
+ }
+
+ // Check sample
+ somaticSample = checkSample(hrdetectParams.getSampleId());
+ if (!somaticSample.isSomatic()) {
+ throw new ToolException("Mismatch sample from CNV query '" + somaticSample.getId() + "' must be somatic");
+ }
+ checkSampleQualityControl(somaticSample);
+
+ SignatureFitting snvFitting = null;
+ SignatureFitting svFitting = null;
+ List signatures = somaticSample.getQualityControl().getVariant().getSignatures();
+ for (Signature signature : signatures) {
+ if (CollectionUtils.isNotEmpty(signature.getFittings())) {
+ for (SignatureFitting fitting : signature.getFittings()) {
+ if (hrdetectParams.getSnvFittingId().equals(fitting.getId()) && snvFitting == null) {
+ // Take the first SNV fitting matching ID
+ snvFitting = fitting;
+ } else if (hrdetectParams.getSvFittingId().equals(fitting.getId()) && svFitting == null) {
+ // Take the first SV fitting matching ID
+ svFitting = fitting;
+ }
+ }
+ }
+ }
+
+ if (snvFitting == null) {
+ throw new ToolException("Unable to compute HRDetect analysis. No SNV fitting with ID '" + hrdetectParams.getSnvFittingId()
+ + "' found for sample '" + hrdetectParams.getSampleId() + "'");
+ }
+ if (svFitting == null) {
+ throw new ToolException("Unable to compute HRDetect analysis. No SV fitting with ID '" + hrdetectParams.getSvFittingId()
+ + "' found for sample '" + hrdetectParams.getSampleId() + "'");
+ }
+
+ pathSnvFittingRData = getFittingRDataFile(snvFitting.getFiles());
+ if (!pathSnvFittingRData.toFile().exists()) {
+ throw new ToolException("Unable to compute HRDetect analysis. No .rData file found for SNV fitting '"
+ + pathSnvFittingRData.toAbsolutePath() + "' with ID '" + hrdetectParams.getSnvFittingId() + "' for sample '"
+ + hrdetectParams.getSampleId() + "'");
+ }
+
+ pathSvFittingRData = getFittingRDataFile(svFitting.getFiles());
+ if (!pathSvFittingRData.toFile().exists()) {
+ throw new ToolException("Unable to compute HRDetect analysis. No .rData file found for SV fitting '"
+ + pathSvFittingRData.toAbsolutePath() + "' with ID '" + hrdetectParams.getSvFittingId() + "' for sample '"
+ + hrdetectParams.getSampleId() + "'");
+ }
+
+ // Check CNV query
+ cnvQuery = JacksonUtils.getDefaultObjectMapper().readValue(hrdetectParams.getCnvQuery(), ObjectMap.class);
+ Individual individual = IndividualQcUtils.getIndividualBySampleId(getStudy(), hrdetectParams.getSampleId(), getCatalogManager(),
+ getToken());
+ if (individual == null) {
+ throw new ToolException("Unable to compute HRDetect analysis. No individual found for sample '"
+ + hrdetectParams.getSampleId() + "', that individual must have at least two samples: somatic and germline");
+ }
+ List samples = individual.getSamples();
+ if (samples.size() < 2) {
+ throw new ToolException("For CNV query processing, individual (" + individual.getId() + ") must have at least two"
+ + " samples: somatic and germline");
+ }
+ for (Sample sample : samples) {
+ if (!sample.isSomatic()) {
+ germlineSample = sample;
+ break;
+ }
+ }
+ if (germlineSample == null) {
+ throw new ToolException("Germline sample not found for individual '" + individual.getId() + "', it is mandatory for CNV query"
+ + " processing");
+ }
+
+ // Check INDEL query
+ indelQuery = JacksonUtils.getDefaultObjectMapper().readValue(hrdetectParams.getIndelQuery(), ObjectMap.class);
+ if (!indelQuery.containsKey(VariantQueryParam.SAMPLE.key())) {
+ logger.info("Setting sample in INDEL query");
+ indelQuery.put(VariantQueryParam.SAMPLE.key(), somaticSample.getId());
+ }
+ if (!somaticSample.getId().equals(indelQuery.getString(VariantQueryParam.SAMPLE.key()))) {
+ throw new ToolException("Mismatch sample from INDEL query '" + cnvQuery.getString(VariantQueryParam.SAMPLE.key())+ "' and"
+ + " sample '" + somaticSample.getId() + "'");
+ }
+
+ // Log messages
+ logger.info("HRDetect ID: {}", hrdetectParams.getId());
+ logger.info("Study: {}", study);
+ logger.info("Assembly: {}", assembly);
+ logger.info("Somatatic sample ID: {}", somaticSample.getId());
+ logger.info("Germline sample ID: {}", germlineSample.getId());
+ logger.info("Signature fitting ID for SNV: {}", hrdetectParams.getSnvFittingId());
+ logger.info("Signature fitting ID for SV: {}", hrdetectParams.getSvFittingId());
+ logger.info("CNV query: {}", cnvQuery.toJson());
+ logger.info("INDEL query: {}", indelQuery.toJson());
+ logger.info("y (SNV3): {}", hrdetectParams.getSnv3CustomName());
+ logger.info("z (SNV8): {}", hrdetectParams.getSnv8CustomName());
+ logger.info("Y (SV3): {}", hrdetectParams.getSv3CustomName());
+ logger.info("Z (SV8): {}", hrdetectParams.getSv8CustomName());
+ logger.info("Bootstrap: {}", hrdetectParams.isBootstrap());
+ }
+
+ @Override
+ protected void run() throws ToolException {
+ step(getId(), () -> {
+ HRDetectAnalysisExecutor toolExecutor = getToolExecutor(HRDetectAnalysisExecutor.class);
+
+ toolExecutor.setStudy(study)
+ .setSomaticSample(somaticSample.getId())
+ .setGermlineSample(germlineSample.getId())
+ .setAssembly(assembly)
+ .setSnvRDataPath(pathSnvFittingRData)
+ .setSvRDataPath(pathSvFittingRData)
+ .setCnvQuery(cnvQuery)
+ .setIndelQuery(indelQuery)
+ .setSnv3CustomName(hrdetectParams.getSnv3CustomName())
+ .setSnv8CustomName(hrdetectParams.getSnv8CustomName())
+ .setSv3CustomName(hrdetectParams.getSv3CustomName())
+ .setSv8CustomName(hrdetectParams.getSv8CustomName())
+ .setBootstrap(hrdetectParams.isBootstrap())
+ .execute();
+
+ // Parse results and update quality control for the catalog sample
+ Sample sample = checkSample(hrdetectParams.getSampleId());
+ HRDetect hrDetect = parseResult(getOutDir());
+ SampleQualityControl qc = sample.getQualityControl();
+ if (qc == null) {
+ qc = new SampleQualityControl();
+ }
+ if (qc.getVariant() == null) {
+ qc.setVariant(new SampleVariantQualityControlMetrics());
+ }
+ if (qc.getVariant().getHrDetects() == null) {
+ qc.getVariant().setHrDetects(new ArrayList<>());
+ }
+ qc.getVariant().getHrDetects().add(hrDetect);
+ catalogManager.getSampleManager().update(getStudy(), sample.getId(), new SampleUpdateParams().setQualityControl(qc),
+ QueryOptions.empty(), getToken());
+ });
+ }
+
+ public HRDetect parseResult(Path dir) throws IOException {
+ HRDetect result = new HRDetect()
+ .setId(hrdetectParams.getId())
+ .setDescription(hrdetectParams.getDescription())
+ .setSnvFittingId(hrdetectParams.getSnvFittingId())
+ .setSvFittingId(hrdetectParams.getSvFittingId())
+ .setCnvQuery(JacksonUtils.getDefaultObjectMapper().readValue(hrdetectParams.getCnvQuery(), ObjectMap.class))
+ .setIndelQuery(JacksonUtils.getDefaultObjectMapper().readValue(hrdetectParams.getIndelQuery(), ObjectMap.class));
+
+ // Set other params
+ ObjectMap params = new ObjectMap();
+ if (StringUtils.isNotEmpty(hrdetectParams.getSnv3CustomName())) {
+ params.append("snv3CustomName", hrdetectParams.getSnv3CustomName());
+ }
+ if (StringUtils.isNotEmpty(hrdetectParams.getSnv8CustomName())) {
+ params.append("snv8CustomName", hrdetectParams.getSnv8CustomName());
+ }
+ if (StringUtils.isNotEmpty(hrdetectParams.getSv3CustomName())) {
+ params.append("sv3CustomName", hrdetectParams.getSv3CustomName());
+ }
+ if (StringUtils.isNotEmpty(hrdetectParams.getSv8CustomName())) {
+ params.append("sv8CustomName", hrdetectParams.getSv8CustomName());
+ }
+ if (params.size() > 0) {
+ result.setParams(params);
+ }
+
+ // Read scores
+ ObjectMap scores = new ObjectMap();
+ File scoresFile = dir.resolve(HRDETECT_SCORES_FILENAME_DEFAULT).toFile();
+ if (scoresFile.exists()) {
+ List lines = FileUtils.readLines(scoresFile, Charset.defaultCharset());
+ if (lines.size() > 1) {
+ String[] labels = lines.get(0).split("\t");
+ String[] values = lines.get(1).split("\t");
+ for (int i = 0; i < labels.length; i++) {
+ try {
+ scores.put(labels[i], Float.parseFloat(values[i + 1]));
+ } catch (NumberFormatException e) {
+ scores.put(labels[i], Float.NaN);
+ }
+ }
+ }
+ }
+ if (MapUtils.isNotEmpty(scores)) {
+ result.setScores(scores);
+ }
+
+ // TODO: files to be added ?
+
+ return result;
+ }
+
+ private Sample checkSample(String sampleId) throws ToolException, CatalogException {
+ study = catalogManager.getStudyManager().get(study, QueryOptions.empty(), token).first().getFqn();
+ OpenCGAResult sampleResult = catalogManager.getSampleManager().get(study, sampleId, QueryOptions.empty(), token);
+ if (sampleResult.getNumResults() != 1) {
+ throw new ToolException("Unable to compute HRDetect analysis. Sample '" + hrdetectParams.getSampleId() + "' not found");
+ }
+
+ return sampleResult.first();
+ }
+
+ private void checkSampleQualityControl(Sample sample) throws ToolException {
+ if (sample.isSomatic()) {
+ // Check signatures are present in the quality control (only for somatic sample)
+ if (sample.getQualityControl() == null || sample.getQualityControl().getVariant() == null ||
+ CollectionUtils.isEmpty(sample.getQualityControl().getVariant().getSignatures())) {
+ throw new ToolException("Unable to compute HRDetect analysis. No mutational signatures found for sample '"
+ + hrdetectParams.getSampleId() + "'");
+ }
+ }
+ }
+
+ private Path getFittingRDataFile(List files) {
+ if (CollectionUtils.isEmpty(files)) {
+ return null;
+ }
+ Path basePath = Paths.get(AnalysisUtils.getJobBaseDir(getOutDir().toAbsolutePath().toString()));
+ for (String file : files) {
+ if (file.endsWith("rData")) {
+ Path path = basePath.resolve(file);
+ logger.info("RData file found: {}; outdir = {}; path = {}", file, getOutDir().toAbsolutePath(), path.toAbsolutePath());
+ return path;
+ }
+ }
+ return null;
+ }
+}
+
diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/hrdetect/HRDetectLocalAnalysisExecutor.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/hrdetect/HRDetectLocalAnalysisExecutor.java
new file mode 100644
index 00000000000..4c96ad7b3b0
--- /dev/null
+++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/hrdetect/HRDetectLocalAnalysisExecutor.java
@@ -0,0 +1,240 @@
+/*
+ * Copyright 2015-2020 OpenCB
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.opencb.opencga.analysis.variant.hrdetect;
+
+import htsjdk.samtools.reference.BlockCompressedIndexedFastaSequenceFile;
+import htsjdk.samtools.reference.FastaSequenceIndex;
+import htsjdk.samtools.reference.ReferenceSequence;
+import htsjdk.samtools.util.GZIIndex;
+import org.apache.commons.collections4.CollectionUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.opencb.biodata.models.variant.StudyEntry;
+import org.opencb.biodata.models.variant.Variant;
+import org.opencb.biodata.models.variant.avro.VariantType;
+import org.opencb.commons.datastore.core.Query;
+import org.opencb.commons.datastore.core.QueryOptions;
+import org.opencb.commons.datastore.core.QueryResultWriter;
+import org.opencb.commons.exec.Command;
+import org.opencb.commons.utils.DockerUtils;
+import org.opencb.opencga.analysis.ResourceUtils;
+import org.opencb.opencga.analysis.StorageToolExecutor;
+import org.opencb.opencga.catalog.exceptions.CatalogException;
+import org.opencb.opencga.core.common.GitRepositoryState;
+import org.opencb.opencga.core.exceptions.ToolException;
+import org.opencb.opencga.core.exceptions.ToolExecutorException;
+import org.opencb.opencga.core.response.OpenCGAResult;
+import org.opencb.opencga.core.response.VariantQueryResult;
+import org.opencb.opencga.core.tools.annotations.ToolExecutor;
+import org.opencb.opencga.core.tools.variant.HRDetectAnalysisExecutor;
+import org.opencb.opencga.core.tools.variant.MutationalSignatureAnalysisExecutor;
+import org.opencb.opencga.storage.core.exceptions.StorageEngineException;
+import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam;
+import org.opencb.opencga.storage.core.variant.adaptors.iterators.VariantDBIterator;
+import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.*;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.*;
+
+import static org.opencb.opencga.analysis.variant.mutationalSignature.MutationalSignatureAnalysis.CATALOGUES_FILENAME_DEFAULT;
+
+@ToolExecutor(id="opencga-local", tool = HRDetectAnalysis.ID,
+ framework = ToolExecutor.Framework.LOCAL, source = ToolExecutor.Source.STORAGE)
+public class HRDetectLocalAnalysisExecutor extends HRDetectAnalysisExecutor
+ implements StorageToolExecutor {
+
+ public final static String R_DOCKER_IMAGE = "opencb/opencga-ext-tools:" + GitRepositoryState.get().getBuildVersion();
+
+ private final static String CNV_FILENAME = "cnv.tsv";
+ private final static String INDEL_FILENAME = "indel.vcf";
+ private final static String INDEL_SORTED_FILENAME = "indel.sorted.vcf";
+ private final static String INDEL_GZ_FILENAME = "indel.sorted.vcf.gz";
+ private final static String INPUT_TABLE_FILENAME = "inputTable.tsv";
+
+ private final static String VIRTUAL_VOLUMEN_DATA = "/data/";
+ private final static String VIRTUAL_VOLUMEN_SNV = "/snv/";
+ private final static String VIRTUAL_VOLUMEN_SV = "/sv/";
+
+ private Path opencgaHome;
+
+ private Logger logger = LoggerFactory.getLogger(this.getClass());
+
+ @Override
+ public void run() throws ToolException, CatalogException, IOException, StorageEngineException {
+ opencgaHome = Paths.get(getExecutorParams().getString("opencgaHome"));
+
+ // Prepare CNV data
+ prepareCNVData();
+
+ // Prepare INDEL data
+ prepareINDELData();
+
+ // Prepare input table
+ prepareInputTable();
+
+ // Run R script for fitting signature
+ executeRScript();
+ }
+
+ private void prepareCNVData() throws ToolExecutorException, StorageEngineException, CatalogException, FileNotFoundException {
+ Query query = new Query(getCnvQuery());
+ query.put(VariantQueryParam.STUDY.key(), getStudy());
+ query.put(VariantQueryParam.SAMPLE.key(), getSomaticSample() + "," + getGermlineSample());
+
+ QueryOptions queryOptions = new QueryOptions();
+ queryOptions.append(QueryOptions.INCLUDE, "id,studies");
+
+ logger.info("CNV query: {}", query);
+ logger.info("CNV query options: {}", queryOptions);
+
+ PrintWriter pwOut = new PrintWriter(getOutDir().resolve("cnvs.discarded").toFile());
+
+ PrintWriter pw = new PrintWriter(getOutDir().resolve(CNV_FILENAME).toAbsolutePath().toString());
+ pw.println("seg_no\tChromosome\tchromStart\tchromEnd\ttotal.copy.number.inNormal\tminor.copy.number.inNormal\t"
+ + "total.copy.number.inTumour\tminor.copy.number.inTumour");
+
+ VariantDBIterator iterator = getVariantStorageManager().iterator(query, queryOptions, getToken());
+ int count = 0;
+ while (iterator.hasNext()) {
+ Variant variant = iterator.next();
+
+ if (CollectionUtils.isEmpty(variant.getStudies())) {
+ pwOut.println(variant.toStringSimple() + "\tStudies is empty");
+ } else {
+ StudyEntry studyEntry = variant.getStudies().get(0);
+ try {
+ StringBuilder sb = new StringBuilder(++count)
+ .append("\t").append(variant.getChromosome())
+ .append("\t").append(variant.getStart())
+ .append("\t").append(variant.getEnd())
+ .append("\t").append(Integer.parseInt(studyEntry.getSampleData(getGermlineSample(), "TCN")))
+ .append("\t").append(Integer.parseInt(studyEntry.getSampleData(getGermlineSample(), "MCN")))
+ .append("\t").append(Integer.parseInt(studyEntry.getSampleData(getSomaticSample(), "TCN")))
+ .append("\t").append(Integer.parseInt(studyEntry.getSampleData(getSomaticSample(), "MCN")));
+
+ pw.println(sb);
+ } catch (NumberFormatException e) {
+ pwOut.println(variant.toStringSimple() + "\tError parsing TCN/MCN values: " + e.getMessage());
+ }
+ }
+ }
+
+ pw.close();
+ pwOut.close();
+ }
+
+ private void prepareINDELData() throws ToolExecutorException, StorageEngineException, CatalogException, IOException {
+ Query query = new Query(getIndelQuery());
+ query.put(VariantQueryParam.STUDY.key(), getStudy());
+
+ QueryOptions queryOptions = new QueryOptions();
+ queryOptions.append(QueryOptions.INCLUDE, "id,studies")
+ .append(QueryOptions.SORT, true);
+
+ logger.info("INDEL query: {}", query);
+ logger.info("INDEL query options: {}", queryOptions);
+
+ getVariantStorageManager().exportData(getOutDir().resolve(INDEL_FILENAME).toAbsolutePath().toString(),
+ VariantWriterFactory.VariantOutputFormat.VCF, null, query, queryOptions, getToken());
+
+ if (!getOutDir().resolve(INDEL_FILENAME).toFile().exists()) {
+ new ToolExecutorException("Error exporting VCF file with INDEL variants");
+ }
+
+ // Workaround to sort, waiting for exporting to do it
+ File sortVcfFile = getOutDir().resolve("sort_vcf.sh").toFile();
+ PrintWriter pw = new PrintWriter(sortVcfFile);
+ pw.println("#!/bin/sh");
+ pw.println("cat $1 | awk '$1 ~ /^#/ {print $0;next} {print $0 | \"sort -k1,1 -k2,2n\"}' > $2");
+ pw.close();
+ new Command("bash " + sortVcfFile.getAbsolutePath()
+ + " " + getOutDir().resolve(INDEL_FILENAME).toAbsolutePath()
+ + " " + getOutDir().resolve(INDEL_SORTED_FILENAME).toFile())
+ .run();
+ sortVcfFile.delete();
+
+ // BGZIP
+ AbstractMap.SimpleEntry outputBinding = new AbstractMap.SimpleEntry<>(getOutDir()
+ .toAbsolutePath().toString(), VIRTUAL_VOLUMEN_DATA);
+ String cmdline = DockerUtils.run(R_DOCKER_IMAGE, null, outputBinding, "bgzip " + VIRTUAL_VOLUMEN_DATA + INDEL_SORTED_FILENAME,
+ null);
+ logger.info("Docker command line: " + cmdline);
+
+ // TABIX
+ cmdline = DockerUtils.run(R_DOCKER_IMAGE, null, outputBinding, "tabix -p vcf " + VIRTUAL_VOLUMEN_DATA + INDEL_GZ_FILENAME, null);
+ logger.info("Docker command line: " + cmdline);
+ }
+
+ private void prepareInputTable() throws FileNotFoundException {
+ PrintWriter pw = new PrintWriter(getOutDir().resolve(INPUT_TABLE_FILENAME).toAbsolutePath().toString());
+ pw.println("sample\tIndels_vcf_files\tCNV_tab_files");
+ pw.println(getSomaticSample() + "\t" + VIRTUAL_VOLUMEN_DATA + INDEL_GZ_FILENAME + "\t" + VIRTUAL_VOLUMEN_DATA + CNV_FILENAME);
+ pw.close();
+ }
+
+ private void executeRScript() throws IOException {
+ // Input
+ List> inputBindings = new ArrayList<>();
+ inputBindings.add(new AbstractMap.SimpleEntry<>(getSnvRDataPath().toFile().getParent(), VIRTUAL_VOLUMEN_SNV));
+ inputBindings.add(new AbstractMap.SimpleEntry<>(getSvRDataPath().toFile().getParent(), VIRTUAL_VOLUMEN_SV));
+
+ // Output
+ AbstractMap.SimpleEntry outputBinding = new AbstractMap.SimpleEntry<>(getOutDir()
+ .toAbsolutePath().toString(), VIRTUAL_VOLUMEN_DATA);
+
+ // Command
+ StringBuilder scriptParams = new StringBuilder("R CMD Rscript --vanilla ")
+ .append("/opt/opencga/signature.tools.lib/scripts/hrDetect")
+ .append(" -x ").append(VIRTUAL_VOLUMEN_SNV).append(getSnvRDataPath().toFile().getName())
+ .append(" -X ").append(VIRTUAL_VOLUMEN_SV).append(getSvRDataPath().toFile().getName())
+ .append(" -i ").append(VIRTUAL_VOLUMEN_DATA).append(INPUT_TABLE_FILENAME)
+ .append(" -o ").append(VIRTUAL_VOLUMEN_DATA);
+
+ if (StringUtils.isNotEmpty(getSnv3CustomName())) {
+ scriptParams.append(" -y ").append(getSnv3CustomName());
+ }
+ if (StringUtils.isNotEmpty(getSnv8CustomName())) {
+ scriptParams.append(" -z ").append(getSnv8CustomName());
+ }
+ if (StringUtils.isNotEmpty(getSv3CustomName())) {
+ scriptParams.append(" -Y ").append(getSv3CustomName());
+ }
+ if (StringUtils.isNotEmpty(getSv8CustomName())) {
+ scriptParams.append(" -Z ").append(getSv3CustomName());
+ }
+ if (getBootstrap() != null) {
+ scriptParams.append(" -b");
+ }
+
+ switch (getAssembly()) {
+ case "GRCh37": {
+ scriptParams.append(" --genomev=hg19");
+ break;
+ }
+ case "GRCh38": {
+ scriptParams.append(" --genomev=hg38");
+ break;
+ }
+ }
+
+ String cmdline = DockerUtils.run(R_DOCKER_IMAGE, inputBindings, outputBinding, scriptParams.toString(), null);
+ logger.info("Docker command line: " + cmdline);
+ }
+}
diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java
index 1a2f35154d9..6d01bb88cc2 100644
--- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java
+++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java
@@ -32,8 +32,6 @@
import org.opencb.biodata.tools.variant.converters.ga4gh.Ga4ghVariantConverter;
import org.opencb.biodata.tools.variant.converters.ga4gh.factories.AvroGa4GhVariantFactory;
import org.opencb.biodata.tools.variant.converters.ga4gh.factories.ProtoGa4GhVariantFactory;
-import org.opencb.cellbase.core.config.SpeciesProperties;
-import org.opencb.cellbase.core.result.CellBaseDataResponse;
import org.opencb.commons.datastore.core.*;
import org.opencb.commons.datastore.core.result.Error;
import org.opencb.commons.datastore.solr.SolrManager;
@@ -551,11 +549,7 @@ public OpenCGAResult setCellbaseConfiguration(String project, CellBaseConfi
engine.getConfiguration().setCellbase(cellbaseConfiguration);
engine.reloadCellbaseConfiguration();
- CellBaseDataResponse species = engine.getCellBaseUtils().getCellBaseClient().getMetaClient().species();
- if (species == null || species.firstResult() == null) {
- throw new IllegalArgumentException("Unable to access cellbase url '" + cellbaseConfiguration.getUrl() + "'"
- + " version '" + cellbaseConfiguration.getVersion() + "'");
- }
+ engine.getCellBaseUtils().validateCellBaseConnection();
if (engine.getMetadataManager().exists()) {
List jobDependsOn = new ArrayList<>(1);
diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/operations/VariantAnnotationOperationManager.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/operations/VariantAnnotationOperationManager.java
index 88cbff56756..204886426d4 100644
--- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/operations/VariantAnnotationOperationManager.java
+++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/operations/VariantAnnotationOperationManager.java
@@ -124,7 +124,8 @@ private void synchronizeProjectMetadata(String projectStr, String token) throws
Project project = catalogManager.getProjectManager().get(projectStr, null, token).first();
ProjectOrganism organism = project.getOrganism();
int currentRelease = project.getCurrentRelease();
- CatalogStorageMetadataSynchronizer.updateProjectMetadata(variantStorageEngine.getMetadataManager(), organism, currentRelease);
+ CatalogStorageMetadataSynchronizer.updateProjectMetadata(variantStorageEngine.getMetadataManager(), organism, currentRelease,
+ project.getCellbase());
}
private String buildOutputFileName(String alias, String region) {
diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/operations/VariantFileIndexerOperationManager.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/operations/VariantFileIndexerOperationManager.java
index bd4404f7118..046f8d4d13a 100644
--- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/operations/VariantFileIndexerOperationManager.java
+++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/operations/VariantFileIndexerOperationManager.java
@@ -167,7 +167,8 @@ private void updateProject(String studyFqn, String token) throws CatalogExceptio
release = project.getCurrentRelease();
// Add species, assembly and release
- CatalogStorageMetadataSynchronizer.updateProjectMetadata(variantStorageEngine.getMetadataManager(), project.getOrganism(), release);
+ CatalogStorageMetadataSynchronizer.updateProjectMetadata(variantStorageEngine.getMetadataManager(), project.getOrganism(), release,
+ project.getCellbase());
}
/**
diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/metadata/CatalogStorageMetadataSynchronizer.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/metadata/CatalogStorageMetadataSynchronizer.java
index 6653acb4730..4702fcfebae 100644
--- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/metadata/CatalogStorageMetadataSynchronizer.java
+++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/metadata/CatalogStorageMetadataSynchronizer.java
@@ -33,6 +33,7 @@
import org.opencb.opencga.catalog.utils.FileMetadataReader;
import org.opencb.opencga.catalog.utils.ParamUtils;
import org.opencb.opencga.core.common.BatchUtils;
+import org.opencb.opencga.core.config.storage.CellBaseConfiguration;
import org.opencb.opencga.core.models.cohort.Cohort;
import org.opencb.opencga.core.models.cohort.CohortStatus;
import org.opencb.opencga.core.models.cohort.CohortUpdateParams;
@@ -48,7 +49,7 @@
import org.opencb.opencga.storage.core.exceptions.StorageEngineException;
import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager;
import org.opencb.opencga.storage.core.metadata.models.*;
-import org.opencb.opencga.storage.core.variant.annotation.annotators.AbstractCellBaseVariantAnnotator;
+import org.opencb.opencga.storage.core.utils.CellBaseUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -113,12 +114,12 @@ public static void updateProjectMetadata(CatalogManager catalog, VariantStorageM
sessionId)
.first();
- updateProjectMetadata(scm, p.getOrganism(), p.getCurrentRelease());
+ updateProjectMetadata(scm, p.getOrganism(), p.getCurrentRelease(), p.getCellbase());
}
- public static void updateProjectMetadata(VariantStorageMetadataManager scm, ProjectOrganism organism, int release)
+ public static void updateProjectMetadata(VariantStorageMetadataManager scm, ProjectOrganism organism, int release, CellBaseConfiguration cellbase)
throws StorageEngineException {
- String scientificName = AbstractCellBaseVariantAnnotator.toCellBaseSpeciesName(organism.getScientificName());
+ String scientificName = CellBaseUtils.toCellBaseSpeciesName(organism.getScientificName());
scm.updateProjectMetadata(projectMetadata -> {
if (projectMetadata == null) {
@@ -126,6 +127,7 @@ public static void updateProjectMetadata(VariantStorageMetadataManager scm, Proj
}
projectMetadata.setSpecies(scientificName);
projectMetadata.setAssembly(organism.getAssembly());
+ projectMetadata.setDataRelease(cellbase.getDataRelease());
projectMetadata.setRelease(release);
return projectMetadata;
});
diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureAnalysis.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureAnalysis.java
index 6558baf3a86..c81b966dcaf 100644
--- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureAnalysis.java
+++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureAnalysis.java
@@ -16,22 +16,28 @@
package org.opencb.opencga.analysis.variant.mutationalSignature;
+import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.opencb.biodata.models.clinical.qc.Signature;
import org.opencb.biodata.models.clinical.qc.SignatureFitting;
+import org.opencb.biodata.models.clinical.qc.SignatureFittingScore;
import org.opencb.commons.datastore.core.ObjectMap;
+import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.opencga.analysis.AnalysisUtils;
import org.opencb.opencga.analysis.ResourceUtils;
import org.opencb.opencga.analysis.tools.OpenCgaToolScopeStudy;
import org.opencb.opencga.catalog.exceptions.CatalogException;
+import org.opencb.opencga.catalog.managers.CatalogManager;
import org.opencb.opencga.core.common.JacksonUtils;
import org.opencb.opencga.core.exceptions.ToolException;
+import org.opencb.opencga.core.exceptions.ToolExecutorException;
import org.opencb.opencga.core.models.common.Enums;
import org.opencb.opencga.core.models.sample.Sample;
import org.opencb.opencga.core.models.sample.SampleQualityControl;
import org.opencb.opencga.core.models.sample.SampleUpdateParams;
+import org.opencb.opencga.core.models.sample.SampleVariantQualityControlMetrics;
import org.opencb.opencga.core.models.variant.MutationalSignatureAnalysisParams;
import org.opencb.opencga.core.response.OpenCGAResult;
import org.opencb.opencga.core.tools.annotations.Tool;
@@ -45,6 +51,9 @@
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
+import java.util.Locale;
+
+import static org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam.STUDY;
@Tool(id = MutationalSignatureAnalysis.ID, resource = Enums.Resource.VARIANT)
public class MutationalSignatureAnalysis extends OpenCgaToolScopeStudy {
@@ -53,22 +62,36 @@ public class MutationalSignatureAnalysis extends OpenCgaToolScopeStudy {
public static final String DESCRIPTION = "Run mutational signature analysis for a given sample.";
public final static String SIGNATURE_COEFFS_FILENAME = "exposures.tsv";
- public final static String SIGNATURE_FITTING_FILENAME = "signature_summary.png";
public final static String CATALOGUES_FILENAME_DEFAULT = "catalogues.tsv";
-
- public final static String QC_UPDATE_KEYNAME = "qcUpdate";
+ public final static String MUTATIONAL_SIGNATURE_DATA_MODEL_FILENAME = "mutational_signature.json";
+ public final static String MUTATIONAL_SIGNATURE_FITTING_DATA_MODEL_FILENAME = "mutational_signature_fitting.json";
+
+ public static final String CLUSTERED = "clustered";
+ public static final String NON_CLUSTERED = "non-clustered";
+ public static final String LENGTH_NA = "na";
+ public static final String LENGTH_1_10Kb= "1-10Kb";
+ public static final String LENGTH_10Kb_100Kb = "10-100Kb";
+ public static final String LENGTH_100Kb_1Mb = "100Kb-1Mb";
+ public static final String LENGTH_1Mb_10Mb = "1Mb-10Mb";
+ public static final String LENGTH_10Mb = ">10Mb";
+ public static final String TYPE_DEL = "del";
+ public static final String TYPE_TDS = "tds";
+ public static final String TYPE_INV = "inv";
+ public static final String TYPE_TRANS = "trans";
@ToolParams
private MutationalSignatureAnalysisParams signatureParams = new MutationalSignatureAnalysisParams();
- private String sample;
+ private Sample sample;
private String assembly;
private ObjectMap query;
- private String catalogues;
private String signaturesFile;
private String rareSignaturesFile;
+ private boolean runCatalogue = true;
+ private boolean runFitting = true;
+
@Override
protected void check() throws Exception {
super.check();
@@ -78,100 +101,108 @@ protected void check() throws Exception {
throw new ToolException("Missing study");
}
- // Two behaviours: using catalogues or using sample/query
- if (StringUtils.isNotEmpty(signatureParams.getCatalogues())) {
- // Fitting from file containing the counts
- // Check if that file exists
- OpenCGAResult fileResult = getCatalogManager().getFileManager().get(study,
- signatureParams.getCatalogues(), QueryOptions.empty(), getToken());
- if (fileResult.getNumResults() == 0) {
- throw new ToolException("Catalogues file '" + signatureParams.getCatalogues() + "' does not exist in study '"
- + study + "'");
+ if (StringUtils.isEmpty(signatureParams.getSample())) {
+ throw new ToolException("Missing sample. It is mandatory to run mutational signature analysis");
+ }
+
+ // Check sample
+ study = catalogManager.getStudyManager().get(study, QueryOptions.empty(), token).first().getFqn();
+ OpenCGAResult sampleResult = catalogManager.getSampleManager().get(study, signatureParams.getSample(),
+ QueryOptions.empty(), token);
+ if (sampleResult.getNumResults() != 1) {
+ throw new ToolException("Unable to compute mutational signature analysis. Sample '" + signatureParams.getSample()
+ + "' not found");
+ }
+ sample = sampleResult.first();
+
+ if (StringUtils.isNotEmpty(signatureParams.getSkip())) {
+ if (signatureParams.getSkip().contains(MutationalSignatureAnalysisParams.SIGNATURE_CATALOGUE_SKIP_VALUE)) {
+ runCatalogue = false;
}
- if (fileResult.getNumResults() > 1) {
- throw new ToolException("Multiple files '" + signatureParams.getCatalogues() + "' found in study '" + study + "'");
+ if (signatureParams.getSkip().contains(MutationalSignatureAnalysisParams.SIGNATURE_FITTING_SKIP_VALUE)) {
+ runFitting = false;
}
- catalogues = fileResult.first().getUri().toURL().getPath();
- logger.info("Signagture catalogues file: {}", catalogues);
- } else if (StringUtils.isNotEmpty(signatureParams.getCataloguesContent())) {
- // Fitting from counts
- FileUtils.write(getOutDir().resolve(CATALOGUES_FILENAME_DEFAULT).toFile(), signatureParams.getCataloguesContent(),
- Charset.defaultCharset(), false);
- catalogues = getOutDir().resolve(CATALOGUES_FILENAME_DEFAULT).toString();
- logger.info("Signagture catalogues file: {}", catalogues);
- } else {
- // Fitting from sample/query
+ }
+
+ // Check 'catalogue' processing
+ if (runCatalogue) {
if (signatureParams.getQuery() == null) {
- throw new ToolException("Missing signature query");
+ throw new ToolException("Missing signature query. It is mandatory to compute mutational signature catalogue");
}
+
query = JacksonUtils.getDefaultObjectMapper().readValue(signatureParams.getQuery(), ObjectMap.class);
- logger.info("Signagture query: {}", signatureParams.getQuery());
- if (!query.containsKey(VariantQueryParam.SAMPLE.key())) {
- throw new ToolException("Missing sample in the signature query");
- }
- if (StringUtils.isEmpty(query.getString(VariantQueryParam.SAMPLE.key()))) {
- throw new ToolException("Sample is empty in the signature query");
+ if (!query.containsKey(VariantQueryParam.SAMPLE.key())
+ || StringUtils.isEmpty(query.getString(VariantQueryParam.SAMPLE.key()))) {
+ query.put(VariantQueryParam.SAMPLE.key(), signatureParams.getSample());
+ } else {
+ // Check mismatch sample
+ String tmpSample = query.getString(VariantQueryParam.SAMPLE.key());
+ if (tmpSample.contains(":")) {
+ tmpSample = tmpSample.split(":")[0];
+ }
+ if (!tmpSample.equals(signatureParams.getSample())) {
+ throw new ToolException("Mismatch sample name, from sample parameter (" + signatureParams.getSample() + ", and from"
+ + " the query (" + query.getString(VariantQueryParam.SAMPLE.key()) + ")");
+ }
}
+ }
- // Get sample
- sample = query.getString(VariantQueryParam.SAMPLE.key());
- if (sample.contains(":")) {
- sample = sample.split(":")[0];
+ // Check 'fitting' processing
+ if (runFitting) {
+ if (StringUtils.isEmpty(signatureParams.getId())) {
+ throw new ToolException("Missing signature catalogue ID (counts ID). It is mandatory to compute signature fitting");
}
- // Get assembly
- assembly = ResourceUtils.getAssembly(catalogManager, study, token);
- if (StringUtils.isEmpty(assembly)) {
- throw new ToolException("Missing assembly for study '" + study + "'");
+ // Check that signature (catalogue) ID exists for the sample
+ boolean found = false;
+ if (sample.getQualityControl() != null && sample.getQualityControl().getVariant() != null
+ && CollectionUtils.isNotEmpty(sample.getQualityControl().getVariant().getSignatures())) {
+ for (Signature signature : sample.getQualityControl().getVariant().getSignatures()) {
+ if (signatureParams.getId().equals(signature.getId())) {
+ found = true;
+ break;
+ }
+ }
}
- // TODO: improve this
- switch (assembly.toUpperCase()) {
- case "GRCH37":
- assembly = "GRCh37";
- break;
- case "GRCH38":
- assembly = "GRCh38";
- break;
- default:
- break;
+ if (!found && !runCatalogue) {
+ throw new ToolException("Signature catalogue ID (counts ID) '" + signatureParams.getId() + "' not found for the sample"
+ + "'" + signatureParams.getSample() + "'");
}
- try {
- // Check sample
- study = catalogManager.getStudyManager().get(study, QueryOptions.empty(), token).first().getFqn();
- OpenCGAResult sampleResult = catalogManager.getSampleManager().get(study, sample, QueryOptions.empty(), token);
- if (sampleResult.getNumResults() != 1) {
- throw new ToolException("Unable to compute mutational signature analysis. Sample '" + sample + "' not found");
- }
-
- // Check signatures file
- if (StringUtils.isNotEmpty(signatureParams.getSignaturesFile())) {
- org.opencb.opencga.core.models.file.File catalogFile = AnalysisUtils.getCatalogFile(signatureParams.getSignaturesFile(),
- getStudy(), catalogManager.getFileManager(), getToken());
- signaturesFile = catalogFile.getUri().getPath();
- }
+ // Check signatures file
+ if (StringUtils.isNotEmpty(signatureParams.getFitSignaturesFile())) {
+ org.opencb.opencga.core.models.file.File catalogFile = AnalysisUtils.getCatalogFile(signatureParams.getFitSignaturesFile(),
+ getStudy(), catalogManager.getFileManager(), getToken());
+ signaturesFile = catalogFile.getUri().getPath();
+ }
- // Check rare signatures file
- if (StringUtils.isNotEmpty(signatureParams.getRareSignaturesFile())) {
- org.opencb.opencga.core.models.file.File catalogFile = AnalysisUtils.getCatalogFile(
- signatureParams.getRareSignaturesFile(), getStudy(), catalogManager.getFileManager(), getToken());
- rareSignaturesFile = catalogFile.getUri().getPath();
- }
- } catch (CatalogException e) {
- throw new ToolException(e);
+ // Check rare signatures file
+ if (StringUtils.isNotEmpty(signatureParams.getFitRareSignaturesFile())) {
+ org.opencb.opencga.core.models.file.File catalogFile = AnalysisUtils.getCatalogFile(
+ signatureParams.getFitRareSignaturesFile(), getStudy(), catalogManager.getFileManager(), getToken());
+ rareSignaturesFile = catalogFile.getUri().getPath();
}
}
+ // Get assembly
+ assembly = getAssembly(study, catalogManager, token);
+
// Log messages
- logger.info("Signagture fitting method: {}", signatureParams.getFitMethod());
- logger.info("Signagture sig. version: {}", signatureParams.getSigVersion());
- logger.info("Signagture organ: {}", signatureParams.getOrgan());
- logger.info("Signagture n boot: {}", signatureParams.getnBoot());
- logger.info("Signagture threshold percentage: {}", signatureParams.getThresholdPerc());
- logger.info("Signagture threshold p-value: {}", signatureParams.getThresholdPval());
- logger.info("Signagture max. rare sigs.: {}", signatureParams.getMaxRareSigs());
- logger.info("Signagture signatures file: {}", signaturesFile);
- logger.info("Signagture rare signatures file: {}", rareSignaturesFile);
+ logger.info("Signagture id: {}", signatureParams.getId());
+ logger.info("Signagture description: {}", signatureParams.getDescription());
+ logger.info("Signagture sample: {}", signatureParams.getSample());
+ logger.info("Signagture query: {}", signatureParams.getQuery());
+ logger.info("Signagture fit id: {}", signatureParams.getFitId());
+ logger.info("Signagture fit method: {}", signatureParams.getFitMethod());
+ logger.info("Signagture fit sig. version: {}", signatureParams.getFitSigVersion());
+ logger.info("Signagture fit organ: {}", signatureParams.getFitOrgan());
+ logger.info("Signagture fit n boot: {}", signatureParams.getFitNBoot());
+ logger.info("Signagture fit threshold percentage: {}", signatureParams.getFitThresholdPerc());
+ logger.info("Signagture fit threshold p-value: {}", signatureParams.getFitThresholdPval());
+ logger.info("Signagture fit max. rare sigs.: {}", signatureParams.getFitMaxRareSigs());
+ logger.info("Signagture fit signatures file: {}", signaturesFile);
+ logger.info("Signagture fit rare signatures file: {}", rareSignaturesFile);
+ logger.info("Skip: {}", signatureParams.getSkip());
}
@Override
@@ -180,135 +211,250 @@ protected void run() throws ToolException {
MutationalSignatureAnalysisExecutor toolExecutor = getToolExecutor(MutationalSignatureAnalysisExecutor.class);
toolExecutor.setStudy(study)
- .setSample(sample)
+ .setSample(signatureParams.getSample())
.setAssembly(assembly)
.setQueryId(signatureParams.getId())
.setQueryDescription(signatureParams.getDescription())
+ .setSample(signatureParams.getSample())
.setQuery(query)
- .setCatalogues(catalogues)
+ .setFitId(signatureParams.getFitId())
.setFitMethod(signatureParams.getFitMethod())
- .setSigVersion(signatureParams.getSigVersion())
- .setOrgan(signatureParams.getOrgan())
- .setnBoot(signatureParams.getnBoot())
- .setThresholdPerc(signatureParams.getThresholdPerc())
- .setThresholdPval(signatureParams.getThresholdPval())
- .setMaxRareSigs(signatureParams.getMaxRareSigs())
+ .setSigVersion(signatureParams.getFitSigVersion())
+ .setOrgan(signatureParams.getFitOrgan())
+ .setnBoot(signatureParams.getFitNBoot())
+ .setThresholdPerc(signatureParams.getFitThresholdPerc())
+ .setThresholdPval(signatureParams.getFitThresholdPval())
+ .setMaxRareSigs(signatureParams.getFitMaxRareSigs())
.setSignaturesFile(signaturesFile)
.setRareSignaturesFile(rareSignaturesFile)
+ .setSkip(signatureParams.getSkip())
.execute();
+ });
+
+ // Get sample quality control again in case it was updated during the mutational signature analysis
+ OpenCGAResult sampleResult;
+ try {
+ sampleResult = catalogManager.getSampleManager().get(study, signatureParams.getSample(),
+ QueryOptions.empty(), token);
+ } catch (CatalogException e) {
+ throw new ToolException("After mutational signature analysis, it could not get sample from OpenCGA catalog", e);
+ }
+ if (sampleResult.getNumResults() != 1) {
+ throw new ToolException("After mutational signature analysis, it could not get sample '" + signatureParams.getSample() + "'"
+ + " from OpenCGA catalog: number of occurrences found: " + sampleResult.getNumResults());
+ }
- // Update quality control for the catalog sample
- if (signatureParams.getQuery() != null && query.containsKey(QC_UPDATE_KEYNAME)) {
- // Remove quality control update key
- query.remove(QC_UPDATE_KEYNAME);
+ if (StringUtils.isEmpty(signatureParams.getId())) {
+ // Nothing to do
+ return;
+ }
- OpenCGAResult sampleResult = getCatalogManager().getSampleManager().get(getStudy(), sample, QueryOptions.empty(),
- getToken());
- Sample sample = sampleResult.first();
- if (sample != null) {
+ // Only save results in sample quality control, if the signature ID is not empty
+ sample = sampleResult.first();
+ SampleQualityControl qc = sample.getQualityControl();
- Signature signature = parse(getOutDir());
- SampleQualityControl qc = sampleResult.first().getQualityControl();
- if (qc == null) {
- qc = new SampleQualityControl();
+ // Sanity check
+ if (qc == null) {
+ qc = new SampleQualityControl();
+ }
+ if (qc.getVariant() == null) {
+ qc.setVariant(new SampleVariantQualityControlMetrics());
+ }
+ if (qc.getVariant().getSignatures() == null) {
+ qc.getVariant().setSignatures(new ArrayList<>());
+ }
+
+ Signature signature = null;
+ SignatureFitting signatureFitting = null;
+ try {
+ File signatureFile = getOutDir().resolve(MUTATIONAL_SIGNATURE_DATA_MODEL_FILENAME).toFile();
+ if (signatureFile.exists()) {
+ signature = JacksonUtils.getDefaultObjectMapper().readerFor(Signature.class).readValue(signatureFile);
+ }
+ File signatureFittingFile = getOutDir().resolve(MUTATIONAL_SIGNATURE_FITTING_DATA_MODEL_FILENAME).toFile();
+ if (signatureFittingFile.exists()) {
+ signatureFitting = JacksonUtils.getDefaultObjectMapper().readerFor(SignatureFitting.class).readValue(signatureFittingFile);
+ }
+ } catch (IOException e) {
+ throw new ToolException("Something happened when parsing result files from mutational signature (or fitting)", e);
+ }
+ if (signature != null) {
+ logger.info("Adding new mutational signature to the signature data model before saving quality control");
+ qc.getVariant().getSignatures().add(signature);
+ }
+ if (signatureFitting != null) {
+ for (Signature sig : qc.getVariant().getSignatures()) {
+ if (StringUtils.isNotEmpty(sig.getId())) {
+ if (sig.getId().equals(signatureParams.getId())) {
+ if (CollectionUtils.isEmpty(sig.getFittings())) {
+ sig.setFittings(new ArrayList<>());
+ }
+ logger.info("Fitting {} was added to the mutational siganture {} before saving quality control",
+ signatureParams.getFitId(), signatureParams.getId());
+ sig.getFittings().add(signatureFitting);
+ break;
}
- qc.getVariant().getSignatures().add(signature);
+ }
+ }
+ }
+ // Update sample quality control
+ try {
+ catalogManager.getSampleManager().update(getStudy(), sample.getId(), new SampleUpdateParams().setQualityControl(qc),
+ QueryOptions.empty(), getToken());
+ logger.info("Quality control saved for sample {}", sample.getId());
+ } catch (CatalogException e) {
+ throw new ToolException("Something happened when saving sample quality control", e);
+ }
+ }
+
+ public static String getContextIndexFilename(String sample, String assembly) {
+ return "OPENCGA_" + sample + "_" + assembly + "_genome_context.csv";
+ }
- catalogManager.getSampleManager().update(getStudy(), sample.getId(), new SampleUpdateParams().setQualityControl(qc),
- QueryOptions.empty(), getToken());
+ public static String getAssembly(String study, CatalogManager catalogManager, String token) throws CatalogException, ToolException {
+ String assembly = ResourceUtils.getAssembly(catalogManager, study, token);
+ if (StringUtils.isEmpty(assembly)) {
+ throw new ToolException("Missing assembly for study '" + study + "'");
+ }
+ // TODO: improve this
+ switch (assembly.toUpperCase()) {
+ case "GRCH37":
+ assembly = "GRCh37";
+ break;
+ case "GRCH38":
+ assembly = "GRCh38";
+ break;
+ default:
+ break;
+ }
+ return assembly;
+ }
+
+ public static File getGenomeContextFile(String sample, String study, CatalogManager catalogManager, String token)
+ throws CatalogException, ToolException {
+ File indexFile = null;
+ String assembly = MutationalSignatureAnalysis.getAssembly(study, catalogManager, token);
+ String indexFilename = getContextIndexFilename(sample, assembly);
+ try {
+ Query fileQuery = new Query("name", indexFilename);
+ QueryOptions fileQueryOptions = new QueryOptions("include", "uri");
+ OpenCGAResult fileResult = catalogManager.getFileManager().search(study, fileQuery,
+ fileQueryOptions, token);
+
+ long maxSize = 0;
+ for (org.opencb.opencga.core.models.file.File file : fileResult.getResults()) {
+ File auxFile = new File(file.getUri().getPath());
+ if (auxFile.exists() && auxFile.length() > maxSize) {
+ maxSize = auxFile.length();
+ indexFile = auxFile;
}
}
- });
+ } catch (CatalogException e) {
+ throw new ToolExecutorException(e);
+ }
+
+ return indexFile;
}
- public Signature parse(Path dir) throws IOException {
- Signature result = new Signature(signatureParams.getId(), signatureParams.getDescription(), query, "SNV", null, null, null);
+ public static List parseCatalogueResults(Path dir) throws IOException {
+ List sigCounts = null;
// Context counts
File contextFile = dir.resolve(CATALOGUES_FILENAME_DEFAULT).toFile();
if (contextFile.exists()) {
List lines = FileUtils.readLines(contextFile, Charset.defaultCharset());
- List sigCounts = new ArrayList<>(lines.size() - 1);
+ sigCounts = new ArrayList<>(lines.size() - 1);
for (int i = 1; i < lines.size(); i++) {
String[] fields = lines.get(i).split("\t");
sigCounts.add(new Signature.GenomeContextCount(fields[0], Math.round(Float.parseFloat((fields[1])))));
}
- result.setCounts(sigCounts);
+ }
+
+ return sigCounts;
+ }
+
+ public static SignatureFitting parseFittingResults(Path outDir, String fitId, String fitMethod, String fitSigVersion, Integer fitNBoot,
+ String fitOrgan, Float fitThresholdPerc, Float fitThresholdPval,
+ Integer fitMaxRareSigs) throws IOException {
+ // Check for fitting coeffs. file
+ File coeffsFile = outDir.resolve(SIGNATURE_COEFFS_FILENAME).toFile();
+ if (!coeffsFile.exists()) {
+ return null;
}
// Signature fitting
- File coeffsFile = dir.resolve(SIGNATURE_COEFFS_FILENAME).toFile();
- if (coeffsFile.exists()) {
- SignatureFitting fitting = new SignatureFitting()
- .setMethod(signatureParams.getFitMethod())
- .setSignatureVersion(signatureParams.getSigVersion());
-
- // Set source from fit method
- if (StringUtils.isNotEmpty(getSignatureParams().getSigVersion())) {
- if (getSignatureParams().getSigVersion().startsWith("COSMIC")) {
- fitting.setSignatureSource("COSMIC");
- } else if (getSignatureParams().getSigVersion().startsWith("RefSig")) {
- fitting.setSignatureSource("RefSig");
- }
+ SignatureFitting fitting = new SignatureFitting();
+ if (StringUtils.isNotEmpty(fitId)) {
+ fitting.setId(fitId);
+ }
+ if (StringUtils.isNotEmpty(fitMethod)) {
+ fitting.setMethod(fitMethod);
+ }
+ if (StringUtils.isNotEmpty(fitSigVersion)) {
+ fitting.setSignatureVersion(fitSigVersion);
+ if (fitSigVersion.startsWith("COSMIC")) {
+ fitting.setSignatureSource("COSMIC");
+ } else if (fitSigVersion.startsWith("RefSig")) {
+ fitting.setSignatureSource("RefSig");
}
+ }
- // Set fitting scores
- List lines = FileUtils.readLines(coeffsFile, Charset.defaultCharset());
- String[] labels = lines.get(0).split("\t");
- String[] values = lines.get(1).split("\t");
- List scores = new ArrayList<>(labels.length);
- for (int i = 0; i < labels.length; i++) {
- String label = labels[i];
- if (label.contains("_")) {
- String[] splits = label.split("_");
- label = splits[splits.length - 1];
- }
- scores.add(new SignatureFitting.Score(label, Double.parseDouble(values[i + 1])));
+ // Set fitting scores
+ List lines = FileUtils.readLines(coeffsFile, Charset.defaultCharset());
+ String[] labels = lines.get(0).split("\t");
+ String[] values = lines.get(1).split("\t");
+ List scores = new ArrayList<>(labels.length);
+ for (int i = 0; i < labels.length; i++) {
+ String label = labels[i];
+ if (label.contains("_")) {
+ String[] splits = label.split("_");
+ label = splits[splits.length - 1];
}
- fitting.setScores(scores);
-
- // Set files
- List files = new ArrayList<>();
- for (File file : getOutDir().toFile().listFiles()) {
- if (file.getName().endsWith("pdf")) {
- files.add(file.getName());
- } else if (file.isDirectory()) {
- for (File file2 : file.listFiles()) {
- if (file2.getName().endsWith("pdf")) {
- files.add(file.getName() + "/" + file2.getName());
- }
+ scores.add(new SignatureFittingScore(label, Double.parseDouble(values[i + 1])));
+ }
+ fitting.setScores(scores);
+
+ // Set files
+ List files = new ArrayList<>();
+ for (File file : outDir.toFile().listFiles()) {
+ if (file.getName().equals("catalogues.pdf")) {
+ continue;
+ }
+ if (file.getName().endsWith("pdf") || file.getName().equals("fitData.rData")) {
+ files.add(AnalysisUtils.getJobFileRelativePath(file.getAbsolutePath()));
+ } else if (file.isDirectory()) {
+ for (File file2 : file.listFiles()) {
+ if (file2.getName().endsWith("pdf")) {
+ files.add(AnalysisUtils.getJobFileRelativePath(file2.getAbsolutePath()));
}
}
}
- fitting.setFiles(files);
+ }
+ fitting.setFiles(files);
- // Set params
- ObjectMap params = new ObjectMap();
- if (signatureParams.getnBoot() != null) {
- params.append("nBoot", signatureParams.getnBoot());
- }
- if (StringUtils.isNotEmpty(signatureParams.getOrgan())) {
- params.append("organ", signatureParams.getOrgan());
- }
- if (signatureParams.getThresholdPerc() != null) {
- params.append("thresholdPerc", signatureParams.getThresholdPerc());
- }
- if (signatureParams.getThresholdPval() != null) {
- params.append("thresholdPval", signatureParams.getThresholdPval());
- }
- if (signatureParams.getMaxRareSigs() != null) {
- params.append("maxRareSigs", signatureParams.getMaxRareSigs());
- }
- if (params.size() > 0) {
- fitting.setParams(params);
- }
+ // Set params
+ ObjectMap params = new ObjectMap();
+ if (fitNBoot != null) {
+ params.append("nBoot", fitNBoot);
+ }
+ if (StringUtils.isNotEmpty(fitOrgan)) {
+ params.append("organ", fitOrgan);
+ }
+ if (fitThresholdPerc != null) {
+ params.append("thresholdPerc", fitThresholdPerc);
+ }
+ if (fitThresholdPval != null) {
+ params.append("thresholdPval", fitThresholdPval);
+ }
+ if (fitMaxRareSigs != null) {
+ params.append("maxRareSigs", fitMaxRareSigs);
+ }
+ if (params.size() > 0) {
fitting.setParams(params);
-
- // Set fitting signature
- result.setFitting(fitting);
}
+ fitting.setParams(params);
- return result;
+ return fitting;
}
public MutationalSignatureAnalysisParams getSignatureParams() {
diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureLocalAnalysisExecutor.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureLocalAnalysisExecutor.java
index f0816d528e5..55173c3bd3a 100644
--- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureLocalAnalysisExecutor.java
+++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureLocalAnalysisExecutor.java
@@ -20,18 +20,28 @@
import htsjdk.samtools.reference.FastaSequenceIndex;
import htsjdk.samtools.reference.ReferenceSequence;
import htsjdk.samtools.util.GZIIndex;
+import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
+import org.opencb.biodata.models.clinical.qc.Signature;
+import org.opencb.biodata.models.clinical.qc.SignatureFitting;
import org.opencb.biodata.models.variant.Variant;
+import org.opencb.biodata.models.variant.avro.BreakendMate;
+import org.opencb.biodata.models.variant.avro.FileEntry;
import org.opencb.biodata.models.variant.avro.VariantType;
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.commons.utils.DockerUtils;
+import org.opencb.commons.utils.FileUtils;
import org.opencb.opencga.analysis.ResourceUtils;
import org.opencb.opencga.analysis.StorageToolExecutor;
import org.opencb.opencga.catalog.exceptions.CatalogException;
+import org.opencb.opencga.catalog.managers.CatalogManager;
import org.opencb.opencga.core.common.GitRepositoryState;
+import org.opencb.opencga.core.common.JacksonUtils;
import org.opencb.opencga.core.exceptions.ToolException;
import org.opencb.opencga.core.exceptions.ToolExecutorException;
+import org.opencb.opencga.core.models.sample.Sample;
+import org.opencb.opencga.core.models.variant.MutationalSignatureAnalysisParams;
import org.opencb.opencga.core.response.OpenCGAResult;
import org.opencb.opencga.core.response.VariantQueryResult;
import org.opencb.opencga.core.tools.annotations.ToolExecutor;
@@ -47,7 +57,7 @@
import java.nio.file.Paths;
import java.util.*;
-import static org.opencb.opencga.analysis.variant.mutationalSignature.MutationalSignatureAnalysis.CATALOGUES_FILENAME_DEFAULT;
+import static org.opencb.opencga.analysis.variant.mutationalSignature.MutationalSignatureAnalysis.*;
@ToolExecutor(id="opencga-local", tool = MutationalSignatureAnalysis.ID,
framework = ToolExecutor.Framework.LOCAL, source = ToolExecutor.Source.STORAGE)
@@ -65,114 +75,83 @@ public class MutationalSignatureLocalAnalysisExecutor extends MutationalSignatur
public void run() throws ToolException, CatalogException, IOException, StorageEngineException {
opencgaHome = Paths.get(getExecutorParams().getString("opencgaHome"));
- if (StringUtils.isEmpty(getCatalogues())) {
+ // Check genome context file for that sample, and create it if necessary
+ if (StringUtils.isNotEmpty(getSkip())
+ && getSkip().contains(MutationalSignatureAnalysisParams.SIGNATURE_CATALOGUE_SKIP_VALUE)
+ && getSkip().contains(MutationalSignatureAnalysisParams.SIGNATURE_FITTING_SKIP_VALUE)) {
+ // Only compute genome context file
+ // TODO: overwrite support !
+ File indexFile = checkGenomeContextFile();
+ logger.info("Checking genome context file {} for sample {}", indexFile.getAbsolutePath(), getSample());
+ }
+
+ if (StringUtils.isEmpty(getSkip()) || (!getSkip().contains(MutationalSignatureAnalysisParams.SIGNATURE_CATALOGUE_SKIP_VALUE))) {
// Get first variant to check where the genome context is stored
Query query = new Query();
if (getQuery() != null) {
query.putAll(getQuery());
}
- // Ovewrite study and type (SNV)
- query.append(VariantQueryParam.STUDY.key(), getStudy()).append(VariantQueryParam.TYPE.key(), VariantType.SNV);
-
- QueryOptions queryOptions = new QueryOptions();
- queryOptions.append(QueryOptions.INCLUDE, "id");
- queryOptions.append(QueryOptions.LIMIT, "1");
+ // Overwrite study and type (SNV)
+ String type = query.getString(VariantQueryParam.TYPE.key());
+ if (type.equals(VariantType.SNV.name())) {
+ // SNV
+ logger.info("Computing catalogue (mutational signature) for SNV variants");
+
+ // TODO: overwrite support !
+ File indexFile = checkGenomeContextFile();
+ logger.info("Mutational signature analysis is using the genome context file {} for sample {}", indexFile.getAbsolutePath(),
+ getSample());
+
+ query.append(VariantQueryParam.STUDY.key(), getStudy()).append(VariantQueryParam.TYPE.key(), VariantType.SNV);
+
+ QueryOptions queryOptions = new QueryOptions();
+ queryOptions.append(QueryOptions.INCLUDE, "id");
+ queryOptions.append(QueryOptions.LIMIT, "1");
+
+ VariantQueryResult variantQueryResult = getVariantStorageManager().get(query, queryOptions, getToken());
+ Variant variant = variantQueryResult.first();
+ if (variant == null) {
+ // Nothing to do
+ addWarning("None variant found for that mutational signature query");
+ return;
+ }
- VariantQueryResult variantQueryResult = getVariantStorageManager().get(query, queryOptions, getToken());
- Variant variant = variantQueryResult.first();
- if (variant == null) {
- // Nothing to do
- addWarning("None variant found for that mutational signature query");
- return;
+ // Run mutational analysis taking into account that the genome context is stored in an index file,
+ // if the genome context file does not exist, it will be created !!!
+ computeSignatureCatalogueSNV(indexFile);
+ } else {
+ // SV
+ logger.info("Computing catalogue (mutational signature) for SV variants");
+ computeSignatureCatalogueSV();
}
-
- // Run mutational analysis taking into account that the genome context is stored in an index file,
- // if the genome context file does not exist, it will be created !!!
- computeFromContextFile();
}
- // Run R script for fitting signature
- executeRScript();
-// if (StringUtils.isEmpty(getOrgan()) && (StringUtils.isEmpty(getSigVersion()) || getSigVersion().startsWith("Ref"))) {
-// addWarning("Since the parameter 'organ' is missing and RefSig is been used, the fitting signature will not be computed.");
-// } else {
-// executeRScript();
-// }
+ if (StringUtils.isEmpty(getSkip()) || (!getSkip().contains(MutationalSignatureAnalysisParams.SIGNATURE_FITTING_SKIP_VALUE))) {
+ // Run R script for fitting signature
+ computeSignatureFitting();
+ }
}
- private void computeFromContextFile() throws ToolExecutorException {
+ private File checkGenomeContextFile() throws ToolExecutorException {
// Context index filename
- File indexFile = null;
- String indexFilename = getContextIndexFilename();
+ File indexFile;
try {
- Query fileQuery = new Query("name", indexFilename);
- QueryOptions fileQueryOptions = new QueryOptions("include", "uri");
- OpenCGAResult fileResult = getVariantStorageManager()
- .getCatalogManager()
- .getFileManager().search(getStudy(), fileQuery, fileQueryOptions, getToken());
-
- long maxSize = 0;
- for (org.opencb.opencga.core.models.file.File file : fileResult.getResults()) {
- File auxFile = new File(file.getUri().getPath());
- if (auxFile.exists() && auxFile.length() > maxSize) {
- maxSize = auxFile.length();
- indexFile = auxFile;
- }
- }
- } catch (CatalogException e) {
- throw new ToolExecutorException(e);
+ indexFile = MutationalSignatureAnalysis.getGenomeContextFile(getSample(), getStudy(), getVariantStorageManager().getCatalogManager(), getToken());
+ } catch (CatalogException | ToolException e) {
+ indexFile = null;
}
-
- if (indexFile == null) {
- // The genome context file does not exist, we have to create it !!!
- indexFile = getOutDir().resolve(indexFilename).toFile();
- createGenomeContextFile(indexFile);
+ if (indexFile != null && indexFile.exists()) {
+ return indexFile;
}
+ // The genome context file does not exist, we have to create it !!!
+ indexFile = getOutDir().resolve(MutationalSignatureAnalysis.getContextIndexFilename(getSample(), getAssembly())).toFile();
+ createGenomeContextFile(indexFile);
+
if (!indexFile.exists()) {
throw new ToolExecutorException("Could not create the genome context index file for sample " + getSample());
}
-
- try {
- // Read context index
- Map indexMap = new HashMap<>();
- BufferedReader br = new BufferedReader(new FileReader(indexFile));
- String line;
- while ((line = br.readLine()) != null) {
- String[] parts = line.split("\t");
- indexMap.put(parts[0], parts[1]);
- }
-
- // Get variant iterator
- Query query = new Query();
- if (getQuery() != null) {
- query.putAll(getQuery());
- }
- // Ovewrite study and type (SNV)
- query.append(VariantQueryParam.STUDY.key(), getStudy()).append(VariantQueryParam.TYPE.key(), VariantType.SNV);
-
- QueryOptions queryOptions = new QueryOptions(QueryOptions.INCLUDE, "id");
-
- VariantDBIterator iterator = getVariantStorageManager().iterator(query, queryOptions, getToken());
-
- Map> countMap = initCountMap();
-
- while (iterator.hasNext()) {
- Variant variant = iterator.next();
-
- // Update count map
- updateCountMap(variant, indexMap.get(variant.toString()), countMap);
- }
-
- // Write context counts
- File cataloguesFile = getOutDir().resolve(CATALOGUES_FILENAME_DEFAULT).toFile();
- writeCountMap(getSample(), countMap, cataloguesFile);
-
- // Update the parameter catalogues
- setCatalogues(cataloguesFile.getAbsolutePath());
- } catch (IOException | CatalogException | StorageEngineException | ToolException e) {
- throw new ToolExecutorException(e);
- }
+ return indexFile;
}
private void createGenomeContextFile(File indexFile) throws ToolExecutorException {
@@ -252,13 +231,312 @@ private void updateCountMap(Variant variant, String sequence, Map indexMap = new HashMap<>();
+ BufferedReader br = new BufferedReader(new FileReader(indexFile));
+ String line;
+ while ((line = br.readLine()) != null) {
+ String[] parts = line.split("\t");
+ indexMap.put(parts[0], parts[1]);
+ }
+
+ // Get variant iterator
+ Query query = new Query();
+ if (getQuery() != null) {
+ query.putAll(getQuery());
+ }
+ // Ovewrite study and type (SNV)
+ query.append(VariantQueryParam.STUDY.key(), getStudy()).append(VariantQueryParam.TYPE.key(), VariantType.SNV);
+
+ QueryOptions queryOptions = new QueryOptions(QueryOptions.INCLUDE, "id");
+
+ VariantDBIterator iterator = getVariantStorageManager().iterator(query, queryOptions, getToken());
+
+ Map> countMap = initCountMap();
+
+ while (iterator.hasNext()) {
+ Variant variant = iterator.next();
+
+ // Update count map
+ updateCountMap(variant, indexMap.get(variant.toString()), countMap);
+ }
+
+ // Write context counts
+ File cataloguesFile = getOutDir().resolve(CATALOGUES_FILENAME_DEFAULT).toFile();
+ writeCountMap(getSample(), countMap, cataloguesFile);
+
+ // Check catalogue file before parsing and creating the mutational signature data model
+ if (!cataloguesFile.exists()) {
+ throw new ToolExecutorException("Something wrong happened: counts file " + CATALOGUES_FILENAME_DEFAULT + " could not be"
+ + " generated");
+ }
+ List genomeContextCounts = parseCatalogueResults(getOutDir());
+ Signature signature = new Signature()
+ .setId(getQueryId())
+ .setDescription(getQueryDescription())
+ .setQuery(query)
+ .setType("SNV")
+ .setCounts(genomeContextCounts);
+
+ JacksonUtils.getDefaultObjectMapper().writerFor(Signature.class).writeValue(getOutDir()
+ .resolve(MutationalSignatureAnalysis.MUTATIONAL_SIGNATURE_DATA_MODEL_FILENAME).toFile(), signature);
+ } catch (IOException | CatalogException | StorageEngineException | ToolException e) {
+ throw new ToolExecutorException(e);
+ }
+ }
+
+ public void computeSignatureCatalogueSV() throws ToolExecutorException {
+ try {
+ // Get variant iterator
+ Query query = new Query();
+ if (getQuery() != null) {
+ query.putAll(getQuery());
+ }
+ // Overwrite study and types related to SV
+ query.put(VariantQueryParam.STUDY.key(), getStudy());
+ query.put(VariantQueryParam.TYPE.key(), VariantType.DELETION + "," + VariantType.BREAKEND + "," + VariantType.DUPLICATION + ","
+ + VariantType.TANDEM_DUPLICATION + "," + VariantType.INVERSION + "," + VariantType.TRANSLOCATION);
+
+ QueryOptions queryOptions = new QueryOptions(QueryOptions.INCLUDE, "id,sv,studies");
+
+ logger.info("Query: {}", query.toJson());
+ logger.info("Query options: {}", queryOptions.toJson());
+
+ File clusteredFile = computeClusteredFile(query, queryOptions);
+
+ BufferedReader br = FileUtils.newBufferedReader(clusteredFile.toPath());
+ // Skip header line
+ // chrom1 start1 end1 chrom2 start2 end2 length type sample id is.clustered
+ // 0 1 2 3 4 5 6 7 8 9 10
+ Map countMap = new HashMap<>();
+ // Skip first line
+ String line = br.readLine();
+ while ((line = br.readLine()) != null) {
+ String[] split = line.split("\t");
+ if (split.length != 11) {
+ logger.warn("Skipping line {}: it does not contain 11 elements", line);
+ continue;
+ }
+ String clusteredKey = split[10].equals("FALSE") ? NON_CLUSTERED : CLUSTERED;
+ String lengthKey = split[6];
+ String typeKey = split[7];
+
+ String key = clusteredKey + "_" + typeKey;
+ if (!typeKey.equals(TYPE_TRANS)) {
+ key += ("_" + lengthKey);
+ }
+ if (countMap.containsKey(key)) {
+ countMap.put(key, 1 + countMap.get(key));
+ } else {
+ countMap.put(key, 1);
+ }
+ }
+
+// logger.info("Count map size = {}", countMap.size());
+// for (Map.Entry entry : countMap.entrySet()) {
+// logger.info("context = {}, count = {}", entry.getKey(), entry.getValue());
+// }
+
+ // Build teh genome context counts object for SV
+ List genomeContextCounts = new LinkedList<>();
+ for (String clustered: new LinkedList<>(Arrays.asList(CLUSTERED, NON_CLUSTERED))) {
+ for (String type: new LinkedList<>(Arrays.asList(TYPE_DEL, TYPE_TDS, TYPE_INV))) {
+ for (String length : new LinkedList<>(Arrays.asList(LENGTH_1_10Kb, LENGTH_10Kb_100Kb, LENGTH_100Kb_1Mb, LENGTH_1Mb_10Mb,
+ LENGTH_10Mb))) {
+ String key = clustered + "_" + type + "_" + length;
+ genomeContextCounts.add(new Signature.GenomeContextCount(key, countMap.containsKey(key) ? countMap.get(key) : 0));
+ }
+ }
+ String key = clustered + "_" + TYPE_TRANS;
+ genomeContextCounts.add(new Signature.GenomeContextCount(key, countMap.containsKey(key) ? countMap.get(key) : 0));
+ }
+
+ // Write catalogue file from the genome context counts
+ PrintWriter pw = new PrintWriter(getOutDir().resolve(CATALOGUES_FILENAME_DEFAULT).toFile());
+ pw.write(query.getString(VariantQueryParam.SAMPLE.key()));
+ pw.write("\n");
+ for (Signature.GenomeContextCount counts : genomeContextCounts) {
+ pw.write(counts.getContext() + "\t" + counts.getTotal() + "\n");
+ }
+ pw.close();
+
+ Signature signature = new Signature()
+ .setId(getQueryId())
+ .setDescription(getQueryDescription())
+ .setQuery(query)
+ .setType("SV")
+ .setCounts(genomeContextCounts);
+
+ JacksonUtils.getDefaultObjectMapper().writerFor(Signature.class).writeValue(getOutDir()
+ .resolve(MutationalSignatureAnalysis.MUTATIONAL_SIGNATURE_DATA_MODEL_FILENAME).toFile(), signature);
+ } catch (IOException | CatalogException | StorageEngineException | ToolException e) {
+ throw new ToolExecutorException(e);
+ }
+ }
+
+ private File computeClusteredFile(Query query, QueryOptions queryOptions) throws ToolException, StorageEngineException,
+ CatalogException {
+ VariantDBIterator iterator = getVariantStorageManager().iterator(query, queryOptions, getToken());
+
+ // $ Rscript sv_clustering.R ./test.bedpe ./out.bedpe
+ File inputFile = getOutDir().resolve("in.clustered.bedpe").toFile();
+ File outputFile = getOutDir().resolve("out.clustered.bedpe").toFile();
+ try {
+ PrintWriter pw = new PrintWriter(inputFile);
+ pw.println("chrom1\tstart1\tend1\tchrom2\tstart2\tend2\tlength\ttype\tsample");
+ while (iterator.hasNext()) {
+ Variant variant = iterator.next();
+ if (variant.getSv() == null || variant.getSv().getBreakend() == null || variant.getSv().getBreakend().getMate() == null) {
+ continue;
+ }
+ String typeKey = getTypeKey(variant);
+ String lengthKey = getLengthKey(variant);
+ if (typeKey != null && lengthKey != null) {
+ BreakendMate mate = variant.getSv().getBreakend().getMate();
+ pw.println(variant.getChromosome() + "\t" + variant.getStart() + "\t" + variant.getEnd() + "\t"
+ + mate.getChromosome() + "\t" + mate.getPosition() + "\t" + mate.getPosition() + "\t"
+ + lengthKey + "\t" + typeKey + "\t" + getSample());
+ }
+ }
+ pw.close();
+
+ // Build command line to run R script via docker image
+ // Input binding
+ List> inputBindings = new ArrayList<>();
+ inputBindings.add(new AbstractMap.SimpleEntry<>(opencgaHome.resolve("analysis/" + MutationalSignatureAnalysis.ID)
+ .toAbsolutePath().toString(), "/script"));
+
+ // Output binding
+ AbstractMap.SimpleEntry outputBinding = new AbstractMap.SimpleEntry<>(getOutDir().toAbsolutePath().toString(),
+ "/jobdir");
+
+ String rParams = "R CMD Rscript --vanilla /script/sv_clustering.R"
+ + " /jobdir/" + inputFile.getName()
+ + " /jobdir/" + outputFile.getName();
+
+ // Execute R script in docker
+ DockerUtils.run(MutationalSignatureLocalAnalysisExecutor.R_DOCKER_IMAGE, inputBindings, outputBinding, rParams, null);
+ } catch (Exception e) {
+ throw new ToolException(e);
}
+
+ // Check output file
+ if (!outputFile.exists()) {
+ throw new ToolException("Something wrong when computing the clustered values.");
+ }
+
+ return outputFile;
+ }
+
+ private String getClusteredKey(Variant variant) {
+ return NON_CLUSTERED;
+ }
+
+ private String getTypeKey(Variant variant) {
+ String variantType = variant.getType() != null ? variant.getType().name() : "";
+ if (CollectionUtils.isNotEmpty(variant.getStudies()) && CollectionUtils.isNotEmpty(variant.getStudies().get(0).getFiles())) {
+ for (FileEntry file : variant.getStudies().get(0).getFiles()) {
+ if (file.getData() != null) {
+ if (file.getData().containsKey("EXT_SVTYPE")) {
+ variantType = file.getData().get("EXT_SVTYPE").toUpperCase(Locale.ROOT);
+ break;
+ } else if (file.getData().containsKey("SVCLASS")) {
+ variantType = file.getData().get("SVCLASS").toUpperCase(Locale.ROOT);
+ break;
+ }
+ }
+ }
+ }
+
+ switch (variantType) {
+ case "DEL":
+ case "DELETION":
+ return TYPE_DEL;
+ case "DUP":
+ case "TDS":
+ case "DUPLICATION":
+ case "TANDEM_DUPLICATION":
+ return TYPE_TDS;
+ case "INV":
+ case "INVERSION":
+ return TYPE_INV;
+ case "TR":
+ case "TRANS":
+ case "TRANSLOCATION":
+ return TYPE_TRANS;
+ }
+ return null;
+ }
+
+ private String getLengthKey(Variant variant) {
+ if (variant.getSv() == null || variant.getSv().getBreakend() == null || variant.getSv().getBreakend().getMate() == null) {
+ return null;
+ }
+ BreakendMate mate = variant.getSv().getBreakend().getMate();
+ if (variant.getChromosome().equals(mate.getChromosome())) {
+ int length = Math.abs(mate.getPosition() - variant.getStart());
+ if (length <= 10000) {
+ return LENGTH_1_10Kb;
+ } else if (length <= 100000) {
+ return LENGTH_10Kb_100Kb;
+ } else if (length <= 1000000) {
+ return LENGTH_100Kb_1Mb;
+ } else if (length <= 10000000) {
+ return LENGTH_1Mb_10Mb;
+ }
+ return LENGTH_10Mb;
+ } else {
+ if (variant.getType() == VariantType.TRANSLOCATION) {
+ return LENGTH_NA;
+ }
+ }
+ return null;
+ }
+
+ private void computeSignatureFitting() throws IOException, ToolException, CatalogException {
+ File cataloguesFile = getOutDir().resolve(CATALOGUES_FILENAME_DEFAULT).toFile();
+ if (!cataloguesFile.exists()) {
+ // Get counts from sample
+ CatalogManager catalogManager = getVariantStorageManager().getCatalogManager();
+ // Check sample
+ String study = catalogManager.getStudyManager().get(getStudy(), QueryOptions.empty(), getToken()).first().getFqn();
+ OpenCGAResult sampleResult = catalogManager.getSampleManager().get(study, getSample(), QueryOptions.empty(),
+ getToken());
+ if (sampleResult.getNumResults() != 1) {
+ throw new ToolException("Unable to compute mutational signature analysis. Sample '" + getSample() + "' not found");
+ }
+ Sample sample = sampleResult.first();
+ logger.info("Searching catalogue counts from quality control for sample " + getSample());
+ if (sample.getQualityControl() != null && sample.getQualityControl().getVariant() != null
+ && CollectionUtils.isNotEmpty(sample.getQualityControl().getVariant().getSignatures())) {
+ logger.info("Searching in " + sample.getQualityControl().getVariant().getSignatures().size() + " signatures");
+ for (Signature signature : sample.getQualityControl().getVariant().getSignatures()) {
+ logger.info("Matching ? " + getQueryId() + " vs " + signature.getId());
+ if (getQueryId().equals(signature.getId())) {
+ // Write catalogue file
+ try (PrintWriter pw = new PrintWriter(cataloguesFile)) {
+ pw.println(getSample());
+ for (Signature.GenomeContextCount count : signature.getCounts()) {
+ pw.println(count.getContext() + "\t" + count.getTotal());
+ }
+ pw.close();
+ } catch (Exception e) {
+ throw new ToolException("Error writing catalogue output file: " + cataloguesFile.getName(), e);
+ }
+ logger.info("Found catalogue {} and written in {}", signature.getId(), cataloguesFile.getAbsolutePath());
+ break;
+ }
+ }
+ }
+ if (!cataloguesFile.exists()) {
+ throw new ToolException("Could not find mutational signagure catalogue (counts) file: " + cataloguesFile.getName());
+ }
+ }
+
List> inputBindings = new ArrayList<>();
- inputBindings.add(new AbstractMap.SimpleEntry<>(inputPath, "/data/input"));
+ inputBindings.add(new AbstractMap.SimpleEntry<>(getOutDir().toAbsolutePath().toString(), "/data/input"));
if (StringUtils.isNotEmpty(getSignaturesFile())) {
File signaturesFile = new File(getSignaturesFile());
if (signaturesFile.exists()) {
@@ -275,7 +553,7 @@ private void executeRScript() throws IOException {
.toAbsolutePath().toString(), "/data/output");
StringBuilder scriptParams = new StringBuilder("R CMD Rscript --vanilla ")
.append("/opt/opencga/signature.tools.lib/scripts/signatureFit")
- .append(" --catalogues=/data/input/").append(new File(getCatalogues()).getName())
+ .append(" --catalogues=/data/input/").append(cataloguesFile.getName())
.append(" --outdir=/data/output");
if (StringUtils.isNotEmpty(getFitMethod())) {
scriptParams.append(" --fitmethod=").append(getFitMethod());
@@ -318,5 +596,16 @@ private void executeRScript() throws IOException {
String cmdline = DockerUtils.run(R_DOCKER_IMAGE, inputBindings, outputBinding, scriptParams.toString(),
null);
logger.info("Docker command line: " + cmdline);
+
+ // Check fitting file before parsing and creating the mutational signature fitting data model
+ File signatureCoeffsFile = getOutDir().resolve(SIGNATURE_COEFFS_FILENAME).toFile();
+ if (!signatureCoeffsFile.exists()) {
+ throw new ToolExecutorException("Something wrong happened: signature coeffs. file " + SIGNATURE_COEFFS_FILENAME + " could not"
+ + " be generated");
+ }
+ SignatureFitting signatureFitting = parseFittingResults(getOutDir(), getFitId(), getFitMethod(), getSigVersion(), getnBoot(),
+ getOrgan(), getThresholdPerc(), getThresholdPval(), getMaxRareSigs());
+ JacksonUtils.getDefaultObjectMapper().writerFor(SignatureFitting.class).writeValue(getOutDir()
+ .resolve(MutationalSignatureAnalysis.MUTATIONAL_SIGNATURE_FITTING_DATA_MODEL_FILENAME).toFile(), signatureFitting);
}
}
diff --git a/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/OpenCGATestExternalResource.java b/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/OpenCGATestExternalResource.java
index a31c301ec86..a30c0b4f4b1 100644
--- a/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/OpenCGATestExternalResource.java
+++ b/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/OpenCGATestExternalResource.java
@@ -32,14 +32,12 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
+import java.io.*;
import java.net.URI;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
+import java.nio.file.StandardCopyOption;
import java.text.SimpleDateFormat;
import java.util.Date;
@@ -171,6 +169,12 @@ public Path isolateOpenCGA() throws IOException {
// Files.copy(inputStream, opencgaHome.resolve("examples")
// .resolve("1k.chr1.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"), StandardCopyOption.REPLACE_EXISTING);
+ // Analysis
+ Files.createDirectories(opencgaHome.resolve("analysis/mutational-signature"));
+
+ inputStream = new FileInputStream("../opencga-app/app/analysis/mutational-signature/sv_clustering.R");
+ Files.copy(inputStream, opencgaHome.resolve("analysis/mutational-signature/sv_clustering.R"), StandardCopyOption.REPLACE_EXISTING);
+
return opencgaHome;
}
diff --git a/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/VariantAnalysisTest.java b/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/VariantAnalysisTest.java
index 8d13a664edf..b2bd0a2227f 100644
--- a/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/VariantAnalysisTest.java
+++ b/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/VariantAnalysisTest.java
@@ -19,18 +19,19 @@
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.mutable.MutableInt;
import org.hamcrest.CoreMatchers;
-import org.junit.AfterClass;
-import org.junit.Assume;
-import org.junit.Before;
-import org.junit.Test;
+import org.junit.*;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.opencb.biodata.models.clinical.Disorder;
import org.opencb.biodata.models.clinical.Phenotype;
+import org.opencb.biodata.models.clinical.qc.HRDetect;
import org.opencb.biodata.models.clinical.qc.SampleQcVariantStats;
+import org.opencb.biodata.models.clinical.qc.Signature;
+import org.opencb.biodata.models.clinical.qc.SignatureFitting;
import org.opencb.biodata.models.core.SexOntologyTermAnnotation;
import org.opencb.biodata.models.variant.StudyEntry;
import org.opencb.biodata.models.variant.Variant;
+import org.opencb.biodata.models.variant.avro.VariantType;
import org.opencb.biodata.models.variant.metadata.SampleVariantStats;
import org.opencb.commons.datastore.core.ObjectMap;
import org.opencb.commons.datastore.core.Query;
@@ -38,8 +39,10 @@
import org.opencb.opencga.TestParamConstants;
import org.opencb.opencga.analysis.tools.ToolRunner;
import org.opencb.opencga.analysis.variant.gwas.GwasAnalysis;
+import org.opencb.opencga.analysis.variant.hrdetect.HRDetectAnalysis;
import org.opencb.opencga.analysis.variant.knockout.KnockoutAnalysis;
import org.opencb.opencga.analysis.variant.manager.VariantStorageManager;
+import org.opencb.opencga.analysis.variant.mutationalSignature.MutationalSignatureAnalysis;
import org.opencb.opencga.analysis.variant.operations.VariantIndexOperationTool;
import org.opencb.opencga.analysis.variant.operations.VariantSampleIndexOperationTool;
import org.opencb.opencga.analysis.variant.samples.SampleEligibilityAnalysis;
@@ -68,16 +71,19 @@
import org.opencb.opencga.core.models.individual.Location;
import org.opencb.opencga.core.models.operations.variant.VariantSampleIndexParams;
import org.opencb.opencga.core.models.sample.Sample;
+import org.opencb.opencga.core.models.sample.SampleQualityControl;
import org.opencb.opencga.core.models.sample.SampleReferenceParam;
import org.opencb.opencga.core.models.sample.SampleUpdateParams;
import org.opencb.opencga.core.models.user.Account;
import org.opencb.opencga.core.models.variant.*;
+import org.opencb.opencga.core.response.OpenCGAResult;
import org.opencb.opencga.core.tools.result.ExecutionResult;
import org.opencb.opencga.core.tools.result.ExecutionResultManager;
import org.opencb.opencga.storage.core.StorageEngineFactory;
import org.opencb.opencga.storage.core.metadata.models.VariantScoreMetadata;
import org.opencb.opencga.storage.core.variant.VariantStorageEngine;
import org.opencb.opencga.storage.core.variant.VariantStorageOptions;
+import org.opencb.opencga.storage.core.variant.adaptors.VariantQuery;
import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam;
import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory;
import org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageEngine;
@@ -90,6 +96,8 @@
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
+import java.net.URI;
+import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
@@ -97,6 +105,7 @@
import static org.hamcrest.CoreMatchers.hasItem;
import static org.junit.Assert.*;
+import static org.opencb.opencga.storage.core.variant.VariantStorageBaseTest.getResourceUri;
@RunWith(Parameterized.class)
public class VariantAnalysisTest {
@@ -115,10 +124,15 @@ public class VariantAnalysisTest {
private static String son = "NA19685";
private static String daughter = "NA19600";
+ public static final String CANCER_STUDY = "cancer";
+ private static String cancer_sample = "AR2.10039966-01T";
+ private static String germline_sample = "AR2.10039966-01G";
+
+
@Parameterized.Parameters(name = "{0}")
public static Object[][] parameters() {
return new Object[][]{
- {MongoDBVariantStorageEngine.STORAGE_ENGINE_ID},
+// {MongoDBVariantStorageEngine.STORAGE_ENGINE_ID},
{HadoopVariantStorageEngine.STORAGE_ENGINE_ID}
};
}
@@ -172,13 +186,16 @@ public void setUp() throws Throwable {
setUpCatalogManager();
-
file = opencga.createFile(STUDY, "variant-test-file.vcf.gz", token);
variantStorageManager.index(STUDY, file.getId(), opencga.createTmpOutdir("_index"), new ObjectMap(VariantStorageOptions.ANNOTATE.key(), true), token);
for (int i = 0; i < file.getSampleIds().size(); i++) {
+ String id = file.getSampleIds().get(i);
+ if (id.equals(son)) {
+ SampleUpdateParams updateParams = new SampleUpdateParams().setSomatic(true);
+ catalogManager.getSampleManager().update(STUDY, id, updateParams, null, token);
+ }
if (i % 2 == 0) {
- String id = file.getSampleIds().get(i);
SampleUpdateParams updateParams = new SampleUpdateParams().setPhenotypes(Collections.singletonList(PHENOTYPE));
catalogManager.getSampleManager().update(STUDY, id, updateParams, null, token);
}
@@ -217,6 +234,20 @@ public void setUp() throws Throwable {
individuals.stream().map(Individual::getId).collect(Collectors.toList()), new QueryOptions(),
token);
+ // Cancer (SV)
+ ObjectMap config = new ObjectMap();
+// config.put(VariantStorageOptions.ANNOTATE.key(), true);
+ config.put(VariantStorageOptions.LOAD_SPLIT_DATA.key(), VariantStorageEngine.SplitData.MULTI);
+
+ file = opencga.createFile(CANCER_STUDY, "AR2.10039966-01T_vs_AR2.10039966-01G.annot.brass.vcf.gz", token);
+ variantStorageManager.index(CANCER_STUDY, file.getId(), opencga.createTmpOutdir("_index"), config, token);
+ file = opencga.createFile(CANCER_STUDY, "AR2.10039966-01T.copynumber.caveman.vcf.gz", token);
+ variantStorageManager.index(CANCER_STUDY, file.getId(), opencga.createTmpOutdir("_index"), config, token);
+ file = opencga.createFile(CANCER_STUDY, "AR2.10039966-01T_vs_AR2.10039966-01G.annot.pindel.vcf.gz", token);
+ variantStorageManager.index(CANCER_STUDY, file.getId(), opencga.createTmpOutdir("_index"), config, token);
+
+ SampleUpdateParams updateParams = new SampleUpdateParams().setSomatic(true);
+ catalogManager.getSampleManager().update(CANCER_STUDY, cancer_sample, updateParams, null, token);
opencga.getStorageConfiguration().getVariant().setDefaultEngine(storageEngine);
VariantStorageEngine engine = opencga.getStorageEngineFactory().getVariantStorageEngine(storageEngine, DB_NAME);
@@ -257,6 +288,19 @@ public void setUpCatalogManager() throws IOException, CatalogException {
catalogManager.getSampleManager().create(STUDY, sample, null, token);
}
+ // Cancer
+ List samples = new ArrayList<>();
+ catalogManager.getStudyManager().create(projectId, CANCER_STUDY, null, "Phase 1", "Done", null, null, null, null, null, token);
+ Sample sample = new Sample().setId(cancer_sample).setSomatic(true);
+ samples.add(sample);
+// catalogManager.getSampleManager().create(CANCER_STUDY, sample, null, token);
+ sample = new Sample().setId(germline_sample);
+ samples.add(sample);
+// catalogManager.getSampleManager().create(CANCER_STUDY, sample, null, token);
+ Individual individual = catalogManager.getIndividualManager()
+ .create(CANCER_STUDY, new Individual("AR2.10039966-01", "AR2.10039966-01", new Individual(), new Individual(), new Location(), SexOntologyTermAnnotation.initMale(), null, null, null, null, "",
+ samples, false, 0, Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), IndividualInternal.init(), Collections.emptyMap()), Collections.emptyList(), new QueryOptions(ParamConstants.INCLUDE_RESULT_PARAM, true), token).first();
+ assertEquals(2, individual.getSamples().size());
}
@Test
@@ -745,7 +789,289 @@ public void testVariantSecondarySampleIndex() throws Exception {
// checkExecutionResult(er, false);
}
- public void checkExecutionResult(ExecutionResult er) {
+ @Test
+ public void testMutationalSignatureFittingSNV() throws Exception {
+ Path outDir = Paths.get(opencga.createTmpOutdir("_mutational_signature_fitting_snv"));
+ System.out.println("outDir = " + outDir);
+
+ URI uri = getResourceUri("mutational-signature-catalogue-snv.json");
+ Path path = Paths.get(uri.getPath());
+ Signature signature = JacksonUtils.getDefaultObjectMapper().readerFor(Signature.class).readValue(path.toFile());
+ SampleQualityControl qc = new SampleQualityControl();
+ qc.getVariant().setSignatures(Collections.singletonList(signature));
+ SampleUpdateParams updateParams = new SampleUpdateParams().setQualityControl(qc);
+ catalogManager.getSampleManager().update(CANCER_STUDY, cancer_sample, updateParams, null, token);
+
+ MutationalSignatureAnalysisParams params = new MutationalSignatureAnalysisParams();
+ params.setSample(cancer_sample);
+ params.setId(signature.getId());
+ params.setFitId("fitting-1");
+ params.setFitMethod("FitMS");
+ params.setFitSigVersion("RefSigv2");
+ params.setFitOrgan("Breast");
+ params.setFitNBoot(200);
+ params.setFitThresholdPerc(5.0f);
+ params.setFitThresholdPval(0.05f);
+ params.setFitMaxRareSigs(1);
+ params.setSkip("catalogue");
+
+ toolRunner.execute(MutationalSignatureAnalysis.class, params, new ObjectMap(ParamConstants.STUDY_PARAM, CANCER_STUDY),
+ outDir, null, token);
+
+ java.io.File catalogueFile = outDir.resolve(MutationalSignatureAnalysis.SIGNATURE_COEFFS_FILENAME).toFile();
+ byte[] bytes = Files.readAllBytes(catalogueFile.toPath());
+ System.out.println(new String(bytes));
+ assertTrue(catalogueFile.exists());
+
+ java.io.File signatureFile = outDir.resolve(MutationalSignatureAnalysis.MUTATIONAL_SIGNATURE_FITTING_DATA_MODEL_FILENAME).toFile();
+ bytes = Files.readAllBytes(signatureFile.toPath());
+ System.out.println(new String(bytes));
+ assertTrue(signatureFile.exists());
+
+ OpenCGAResult sampleResult = catalogManager.getSampleManager().get(CANCER_STUDY, cancer_sample, QueryOptions.empty(), token);
+ Sample sample = sampleResult.first();
+ List signatures = sample.getQualityControl().getVariant().getSignatures();
+ for (Signature sig : signatures) {
+ if (sig.getId().equals(signature.getId())) {
+ for (SignatureFitting fitting : sig.getFittings()) {
+ if (fitting.getId().equals(params.getFitId())) {
+ System.out.println(JacksonUtils.getDefaultObjectMapper().writerFor(SignatureFitting.class).writeValueAsString(fitting));
+ return;
+ }
+ }
+ }
+ }
+ fail("Mutational signature fitting not found in sample quality control");
+ }
+
+ @Test
+ public void testMutationalSignatureCatalogueSV() throws Exception {
+ Path outDir = Paths.get(opencga.createTmpOutdir("_mutational_signature_catalogue_sv"));
+ System.out.println("outDir = " + outDir);
+
+ Path opencgaHome = opencga.getOpencgaHome();
+ System.out.println("OpenCGA home = " + opencgaHome);
+
+ MutationalSignatureAnalysisParams params = new MutationalSignatureAnalysisParams();
+ params.setSample(cancer_sample);
+ params.setId("catalogue-1");
+ params.setDescription("Catalogue #1");
+ VariantQuery query = new VariantQuery();
+ query.sample(cancer_sample);
+ query.type(VariantType.SV.name());
+ params.setQuery(query.toJson());
+ params.setSkip("fitting");
+
+ toolRunner.execute(MutationalSignatureAnalysis.class, params, new ObjectMap(ParamConstants.STUDY_PARAM, CANCER_STUDY),
+ outDir, null, token);
+
+ java.io.File catalogueFile = outDir.resolve(MutationalSignatureAnalysis.CATALOGUES_FILENAME_DEFAULT).toFile();
+ byte[] bytes = Files.readAllBytes(catalogueFile.toPath());
+ System.out.println(new String(bytes));
+ assertTrue(catalogueFile.exists());
+
+ java.io.File signatureFile = outDir.resolve(MutationalSignatureAnalysis.MUTATIONAL_SIGNATURE_DATA_MODEL_FILENAME).toFile();
+ bytes = Files.readAllBytes(signatureFile.toPath());
+ System.out.println(new String(bytes));
+ assertTrue(signatureFile.exists());
+
+ OpenCGAResult sampleResult = catalogManager.getSampleManager().get(CANCER_STUDY, cancer_sample, QueryOptions.empty(), token);
+ Sample sample = sampleResult.first();
+ List signatures = sample.getQualityControl().getVariant().getSignatures();
+ for (Signature signature : signatures) {
+ if (signature.getId().equals(params.getId())) {
+ return;
+ }
+ }
+ fail("Signature not found in sample quality control");
+ }
+
+ @Test
+ public void testMutationalSignatureFittingSV() throws Exception {
+ Path outDir = Paths.get(opencga.createTmpOutdir("_mutational_signature_fitting"));
+ System.out.println("outDir = " + outDir);
+
+ URI uri = getResourceUri("2019_01_10_all_PCAWG_sigs_rearr.tsv");
+ Path path = Paths.get(uri.getPath());
+ catalogManager.getFileManager().createFolder(CANCER_STUDY, "signature", true, "", new QueryOptions(), token);
+ catalogManager.getFileManager().link(CANCER_STUDY, uri, "signature", new ObjectMap(), token);
+ String filename = Paths.get(uri.toURL().getFile()).toFile().getName();
+ File file = catalogManager.getFileManager().get(CANCER_STUDY, filename, null, token).first();
+ String signatureFileId = file.getId();
+
+ uri = getResourceUri("mutational-signature-sv.json");
+ path = Paths.get(uri.getPath());
+ Signature signature = JacksonUtils.getDefaultObjectMapper().readerFor(Signature.class).readValue(path.toFile());
+ SampleQualityControl qc = new SampleQualityControl();
+ qc.getVariant().setSignatures(Collections.singletonList(signature));
+ SampleUpdateParams updateParams = new SampleUpdateParams().setQualityControl(qc);
+ catalogManager.getSampleManager().update(CANCER_STUDY, cancer_sample, updateParams, null, token);
+
+ MutationalSignatureAnalysisParams params = new MutationalSignatureAnalysisParams();
+ params.setSample(cancer_sample);
+ params.setId(signature.getId());
+ params.setFitId("fitting-1");
+ params.setFitMethod("FitMS");
+ params.setFitSigVersion("RefSigv2");
+ params.setFitOrgan("Breast");
+ params.setFitNBoot(200);
+ params.setFitThresholdPerc(5.0f);
+ params.setFitThresholdPval(0.05f);
+ params.setFitMaxRareSigs(1);
+ params.setFitSignaturesFile(signatureFileId);
+ params.setFitRareSignaturesFile(signatureFileId);
+ params.setSkip("catalogue");
+
+ toolRunner.execute(MutationalSignatureAnalysis.class, params, new ObjectMap(ParamConstants.STUDY_PARAM, CANCER_STUDY),
+ outDir, null, token);
+
+ java.io.File catalogueFile = outDir.resolve(MutationalSignatureAnalysis.SIGNATURE_COEFFS_FILENAME).toFile();
+ byte[] bytes = Files.readAllBytes(catalogueFile.toPath());
+ System.out.println(new String(bytes));
+ assertTrue(catalogueFile.exists());
+
+ java.io.File signatureFile = outDir.resolve(MutationalSignatureAnalysis.MUTATIONAL_SIGNATURE_FITTING_DATA_MODEL_FILENAME).toFile();
+ bytes = Files.readAllBytes(signatureFile.toPath());
+ System.out.println(new String(bytes));
+ assertTrue(signatureFile.exists());
+ }
+
+ @Test
+ public void testHRDetect() throws Exception {
+ Path snvFittingOutDir = Paths.get(opencga.createTmpOutdir("_snv_fitting"));
+ Path svFittingOutDir = Paths.get(opencga.createTmpOutdir("_sv_fitting"));
+ Path hrdetectOutDir = Paths.get(opencga.createTmpOutdir("_hrdetect"));
+
+ // Read SNV signaure
+ URI uri = getResourceUri("mutational-signature-catalogue-snv.json");
+ Path path = Paths.get(uri.getPath());
+ Signature snvSignature = JacksonUtils.getDefaultObjectMapper().readerFor(Signature.class).readValue(path.toFile());
+
+ // Read SV signature
+ uri = getResourceUri("mutational-signature-sv.json");
+ path = Paths.get(uri.getPath());
+ Signature svSignature = JacksonUtils.getDefaultObjectMapper().readerFor(Signature.class).readValue(path.toFile());
+
+ // Update quality control for the cancer sample
+ SampleQualityControl qc = new SampleQualityControl();
+ qc.getVariant().setSignatures(Arrays.asList(snvSignature, svSignature));
+ SampleUpdateParams updateParams = new SampleUpdateParams().setQualityControl(qc);
+ catalogManager.getSampleManager().update(CANCER_STUDY, cancer_sample, updateParams, null, token);
+
+ // SNV fitting
+ MutationalSignatureAnalysisParams params = new MutationalSignatureAnalysisParams();
+ params.setSample(cancer_sample);
+ params.setId(snvSignature.getId());
+ params.setFitId("snv-fitting-1");
+ params.setFitMethod("FitMS");
+ params.setFitSigVersion("RefSigv2");
+ params.setFitOrgan("Breast");
+ params.setFitNBoot(100);
+ params.setFitThresholdPerc(5.0f);
+ params.setFitThresholdPval(0.05f);
+ params.setFitMaxRareSigs(1);
+ params.setSkip("catalogue");
+
+ toolRunner.execute(MutationalSignatureAnalysis.class, params, new ObjectMap(ParamConstants.STUDY_PARAM, CANCER_STUDY),
+ snvFittingOutDir, null, token);
+
+ java.io.File snvSignatureFittingFile = snvFittingOutDir.resolve(MutationalSignatureAnalysis.MUTATIONAL_SIGNATURE_FITTING_DATA_MODEL_FILENAME).toFile();
+ assertTrue(snvSignatureFittingFile.exists());
+ SignatureFitting snvFitting = JacksonUtils.getDefaultObjectMapper().readerFor(SignatureFitting.class).readValue(snvSignatureFittingFile);
+ assertEquals(params.getFitId(), snvFitting.getId());
+
+ // SV fitting
+ uri = getResourceUri("2019_01_10_all_PCAWG_sigs_rearr.tsv");
+ path = Paths.get(uri.getPath());
+ catalogManager.getFileManager().createFolder(CANCER_STUDY, "signature", true, "", new QueryOptions(), token);
+ catalogManager.getFileManager().link(CANCER_STUDY, uri, "signature", new ObjectMap(), token);
+ String filename = Paths.get(uri.toURL().getFile()).toFile().getName();
+ File file = catalogManager.getFileManager().get(CANCER_STUDY, filename, null, token).first();
+ String signatureFileId = file.getId();
+
+ params = new MutationalSignatureAnalysisParams();
+ params.setSample(cancer_sample);
+ params.setId(svSignature.getId());
+ params.setFitId("fitting-sv-1");
+ params.setFitMethod("FitMS");
+ params.setFitSigVersion("RefSigv2");
+ params.setFitOrgan("Breast");
+ params.setFitNBoot(100);
+ params.setFitThresholdPerc(5.0f);
+ params.setFitThresholdPval(0.05f);
+ params.setFitMaxRareSigs(1);
+ params.setFitSignaturesFile(signatureFileId);
+ params.setFitRareSignaturesFile(signatureFileId);
+ params.setSkip("catalogue");
+
+ toolRunner.execute(MutationalSignatureAnalysis.class, params, new ObjectMap(ParamConstants.STUDY_PARAM, CANCER_STUDY),
+ svFittingOutDir, null, token);
+
+ java.io.File svSignatureFittingFile = svFittingOutDir.resolve(MutationalSignatureAnalysis.MUTATIONAL_SIGNATURE_FITTING_DATA_MODEL_FILENAME).toFile();
+ assertTrue(svSignatureFittingFile.exists());
+ SignatureFitting svFitting = JacksonUtils.getDefaultObjectMapper().readerFor(SignatureFitting.class).readValue(svSignatureFittingFile);
+ assertEquals(params.getFitId(), svFitting.getId());
+
+ // HRDetect
+ HRDetectAnalysisParams hrdParams = new HRDetectAnalysisParams();
+ hrdParams.setId("hrd-1");
+ hrdParams.setSampleId(cancer_sample);
+ hrdParams.setSnvFittingId(snvFitting.getId());
+ hrdParams.setSvFittingId(svFitting.getId());
+ hrdParams.setCnvQuery("{\"sample\": \"" + cancer_sample + "\", \"type\": \"" + VariantType.CNV + "\"}");
+ hrdParams.setIndelQuery("{\"sample\": \"" + cancer_sample + "\", \"type\": \"" + VariantType.INDEL + "\"}");
+ hrdParams.setBootstrap(true);
+
+ toolRunner.execute(HRDetectAnalysis.class, hrdParams, new ObjectMap(ParamConstants.STUDY_PARAM, CANCER_STUDY), hrdetectOutDir, null, token);
+
+ java.io.File hrDetectFile = hrdetectOutDir.resolve(HRDetectAnalysis.HRDETECT_SCORES_FILENAME_DEFAULT).toFile();
+ byte[] bytes = Files.readAllBytes(hrDetectFile.toPath());
+ System.out.println(new String(bytes));
+ assertTrue(hrDetectFile.exists());
+
+ OpenCGAResult sampleResult = catalogManager.getSampleManager().get(CANCER_STUDY, cancer_sample, QueryOptions.empty(), token);
+ Sample sample = sampleResult.first();
+ List hrDetects = sample.getQualityControl().getVariant().getHrDetects();
+ for (HRDetect hrDetect : hrDetects) {
+ if (hrDetect.getId().equals(hrDetect.getId())) {
+ if (hrDetect.getScores().containsKey("del.mh.prop")) {
+ Assert.assertEquals(hrDetect.getScores().getFloat("del.mh.prop"), 0.172413793103448f, 0.00001f);
+ return;
+ }
+ }
+ }
+ fail("HRDetect result not found in sample quality control");
+ }
+
+ @Test
+ public void testHRDetectParseResults() throws Exception {
+ Path hrdetectOutDir = Paths.get(opencga.createTmpOutdir("_hrdetect"));
+ URI uri = getResourceUri("hrdetect_output_38.tsv");
+ java.io.File file = Paths.get(uri.getPath()).toFile();
+ FileUtils.copyFile(file, hrdetectOutDir.resolve(HRDetectAnalysis.HRDETECT_SCORES_FILENAME_DEFAULT).toFile());
+
+ HRDetectAnalysisParams hrdParams = new HRDetectAnalysisParams();
+ hrdParams.setId("hrd-1");
+ hrdParams.setSampleId(cancer_sample);
+ hrdParams.setSnvFittingId("snvFittingId");
+ hrdParams.setSvFittingId("svFittingId");
+ hrdParams.setCnvQuery("{\"sample\": \"" + cancer_sample + "\", \"type\": \"" + VariantType.CNV + "\"}");
+ hrdParams.setIndelQuery("{\"sample\": \"" + cancer_sample + "\", \"type\": \"" + VariantType.INDEL + "\"}");
+
+ HRDetectAnalysis analysis = new HRDetectAnalysis();
+ analysis.setUp(opencga.getOpencgaHome().toString(), catalogManager, variantStorageManager, hrdParams.toObjectMap(), hrdetectOutDir,
+ "job-1", token);
+ HRDetect hrDetect = analysis.parseResult(hrdetectOutDir);
+ for (Map.Entry entry : hrDetect.getScores().entrySet()) {
+ System.out.println(entry.getKey() + " -> " + entry.getValue());
+ }
+ assertTrue(hrDetect.getScores().containsKey("hrd"));
+ assertEquals(-0.102769986f, hrDetect.getScores().getFloat("hrd"), 0.00001f);
+ assertTrue(hrDetect.getScores().containsKey("Probability"));
+ assertEquals(0.998444f, hrDetect.getScores().getFloat("Probability"), 0.00001f);
+ }
+
+ public void checkExecutionResult(ExecutionResult er) {
checkExecutionResult(er, true);
}
diff --git a/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/manager/VariantCatalogQueryUtilsTest.java b/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/manager/VariantCatalogQueryUtilsTest.java
index b789e5b96c7..c03587a1011 100644
--- a/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/manager/VariantCatalogQueryUtilsTest.java
+++ b/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/manager/VariantCatalogQueryUtilsTest.java
@@ -188,7 +188,7 @@ public static void setUp() throws Exception {
.setVersion("v5")
.setDefaultSpecies("hsapiens")
.setRest(new RestConfig(Collections.singletonList("https://ws.zettagenomics.com/cellbase"), 10000));
- cellBaseUtils = new CellBaseUtils(new CellBaseClient(clientConfiguration), assembly);
+ cellBaseUtils = new CellBaseUtils(new CellBaseClient(clientConfiguration));
Region cadm1 = cellBaseUtils.getGeneRegion("CADM1");
diff --git a/opencga-app/app/analysis/mutational-signature/sv_clustering.R b/opencga-app/app/analysis/mutational-signature/sv_clustering.R
new file mode 100644
index 00000000000..83f5fe2d459
--- /dev/null
+++ b/opencga-app/app/analysis/mutational-signature/sv_clustering.R
@@ -0,0 +1,884 @@
+library(optparse)
+
+#' The BEDPE data fram should contain the following columns: "chrom1", "start1", "end1", "chrom2", "start2", "end2" and "sample" (sample name).
+
+clustering <- function(sv_bedpe,
+ out_fpath,
+ kmin,
+ kmin.samples,
+ gamma.sdev,
+ PEAK.FACTOR,
+ thresh.dist,
+ gamma,
+ kmin.filter) {
+ sv_bedpe <- read.table(args[1], sep = "\t", header = TRUE, stringsAsFactors = FALSE, check.names = FALSE)
+ clustering.result <- rearrangement.clustering_bedpe(sv_bedpe=sv_bedpe,
+ kmin=kmin,
+ kmin.samples=kmin.samples,
+ gamma.sdev=gamma.sdev,
+ PEAK.FACTOR=PEAK.FACTOR,
+ thresh.dist=thresh.dist,
+ gamma=gamma,
+ kmin.filter=kmin.filter)
+ sv_bedpe <- clustering.result$sv_bedpe
+ write.table(sv_bedpe, file = out_fpath, row.names = FALSE, sep = "\t", quote = FALSE)
+}
+
+calcIntermutDist <- function (subs.type, first.chrom.na = FALSE) {
+
+ subs.type.processed <- data.frame()
+ for (c in unique(subs.type$chr)) {
+ # choose subs from only one chromosome at a time
+
+ subs.type.chrom <- subset(subs.type, subset=subs.type$chr==c)
+ # sort the subs by position
+ subs.type.chrom <- subs.type.chrom [order(subs.type.chrom$position),]
+
+ if (first.chrom.na) {
+ subs.type.chrom$prevPos <- c(NA,subs.type.chrom$position[1:nrow(subs.type.chrom)-1])
+ } else {
+ subs.type.chrom$prevPos <- c(0,subs.type.chrom$position[1:nrow(subs.type.chrom)-1])
+ }
+ subs.type.chrom$distPrev <- subs.type.chrom$position - subs.type.chrom$prevPos
+
+ subs.type.processed <- rbind(subs.type.processed,subs.type.chrom)
+ }
+
+ subs.type.processed$distPrev[subs.type.processed$distPrev==0] <- 1
+ subs.type.processed
+}
+
+assignPvalues <- function(kat.regions, chrom.bps, bp.rate=NA) {
+
+ if (is.na(bp.rate)) { # estimate the chromosome rate
+ left.bp <- min(chrom.bps$pos)
+ right.bp <- max(chrom.bps$pos)
+ bp.rate <- nrow(chrom.bps)/ (right.bp - left.bp)
+ }
+
+ # assume binomial distribution
+ kat.regions$pvalue <- 1-pbinom(kat.regions$number.bps, kat.regions$end.bp - kat.regions$start.bp, bp.rate)
+
+ kat.regions$d.seg<- (kat.regions$number.bps/( kat.regions$end.bp - kat.regions$start.bp))
+
+ kat.regions$rate.factor <- kat.regions$d.seg/bp.rate
+
+ kat.regions
+}
+
+hotspotInfo <- function(kat.regions.all, subs, segInterDist=c()) {
+ if(nrow(kat.regions.all)>0){
+ for(r in 1:nrow(kat.regions.all)){
+
+ # indices of the breakpoints in the hotspot
+ subs.hotspot <-subs[kat.regions.all$firstBp[r]:kat.regions.all$lastBp[r],]
+
+ kat.regions.all[r,'start.bp'] <- min(subs.hotspot$pos)
+ kat.regions.all[r,'end.bp'] <- max(subs.hotspot$pos)
+ kat.regions.all[r,'length.bp'] <- kat.regions.all[r,'end.bp'] - kat.regions.all[r,'start.bp']
+ kat.regions.all[r,'number.bps'] <- nrow(subs.hotspot)
+ kat.regions.all[r,'number.bps.clustered'] <- sum(subs.hotspot$is.clustered)
+
+ if (length(segInterDist)>0 & is.na(kat.regions.all[r,'avgDist.bp'])) {
+ kat.regions.all[r,'avgDist.bp'] <- mean(segInterDist[kat.regions.all$firstBp[r]:kat.regions.all$lastBp[r]])
+ }
+ kat.regions.all[r,'no.samples'] <- length(unique(subs.hotspot$sample))
+
+ if ('pf' %in% colnames(subs.hotspot)){
+ kat.regions.all[r,'no.del'] <- nrow(subset(subs.hotspot, pf==2))
+ kat.regions.all[r,'no.dup'] <- nrow(subset(subs.hotspot, pf==4))
+ kat.regions.all[r,'no.inv'] <- nrow(subset(subs.hotspot, pf==1 | pf==8))
+ kat.regions.all[r,'no.trn'] <- nrow(subset(subs.hotspot, pf==32))
+ }
+
+ } # for all peaks
+ } # if there is at least one peak
+ kat.regions.all
+}
+
+extract.kat.regions <- function (res, imd, subs, kmin.samples=10, pvalue.thresh=1, rate.factor.thresh=1, doMerging=FALSE, kmin.filter=NA, bp.rate=NA) {
+
+ segInterDist <- res$yhat
+ kataegis.threshold <- imd
+
+ kat.regions.all = data.frame()
+
+ chr <- as.character(subs$chr[1])
+
+ positions <- subs$pos
+
+ katLoci = (segInterDist<=kataegis.threshold) # flag specifying if a point is in a peak
+
+ if(sum(katLoci)>0) {
+
+ start.regions = which(katLoci[-1] & !(katLoci[-(length(katLoci))]) # katLoci breakpoints
+ | (katLoci[-1] & katLoci[-(length(katLoci))] & segInterDist[-1] != segInterDist[-length(katLoci)] )
+ )+1 # endpoints between peaks
+ if (katLoci[1]) {start.regions <- c(1, start.regions)}
+
+ end.regions = which(!(katLoci[-1]) & katLoci[-(length(katLoci))] #
+ | (katLoci[-1] & katLoci[-(length(katLoci))] & segInterDist[-1] != segInterDist[-length(katLoci)] )
+ ) #
+ if (katLoci[length(katLoci)]) {end.regions <- c( end.regions, length(katLoci))}
+
+ start.regions.init <- start.regions
+ end.regions.init <- end.regions
+
+ # handling special cases
+ if(length(end.regions)+length(start.regions)>0) { # if there are any discontinuities in the segmentation at all
+ if (length(end.regions)==1 & length(start.regions)==0){
+ start.regions <- 1
+ } else if (length(start.regions)==1 & length(end.regions)==0){
+ end.regions <- length(positions)
+ } else if ((end.regions[1]end.regions[length(end.regions)])) {
+ # starts and ends are the same length, but missing both endpoints
+
+ start.regions <- c(1,start.regions)
+ end.regions <- c(end.regions, length(positions))
+
+ } else if (end.regions[1]end.regions[length(end.regions)]){
+ # ends will be one shorter
+
+ end.regions <- c(end.regions, length(positions))
+ }
+
+ if (length(start.regions)!=length(end.regions)) {
+ browser()
+ }
+
+
+
+ # prepare a data structure that will be later filled up
+ kat.regions.all <- data.frame(
+ chr=subs$chr[1],
+ start.bp=rep(NA,length(start.regions)), # start coordinate [bp]
+ end.bp=rep(NA,length(start.regions)), # end coordinate [bp]
+ length.bp=rep(NA,length(start.regions)), # length [bp]
+ number.bps=rep(NA,length(start.regions)),
+ number.bps.clustered=rep(NA,length(start.regions)),
+ avgDist.bp=rep(NA,length(start.regions)),
+ no.samples=rep(NA,length(start.regions)),
+ no.del =rep(NA,length(start.regions)),
+ no.dup =rep(NA,length(start.regions)),
+ no.inv= rep(NA,length(start.regions)),
+ no.trn = rep(NA,length(start.regions)),
+ firstBp=start.regions,
+ lastBp=end.regions )
+
+ kat.regions.all <- hotspotInfo(kat.regions.all, subs, segInterDist)
+
+ step.segInterDist.left <- rep(NA, length(segInterDist))
+ step.segInterDist.left[2:length(segInterDist)] <- segInterDist[2:length(segInterDist)]- segInterDist[1:(length(segInterDist)-1)]
+ step.segInterDist.right <- rep(NA, length(segInterDist))
+ step.segInterDist.right[1:(length(segInterDist)-1)] <- segInterDist[1:(length(segInterDist)-1)]- segInterDist[2:(length(segInterDist))]
+
+ kat.regions.all$step.left <- step.segInterDist.left[start.regions]
+ kat.regions.all$step.right <- step.segInterDist.right[end.regions]
+
+
+ # run the filters on the regions of increased frequency
+ # make sure there are at least kmin samples
+
+ if ((!is.null(kat.regions.all)) && (nrow(kat.regions.all)>0)) {
+ kat.regions.all <- subset(kat.regions.all, no.samples>=kmin.samples)
+ }
+
+
+ # make sure there are at least kmin.filter breakpoints
+ if (!is.na(kmin.filter)) {
+ kat.regions.all <- subset(kat.regions.all, number.bps>=kmin.filter)
+ }
+
+
+
+ # make sure the p-value is less than somethng
+ if ((!is.null(kat.regions.all)) && (nrow(kat.regions.all)>0)) {
+ kat.regions.all <- assignPvalues(kat.regions.all, subs, bp.rate=bp.rate)
+ kat.regions.all <- subset(kat.regions.all, pvalue<=pvalue.thresh)
+ # only keep the hotspots that exceed the theshold
+ kat.regions.all <- subset(kat.regions.all, rate.factor>=rate.factor.thresh)
+ }
+
+
+
+
+
+ # merge segments if both were found to be peaks
+ if (doMerging) {
+ if(nrow(kat.regions.all)>1){
+ for(r in 2:nrow(kat.regions.all)){
+ if (kat.regions.all$lastBp[r-1] == (kat.regions.all$firstBp[r]-1)) {
+ # merge two segments
+ kat.regions.all$firstBp[r] <- kat.regions.all$firstBp[r-1]
+ kat.regions.all$firstBp[r-1] <- NA
+ kat.regions.all$lastBp[r-1] <- NA
+ kat.regions.all$avgDist.bp[r] <- NA # this will need to be updated as segments are being merged
+ }
+ }
+ }
+ # remove some of the merged segments
+ kat.regions.all <- subset(kat.regions.all, !is.na(firstBp) & !is.na(lastBp))
+
+ # update the info on hotspots that might have changed when they were merged
+ kat.regions.all <- hotspotInfo( kat.regions.all , subs, segInterDist)
+ kat.regions.all <- assignPvalues(kat.regions.all, subs, bp.rate=bp.rate)
+ } # end merging
+
+
+
+
+ } # end if there are discontinuities in the segmentation
+ } # if there are any points under the inter-mutation distance threshold
+
+ kat.regions.all
+
+}
+
+#PCF-ALGORITHM (KL):
+### EXACT version
+exactPcf <- function(y, kmin=5, gamma, yest) {
+ ## Implementaion of exact PCF by Potts-filtering
+ ## x: input array of (log2) copy numbers
+ ## kmin: Mininal length of plateaus
+ ## gamma: penalty for each discontinuity
+ N <- length(y)
+ yhat <- rep(0,N);
+ if (N < 2*kmin) {
+ if (yest) {
+ return(list(Lengde = N, sta = 1, mean = mean(y), nIntervals=1, yhat=rep(mean(y),N)))
+ } else {
+ return(list(Lengde = N, sta = 1, mean = mean(y), nIntervals=1))
+ }
+ }
+ initSum <- sum(y[1:kmin])
+ initKvad <- sum(y[1:kmin]^2)
+ initAve <- initSum/kmin;
+ bestCost <- rep(0,N)
+ bestCost[kmin] <- initKvad - initSum*initAve
+ bestSplit <- rep(0,N)
+ bestAver <- rep(0,N)
+ bestAver[kmin] <- initAve
+ Sum <- rep(0,N)
+ Kvad <- rep(0,N)
+ Aver <- rep(0,N)
+ Cost <- rep(0,N)
+ kminP1=kmin+1
+ for (k in (kminP1):(2*kmin-1)) {
+ Sum[kminP1:k]<-Sum[kminP1:k]+y[k]
+ Aver[kminP1:k] <- Sum[kminP1:k]/((k-kmin):1)
+ Kvad[kminP1:k] <- Kvad[kminP1:k]+y[k]^2
+ bestAver[k] <- (initSum+Sum[kminP1])/k
+ bestCost[k] <- (initKvad+Kvad[kminP1])-k*bestAver[k]^2
+ }
+ for (n in (2*kmin):N) {
+ yn <- y[n]
+ yn2 <- yn^2
+ Sum[kminP1:n] <- Sum[kminP1:n]+yn
+ Aver[kminP1:n] <- Sum[kminP1:n]/((n-kmin):1)
+ Kvad[kminP1:n] <- Kvad[kminP1:n]+yn2
+ nMkminP1=n-kmin+1
+ Cost[kminP1:nMkminP1] <- bestCost[kmin:(n-kmin)]+Kvad[kminP1:nMkminP1]-Sum[kminP1:nMkminP1]*Aver[kminP1:nMkminP1]+gamma
+ Pos <- which.min(Cost[kminP1:nMkminP1])+kmin
+ cost <- Cost[Pos]
+ aver <- Aver[Pos]
+ totAver <- (Sum[kminP1]+initSum)/n
+ totCost <- (Kvad[kminP1]+initKvad) - n*totAver*totAver
+
+ if (length(totCost)==0 || length(cost)==0) {
+ browser()
+ }
+ if (totCost < cost) {
+ Pos <- 1
+ cost <- totCost
+ aver <- totAver
+ }
+ bestCost[n] <- cost
+ bestAver[n] <- aver
+ bestSplit[n] <- Pos-1
+ }
+ n <- N
+ antInt <- 0
+ if(yest){
+ while (n > 0) {
+ yhat[(bestSplit[n]+1):n] <- bestAver[n]
+ n <- bestSplit[n]
+ antInt <- antInt+1
+ }
+ } else {
+ while (n > 0) {
+ n <- bestSplit[n]
+ antInt <- antInt+1
+ }
+ }
+ n <- N #nProbes
+ lengde <- rep(0,antInt)
+ start <- rep(0,antInt)
+ verdi <- rep(0,antInt)
+ oldSplit <- n
+ antall <- antInt
+ while (n > 0) {
+ start[antall] <- bestSplit[n]+1
+ lengde[antall] <- oldSplit-bestSplit[n]
+ verdi[antall] <- bestAver[n]
+ n <- bestSplit[n]
+ oldSplit <- n
+ antall <- antall-1
+ }
+ if (yest) {
+ return(list(Lengde = lengde, sta = start, mean = verdi, nIntervals=antInt, yhat=yhat))
+ } else {
+ return(list(Lengde = lengde, sta = start, mean = verdi, nIntervals=antInt))
+ }
+}
+
+
+
+selectFastPcf <- function(x,kmin,gamma,yest){
+ xLength <- length(x)
+ if (xLength< 1000) {
+ result<-runFastPcf(x,kmin,gamma,0.15,0.15,yest)
+ } else {
+ if (xLength < 15000){
+ result<-runFastPcf(x,kmin,gamma,0.12,0.05,yest)
+ } else {
+ result<-runPcfSubset(x,kmin,gamma,0.12,0.05,yest)
+ }
+ }
+ return(result)
+}
+
+
+runFastPcf <- function(x,kmin,gamma,frac1,frac2,yest){
+ antGen <- length(x)
+
+ L <- min(8, floor(length(x)/6))
+
+ mark<-filterMarkS4(x,kmin,L,1,frac1,frac2,0.02,0.9)
+ mark[antGen]=TRUE
+ dense <- compact(x,mark)
+ #print(dense$Nr)
+ #print(frac2)
+ result<-PottsCompact(kmin,gamma,dense$Nr,dense$Sum,dense$Sq,yest)
+ return(result)
+}
+
+runPcfSubset <- function(x,kmin,gamma,frac1,frac2,yest){
+ SUBSIZE <- 5000
+ antGen <- length(x)
+ mark<-filterMarkS4(x,kmin,8,1,frac1,frac2,0.02,0.9)
+ markInit<-c(mark[1:(SUBSIZE-1)],TRUE)
+ compX<-compact(x[1:SUBSIZE],markInit)
+ mark2 <- rep(FALSE,antGen)
+ mark2[1:SUBSIZE] <- markWithPotts(kmin,gamma,compX$Nr,compX$Sum,compX$Sq,SUBSIZE)
+ mark2[4*SUBSIZE/5]<-TRUE
+ start <- 4*SUBSIZE/5+1
+ while(start + SUBSIZE < antGen){
+ slutt<-start+SUBSIZE-1
+ markSub<-c(mark2[1:(start-1)],mark[start:slutt])
+ markSub[slutt] <- TRUE
+ compX<-compact(x[1:slutt],markSub)
+ mark2[1:slutt] <- markWithPotts(kmin,gamma,compX$Nr,compX$Sum,compX$Sq,slutt)
+ start <- start+4*SUBSIZE/5
+ mark2[start-1]<-TRUE
+ }
+ markSub<-c(mark2[1:(start-1)],mark[start:antGen])
+ compX<-compact(x,markSub)
+ result <- PottsCompact(kmin,gamma,compX$Nr,compX$Sum,compX$Sq,yest)
+ return(result)
+}
+
+PottsCompact <- function(kmin, gamma, nr, res, sq, yest) {
+ ## Potts filtering on compact array;
+ ## kmin: minimal length of plateau
+ ## gamma: penalty for discontinuity
+ ## nr: number of values between breakpoints
+ ## res: sum of values between breakpoints
+ ## sq: sum of squares of values between breakpoints
+
+ N <- length(nr)
+ Ant <- rep(0,N)
+ Sum <- rep(0,N)
+ Kvad <- rep(0,N)
+ Cost <- rep(0,N)
+ if (sum(nr) < 2*kmin){
+ estim <- list()
+ estim$yhat <- rep( sum(res)/sum(nr),sum(nr))
+ return(estim)
+ }
+ initAnt <- nr[1]
+ initSum <- res[1]
+ initKvad <- sq[1]
+ initAve <- initSum/initAnt
+ bestCost <- rep(0,N)
+ bestCost[1] <- initKvad - initSum*initAve
+ bestSplit <- rep(0,N)
+ k <- 2
+ while(sum(nr[1:k]) < 2*kmin) {
+ Ant[2:k] <- Ant[2:k]+nr[k]
+ Sum[2:k]<-Sum[2:k]+res[k]
+ Kvad[2:k] <- Kvad[2:k]+sq[k]
+ bestCost[k] <- (initKvad+Kvad[2])-(initSum+Sum[2])^2/(initAnt+Ant[2])
+ k <- k+1
+ }
+ for (n in k:N) {
+ Ant[2:n] <- Ant[2:n]+nr[n]
+ Sum[2:n] <- Sum[2:n]+res[n]
+ Kvad[2:n] <- Kvad[2:n]+sq[n]
+ limit <- n
+ while(limit > 2 & Ant[limit] < kmin) {limit <- limit-1}
+ Cost[2:limit] <- bestCost[1:limit-1]+Kvad[2:limit]-Sum[2:limit]^2/Ant[2:limit]
+ Pos <- which.min(Cost[2:limit])+ 1
+ cost <- Cost[Pos]+gamma
+ totCost <- (Kvad[2]+initKvad) - (Sum[2]+initSum)^2/(Ant[2]+initAnt)
+ if (totCost < cost) {
+ Pos <- 1
+ cost <- totCost
+ }
+ bestCost[n] <- cost
+ bestSplit[n] <- Pos-1
+ }
+
+ if (yest) {
+ yhat<-rep(0,N)
+ res<-findEst(bestSplit,N,nr,res,TRUE)
+ } else {
+ res<-findEst(bestSplit,N,nr,res,FALSE)
+ }
+ return(res)
+}
+
+compact <- function(y,mark){
+ ## accumulates numbers of observations, sums and
+ ## sums of squares between potential breakpoints
+ N <- length(y)
+ tell<-seq(1:N)
+ cCTell<-tell[mark]
+ Ncomp<-length(cCTell)
+ lowTell<-c(0,cCTell[1:(Ncomp-1)])
+ ant<-cCTell-lowTell
+ cy<-cumsum(y)
+ cCcy<-cy[mark]
+ lowcy<-c(0,cCcy[1:(Ncomp-1)])
+ sum<-cCcy-lowcy
+ y2<-y^2
+ cy2<-cumsum(y2)
+ cCcy2<-cy2[mark]
+ lowcy2<-c(0,cCcy2[1:(Ncomp-1)])
+ sq<-cCcy2-lowcy2
+ return(list(Nr=ant,Sum=sum,Sq=sq))
+}
+
+findEst <- function(bestSplit,N,Nr,Sum,yest){
+ n<-N
+ lengde<-rep(0,N)
+ antInt<-0
+ while (n>0){
+ antInt<-antInt+1
+ lengde[antInt] <- n-bestSplit[n]
+ n<-bestSplit[n]
+ }
+ lengde<-lengde[antInt:1]
+ lengdeOrig<-rep(0,antInt)
+ startOrig<-rep(1,antInt+1)
+ verdi<-rep(0,antInt)
+ start<-rep(1,antInt+1)
+ for(i in 1:antInt){
+ start[i+1] <- start[i]+lengde[i]
+ lengdeOrig[i] <- sum(Nr[start[i]:(start[i+1]-1)])
+ startOrig[i+1] <- startOrig[i]+lengdeOrig[i]
+ verdi[i]<-sum(Sum[start[i]:(start[i+1]-1)])/lengdeOrig[i]
+ }
+
+ if(yest){
+ yhat<-rep(0,startOrig[antInt+1]-1)
+ for (i in 1:antInt){
+ yhat[startOrig[i]:(startOrig[i+1]-1)]<-verdi[i]
+ }
+ startOrig<-startOrig[1:antInt]
+ return(list(Lengde=lengdeOrig,sta=startOrig,mean=verdi,nIntervals=antInt,yhat=yhat))
+ } else {
+ startOrig<-startOrig[1:antInt]
+ return(list(Lengde=lengdeOrig,sta=startOrig,mean=verdi,nIntervals=antInt))
+ }
+
+}
+
+
+markWithPotts <- function(kmin, gamma, nr, res, sq, subsize) {
+ ## Potts filtering on compact array;
+ ## kmin: minimal length of plateau
+ ## gamma: penalty for discontinuity
+ ## nr: number of values between breakpoints
+ ## res: sum of values between breakpoints
+ ## sq: sum of squares of values between breakpoints
+
+ N <- length(nr)
+ Ant <- rep(0,N)
+ Sum <- rep(0,N)
+ Kvad <- rep(0,N)
+ Cost <- rep(0,N)
+ markSub <- rep(FALSE,N)
+ initAnt <- nr[1]
+ initSum <- res[1]
+ initKvad <- sq[1]
+ initAve <- initSum/initAnt
+ bestCost <- rep(0,N)
+ bestCost[1] <- initKvad - initSum*initAve
+ bestSplit <- rep(0,N)
+ k <- 2
+ while(sum(nr[1:k]) < 2*kmin) {
+ Ant[2:k] <- Ant[2:k]+nr[k]
+ Sum[2:k]<-Sum[2:k]+res[k]
+ Kvad[2:k] <- Kvad[2:k]+sq[k]
+ bestCost[k] <- (initKvad+Kvad[2])-(initSum+Sum[2])^2/(initAnt+Ant[2])
+ k <- k+1
+ }
+ for (n in k:N) {
+ Ant[2:n] <- Ant[2:n]+nr[n]
+ Sum[2:n] <- Sum[2:n]+res[n]
+ Kvad[2:n] <- Kvad[2:n]+sq[n]
+ limit <- n
+ while(limit > 2 & Ant[limit] < kmin) {limit <- limit-1}
+ Cost[2:limit] <- bestCost[1:limit-1]+Kvad[2:limit]-Sum[2:limit]^2/Ant[2:limit]
+ Pos <- which.min(Cost[2:limit])+ 1
+ cost <- Cost[Pos]+gamma
+ totCost <- (Kvad[2]+initKvad) - (Sum[2]+initSum)^2/(Ant[2]+initAnt)
+ if (totCost < cost) {
+ Pos <- 1
+ cost <- totCost
+ }
+ bestCost[n] <- cost
+ bestSplit[n] <- Pos-1
+ markSub[Pos-1] <- TRUE
+ }
+ help<-findMarks(markSub,nr,subsize)
+ return(help=help)
+}
+
+
+findMarks <- function(markSub,Nr,subsize){
+ ## markSub: marks in compressed scale
+ ## NR: number of observations between potenstial breakpoints
+ mark<-rep(FALSE,subsize) ## marks in original scale
+ if(sum(markSub)<1) {return(mark)} else {
+ N<-length(markSub)
+ ant <- seq(1:N)
+ help <- ant[markSub]
+ lengdeHelp<-length(help)
+ help0 <- c(0,help[1:(lengdeHelp-1)])
+ lengde <- help-help0
+ start<-1
+ oldStart<-1
+ startOrig<-1
+ for(i in 1:lengdeHelp){
+ start <- start+lengde[i]
+ lengdeOrig <- sum(Nr[oldStart:(start-1)])
+ startOrig <- startOrig+lengdeOrig
+ mark[startOrig-1]<-TRUE
+ oldStart<-start
+ }
+ return(mark)
+ }
+
+}
+
+
+compact <- function(y,mark){
+ ## accumulates numbers of observations, sums and
+ ## sums of squares between potential breakpoints
+ ## y: array to be compacted
+ ## mark: logical array of potential breakpoints
+ tell<-seq(1:length(y))
+ cCTell<-tell[mark]
+ Ncomp<-length(cCTell)
+ lowTell<-c(0,cCTell[1:(Ncomp-1)])
+ ant<-cCTell-lowTell
+ cy<-cumsum(y)
+ cCcy<-cy[mark]
+ lowcy<-c(0,cCcy[1:(Ncomp-1)])
+ sum<-cCcy-lowcy
+ cy2<-cumsum(y^2)
+ cCcy2<-cy2[mark]
+ lowcy2<-c(0,cCcy2[1:(Ncomp-1)])
+ sq<-cCcy2-lowcy2
+ return(list(Nr=ant,Sum=sum,Sq=sq))
+}
+
+filterMarkS4 <- function(x,kmin,L,L2,frac1,frac2,frac3,thres){
+ ## marks potential breakpoints, partially by a two 6*L and 6*L2 highpass
+ ## filters (L>L2), then by a filter seaching for potential kmin long segments
+ lengdeArr <- length(x)
+ xc<-cumsum(x)
+ xc<-c(0,xc)
+ ind11<-1:(lengdeArr-6*L+1)
+ ind12<-ind11+L
+ ind13<-ind11+3*L
+ ind14<-ind11+5*L
+ ind15<-ind11+6*L
+
+ cost1<-abs(4*xc[ind13]-xc[ind11]-xc[ind12]-xc[ind14]-xc[ind15])
+ cost1<-c(rep(0,3*L-1),cost1,rep(0,3*L))
+ ##mark shortening in here
+ in1<-1:(lengdeArr-6)
+ in2<-in1+1
+ in3<-in1+2
+ in4<-in1+3
+ in5<-in1+4
+ in6<-in1+5
+ in7<-in1+6
+ test<-pmax(cost1[in1],cost1[in2],cost1[in3],cost1[in4],cost1[in5],cost1[in6],cost1[in7])
+ test<-c(rep(0,3),test,rep(0,3))
+ cost1B<-cost1[cost1>=thres*test]
+ frac1B<-min(0.8,frac1*length(cost1)/length(cost1B))
+ limit <- quantile(cost1B,(1-frac1B),names=FALSE)
+ mark<-(cost1>limit)&(cost1>0.9*test)
+
+
+ ind21<-1:(lengdeArr-6*L2+1)
+ ind22<-ind21+L2
+ ind23<-ind21+3*L2
+ ind24<-ind21+5*L2
+ ind25<-ind21+6*L2
+ cost2<-abs(4*xc[ind23]-xc[ind21]-xc[ind22]-xc[ind24]-xc[ind25])
+ limit2 <- quantile(cost2,(1-frac2),names=FALSE)
+ mark2<-(cost2>limit2)
+ mark2<-c(rep(0,3*L2-1),mark2,rep(0,3*L2))
+ if(3*L>kmin){
+ mark[kmin:(3*L-1)]<-TRUE
+ mark[(lengdeArr-3*L+1):(lengdeArr-kmin)]<-TRUE
+ }
+ else
+ {
+ mark[kmin]<- TRUE
+ mark[lengdeArr-kmin]<-TRUE
+ }
+
+ if((kmin>1)&&(length(lengdeArr)>(3*kmin+1))){
+ ind1<-1:(lengdeArr-3*kmin+1)
+ ind2<-ind1+3*kmin
+ ind3<-ind1+kmin
+ ind4<-ind1+2*kmin
+ shortAb <- abs(3*(xc[ind4]-xc[ind3])-(xc[ind2]-xc[ind1]))
+ in1<-1:(length(shortAb)-6)
+ in2<-in1+1
+ in3<-in1+2
+ in4<-in1+3
+ in5<-in1+4
+ in6<-in1+5
+ in7<-in1+6
+ test<-pmax(shortAb[in1],shortAb[in2],shortAb[in3],shortAb[in4],shortAb[in5],shortAb[in6],shortAb[in7])
+ test<-c(rep(0,3),test,rep(0,3))
+ cost1C<-shortAb[shortAb>=thres*test]
+ frac1C<-min(0.8,frac3*length(shortAb)/length(cost1C))
+ limit3 <- quantile(cost1C,(1-frac1C),names=FALSE)
+ markH1<-(shortAb>limit3)&(shortAb>thres*test)
+ markH2<-c(rep(FALSE,(kmin-1)),markH1,rep(FALSE,2*kmin))
+ markH3<-c(rep(FALSE,(2*kmin-1)),markH1,rep(FALSE,kmin))
+ mark<-mark|mark2|markH2|markH3
+ } else {
+ mark<-mark|mark2
+ }
+
+ if(3*L>kmin){
+ mark[1:(kmin-1)]<-FALSE
+ mark[kmin:(3*L-1)]<-TRUE
+ mark[(lengdeArr-3*L+1):(lengdeArr-kmin)]<-TRUE
+ mark[(lengdeArr-kmin+1):(lengdeArr-1)]<-FALSE
+ mark[lengdeArr]<-TRUE
+ }
+ else
+ {
+ mark[1:(kmin-1)]<-FALSE
+ mark[(lengdeArr-kmin+1):(lengdeArr-1)]<-FALSE
+ mark[lengdeArr]<-TRUE
+ mark[kmin]<- TRUE
+ mark[lengdeArr-kmin]<-TRUE
+ }
+
+ return(mark)
+}
+
+medianFilter <- function(x,k){
+ n <- length(x)
+ filtWidth <- 2*k + 1
+
+ #Make sure filtWidth does not exceed n
+ if(filtWidth > n){
+ if(n==0){
+ filtWidth <- 1
+ }else if(n%%2 == 0){
+ #runmed requires filtWidth to be odd, ensure this:
+ filtWidth <- n - 1
+ }else{
+ filtWidth <- n
+ }
+ }
+
+ runMedian <- runmed(x,k=filtWidth,endrule="median")
+
+ return(runMedian)
+}
+
+getMad <- function(x,k=25){
+
+ #Remove observations that are equal to zero; are likely to be imputed, should not contribute to sd:
+ x <- x[x!=0]
+
+ #Calculate runMedian
+ runMedian <- medianFilter(x,k)
+
+ dif <- x-runMedian
+ SD <- mad(dif)
+
+ return(SD)
+}
+
+rearrangement.clustering_bedpe <- function(sv_bedpe,
+ kmin=10,# how many points at minimum in a peak, for the pcf algorithm
+ kmin.samples=kmin, # how many different samples at minimum in a peak
+ gamma.sdev=25,
+ PEAK.FACTOR=4,
+ thresh.dist=NA,
+ gamma=NA,
+ kmin.filter=kmin # if the pcf parameter is different from the definition of a peak
+) {
+
+ #add an id to the rearrangement
+ sv_bedpe$id <- 1:nrow(sv_bedpe)
+
+ #functions below expect rows to be organised by chromosomes and ordered by position on the chromosome
+
+ #prepare a dataframe for the calculation
+ rearrs.left <- sv_bedpe[,c('chrom1','start1','sample')]
+ names(rearrs.left ) <- NA
+ rearrs.right <- sv_bedpe[,c('chrom2','start2','sample')]
+ names(rearrs.right ) <- NA
+ rearrs.cncd <- rbind(rearrs.left , rearrs.right )
+ colnames(rearrs.cncd) <- c('chr', 'position', 'sample')
+ rearrs.cncd$isLeft <- c(rep(TRUE, nrow(rearrs.left)), rep(FALSE, nrow(rearrs.left)))
+ rearrs.cncd$id <- c(sv_bedpe$id, sv_bedpe$id)
+ # sample.bps <- rearrs.cncd
+ #need to reorder
+ sample.bps <- NULL
+ for (chrom_i in unique(rearrs.cncd$chr)){
+ tmptab <- rearrs.cncd[rearrs.cncd$chr==chrom_i,,drop=FALSE]
+ tmptab <- tmptab[order(tmptab$position),,drop=FALSE]
+ sample.bps <- rbind(sample.bps,tmptab)
+ }
+ rownames(sample.bps) <- 1:nrow(sample.bps)
+
+ #run the algorithm
+ genome.size <- 3 * 10^9
+ MIN.BPS <- 10 # minimal number of breakpoints on a chromosome to do any any segmentation
+
+ logScale <- FALSE
+
+ exp.dist <-genome.size/nrow(sample.bps)
+
+ if (logScale) {
+ sample.bps$intermut.dist <- log10(calcIntermutDist(sample.bps, first.chrom.na=FALSE)$distPrev) # calculate the distances between the breakpoints
+ if (is.na(thresh.dist)) {
+ thresh.dist <- log10(exp.dist/PEAK.FACTOR) # calculate the threshold to call a peak
+ }
+ } else {
+
+ sample.bps$intermut.dist <- calcIntermutDist(sample.bps, first.chrom.na=FALSE)$distPrev
+ if (is.na(thresh.dist)) {
+ thresh.dist <- exp.dist/PEAK.FACTOR
+ }
+ }
+
+
+ if (is.na(gamma) & !is.na(gamma.sdev)) {
+ # compute the mean absolute deviation
+ sdev <- getMad(sample.bps$intermut.dist);
+ gamma <- gamma.sdev*sdev
+ }
+
+
+
+ sample.bps$is.clustered.single <- rep(FALSE, nrow(sample.bps))
+
+ all.kat.regions <- data.frame()
+
+ for (chrom in unique(sample.bps$chr)) { # loop over chromosomes
+
+ sample.bps.flag <- sample.bps$chr==chrom # breakpoints on a current chromosome
+ # sample.bps.chrom <- sample.bps[sample.bps.flag,]
+ # sample.bps.chrom <- sample.bps.chrom[order(sample.bps.chrom$position),]
+ #
+ if (sum(sample.bps.flag )>MIN.BPS ) { # if there are enough breakpoints on a chromosome to run pcf
+
+ data.points <- sample.bps$intermut.dist[sample.bps.flag]
+ # data.points <- sample.bps.chrom$intermut.dist
+
+ res = exactPcf(data.points, kmin, gamma, T)
+
+ #reorder results
+ sample.bps$mean.intermut.dist[sample.bps.flag] <- res$yhat
+
+ # prepare the points for pcf
+ subs <- data.frame(chr=sample.bps$chr[sample.bps.flag], pos=sample.bps$position[sample.bps.flag], sample=sample.bps$sample[sample.bps.flag])
+ kat.regions <- extract.kat.regions(res, thresh.dist, subs, doMerging=TRUE, kmin.samples=1, kmin.filter= kmin.filter) # extract peaks, this is special case as we want at least kmin samples
+
+ all.kat.regions <- rbind(all.kat.regions, kat.regions)
+ if (!is.null(kat.regions) && nrow( kat.regions )>0) { # if there are any kataegis regions found on this chormosome
+ for (k in 1:nrow(kat.regions)) {
+
+ sample.bps$is.clustered.single[which(sample.bps.flag)[ kat.regions$firstBp[k] : kat.regions$lastBp[k]]] <- TRUE # call all breakpoints as clustered
+ }
+ }
+ } else {
+
+ sample.bps$mean.intermut.dist[sample.bps.flag] <- mean(sample.bps$intermut.dist[sample.bps.flag])
+ }
+ }
+
+
+
+ if (!logScale) { # even if pcf was run on non-logged distances, I log the output
+ sample.bps$intermut.dist <- log10(sample.bps$intermut.dist)
+ sample.bps$mean.intermut.dist <- log10(sample.bps$mean.intermut.dist)
+ }
+
+ # a rearrangement is in a cluster if any of its breakpoints are
+ sample.bps$is.clustered <- sample.bps$is.clustered.single
+ sample.bps$is.clustered[sample.bps$id %in% subset(sample.bps, is.clustered.single==TRUE)$id] <- TRUE
+
+ # mark both breakpoints of a rearrangement as clustered if any is
+ sv_bedpe$is.clustered <- sv_bedpe$id %in% sample.bps$id[sample.bps$is.clustered]
+
+ result <- list()
+ result$sv_bedpe <- sv_bedpe
+ result$kat.regions <- all.kat.regions
+ result
+}
+
+
+option_list <- list(
+ make_option(c("--kmin"), type="integer", default=10, help="How many points at minimum in a peak, for the pcf algorithm"),
+ make_option(c("--kmin_samples"), type="integer", default=1, help="How many different samples at minimum in a peak"),
+ make_option(c("--gamma_sdev"), type="integer", default=25, help="Gamma standard deviation"),
+ make_option(c("--peak_factor"), type="integer", default=10, help="Peak factor"),
+ make_option(c("--thresh_dist"), type="integer", default=NA, help="Threshold distance"),
+ make_option(c("--gamma"), type="integer", default=NA, help="Gamma"),
+ make_option(c("--kmin_filter"), type="integer", default=10, help="Kmin filter")
+)
+parser <- OptionParser(usage = "%prog [options] sv_bedpe out_fpath", option_list=option_list)
+arguments <- parse_args(parser, positional_arguments = 2)
+opt <- arguments$options
+args <- arguments$args
+
+clustering(args[1],
+ args[2],
+ kmin=opt$kmin,
+ kmin.samples=opt$kmin_samples,
+ gamma.sdev=opt$gamma_sdev,
+ PEAK.FACTOR=opt$peak_factor,
+ thresh.dist=opt$thresh_dist,
+ gamma=opt$gamma,
+ kmin.filter=opt$kmin_filter)
diff --git a/opencga-app/app/cloud/docker/opencga-ext-tools/Dockerfile b/opencga-app/app/cloud/docker/opencga-ext-tools/Dockerfile
index f626cf00c63..c082c0dbc9a 100644
--- a/opencga-app/app/cloud/docker/opencga-ext-tools/Dockerfile
+++ b/opencga-app/app/cloud/docker/opencga-ext-tools/Dockerfile
@@ -13,7 +13,7 @@ WORKDIR /opt/opencga/signature.tools.lib
RUN apt-get update -y && DEBIAN_FRONTEND="noninteractive" TZ="Europe/London" apt-get install -y \
libcurl4 git libgmp-dev libcurl4-openssl-dev libgit2-dev libssl-dev libssh-dev libxml2-dev libfontconfig1-dev libharfbuzz-dev libfribidi-dev \
libfreetype6-dev libpng-dev libtiff5-dev libjpeg-dev \
- gnuplot pandoc samtools bcftools fastqc plink1.9 bwa r-base && \
+ gnuplot pandoc samtools bcftools tabix fastqc plink1.9 bwa r-base && \
## Installation dependencies using R install.packages() is slower than apt-get but final size is 400GB smaller.
R -e "install.packages(c('BiocManager', 'RCircos', 'nnls', 'ggplot2', 'jsonlite', 'optparse', 'knitr', 'configr', 'dplyr', 'rmarkdown', 'tidyr', 'httr'))" && \
R -e "BiocManager::install('BiocStyle')" && \
@@ -21,10 +21,12 @@ RUN apt-get update -y && DEBIAN_FRONTEND="noninteractive" TZ="Europe/London" apt
R -e 'install.packages(c("devtools", "getopt"), repos="https://www.stats.bris.ac.uk/R/")' && \
git clone https://github.com/Nik-Zainal-Group/signature.tools.lib.git /opt/opencga/signature.tools.lib && \
git checkout d3d73db497b5b83abc55d6cd55840c34ed542628 && \
- sed -i '/BSgenome/d' DESCRIPTION && \
- R -e 'devtools::install(repos="https://www.stats.bris.ac.uk/R/")' && \
+ sed -i '/Mmusculus/d' DESCRIPTION && \
+ sed -i '/Cfamiliaris/d' DESCRIPTION && \
+ sed -i '/1000genomes/d' DESCRIPTION && \
+ R -e 'options(timeout = 300);devtools::install(repos="https://www.stats.bris.ac.uk/R/")' && \
## Clean up
rm -rf /var/lib/apt/lists/* /tmp/* /opt/opencga/signature.tools.lib/.git && \
strip --remove-section=.note.ABI-tag /usr/lib/x86_64-linux-gnu/libQt5Core.so.5
-WORKDIR /opt/opencga
+WORKDIR /opt/opencga
\ No newline at end of file
diff --git a/opencga-app/pom.xml b/opencga-app/pom.xml
index ccad8318be7..890b211ed9a 100644
--- a/opencga-app/pom.xml
+++ b/opencga-app/pom.xml
@@ -22,7 +22,7 @@
org.opencb.opencga
opencga
- 2.4.13-SNAPSHOT
+ 2.6.0-SNAPSHOT
../pom.xml
diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/admin/executors/CatalogCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/admin/executors/CatalogCommandExecutor.java
index 5c743b66c9c..5554d04d94f 100644
--- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/admin/executors/CatalogCommandExecutor.java
+++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/admin/executors/CatalogCommandExecutor.java
@@ -36,7 +36,6 @@
import javax.ws.rs.client.WebTarget;
import javax.ws.rs.core.Response;
import java.io.IOException;
-import java.net.URISyntaxException;
import java.nio.file.Paths;
import java.util.Collections;
@@ -178,7 +177,7 @@ private void install() throws CatalogException {
try (CatalogManager catalogManager = new CatalogManager(configuration)) {
catalogManager.installCatalogDB(configuration.getAdmin().getSecretKey(), commandOptions.commonOptions.adminPassword,
- commandOptions.email, commandOptions.organization, commandOptions.force, true);
+ commandOptions.email, commandOptions.organization, commandOptions.force);
}
}
diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/InternalCliOptionsParser.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/InternalCliOptionsParser.java
index a0562e7c9d3..f11d519c846 100644
--- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/InternalCliOptionsParser.java
+++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/InternalCliOptionsParser.java
@@ -49,6 +49,7 @@
import static org.opencb.opencga.app.cli.internal.options.VariantCommandOptions.GatkCommandOptions.GATK_RUN_COMMAND;
import static org.opencb.opencga.app.cli.internal.options.VariantCommandOptions.GenomePlotCommandOptions.GENOME_PLOT_RUN_COMMAND;
import static org.opencb.opencga.app.cli.internal.options.VariantCommandOptions.GwasCommandOptions.GWAS_RUN_COMMAND;
+import static org.opencb.opencga.app.cli.internal.options.VariantCommandOptions.HRDetectCommandOptions.HRDETECT_RUN_COMMAND;
import static org.opencb.opencga.app.cli.internal.options.VariantCommandOptions.IndividualQcCommandOptions.INDIVIDUAL_QC_RUN_COMMAND;
import static org.opencb.opencga.app.cli.internal.options.VariantCommandOptions.InferredSexCommandOptions.INFERRED_SEX_RUN_COMMAND;
import static org.opencb.opencga.app.cli.internal.options.VariantCommandOptions.JulieRunCommandOptions.JULIE_RUN_COMMAND;
@@ -169,6 +170,7 @@ public InternalCliOptionsParser() {
variantSubCommands.addCommand(KNOCKOUT_RUN_COMMAND, variantCommandOptions.knockoutCommandOptions);
variantSubCommands.addCommand(SAMPLE_ELIGIBILITY_RUN_COMMAND, variantCommandOptions.sampleEligibilityCommandOptions);
variantSubCommands.addCommand(MUTATIONAL_SIGNATURE_RUN_COMMAND, variantCommandOptions.mutationalSignatureCommandOptions);
+ variantSubCommands.addCommand(HRDETECT_RUN_COMMAND, variantCommandOptions.hrDetectCommandOptions);
variantSubCommands.addCommand(GENOME_PLOT_RUN_COMMAND, variantCommandOptions.genomePlotInternalCommandOptions);
variantSubCommands.addCommand(MENDELIAN_ERROR_RUN_COMMAND, variantCommandOptions.mendelianErrorCommandOptions);
variantSubCommands.addCommand(INFERRED_SEX_RUN_COMMAND, variantCommandOptions.inferredSexCommandOptions);
diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/executors/AlignmentCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/executors/AlignmentCommandExecutor.java
index 1ed8c9fa275..3553384e71a 100644
--- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/executors/AlignmentCommandExecutor.java
+++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/executors/AlignmentCommandExecutor.java
@@ -156,8 +156,6 @@ private void qcRun() throws ToolException {
ObjectMap params = new AlignmentQcParams(
cliOptions.bamFile,
- cliOptions.bedFile,
- cliOptions.dictFile,
cliOptions.skip,
cliOptions.overwrite,
cliOptions.outdir
diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/executors/VariantInternalCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/executors/VariantInternalCommandExecutor.java
index 552f0c60edd..8c6ac66073c 100644
--- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/executors/VariantInternalCommandExecutor.java
+++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/executors/VariantInternalCommandExecutor.java
@@ -34,6 +34,7 @@
import org.opencb.opencga.analysis.variant.VariantExportTool;
import org.opencb.opencga.analysis.variant.genomePlot.GenomePlotAnalysis;
import org.opencb.opencga.analysis.variant.gwas.GwasAnalysis;
+import org.opencb.opencga.analysis.variant.hrdetect.HRDetectAnalysis;
import org.opencb.opencga.analysis.variant.inferredSex.InferredSexAnalysis;
import org.opencb.opencga.analysis.variant.julie.JulieTool;
import org.opencb.opencga.analysis.variant.knockout.KnockoutAnalysis;
@@ -89,6 +90,7 @@
import static org.opencb.opencga.app.cli.internal.options.VariantCommandOptions.GatkCommandOptions.GATK_RUN_COMMAND;
import static org.opencb.opencga.app.cli.internal.options.VariantCommandOptions.GenomePlotCommandOptions.GENOME_PLOT_RUN_COMMAND;
import static org.opencb.opencga.app.cli.internal.options.VariantCommandOptions.GwasCommandOptions.GWAS_RUN_COMMAND;
+import static org.opencb.opencga.app.cli.internal.options.VariantCommandOptions.HRDetectCommandOptions.HRDETECT_RUN_COMMAND;
import static org.opencb.opencga.app.cli.internal.options.VariantCommandOptions.IndividualQcCommandOptions.INDIVIDUAL_QC_RUN_COMMAND;
import static org.opencb.opencga.app.cli.internal.options.VariantCommandOptions.InferredSexCommandOptions.INFERRED_SEX_RUN_COMMAND;
import static org.opencb.opencga.app.cli.internal.options.VariantCommandOptions.KnockoutCommandOptions.KNOCKOUT_RUN_COMMAND;
@@ -218,6 +220,9 @@ public void execute() throws Exception {
case MUTATIONAL_SIGNATURE_RUN_COMMAND:
mutationalSignature();
break;
+ case HRDETECT_RUN_COMMAND:
+ hrDetect();
+ break;
case GENOME_PLOT_RUN_COMMAND:
genomePlot();
break;
@@ -796,29 +801,52 @@ private void mutationalSignature() throws Exception {
VariantCommandOptions.MutationalSignatureCommandOptions cliOptions = variantCommandOptions.mutationalSignatureCommandOptions;
// Check signature release
- checkSignatureVersion(cliOptions.sigVersion);
+ checkSignatureVersion(cliOptions.fitSigVersion);
ObjectMap params = new MutationalSignatureAnalysisParams(
cliOptions.id,
cliOptions.description,
+ cliOptions.sample,
cliOptions.query,
- cliOptions.catalogues,
- cliOptions.cataloguesContent,
+ cliOptions.fitId,
cliOptions.fitMethod,
- cliOptions.nBoot,
- cliOptions.sigVersion,
- cliOptions.organ,
- cliOptions.thresholdPerc,
- cliOptions.thresholdPval,
- cliOptions.maxRareSigs,
- cliOptions.signaturesFile,
- cliOptions.rareSignaturesFile,
+ cliOptions.fitNBoot,
+ cliOptions.fitSigVersion,
+ cliOptions.fitOrgan,
+ cliOptions.fitThresholdPerc,
+ cliOptions.fitThresholdPval,
+ cliOptions.fitMaxRareSigs,
+ cliOptions.fitSignaturesFile,
+ cliOptions.fitRareSignaturesFile,
+ cliOptions.skip,
cliOptions.outdir)
.toObjectMap(cliOptions.commonOptions.params).append(ParamConstants.STUDY_PARAM, cliOptions.study);
toolRunner.execute(MutationalSignatureAnalysis.class, params, Paths.get(cliOptions.outdir), jobId, token);
}
+ private void hrDetect() throws Exception {
+ VariantCommandOptions.HRDetectCommandOptions cliOptions = variantCommandOptions.hrDetectCommandOptions;
+
+ ObjectMap params = new HRDetectAnalysisParams(
+ cliOptions.id,
+ cliOptions.description,
+ cliOptions.sample,
+ cliOptions.snvFittingId,
+ cliOptions.svFittingId,
+ cliOptions.cnvQuery,
+ cliOptions.indelQuery,
+ cliOptions.snv3CustomName,
+ cliOptions.snv8CustomName,
+ cliOptions.sv3CustomName,
+ cliOptions.sv8CustomName,
+ cliOptions.bootstrap,
+ cliOptions.outdir)
+ .toObjectMap(cliOptions.commonOptions.params).append(ParamConstants.STUDY_PARAM, cliOptions.study);
+
+ toolRunner.execute(HRDetectAnalysis.class, params, Paths.get(cliOptions.outdir), jobId, token);
+ }
+
private void genomePlot() throws Exception {
VariantCommandOptions.GenomePlotInternalCommandOptions cliOptions = variantCommandOptions.genomePlotInternalCommandOptions;
@@ -911,7 +939,7 @@ private void sampleQc() throws Exception {
VariantCommandOptions.SampleQcCommandOptions cliOptions = variantCommandOptions.sampleQcCommandOptions;
// Check signature release
- checkSignatureVersion(cliOptions.signatureSigVersion);
+ checkSignatureVersion(cliOptions.signatureFitSigVersion);
// Build variant query from cli options
AnnotationVariantQueryParams variantStatsQuery = ToolParams.fromParams(AnnotationVariantQueryParams.class,
@@ -925,18 +953,20 @@ private void sampleQc() throws Exception {
cliOptions.signatureId,
cliOptions.signatureDescription,
cliOptions.signatureQuery,
+ cliOptions.signatureFitId,
cliOptions.signatureFitMethod,
- cliOptions.signatureNBoot,
- cliOptions.signatureSigVersion,
- cliOptions.signatureOrgan,
- cliOptions.signatureThresholdPerc,
- cliOptions.signatureThresholdPval,
- cliOptions.signatureMaxRareSigs,
- cliOptions.signatureSignaturesFile,
- cliOptions.signatureRareSignaturesFile,
+ cliOptions.signatureFitNBoot,
+ cliOptions.signatureFitSigVersion,
+ cliOptions.signatureFitOrgan,
+ cliOptions.signatureFitThresholdPerc,
+ cliOptions.signatureFitThresholdPval,
+ cliOptions.signatureFitMaxRareSigs,
+ cliOptions.signatureFitSignaturesFile,
+ cliOptions.signatureFitRareSignaturesFile,
cliOptions.genomePlotId,
cliOptions.genomePlotDescr,
cliOptions.genomePlotConfigFile,
+ cliOptions.skip,
cliOptions.outdir)
.toObjectMap(cliOptions.commonOptions.params).append(ParamConstants.STUDY_PARAM, cliOptions.study);
diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/options/VariantCommandOptions.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/options/VariantCommandOptions.java
index a4c2251f6cb..cae48c1095e 100644
--- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/options/VariantCommandOptions.java
+++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/options/VariantCommandOptions.java
@@ -24,6 +24,7 @@
import org.opencb.opencga.analysis.variant.VariantExportTool;
import org.opencb.opencga.analysis.variant.genomePlot.GenomePlotAnalysis;
import org.opencb.opencga.analysis.variant.gwas.GwasAnalysis;
+import org.opencb.opencga.analysis.variant.hrdetect.HRDetectAnalysis;
import org.opencb.opencga.analysis.variant.inferredSex.InferredSexAnalysis;
import org.opencb.opencga.analysis.variant.julie.JulieTool;
import org.opencb.opencga.analysis.variant.knockout.KnockoutAnalysis;
@@ -128,6 +129,7 @@ public class VariantCommandOptions {
public final KnockoutCommandOptions knockoutCommandOptions;
public final SampleEligibilityCommandOptions sampleEligibilityCommandOptions;
public final MutationalSignatureCommandOptions mutationalSignatureCommandOptions;
+ public final HRDetectCommandOptions hrDetectCommandOptions;
public final GenomePlotCommandOptions genomePlotCommandOptions;
public final GenomePlotInternalCommandOptions genomePlotInternalCommandOptions;
public final MendelianErrorCommandOptions mendelianErrorCommandOptions;
@@ -196,6 +198,7 @@ public VariantCommandOptions(GeneralCliOptions.CommonCommandOptions commonComman
this.knockoutCommandOptions = new KnockoutCommandOptions();
this.sampleEligibilityCommandOptions = new SampleEligibilityCommandOptions();
this.mutationalSignatureCommandOptions = new MutationalSignatureCommandOptions();
+ this.hrDetectCommandOptions = new HRDetectCommandOptions();
this.genomePlotCommandOptions = new GenomePlotCommandOptions();
this.genomePlotInternalCommandOptions = new GenomePlotInternalCommandOptions();
this.mendelianErrorCommandOptions = new MendelianErrorCommandOptions();
@@ -1319,6 +1322,10 @@ public class MutationalSignatureCommandOptions {
@Parameter(names = {"--study"}, description = "Study where all the samples belong to.")
public String study;
+ @Parameter(names = {"--sample"}, description = "Sample ID.", required = true)
+ public String sample;
+
+ // Signature catalogue
@Parameter(names = {"--id"}, description = FieldConstants.MUTATIONAL_SIGNATURE_ID_DESCRIPTION)
public String id;
@@ -1329,40 +1336,96 @@ public class MutationalSignatureCommandOptions {
public String query;
// For fitting method
-
- @Parameter(names = {"--catalogues"}, description = FieldConstants.MUTATIONAL_SIGNATURE_CATALOGUES_DESCRIPTION)
- public String catalogues;
-
- @Parameter(names = {"--catalogues-content"}, description = FieldConstants.MUTATIONAL_SIGNATURE_CATALOGUES_CONTENT_DESCRIPTION)
- public String cataloguesContent;
+ @Parameter(names = {"--fit-id"}, description = FieldConstants.MUTATIONAL_SIGNATURE_FIT_ID_DESCRIPTION)
+ public String fitId;
@Parameter(names = {"--fit-method"}, description = FieldConstants.MUTATIONAL_SIGNATURE_FIT_METHOD_DESCRIPTION)
public String fitMethod = "FitMS";
- @Parameter(names = {"--n-boot"}, description = FieldConstants.MUTATIONAL_SIGNATURE_N_BOOT_DESCRIPTION)
- public Integer nBoot;
+ @Parameter(names = {"--fit-n-boot"}, description = FieldConstants.MUTATIONAL_SIGNATURE_FIT_N_BOOT_DESCRIPTION)
+ public Integer fitNBoot;
+
+ @Parameter(names = {"--fit-sig-version"}, description = FieldConstants.MUTATIONAL_SIGNATURE_FIT_SIG_VERSION_DESCRIPTION)
+ public String fitSigVersion = "RefSigv2";
+
+ @Parameter(names = {"--fit-organ"}, description = FieldConstants.MUTATIONAL_SIGNATURE_FIT_ORGAN_DESCRIPTION)
+ public String fitOrgan;
+
+ @Parameter(names = {"--fit-threshold-perc"}, description = FieldConstants.MUTATIONAL_SIGNATURE_FIT_THRESHOLD_PERC_DESCRIPTION)
+ public Float fitThresholdPerc = 5f;
+
+ @Parameter(names = {"--fit-threshold-pval"}, description = FieldConstants.MUTATIONAL_SIGNATURE_FIT_THRESHOLD_PVAL_DESCRIPTION)
+ public Float fitThresholdPval = 0.05f;
+
+ @Parameter(names = {"--fit-max-rare-sigs"}, description = FieldConstants.MUTATIONAL_SIGNATURE_FIT_MAX_RARE_SIGS_DESCRIPTION)
+ public Integer fitMaxRareSigs = 1;
+
+ @Parameter(names = {"--fit-signatures-file"}, description = FieldConstants.MUTATIONAL_SIGNATURE_FIT_SIGNATURES_FILE_DESCRIPTION)
+ public String fitSignaturesFile;
+
+ @Parameter(names = {"--fit-rare-signatures-file"},
+ description = FieldConstants.MUTATIONAL_SIGNATURE_FIT_RARE_SIGNATURES_FILE_DESCRIPTION)
+ public String fitRareSignaturesFile;
+
+ // Other
+ @Parameter(names = {"--skip"}, description = FieldConstants.SAMPLE_QUALITY_CONTROL_SKIP_DESCRIPTION)
+ public String skip;
+
+ @Parameter(names = {"-o", "--outdir"}, description = FieldConstants.JOB_OUT_DIR_DESCRIPTION, arity = 1, required = false)
+ public String outdir;
+ }
+
+ @Parameters(commandNames = HRDetectCommandOptions.HRDETECT_RUN_COMMAND, commandDescription = HRDetectAnalysis.DESCRIPTION)
+ public class HRDetectCommandOptions {
+ public static final String HRDETECT_RUN_COMMAND = HRDetectAnalysis.ID + "-run";
- @Parameter(names = {"--sig-version"}, description = FieldConstants.MUTATIONAL_SIGNATURE_SIG_VERSION_DESCRIPTION)
- public String sigVersion = "RefSigv2";
+ @ParametersDelegate
+ public GeneralCliOptions.CommonCommandOptions commonOptions = commonCommandOptions;
+
+ @ParametersDelegate
+ public Object internalJobOptions = internalJobOptionsObject;
+
+ @Parameter(names = {"--study"}, description = "Study where the sample belong to.")
+ public String study;
+
+ @Parameter(names = {"--sample-id"}, description = FieldConstants.SAMPLE_ID_DESCRIPTION, required = true)
+ public String sample;
- @Parameter(names = {"--organ"}, description = FieldConstants.MUTATIONAL_SIGNATURE_ORGAN_DESCRIPTION)
- public String organ;
+ @Parameter(names = {"--id"}, description = FieldConstants.HRDETECT_ID_DESCRIPTION, arity = 1)
+ public String id;
+
+ @Parameter(names = {"--description"}, description = FieldConstants.HRDETECT_DESCRIPTION_DESCRIPTION, arity = 1)
+ public String description;
+
+ @Parameter(names = {"--snv-fitting-id"}, description = FieldConstants.HRDETECT_SNV_FITTING_ID_DESCRIPTION, required = true,
+ arity = 1)
+ public String snvFittingId;
+
+ @Parameter(names = {"--sv-fitting-id"}, description = FieldConstants.HRDETECT_SV_FITTING_ID_DESCRIPTION, required = true, arity = 1)
+ public String svFittingId;
- @Parameter(names = {"--threshold-perc"}, description = FieldConstants.MUTATIONAL_SIGNATURE_THRESHOLD_PERC_DESCRIPTION)
- public Float thresholdPerc = 5f;
+ @Parameter(names = {"--cnv-query"}, description = FieldConstants.HRDETECT_CNV_QUERY_DESCRIPTION, required = true, arity = 1)
+ public String cnvQuery;
- @Parameter(names = {"--threshold-pval"}, description = FieldConstants.MUTATIONAL_SIGNATURE_THRESHOLD_PVAL_DESCRIPTION)
- public Float thresholdPval = 0.05f;
+ @Parameter(names = {"--indel-query"}, description = FieldConstants.HRDETECT_INDEL_QUERY_DESCRIPTION, required = true, arity = 1)
+ public String indelQuery;
- @Parameter(names = {"--max-rare-sigs"}, description = FieldConstants.MUTATIONAL_SIGNATURE_MAX_RARE_SIGS_DESCRIPTION)
- public Integer maxRareSigs = 1;
+ @Parameter(names = {"--snv3custom-name"}, description = FieldConstants.HRDETECT_SNV3_CUSTOM_NAME_DESCRIPTION, arity = 1)
+ public String snv3CustomName;
- @Parameter(names = {"--signatures-file"}, description = FieldConstants.MUTATIONAL_SIGNATURE_SIGNATURES_FILE_DESCRIPTION)
- public String signaturesFile;
+ @Parameter(names = {"--snv8custom-name"}, description = FieldConstants.HRDETECT_SNV8_CUSTOM_NAME_DESCRIPTION, arity = 1)
+ public String snv8CustomName;
- @Parameter(names = {"--rare-signatures-file"}, description = FieldConstants.MUTATIONAL_SIGNATURE_RARE_SIGNATURES_FILE_DESCRIPTION)
- public String rareSignaturesFile;
+ @Parameter(names = {"--sv3custom-name"}, description = FieldConstants.HRDETECT_SV3_CUSTOM_NAME_DESCRIPTION, arity = 1)
+ public String sv3CustomName;
+ @Parameter(names = {"--sv8custom-name"}, description = FieldConstants.HRDETECT_SV8_CUSTOM_NAME_DESCRIPTION, arity = 1)
+ public String sv8CustomName;
+
+ @Parameter(names = {"--bootstrap"}, description = FieldConstants.HRDETECT_BOOTSTRAP_DESCRIPTION, arity = 1)
+ public Boolean bootstrap;
+
+ // Other
@Parameter(names = {"-o", "--outdir"}, description = FieldConstants.JOB_OUT_DIR_DESCRIPTION, arity = 1, required = false)
public String outdir;
}
@@ -1595,32 +1658,36 @@ public class SampleQcCommandOptions {
@Parameter(names = {"--ms-query"}, description = FieldConstants.MUTATIONAL_SIGNATURE_QUERY_DESCRIPTION)
public String signatureQuery;
+ @Parameter(names = {"--ms-fit-id"}, description = FieldConstants.MUTATIONAL_SIGNATURE_FIT_ID_DESCRIPTION)
+ public String signatureFitId;
+
@Parameter(names = {"--ms-fit-method"}, description = FieldConstants.MUTATIONAL_SIGNATURE_FIT_METHOD_DESCRIPTION)
public String signatureFitMethod = "FitMS";
- @Parameter(names = {"--ms-n-boot"}, description = FieldConstants.MUTATIONAL_SIGNATURE_N_BOOT_DESCRIPTION)
- public Integer signatureNBoot;
+ @Parameter(names = {"--ms-fit-n-boot"}, description = FieldConstants.MUTATIONAL_SIGNATURE_FIT_N_BOOT_DESCRIPTION)
+ public Integer signatureFitNBoot;
- @Parameter(names = {"--ms-sig-version"}, description = FieldConstants.MUTATIONAL_SIGNATURE_SIG_VERSION_DESCRIPTION)
- public String signatureSigVersion = "RefSigv2";
+ @Parameter(names = {"--ms-fit-sig-version"}, description = FieldConstants.MUTATIONAL_SIGNATURE_FIT_SIG_VERSION_DESCRIPTION)
+ public String signatureFitSigVersion = "RefSigv2";
- @Parameter(names = {"--ms-organ"}, description = FieldConstants.MUTATIONAL_SIGNATURE_ORGAN_DESCRIPTION)
- public String signatureOrgan;
+ @Parameter(names = {"--ms-fit-organ"}, description = FieldConstants.MUTATIONAL_SIGNATURE_FIT_ORGAN_DESCRIPTION)
+ public String signatureFitOrgan;
- @Parameter(names = {"--ms-threshold-perc"}, description = FieldConstants.MUTATIONAL_SIGNATURE_THRESHOLD_PERC_DESCRIPTION)
- public Float signatureThresholdPerc = 5f;
+ @Parameter(names = {"--ms-fit-threshold-perc"}, description = FieldConstants.MUTATIONAL_SIGNATURE_FIT_THRESHOLD_PERC_DESCRIPTION)
+ public Float signatureFitThresholdPerc = 5f;
- @Parameter(names = {"--ms-threshold-pval"}, description = FieldConstants.MUTATIONAL_SIGNATURE_THRESHOLD_PVAL_DESCRIPTION)
- public Float signatureThresholdPval = 0.05f;
+ @Parameter(names = {"--ms-fit-threshold-pval"}, description = FieldConstants.MUTATIONAL_SIGNATURE_FIT_THRESHOLD_PVAL_DESCRIPTION)
+ public Float signatureFitThresholdPval = 0.05f;
- @Parameter(names = {"--ms-max-rare-sigs"}, description = FieldConstants.MUTATIONAL_SIGNATURE_MAX_RARE_SIGS_DESCRIPTION)
- public Integer signatureMaxRareSigs = 1;
+ @Parameter(names = {"--ms-fit-max-rare-sigs"}, description = FieldConstants.MUTATIONAL_SIGNATURE_FIT_MAX_RARE_SIGS_DESCRIPTION)
+ public Integer signatureFitMaxRareSigs = 1;
- @Parameter(names = {"--ms-signatures-file"}, description = FieldConstants.MUTATIONAL_SIGNATURE_SIGNATURES_FILE_DESCRIPTION)
- public String signatureSignaturesFile;
+ @Parameter(names = {"--ms-fit-signatures-file"}, description = FieldConstants.MUTATIONAL_SIGNATURE_FIT_SIGNATURES_FILE_DESCRIPTION)
+ public String signatureFitSignaturesFile;
- @Parameter(names = {"--ms-rare-signatures-file"}, description = FieldConstants.MUTATIONAL_SIGNATURE_RARE_SIGNATURES_FILE_DESCRIPTION)
- public String signatureRareSignaturesFile;
+ @Parameter(names = {"--ms-fit-rare-signatures-file"},
+ description = FieldConstants.MUTATIONAL_SIGNATURE_FIT_RARE_SIGNATURES_FILE_DESCRIPTION)
+ public String signatureFitRareSignaturesFile;
// Genome plot
@@ -1633,6 +1700,10 @@ public class SampleQcCommandOptions {
@Parameter(names = {"--gpcf", "--gp-config-file"}, description = FieldConstants.GENOME_PLOT_CONFIGURATION_FILE_DESCRIPTION)
public String genomePlotConfigFile;
+ // Other
+ @Parameter(names = {"--skip"}, description = FieldConstants.SAMPLE_QUALITY_CONTROL_SKIP_DESCRIPTION)
+ public String skip;
+
@Parameter(names = {"-o", "--outdir"}, description = FieldConstants.JOB_OUT_DIR_DESCRIPTION)
public String outdir;
}
diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/OpenCgaCompleter.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/OpenCgaCompleter.java
index 30ea9641968..8f6a7d65564 100644
--- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/OpenCgaCompleter.java
+++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/OpenCgaCompleter.java
@@ -1,5 +1,5 @@
/*
-* Copyright 2015-2022-12-13 OpenCB
+* Copyright 2015-2023-01-10 OpenCB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -45,7 +45,7 @@ public abstract class OpenCgaCompleter implements Completer {
.map(Candidate::new)
.collect(toList());
- private List variantList = asList( "aggregationstats","annotation-metadata","annotation-query","circos-run","cohort-stats-delete","cohort-stats-info","cohort-stats-run","exomiser-run","export-run","family-genotypes","family-qc-run","file-delete","gatk-run","genome-plot-run","gwas-run","index-run","individual-qc-run","inferred-sex-run","knockout-gene-query","knockout-individual-query","knockout-run","mendelian-error-run","metadata","mutational-signature-query","mutational-signature-run","plink-run","query","relatedness-run","rvtests-run","sample-aggregation-stats","sample-eligibility-run","sample-qc-run","sample-query","sample-run","sample-stats-query","sample-stats-run","stats-export-run","stats-run")
+ private List variantList = asList( "aggregationstats","annotation-metadata","annotation-query","circos-run","cohort-stats-delete","cohort-stats-info","cohort-stats-run","exomiser-run","export-run","family-genotypes","family-qc-run","file-delete","gatk-run","genome-plot-run","gwas-run","hr-detect-run","index-run","individual-qc-run","inferred-sex-run","knockout-gene-query","knockout-individual-query","knockout-run","mendelian-error-run","metadata","mutational-signature-query","mutational-signature-run","plink-run","query","relatedness-run","rvtests-run","sample-aggregation-stats","sample-eligibility-run","sample-qc-run","sample-query","sample-run","sample-stats-query","sample-stats-run","stats-export-run","stats-run")
.stream()
.map(Candidate::new)
.collect(toList());
diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/OpencgaCliOptionsParser.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/OpencgaCliOptionsParser.java
index 6622d548c29..9b32de2528f 100644
--- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/OpencgaCliOptionsParser.java
+++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/OpencgaCliOptionsParser.java
@@ -1,5 +1,5 @@
/*
-* Copyright 2015-2022-12-13 OpenCB
+* Copyright 2015-2023-01-10 OpenCB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -75,6 +75,7 @@ public OpencgaCliOptionsParser() {
analysisVariantSubCommands.addCommand("gatk-run", analysisVariantCommandOptions.runGatkCommandOptions);
analysisVariantSubCommands.addCommand("genome-plot-run", analysisVariantCommandOptions.runGenomePlotCommandOptions);
analysisVariantSubCommands.addCommand("gwas-run", analysisVariantCommandOptions.runGwasCommandOptions);
+ analysisVariantSubCommands.addCommand("hr-detect-run", analysisVariantCommandOptions.runHrDetectCommandOptions);
analysisVariantSubCommands.addCommand("index-run", analysisVariantCommandOptions.runIndexCommandOptions);
analysisVariantSubCommands.addCommand("individual-qc-run", analysisVariantCommandOptions.runIndividualQcCommandOptions);
analysisVariantSubCommands.addCommand("inferred-sex-run", analysisVariantCommandOptions.runInferredSexCommandOptions);
diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisAlignmentCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisAlignmentCommandExecutor.java
index c25e53d68e9..3378e6f4e1f 100644
--- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisAlignmentCommandExecutor.java
+++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisAlignmentCommandExecutor.java
@@ -485,8 +485,6 @@ private RestResponse runQc() throws Exception {
} else {
ObjectMap beanParams = new ObjectMap();
putNestedIfNotEmpty(beanParams, "bamFile",commandOptions.bamFile, true);
- putNestedIfNotEmpty(beanParams, "bedFile",commandOptions.bedFile, true);
- putNestedIfNotEmpty(beanParams, "dictFile",commandOptions.dictFile, true);
putNestedIfNotEmpty(beanParams, "skip",commandOptions.skip, true);
putNestedIfNotNull(beanParams, "overwrite",commandOptions.overwrite, true);
putNestedIfNotEmpty(beanParams, "outdir",commandOptions.outdir, true);
diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisVariantCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisVariantCommandExecutor.java
index 000ed437d45..b0238128d84 100644
--- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisVariantCommandExecutor.java
+++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisVariantCommandExecutor.java
@@ -42,6 +42,7 @@
import org.opencb.opencga.core.models.variant.GatkWrapperParams;
import org.opencb.opencga.core.models.variant.GenomePlotAnalysisParams;
import org.opencb.opencga.core.models.variant.GwasAnalysisParams;
+import org.opencb.opencga.core.models.variant.HRDetectAnalysisParams;
import org.opencb.opencga.core.models.variant.IndividualQcAnalysisParams;
import org.opencb.opencga.core.models.variant.InferredSexAnalysisParams;
import org.opencb.opencga.core.models.variant.KnockoutAnalysisParams;
@@ -137,6 +138,9 @@ public void execute() throws Exception {
case "gwas-run":
queryResponse = runGwas();
break;
+ case "hr-detect-run":
+ queryResponse = runHrDetect();
+ break;
case "index-run":
queryResponse = runIndex();
break;
@@ -783,6 +787,56 @@ private RestResponse runGwas() throws Exception {
return openCGAClient.getVariantClient().runGwas(gwasAnalysisParams, queryParams);
}
+ private RestResponse runHrDetect() throws Exception {
+
+ logger.debug("Executing runHrDetect in Analysis - Variant command line");
+
+ AnalysisVariantCommandOptions.RunHrDetectCommandOptions commandOptions = analysisVariantCommandOptions.runHrDetectCommandOptions;
+
+ ObjectMap queryParams = new ObjectMap();
+ queryParams.putIfNotEmpty("study", commandOptions.study);
+ queryParams.putIfNotEmpty("jobId", commandOptions.jobId);
+ queryParams.putIfNotEmpty("jobDescription", commandOptions.jobDescription);
+ queryParams.putIfNotEmpty("jobDependsOn", commandOptions.jobDependsOn);
+ queryParams.putIfNotEmpty("jobTags", commandOptions.jobTags);
+ if (queryParams.get("study") == null && OpencgaMain.isShellMode()) {
+ queryParams.putIfNotEmpty("study", sessionManager.getSession().getCurrentStudy());
+ }
+
+
+ HRDetectAnalysisParams hRDetectAnalysisParams= null;
+ if (commandOptions.jsonDataModel) {
+ hRDetectAnalysisParams = new HRDetectAnalysisParams();
+ RestResponse res = new RestResponse<>();
+ res.setType(QueryType.VOID);
+ PrintUtils.println(getObjectAsJSON(hRDetectAnalysisParams));
+ return res;
+ } else if (commandOptions.jsonFile != null) {
+ hRDetectAnalysisParams = JacksonUtils.getDefaultObjectMapper()
+ .readValue(new java.io.File(commandOptions.jsonFile), HRDetectAnalysisParams.class);
+ } else {
+ ObjectMap beanParams = new ObjectMap();
+ putNestedIfNotEmpty(beanParams, "id",commandOptions.id, true);
+ putNestedIfNotEmpty(beanParams, "description",commandOptions.description, true);
+ putNestedIfNotEmpty(beanParams, "sampleId",commandOptions.sampleId, true);
+ putNestedIfNotEmpty(beanParams, "snvFittingId",commandOptions.snvFittingId, true);
+ putNestedIfNotEmpty(beanParams, "svFittingId",commandOptions.svFittingId, true);
+ putNestedIfNotEmpty(beanParams, "cnvQuery",commandOptions.cnvQuery, true);
+ putNestedIfNotEmpty(beanParams, "indelQuery",commandOptions.indelQuery, true);
+ putNestedIfNotEmpty(beanParams, "snv3CustomName",commandOptions.snv3CustomName, true);
+ putNestedIfNotEmpty(beanParams, "snv8CustomName",commandOptions.snv8CustomName, true);
+ putNestedIfNotEmpty(beanParams, "sv3CustomName",commandOptions.sv3CustomName, true);
+ putNestedIfNotEmpty(beanParams, "sv8CustomName",commandOptions.sv8CustomName, true);
+ putNestedIfNotNull(beanParams, "bootstrap",commandOptions.bootstrap, true);
+ putNestedIfNotEmpty(beanParams, "outdir",commandOptions.outdir, true);
+
+ hRDetectAnalysisParams = JacksonUtils.getDefaultObjectMapper().copy()
+ .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, true)
+ .readValue(beanParams.toJson(), HRDetectAnalysisParams.class);
+ }
+ return openCGAClient.getVariantClient().runHrDetect(hRDetectAnalysisParams, queryParams);
+ }
+
private RestResponse runIndex() throws Exception {
logger.debug("Executing runIndex in Analysis - Variant command line");
@@ -1086,6 +1140,7 @@ private RestResponse queryMutationalSignature() throws Exception {
ObjectMap queryParams = new ObjectMap();
queryParams.putIfNotEmpty("study", commandOptions.study);
queryParams.putIfNotEmpty("sample", commandOptions.sample);
+ queryParams.putIfNotEmpty("type", commandOptions.type);
queryParams.putIfNotEmpty("ct", commandOptions.ct);
queryParams.putIfNotEmpty("biotype", commandOptions.biotype);
queryParams.putIfNotEmpty("fileData", commandOptions.fileData);
@@ -1099,17 +1154,8 @@ private RestResponse queryMutationalSignature() throws Exception {
queryParams.putIfNotEmpty("panelFeatureType", commandOptions.panelFeatureType);
queryParams.putIfNotEmpty("panelRoleInCancer", commandOptions.panelRoleInCancer);
queryParams.putIfNotNull("panelIntersection", commandOptions.panelIntersection);
- queryParams.putIfNotEmpty("catalogues", commandOptions.catalogues);
- queryParams.putIfNotEmpty("cataloguesContent", commandOptions.cataloguesContent);
- queryParams.putIfNotEmpty("fitMethod", commandOptions.fitMethod);
- queryParams.putIfNotNull("nBoot", commandOptions.nBoot);
- queryParams.putIfNotEmpty("sigVersion", commandOptions.sigVersion);
- queryParams.putIfNotEmpty("organ", commandOptions.organ);
- queryParams.putIfNotNull("thresholdPerc", commandOptions.thresholdPerc);
- queryParams.putIfNotNull("thresholdPval", commandOptions.thresholdPval);
- queryParams.putIfNotNull("maxRareSigs", commandOptions.maxRareSigs);
- queryParams.putIfNotEmpty("signaturesFile", commandOptions.signaturesFile);
- queryParams.putIfNotEmpty("rareSignaturesFile", commandOptions.rareSignaturesFile);
+ queryParams.putIfNotEmpty("msId", commandOptions.msId);
+ queryParams.putIfNotEmpty("msDescription", commandOptions.msDescription);
if (queryParams.get("study") == null && OpencgaMain.isShellMode()) {
queryParams.putIfNotEmpty("study", sessionManager.getSession().getCurrentStudy());
}
@@ -1148,18 +1194,19 @@ private RestResponse runMutationalSignature() throws Exception {
ObjectMap beanParams = new ObjectMap();
putNestedIfNotEmpty(beanParams, "id",commandOptions.id, true);
putNestedIfNotEmpty(beanParams, "description",commandOptions.description, true);
+ putNestedIfNotEmpty(beanParams, "sample",commandOptions.sample, true);
putNestedIfNotEmpty(beanParams, "query",commandOptions.query, true);
- putNestedIfNotEmpty(beanParams, "catalogues",commandOptions.catalogues, true);
- putNestedIfNotEmpty(beanParams, "cataloguesContent",commandOptions.cataloguesContent, true);
+ putNestedIfNotEmpty(beanParams, "fitId",commandOptions.fitId, true);
putNestedIfNotEmpty(beanParams, "fitMethod",commandOptions.fitMethod, true);
- putNestedIfNotNull(beanParams, "nBoot",commandOptions.nBoot, true);
- putNestedIfNotEmpty(beanParams, "sigVersion",commandOptions.sigVersion, true);
- putNestedIfNotEmpty(beanParams, "organ",commandOptions.organ, true);
- putNestedIfNotNull(beanParams, "thresholdPerc",commandOptions.thresholdPerc, true);
- putNestedIfNotNull(beanParams, "thresholdPval",commandOptions.thresholdPval, true);
- putNestedIfNotNull(beanParams, "maxRareSigs",commandOptions.maxRareSigs, true);
- putNestedIfNotEmpty(beanParams, "signaturesFile",commandOptions.signaturesFile, true);
- putNestedIfNotEmpty(beanParams, "rareSignaturesFile",commandOptions.rareSignaturesFile, true);
+ putNestedIfNotNull(beanParams, "fitNBoot",commandOptions.fitNBoot, true);
+ putNestedIfNotEmpty(beanParams, "fitSigVersion",commandOptions.fitSigVersion, true);
+ putNestedIfNotEmpty(beanParams, "fitOrgan",commandOptions.fitOrgan, true);
+ putNestedIfNotNull(beanParams, "fitThresholdPerc",commandOptions.fitThresholdPerc, true);
+ putNestedIfNotNull(beanParams, "fitThresholdPval",commandOptions.fitThresholdPval, true);
+ putNestedIfNotNull(beanParams, "fitMaxRareSigs",commandOptions.fitMaxRareSigs, true);
+ putNestedIfNotEmpty(beanParams, "fitSignaturesFile",commandOptions.fitSignaturesFile, true);
+ putNestedIfNotEmpty(beanParams, "fitRareSignaturesFile",commandOptions.fitRareSignaturesFile, true);
+ putNestedIfNotEmpty(beanParams, "skip",commandOptions.skip, true);
putNestedIfNotEmpty(beanParams, "outdir",commandOptions.outdir, true);
mutationalSignatureAnalysisParams = JacksonUtils.getDefaultObjectMapper().copy()
@@ -1517,18 +1564,20 @@ private RestResponse runSampleQc() throws Exception {
putNestedIfNotEmpty(beanParams, "msId",commandOptions.msId, true);
putNestedIfNotEmpty(beanParams, "msDescription",commandOptions.msDescription, true);
putNestedIfNotEmpty(beanParams, "msQuery",commandOptions.msQuery, true);
+ putNestedIfNotEmpty(beanParams, "msFitId",commandOptions.msFitId, true);
putNestedIfNotEmpty(beanParams, "msFitMethod",commandOptions.msFitMethod, true);
- putNestedIfNotNull(beanParams, "msNBoot",commandOptions.msNBoot, true);
- putNestedIfNotEmpty(beanParams, "msSigVersion",commandOptions.msSigVersion, true);
- putNestedIfNotEmpty(beanParams, "msOrgan",commandOptions.msOrgan, true);
- putNestedIfNotNull(beanParams, "msThresholdPerc",commandOptions.msThresholdPerc, true);
- putNestedIfNotNull(beanParams, "msThresholdPval",commandOptions.msThresholdPval, true);
- putNestedIfNotNull(beanParams, "msMaxRareSigs",commandOptions.msMaxRareSigs, true);
- putNestedIfNotEmpty(beanParams, "msSignaturesFile",commandOptions.msSignaturesFile, true);
- putNestedIfNotEmpty(beanParams, "msRareSignaturesFile",commandOptions.msRareSignaturesFile, true);
+ putNestedIfNotNull(beanParams, "msFitNBoot",commandOptions.msFitNBoot, true);
+ putNestedIfNotEmpty(beanParams, "msFitSigVersion",commandOptions.msFitSigVersion, true);
+ putNestedIfNotEmpty(beanParams, "msFitOrgan",commandOptions.msFitOrgan, true);
+ putNestedIfNotNull(beanParams, "msFitThresholdPerc",commandOptions.msFitThresholdPerc, true);
+ putNestedIfNotNull(beanParams, "msFitThresholdPval",commandOptions.msFitThresholdPval, true);
+ putNestedIfNotNull(beanParams, "msFitMaxRareSigs",commandOptions.msFitMaxRareSigs, true);
+ putNestedIfNotEmpty(beanParams, "msFitSignaturesFile",commandOptions.msFitSignaturesFile, true);
+ putNestedIfNotEmpty(beanParams, "msFitRareSignaturesFile",commandOptions.msFitRareSignaturesFile, true);
putNestedIfNotEmpty(beanParams, "gpId",commandOptions.gpId, true);
putNestedIfNotEmpty(beanParams, "gpDescription",commandOptions.gpDescription, true);
putNestedIfNotEmpty(beanParams, "gpConfigFile",commandOptions.gpConfigFile, true);
+ putNestedIfNotEmpty(beanParams, "skip",commandOptions.skip, true);
putNestedIfNotEmpty(beanParams, "outdir",commandOptions.outdir, true);
sampleQcAnalysisParams = JacksonUtils.getDefaultObjectMapper().copy()
diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/CohortsCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/CohortsCommandExecutor.java
index ad136880929..89a9d6aa6d4 100644
--- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/CohortsCommandExecutor.java
+++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/CohortsCommandExecutor.java
@@ -29,7 +29,6 @@
import org.opencb.opencga.core.models.cohort.CohortCreateParams;
import org.opencb.opencga.core.models.cohort.CohortGenerateParams;
import org.opencb.opencga.core.models.cohort.CohortUpdateParams;
-import org.opencb.opencga.core.models.common.Enums.CohortType;
import org.opencb.opencga.core.models.common.Enums;
import org.opencb.opencga.core.models.common.StatusParams;
import org.opencb.opencga.core.models.common.TsvAnnotationParams;
@@ -269,7 +268,7 @@ private RestResponse