Skip to content

Commit

Permalink
Merge pull request #47 from cBioPortal/rfc79-feedback
Browse files Browse the repository at this point in the history
RFC79: Implement feedback from previous 7 PRs
  • Loading branch information
forus authored Jun 19, 2024
2 parents d15c579 + 96acec5 commit d081f8f
Show file tree
Hide file tree
Showing 41 changed files with 410 additions and 550 deletions.
6 changes: 2 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,8 @@ To execute an incremental upload, use the -d (or --data_directory) option instea
docker run -it -v $(pwd)/data/:/data/ -v $(pwd)/application.properties:/application.properties cbioportal-core python importer/metaImport.py -d /data/study_es_0_inc -p /data/api_json -o
```
**Note:**
While the directory should adhere to the standard cBioPortal file formats and study structure, please note the following specific guidelines for incremental uploads:

- Incremental uploads are not supported for all data types. For instance, uploading study metadata, resources, or GSVA data incrementally is currently unsupported.
- The data pertaining to patient or sample IDs should only include entries that are either new or need updates.
While the directory should adhere to the standard cBioPortal file formats and study structure, incremental uploads are not supported for all data types though.
For instance, uploading study metadata, resources, or GSVA data incrementally is currently unsupported.

This method ensures efficient updates without the need for complete study reuploads, saving time and computational resources.

Expand Down
2 changes: 1 addition & 1 deletion scripts/importer/cbioportal_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ class MetaFileTypes(object):
},
}

# in order of they should be loaded
# order is important! This is the order in which they should be loaded:
INCREMENTAL_UPLOAD_SUPPORTED_META_TYPES = [
MetaFileTypes.PATIENT_ATTRIBUTES,
MetaFileTypes.SAMPLE_ATTRIBUTES,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -211,10 +211,7 @@ public static void deleteByPatientId(int patientId) throws DaoException {
try {
con = JdbcUtil.getDbConnection(DaoClinicalEvent.class);

pstmt = con.prepareStatement("DELETE clinical_event, clinical_event_data" +
" FROM clinical_event" +
" LEFT JOIN clinical_event_data ON clinical_event_data.CLINICAL_EVENT_ID = clinical_event.CLINICAL_EVENT_ID" +
" WHERE clinical_event.PATIENT_ID = ?");
pstmt = con.prepareStatement("DELETE FROM clinical_event WHERE clinical_event.PATIENT_ID = ?");
pstmt.setInt(1, patientId);
pstmt.executeUpdate();
} catch (SQLException e) {
Expand Down
2 changes: 2 additions & 0 deletions src/main/java/org/mskcc/cbio/portal/dao/DaoCnaEvent.java
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,8 @@ public static void removeSampleCnaEvents(int cnaProfileId, List<Integer> sampleI
("DELETE sample_cna_event, alteration_driver_annotation" +
" FROM sample_cna_event" +
" LEFT JOIN alteration_driver_annotation ON alteration_driver_annotation.`ALTERATION_EVENT_ID` = sample_cna_event.`CNA_EVENT_ID`" +
" AND alteration_driver_annotation.`SAMPLE_ID` = sample_cna_event.`SAMPLE_ID`" +
" AND alteration_driver_annotation.`GENETIC_PROFILE_ID` = sample_cna_event.`GENETIC_PROFILE_ID`" +
" WHERE sample_cna_event.`GENETIC_PROFILE_ID` = ? AND sample_cna_event.`SAMPLE_ID` IN (" +
String.join(",", Collections.nCopies(sampleIds.size(), "?"))
+ ")");
Expand Down
7 changes: 4 additions & 3 deletions src/main/java/org/mskcc/cbio/portal/dao/DaoGeneOptimized.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,9 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.mskcc.cbio.portal.model.CanonicalGene;
import org.mskcc.cbio.portal.util.EntrezValidator;
import org.mskcc.cbio.portal.util.DataValidator;
import org.mskcc.cbio.portal.util.ProgressMonitor;
import org.mskcc.cbio.portal.util.TsvUtil;

/**
* A Utility Class that speeds access to Gene Info.
Expand Down Expand Up @@ -91,7 +92,7 @@ private synchronized void fillCache() {
if (line.startsWith("#")) {
continue;
}
String[] parts = line.trim().split("\t",-1);
String[] parts = TsvUtil.splitTsvLine(line);
CanonicalGene gene = getGene(Long.parseLong(parts[1]));
if (gene==null) {
ProgressMonitor.logWarning(line+" in config file [resources" + GENE_SYMBOL_DISAMBIGUATION_FILE +
Expand Down Expand Up @@ -323,7 +324,7 @@ public List<CanonicalGene> guessGene(String geneId, String chr) {
}

CanonicalGene gene;
if (EntrezValidator.isaValidEntrezId(geneId)) { // likely to be a entrez gene id
if (DataValidator.isValidNumericSequence(geneId)) { // likely to be a entrez gene id
gene = getGene(Integer.parseInt(geneId));
if (gene!=null) {
return Collections.singletonList(gene);
Expand Down
22 changes: 16 additions & 6 deletions src/main/java/org/mskcc/cbio/portal/dao/DaoGeneticAlteration.java
Original file line number Diff line number Diff line change
Expand Up @@ -236,8 +236,20 @@ public HashMap<Integer,HashMap<Integer, String>> getGeneticAlterationMapForEntit
HashMap<Integer, String> mapSampleValue = new HashMap<Integer, String>();
int geneticEntityId = rs.getInt("GENETIC_ENTITY_ID");
String values = rs.getString("VALUES");
//hm.debug..
String valueParts[] = values.split(DELIM, -1);
String[] valueParts = values.split(DELIM, -1);
int valuesLength = valueParts.length;
boolean hasMeaninglessTrailingDelimiter = valuesLength - orderedSampleList.size() == 1 && valueParts[valuesLength - 1].isEmpty();
if (hasMeaninglessTrailingDelimiter) {
// adjust value length to account for the trailing delimiter
valuesLength -= 1;
}
if (valuesLength != orderedSampleList.size()) {
throw new IllegalStateException(
"Data inconsistency detected: The length of the values for genetic profile with Id = "
+ geneticProfileId + " and genetic entity with ID = " + geneticEntityId
+ " (" + valuesLength + " elements) does not match the expected length of the sample list ("
+ orderedSampleList.size() + " elements).");
}
for (int i = 0; i < orderedSampleList.size(); i++) {
String value = valueParts[i];
Integer sampleId = orderedSampleList.get(i);
Expand Down Expand Up @@ -429,19 +441,17 @@ public int getCount() throws DaoException {
* Deletes all Genetic Alteration Records associated with the specified Genetic Profile ID.
*
* @param geneticProfileId Genetic Profile ID.
* @param geneticEntityId Genetic Entity ID.
* @throws DaoException Database Error.
*/
public void deleteAllRecordsInGeneticProfile(long geneticProfileId, long geneticEntityId) throws DaoException {
public void deleteAllRecordsInGeneticProfile(long geneticProfileId) throws DaoException {
Connection con = null;
PreparedStatement pstmt = null;
ResultSet rs = null;
try {
con = JdbcUtil.getDbConnection(DaoGeneticAlteration.class);
pstmt = con.prepareStatement("DELETE from " +
"genetic_alteration WHERE GENETIC_PROFILE_ID=? and GENETIC_ENTITY_ID=?");
"genetic_alteration WHERE GENETIC_PROFILE_ID=?");
pstmt.setLong(1, geneticProfileId);
pstmt.setLong(2, geneticEntityId);
pstmt.executeUpdate();
} catch (SQLException e) {
throw new DaoException(e);
Expand Down
121 changes: 40 additions & 81 deletions src/main/java/org/mskcc/cbio/portal/dao/DaoSampleProfile.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,22 @@

package org.mskcc.cbio.portal.dao;

import org.mskcc.cbio.portal.model.*;

import org.apache.commons.lang3.StringUtils;

import java.sql.*;
import java.util.*;
import org.mskcc.cbio.portal.model.CancerStudy;
import org.mskcc.cbio.portal.model.GeneticProfile;
import org.mskcc.cbio.portal.model.Sample;

import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;

/**
* Data access object for sample_profile table
Expand All @@ -50,99 +60,48 @@ public final class DaoSampleProfile {
private DaoSampleProfile() {}

private static final int NO_SUCH_PROFILE_ID = -1;
private static final String TABLE_NAME = "sample_profile";

public static int addSampleProfile(Integer sampleId, Integer geneticProfileId, Integer panelId) throws DaoException {
if (MySQLbulkLoader.isBulkLoad()) {

// Add new record using bulk loader. Order of fields is:
// 1. sample ID
// 2. genetic Profile ID
// 3. gene panel ID
if (panelId != null) {
MySQLbulkLoader.getMySQLbulkLoader(TABLE_NAME).insertRecord(
Integer.toString(sampleId),
Integer.toString(geneticProfileId),
Integer.toString(panelId));
} else {
MySQLbulkLoader.getMySQLbulkLoader(TABLE_NAME).insertRecord(
Integer.toString(sampleId),
Integer.toString(geneticProfileId),
null);
}

return 1;
}
public static void upsertSampleProfiles(Collection<Integer> sampleIds, Integer geneticProfileId, Integer panelId) throws DaoException {
upsertSampleProfiles(
sampleIds.stream()
.map(sampleId -> new SampleProfileTuple(geneticProfileId, sampleId, panelId)).toList());
}

// Add new record without using bulk loader
Connection con = null;
PreparedStatement pstmt = null;
ResultSet rs = null;
public record SampleProfileTuple(int geneticProfileId, int sampleId, Integer panelId) {}

try {
con = JdbcUtil.getDbConnection(DaoSampleProfile.class);
pstmt = con.prepareStatement
("INSERT INTO sample_profile (`SAMPLE_ID`, `GENETIC_PROFILE_ID`, `PANEL_ID`) VALUES (?,?,?)");
pstmt.setInt(1, sampleId);
pstmt.setInt(2, geneticProfileId);
if (panelId != null) {
pstmt.setInt(3, panelId);
}
else {
pstmt.setNull(3, java.sql.Types.INTEGER);
}
return pstmt.executeUpdate();
} catch (NullPointerException | SQLException e) {
throw new DaoException(e);
} finally {
JdbcUtil.closeAll(DaoSampleProfile.class, con, pstmt, rs);
public static void upsertSampleProfiles(Collection<SampleProfileTuple> idTuples) throws DaoException {
if (idTuples.isEmpty()) {
return;
}
}

public static void updateSampleProfile(Integer sampleId, Integer geneticProfileId, Integer panelId) throws DaoException {
/**
* Update a record in the sample_profile table when adding gene panel field from the sample profile matrix.
* Can not use the bulk loader, because the sample might already be added, which requires an UPDATE of the
* record.
*/
Connection con = null;
PreparedStatement pstmt = null;
ResultSet rs = null;

try {
con = JdbcUtil.getDbConnection(DaoSampleProfile.class);
if (!sampleExistsInGeneticProfile(sampleId, geneticProfileId)) {

pstmt = con.prepareStatement
("INSERT INTO sample_profile (`SAMPLE_ID`, `GENETIC_PROFILE_ID`, `PANEL_ID`) VALUES (?,?,?)");
pstmt.setInt(1, sampleId);
pstmt.setInt(2, geneticProfileId);
if (panelId != null) {
pstmt.setInt(3, panelId);
} else {
pstmt.setNull(3, java.sql.Types.INTEGER);
}
} else {
pstmt = con.prepareStatement
("UPDATE `sample_profile` SET `PANEL_ID` = ? WHERE (`SAMPLE_ID` = ? AND `GENETIC_PROFILE_ID` = ?)");
if (panelId != null) {
pstmt.setInt(1, panelId);

pstmt = con.prepareStatement
("INSERT INTO sample_profile (`SAMPLE_ID`, `GENETIC_PROFILE_ID`, `PANEL_ID`)" +
" VALUES" +
String.join(",", Collections.nCopies(idTuples.size(), " (?,?,?)")) +
" ON DUPLICATE KEY UPDATE `PANEL_ID` = VALUES(`PANEL_ID`);");
int parameterIndex = 1;
for (SampleProfileTuple idTuple : idTuples) {
pstmt.setInt(parameterIndex++, idTuple.sampleId());
pstmt.setInt(parameterIndex++, idTuple.geneticProfileId());
if (idTuple.panelId() != null) {
pstmt.setInt(parameterIndex, idTuple.panelId());
} else {
pstmt.setNull(1, java.sql.Types.INTEGER);
pstmt.setNull(parameterIndex, java.sql.Types.INTEGER);
}
pstmt.setInt(2, sampleId);
pstmt.setInt(3, geneticProfileId);
parameterIndex++;
}
pstmt.executeUpdate();
} catch (NullPointerException e) {
throw new DaoException(e);
} catch (SQLException e) {
throw new DaoException(e);
} finally {
JdbcUtil.closeAll(DaoSampleProfile.class, con, pstmt, rs);
JdbcUtil.closeAll(DaoSampleProfile.class, con, pstmt, null);
}
}

public static boolean sampleExistsInGeneticProfile(int sampleId, int geneticProfileId)
throws DaoException {
Connection con = null;
Expand Down
6 changes: 3 additions & 3 deletions src/main/java/org/mskcc/cbio/portal/dao/JdbcUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,12 @@ public class JdbcUtil {
public static DataSource getDataSource() {
if (dataSource == null) {
dataSource = new TransactionAwareDataSourceProxy(new JdbcDataSource());
initSpringTx();
setupTransactionManagement();
}
return dataSource;
}

private static void initSpringTx() {
private static void setupTransactionManagement() {
transactionManager = new DataSourceTransactionManager(dataSource);
transactionTemplate = new TransactionTemplate(transactionManager);
}
Expand All @@ -78,7 +78,7 @@ private static void initSpringTx() {
*/
public static void setDataSource(DataSource value) {
dataSource = value;
initSpringTx();
setupTransactionManagement();
}

public static TransactionTemplate getTransactionTemplate() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import org.mskcc.cbio.portal.util.ConsoleUtil;
import org.mskcc.cbio.portal.util.FileUtil;
import org.mskcc.cbio.portal.util.ProgressMonitor;
import org.mskcc.cbio.portal.util.TsvUtil;

import java.io.BufferedReader;
import java.io.BufferedWriter;
Expand Down Expand Up @@ -68,7 +69,7 @@ public void convert() throws IOException {
ProgressMonitor.incrementCurValue();
ConsoleUtil.showProgress();
if (!line.startsWith("#")) {
String parts[] = line.split("\t",-1);
String parts[] = TsvUtil.splitTsvLine(line);
if (parts.length<8) {
System.err.println("Wrong line in cosmic: "+line);
continue;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.mskcc.cbio.portal.util.ConsoleUtil;
import org.mskcc.cbio.portal.util.FileUtil;
import org.mskcc.cbio.portal.dao.*;
import org.mskcc.cbio.portal.util.TsvUtil;

import java.io.File;
import java.io.IOException;
Expand Down Expand Up @@ -109,7 +110,7 @@ private HashSet getExcludedCases() throws IOException {

HashSet excludedCaseSet = new HashSet();
while (line != null) {
if (!line.startsWith("#") && line.trim().length() > 0) {
if (TsvUtil.isDataLine(line)) {
String parts[] = line.split("\t");
excludedCaseSet.add(parts[0]);
}
Expand Down
Loading

0 comments on commit d081f8f

Please sign in to comment.