Skip to content

Commit

Permalink
Fix violin plot sample filtering and counting issues (#11122)
Browse files Browse the repository at this point in the history
* use samples filtered without numerical clinical data filter when converting patient clinical data to sample clinical data
* make clinical data sample/patient ids match legacy ids, and use stable ids to count samples when internal id is null
  • Loading branch information
onursumer authored Oct 28, 2024
1 parent d4fece4 commit 219d894
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public DensityPlotData getDensityPlotData(List<ClinicalData> sampleClinicalData,
result.setBins(new ArrayList<>());

Map<String, List<ClinicalData>> clinicalDataGroupedBySampleId = sampleClinicalData.stream().
collect(Collectors.groupingBy(ClinicalData::getSampleId));
collect(Collectors.groupingBy(c -> c.getStudyId() + "_" + c.getSampleId()));

List<ClinicalData> extractedXYClinicalData = clinicalDataGroupedBySampleId.entrySet().stream()
.filter(entry -> entry.getValue().size() == 2 &&
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,11 @@ public ClinicalViolinPlotData getClinicalViolinPlotData(
result.setRows(new ArrayList<>());

// collect filtered samples into a set for quick lookup
Set<Integer> samplesForSampleCountsIds =
Set<String> samplesForSampleCountsIds =
samplesForSampleCounts.stream()
.map(Sample::getInternalId)
.map(s -> s.getInternalId() == null ?
s.getCancerStudyIdentifier() + "_" + s.getStableId(): s.getInternalId().toString()
)
.collect(Collectors.toSet());

// clinicalDataMap is a map sampleId->studyId->data
Expand Down Expand Up @@ -214,12 +216,14 @@ public ClinicalViolinPlotData getClinicalViolinPlotData(

@SafeVarargs
private static int countFilteredSamples(
Set<Integer> filteredSampleIds,
Set<String> filteredSampleIds,
List<ClinicalData>... dataLists
) {
return (int) Arrays.stream(dataLists)
.flatMap(Collection::stream)
.map(ClinicalData::getInternalId)
.map(c -> c.getInternalId() == null ?
c.getStudyId() + "_" + c.getSampleId() : c.getInternalId().toString()
)
.filter(filteredSampleIds::contains)
.count();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -343,9 +343,12 @@ public ResponseEntity<ClinicalViolinPlotData> fetchClinicalDataViolinPlots(
@Parameter(required = true, description = "Study view filter")
@Valid @RequestBody(required = false) StudyViewFilter studyViewFilter) {

// fetch the samples by using the provided study view filter
List<Sample> filteredSamples = studyViewColumnarService.getFilteredSamples(interceptedStudyViewFilter);

// get samples that are filtered without the numerical filter - this is violin plot data
// remove the numerical clinical data filter from the study view filter.
// this new modified filter is used to fetch sample and patient clinical data.
// this is required to get the complete violin plot data.
if (interceptedStudyViewFilter.getClinicalDataFilters() != null) {
interceptedStudyViewFilter.getClinicalDataFilters().stream()
.filter(f->f.getAttributeId().equals(numericalAttributeId))
Expand All @@ -362,11 +365,22 @@ public ResponseEntity<ClinicalViolinPlotData> fetchClinicalDataViolinPlots(
List<ClinicalData> patientClinicalDataList = filterNonEmptyClinicalData(
studyViewColumnarService.getPatientClinicalData(interceptedStudyViewFilter, attributeIds)
);

List<ClinicalData> combinedClinicalDataList = Stream.concat(
sampleClinicalDataList.stream(),
convertPatientClinicalDataToSampleClinicalData(patientClinicalDataList, filteredSamples).stream()
).toList();

List<ClinicalData> combinedClinicalDataList;
if (patientClinicalDataList.isEmpty()) {
combinedClinicalDataList = sampleClinicalDataList;
} else {
// we previously fetched sample and patient clinical data with the modified study view filter,
// however filteredSamples reflects only the original unmodified study view filter.
// we need to fetch samples again to get the samples corresponding to this modified filter,
// otherwise patient to sample mapping may be incomplete.
List<Sample> samplesWithoutNumericalFilter = studyViewColumnarService.getFilteredSamples(interceptedStudyViewFilter);

combinedClinicalDataList = Stream.concat(
sampleClinicalDataList.stream(),
convertPatientClinicalDataToSampleClinicalData(patientClinicalDataList, samplesWithoutNumericalFilter).stream()
).toList();
}

// Only mutation count can use log scale
boolean useLogScale = logScale && numericalAttributeId.equals("MUTATION_COUNT");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@ public static List<ClinicalData> filterNonEmptyClinicalData(List<ClinicalData> c

public static List<ClinicalData> convertPatientClinicalDataToSampleClinicalData(
List<ClinicalData> patientClinicalDataList,
List<Sample> filteredSamples
List<Sample> samplesWithoutNumericalFilter
) {
List<ClinicalData> sampleClinicalDataList = new ArrayList<>();

Map<String, Map<String, List<Sample>>> patientToSamples = filteredSamples
Map<String, Map<String, List<Sample>>> patientToSamples = samplesWithoutNumericalFilter
.stream()
.collect(Collectors.groupingBy(
s -> s.getCancerStudyIdentifier() + "_" + s.getPatientStableId(),
Sample::getPatientStableId,
Collectors.groupingBy(Sample::getCancerStudyIdentifier)
));

Expand All @@ -41,7 +41,7 @@ public static List<ClinicalData> convertPatientClinicalDataToSampleClinicalData(
newData.setPatientId(d.getPatientId());
newData.setStudyId(d.getStudyId());
newData.setAttrValue(d.getAttrValue());
newData.setSampleId(s.getCancerStudyIdentifier() + "_" + s.getStableId());
newData.setSampleId(s.getStableId());

sampleClinicalDataList.add(newData);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@

<select id="getSampleClinicalDataFromStudyViewFilter" resultType="org.cbioportal.model.ClinicalData">
SELECT
sample_unique_id as sampleId,
patient_unique_id as patientId,
replaceOne(sample_unique_id, concat(cancer_study_identifier, '_'), '') as sampleId,
replaceOne(patient_unique_id, concat(cancer_study_identifier, '_'), '') as patientId,
attribute_name as attrId,
attribute_value as attrValue,
cancer_study_identifier as studyId
Expand All @@ -113,7 +113,7 @@

<select id="getPatientClinicalDataFromStudyViewFilter" resultType="org.cbioportal.model.ClinicalData">
SELECT
patient_unique_id as patientId,
replaceOne(patient_unique_id, concat(cancer_study_identifier, '_'), '') as patientId,
attribute_name as attrId,
attribute_value as attrValue,
cancer_study_identifier as studyId
Expand Down

0 comments on commit 219d894

Please sign in to comment.