Skip to content

Commit

Permalink
Merge pull request #70 from cyenyxe/develop
Browse files Browse the repository at this point in the history
EVA-1190 Report studies browsable since given date

Added query of `VariantStudySummary` objects by creation date, test dataset modified accordingly.
  • Loading branch information
jmmut authored May 2, 2018
2 parents 60d2a96 + 86fcc77 commit 02a5b5f
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import uk.ac.ebi.eva.commons.mongodb.entities.VariantSourceMongo;
import uk.ac.ebi.eva.commons.mongodb.entities.projections.VariantStudySummary;

import java.util.Date;
import java.util.List;

import static org.springframework.data.mongodb.core.aggregation.Aggregation.group;
Expand All @@ -45,8 +46,8 @@ public class VariantStudySummaryService {
/**
* the equivalent intended query is:
* db.files.aggregate([
* {$group:{_id: {studyId:"$sid",studyName:"$sname"}, filesCount:{$sum:1}}},
* {$project:{"studyId" : "$_id.studyId", "studyName" : "$_id.studyName", "_id" : 0, "filesCount":"$filesCount" }}
* {$group: {_id : {studyId : "$sid", studyName : "$sname"}, filesCount : {$sum : 1}}},
* {$project: {"studyId" : "$_id.studyId", "studyName" : "$_id.studyName", "_id" : 0, "filesCount":"$filesCount" }}
* ])
* See also the inner explanation of those 2 stages
*
Expand Down Expand Up @@ -89,6 +90,37 @@ private ProjectionOperation projectAndFlatten() {
.as(VariantStudySummary.STUDY_NAME);
}

/**
* the equivalent intended query is:
* db.files.aggregate([
* {$match: {date : {$gte : fromDate}}},
* {$group: {_id : {studyId : "$sid", studyName : "$sname"}, filesCount : {$sum : 1}}},
* {$project: {"studyId" : "$_id.studyId", "studyName" : "$_id.studyName", "_id" : 0, "filesCount":"$filesCount" }}
* ])
* See also the inner explanation of those 2 stages
*
* @see #matchByFromDate(Date)
* @see #groupAndCount
* @see #projectAndFlatten
*/
public List<VariantStudySummary> findByFromDate(Date fromDate) {
Aggregation aggregation = Aggregation.newAggregation(
matchByFromDate(fromDate),
groupAndCount(),
projectAndFlatten()
);

AggregationResults<VariantStudySummary> studies = mongoTemplate.aggregate(aggregation,
VariantSourceMongo.class,
VariantStudySummary.class);

return studies.getMappedResults();
}

private MatchOperation matchByFromDate(Date fromDate) {
return match(Criteria.where("date").gte(fromDate));
}

/**
* the equivalent intended query is:
* db.files.aggregate([
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@
import uk.ac.ebi.eva.commons.mongodb.configuration.MongoRepositoryTestConfiguration;
import uk.ac.ebi.eva.commons.mongodb.entities.projections.VariantStudySummary;

import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.time.LocalDate;
import java.util.Date;
import java.util.List;

import static com.lordofthejars.nosqlunit.mongodb.MongoDbRule.MongoDbRuleBuilder.newMongoDbRule;
Expand Down Expand Up @@ -98,6 +102,25 @@ public void testListStudies() {
assertEquals(EXPECTED_UNIQUE_STUDIES_COUNT, uniqueStudies.size());
}

@Test
public void testListStudiesByFromDate() throws ParseException {
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
List<VariantStudySummary> allStudies = service.findByFromDate(dateFormat.parse("2015-12-31"));
assertEquals(EXPECTED_UNIQUE_STUDIES_COUNT, allStudies.size());

List<VariantStudySummary> studiesRightBeforeLastDate = service.findByFromDate(dateFormat.parse("2018-04-22"));
assertEquals(1, studiesRightBeforeLastDate.size());

List<VariantStudySummary> studiestOnLastDate = service.findByFromDate(dateFormat.parse("2018-04-23"));
assertEquals(1, studiestOnLastDate.size());

List<VariantStudySummary> studiesRightAfterLastDate = service.findByFromDate(dateFormat.parse("2018-04-24"));
assertEquals(0, studiesRightAfterLastDate.size());

int nextYear = LocalDate.now().getYear()+1;
List<VariantStudySummary> futureStudies = service.findByFromDate(dateFormat.parse(nextYear + "-01-01"));
assertEquals(0, futureStudies.size());
}

private void assertCorrectCount(int expectedFileCount, VariantStudySummary study) {
int buggedFongoCount = 0;
Expand Down
61 changes: 43 additions & 18 deletions variation-commons-mongodb/src/test/resources/test-data/files.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
"stype": "COLLECTION",
"_class": "uk.ac.ebi.eva.commons.models.data.VariantSourceEntity",
"fid": "secondFileId",
"fname": "other.vcf.gz"
"fname": "other.vcf.gz",
"date": { "$date" : "2018-04-23T18:25:43.511Z" }
},
{
"sname": "secondStudyName",
Expand All @@ -18,7 +19,8 @@
"stype": "COLLECTION",
"_class": "uk.ac.ebi.eva.commons.models.data.VariantSourceEntity",
"fid": "thirdFileId",
"fname": "yet.another.vcf.gz"
"fname": "yet.another.vcf.gz",
"date": { "$date" : "2018-04-23T18:25:43.511Z" }
},
{
"sname": "UMCG Cardio GenePanel screening",
Expand All @@ -29,7 +31,8 @@
"stype": "CASE_CONTROL",
"fname": "CAR_Batch123.vcf.gz",
"samp": {},
"sid": "PRJEB6041"
"sid": "PRJEB6041",
"date": { "$date" : "2016-01-01T18:25:43.511Z" }
},
{
"sname": "firstStudyName",
Expand All @@ -40,6 +43,7 @@
"_class": "uk.ac.ebi.eva.commons.models.data.VariantSourceEntity",
"fid": "firstFileId",
"fname": "small.vcf.gz",
"date": { "$date" : "2016-01-01T18:25:43.511Z" },
"samp": {
"HG03802": 1460,
"NA19152": 2055,
Expand Down Expand Up @@ -2575,6 +2579,7 @@
},
"stype": "CASE_CONTROL",
"fname": "ALL.chr11.phase3_shapeit2_mvncall_integrated_v3plus_nounphased.rsID.genotypes.vcf.gz",
"date": { "$date" : "2016-01-01T18:25:43.511Z" },
"samp": {
"HG03802": 1460,
"NA19152": 2055,
Expand Down Expand Up @@ -5092,7 +5097,8 @@
"stype": "CASE_CONTROL",
"fname": "ESP6500SI-V2-SSA137.updatedRsIds.chr11.snps_indels.vcf.gz",
"samp": {},
"sid": "PRJEB5439"
"sid": "PRJEB5439",
"date": { "$date" : "2016-01-01T18:25:43.511Z" }
},
{
"sname": "Genome of the Netherlands Release 5",
Expand All @@ -5103,7 +5109,8 @@
"stype": "CASE_CONTROL",
"fname": "gonl.chr9.snps_indels.r5.vcf.gz",
"samp": {},
"sid": "PRJEB5829"
"sid": "PRJEB5829",
"date": { "$date" : "2016-01-01T18:25:43.511Z" }
},
{
"sname": "The National FINRISK Study",
Expand All @@ -5114,7 +5121,8 @@
"stype": "CASE_CONTROL",
"fname": "FINRISK.EVA.AN_FIXED.EVA.eva_1.vcf.gz",
"samp": {},
"sid": "PRJEB7895"
"sid": "PRJEB7895",
"date": { "$date" : "2016-01-01T18:25:43.511Z" }
},
{
"sname": "UK10K Avon Longitudinal Study of Parents and Children (ALSPAC) Variants",
Expand All @@ -5125,7 +5133,8 @@
"stype": "CASE_CONTROL",
"fname": "ALSPAC.beagle.anno.csq.shapeit.20131101.sites.vcf.gz",
"samp": {},
"sid": "PRJEB7217"
"sid": "PRJEB7217",
"date": { "$date" : "2016-01-01T18:25:43.511Z" }
},
{
"sname": "UK10K The Department of Twin Research and Genetic Epidemiology (TwinsUK) Variants",
Expand All @@ -5136,7 +5145,8 @@
"stype": "CASE_CONTROL",
"fname": "TWINSUK.beagle.anno.csq.shapeit.20131101.sites.vcf.gz",
"samp": {},
"sid": "PRJEB7218"
"sid": "PRJEB7218",
"date": { "$date" : "2016-01-01T18:25:43.511Z" }
},
{
"sname": "GEUVADIS: Genetic European Variation in Disease",
Expand All @@ -5147,7 +5157,8 @@
"stype": "CASE_CONTROL",
"fname": "GEEVS_aggregation_v2.EVA.eva_1.vcf.gz",
"samp": {},
"sid": "PRJEB6042"
"sid": "PRJEB6042",
"date": { "$date" : "2016-01-01T18:25:43.511Z" }
},
{
"sname": "Genome of the Netherlands Release 5",
Expand All @@ -5158,7 +5169,8 @@
"stype": "CASE_CONTROL",
"fname": "gonl.chr11.snps_indels.r5.vcf.gz",
"samp": {},
"sid": "PRJEB5829"
"sid": "PRJEB5829",
"date": { "$date" : "2016-01-01T18:25:43.511Z" }
},
{
"sname": "MGP: Medical Genome Project healthy controls from Spanish population",
Expand All @@ -5169,7 +5181,8 @@
"stype": "CASE_CONTROL",
"fname": "spv.fixed.V2.vcf.gz",
"samp": {},
"sid": "PRJEB8705"
"sid": "PRJEB8705",
"date": { "$date" : "2016-01-01T18:25:43.511Z" }
},
{
"sname": "1000 Genomes Phase 1 Analysis",
Expand All @@ -5189,6 +5202,7 @@
},
"stype": "CASE_CONTROL",
"fname": "ALL.chr11.integrated_phase1_v3.20101123.snps_indels_svs.genotypes.vcf.gz",
"date": { "$date" : "2016-01-01T18:25:43.511Z" },
"samp": {
"NA19152": 754,
"NA18543": 529,
Expand Down Expand Up @@ -6294,7 +6308,8 @@
"stype": "CASE_CONTROL",
"fname": "gonl.SV.r5.eva_1.vcf.gz",
"samp": {},
"sid": "PRJEB5829"
"sid": "PRJEB5829",
"date": { "$date" : "2016-01-01T18:25:43.511Z" }
},
{
"sname": "GenomeDK Release 1 GATK-Indels calls",
Expand All @@ -6305,7 +6320,8 @@
"stype": "CASE_CONTROL",
"fname": "genomedk.chr11.gatk_indels.r1.EVA.vcf.gz",
"samp": {},
"sid": "PRJEB8652"
"sid": "PRJEB8652",
"date": { "$date" : "2016-01-01T18:25:43.511Z" }
},
{
"sname": "GenomeDK Release 1 SoapAsmVar-Indels calls",
Expand All @@ -6316,7 +6332,8 @@
"stype": "CASE_CONTROL",
"fname": "genomedk.chr11.asmvar_indels.r1.EVA.vcf.gz",
"samp": {},
"sid": "PRJEB8639"
"sid": "PRJEB8639",
"date": { "$date" : "2016-01-01T18:25:43.511Z" }
},
{
"sname": "The Exome Aggregation Consortium (ExAC) v0.3",
Expand All @@ -6327,7 +6344,8 @@
"stype": "COLLECTION",
"fname": "ExAC.r0.3.sites.vep.fixed.V3.vcf.gz",
"samp": {},
"sid": "PRJEB8661"
"sid": "PRJEB8661",
"date": { "$date" : "2016-01-01T18:25:43.511Z" }
},
{
"sname": "Human Variation Data From dbSNP build 144",
Expand All @@ -6338,7 +6356,8 @@
"stype": "COLLECTION",
"fname": "dbsnp_chr2_20150715_1700.vcf.gz",
"samp": {},
"sid": "PRJX00001"
"sid": "PRJX00001",
"date": { "$date" : "2016-01-01T18:25:43.511Z" }
},
{
"sname": "Human Variation Data From dbSNP build 144",
Expand All @@ -6349,7 +6368,8 @@
"stype": "COLLECTION",
"fname": "dbsnp_chr9_20150715_1700.vcf.gz",
"samp": {},
"sid": "PRJX00001"
"sid": "PRJX00001",
"date": { "$date" : "2016-01-01T18:25:43.511Z" }
},
{
"sname": "Human Variation Data From dbSNP build 144",
Expand All @@ -6360,7 +6380,8 @@
"stype": "COLLECTION",
"fname": "dbsnp_chr11_20150715_1700.vcf.gz",
"samp": {},
"sid": "PRJX00001"
"sid": "PRJX00001",
"date": { "$date" : "2016-01-01T18:25:43.511Z" }
},
{
"sid": "PRJNA289433",
Expand All @@ -6370,6 +6391,7 @@
"_class": "uk.ac.ebi.eva.commons.models.data.VariantSourceEntity",
"stype": "COLLECTION",
"fname": "chr13.allImputed.EVA.garys.vcf.gz",
"date": { "$date" : "2016-01-01T18:25:43.511Z" },
"samp": {
"MD_CHW_AAR_13084": 7616,
"MD_CHW_AAM_6902": 5625,
Expand Down Expand Up @@ -17014,6 +17036,7 @@
},
"fid": "ERZ367948",
"sname": "Study of Major Depression in Chinese women",
"date": { "$date" : "2016-01-01T18:25:43.511Z" },
"aggregation": "NONE",
"st": {
"nVar": 1203645,
Expand Down Expand Up @@ -27679,6 +27702,7 @@
"fid": "ERZ367955",
"sname": "Study of Major Depression in Chinese women",
"aggregation": "NONE",
"date": { "$date" : "2016-01-01T18:25:43.511Z" },
"st": {
"nVar": 1203645,
"nTi": 824182,
Expand All @@ -27704,6 +27728,7 @@
},
"fid": "ERZ329750",
"sname": "Illumina Platinum Genomes calls for NA12877 and NA12878 against GRCh37",
"date": { "$date" : "2016-01-01T18:25:43.511Z" },
"aggregation": "NONE",
"st": {
"nVar": 5494956,
Expand Down

0 comments on commit 02a5b5f

Please sign in to comment.