Skip to content

Commit 02a5b5f

Browse files
authored
Merge pull request #70 from cyenyxe/develop
EVA-1190 Report studies browsable since given date Added query of `VariantStudySummary` objects by creation date, test dataset modified accordingly.
2 parents 60d2a96 + 86fcc77 commit 02a5b5f

File tree

3 files changed

+100
-20
lines changed

3 files changed

+100
-20
lines changed

variation-commons-mongodb/src/main/java/uk/ac/ebi/eva/commons/mongodb/services/VariantStudySummaryService.java

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import uk.ac.ebi.eva.commons.mongodb.entities.VariantSourceMongo;
2828
import uk.ac.ebi.eva.commons.mongodb.entities.projections.VariantStudySummary;
2929

30+
import java.util.Date;
3031
import java.util.List;
3132

3233
import static org.springframework.data.mongodb.core.aggregation.Aggregation.group;
@@ -45,8 +46,8 @@ public class VariantStudySummaryService {
4546
/**
4647
* the equivalent intended query is:
4748
* db.files.aggregate([
48-
* {$group:{_id: {studyId:"$sid",studyName:"$sname"}, filesCount:{$sum:1}}},
49-
* {$project:{"studyId" : "$_id.studyId", "studyName" : "$_id.studyName", "_id" : 0, "filesCount":"$filesCount" }}
49+
* {$group: {_id : {studyId : "$sid", studyName : "$sname"}, filesCount : {$sum : 1}}},
50+
* {$project: {"studyId" : "$_id.studyId", "studyName" : "$_id.studyName", "_id" : 0, "filesCount":"$filesCount" }}
5051
* ])
5152
* See also the inner explanation of those 2 stages
5253
*
@@ -89,6 +90,37 @@ private ProjectionOperation projectAndFlatten() {
8990
.as(VariantStudySummary.STUDY_NAME);
9091
}
9192

93+
/**
94+
* the equivalent intended query is:
95+
* db.files.aggregate([
96+
* {$match: {date : {$gte : fromDate}}},
97+
* {$group: {_id : {studyId : "$sid", studyName : "$sname"}, filesCount : {$sum : 1}}},
98+
* {$project: {"studyId" : "$_id.studyId", "studyName" : "$_id.studyName", "_id" : 0, "filesCount":"$filesCount" }}
99+
* ])
100+
* See also the inner explanation of those 2 stages
101+
*
102+
* @see #matchByFromDate(Date)
103+
* @see #groupAndCount
104+
* @see #projectAndFlatten
105+
*/
106+
public List<VariantStudySummary> findByFromDate(Date fromDate) {
107+
Aggregation aggregation = Aggregation.newAggregation(
108+
matchByFromDate(fromDate),
109+
groupAndCount(),
110+
projectAndFlatten()
111+
);
112+
113+
AggregationResults<VariantStudySummary> studies = mongoTemplate.aggregate(aggregation,
114+
VariantSourceMongo.class,
115+
VariantStudySummary.class);
116+
117+
return studies.getMappedResults();
118+
}
119+
120+
private MatchOperation matchByFromDate(Date fromDate) {
121+
return match(Criteria.where("date").gte(fromDate));
122+
}
123+
92124
/**
93125
* the equivalent intended query is:
94126
* db.files.aggregate([

variation-commons-mongodb/src/test/java/uk/ac/ebi/eva/commons/mongodb/services/VariantStudySummaryServiceTest.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@
3232
import uk.ac.ebi.eva.commons.mongodb.configuration.MongoRepositoryTestConfiguration;
3333
import uk.ac.ebi.eva.commons.mongodb.entities.projections.VariantStudySummary;
3434

35+
import java.text.ParseException;
36+
import java.text.SimpleDateFormat;
37+
import java.time.LocalDate;
38+
import java.util.Date;
3539
import java.util.List;
3640

3741
import static com.lordofthejars.nosqlunit.mongodb.MongoDbRule.MongoDbRuleBuilder.newMongoDbRule;
@@ -98,6 +102,25 @@ public void testListStudies() {
98102
assertEquals(EXPECTED_UNIQUE_STUDIES_COUNT, uniqueStudies.size());
99103
}
100104

105+
@Test
106+
public void testListStudiesByFromDate() throws ParseException {
107+
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
108+
List<VariantStudySummary> allStudies = service.findByFromDate(dateFormat.parse("2015-12-31"));
109+
assertEquals(EXPECTED_UNIQUE_STUDIES_COUNT, allStudies.size());
110+
111+
List<VariantStudySummary> studiesRightBeforeLastDate = service.findByFromDate(dateFormat.parse("2018-04-22"));
112+
assertEquals(1, studiesRightBeforeLastDate.size());
113+
114+
List<VariantStudySummary> studiestOnLastDate = service.findByFromDate(dateFormat.parse("2018-04-23"));
115+
assertEquals(1, studiestOnLastDate.size());
116+
117+
List<VariantStudySummary> studiesRightAfterLastDate = service.findByFromDate(dateFormat.parse("2018-04-24"));
118+
assertEquals(0, studiesRightAfterLastDate.size());
119+
120+
int nextYear = LocalDate.now().getYear()+1;
121+
List<VariantStudySummary> futureStudies = service.findByFromDate(dateFormat.parse(nextYear + "-01-01"));
122+
assertEquals(0, futureStudies.size());
123+
}
101124

102125
private void assertCorrectCount(int expectedFileCount, VariantStudySummary study) {
103126
int buggedFongoCount = 0;

variation-commons-mongodb/src/test/resources/test-data/files.json

Lines changed: 43 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
"stype": "COLLECTION",
99
"_class": "uk.ac.ebi.eva.commons.models.data.VariantSourceEntity",
1010
"fid": "secondFileId",
11-
"fname": "other.vcf.gz"
11+
"fname": "other.vcf.gz",
12+
"date": { "$date" : "2018-04-23T18:25:43.511Z" }
1213
},
1314
{
1415
"sname": "secondStudyName",
@@ -18,7 +19,8 @@
1819
"stype": "COLLECTION",
1920
"_class": "uk.ac.ebi.eva.commons.models.data.VariantSourceEntity",
2021
"fid": "thirdFileId",
21-
"fname": "yet.another.vcf.gz"
22+
"fname": "yet.another.vcf.gz",
23+
"date": { "$date" : "2018-04-23T18:25:43.511Z" }
2224
},
2325
{
2426
"sname": "UMCG Cardio GenePanel screening",
@@ -29,7 +31,8 @@
2931
"stype": "CASE_CONTROL",
3032
"fname": "CAR_Batch123.vcf.gz",
3133
"samp": {},
32-
"sid": "PRJEB6041"
34+
"sid": "PRJEB6041",
35+
"date": { "$date" : "2016-01-01T18:25:43.511Z" }
3336
},
3437
{
3538
"sname": "firstStudyName",
@@ -40,6 +43,7 @@
4043
"_class": "uk.ac.ebi.eva.commons.models.data.VariantSourceEntity",
4144
"fid": "firstFileId",
4245
"fname": "small.vcf.gz",
46+
"date": { "$date" : "2016-01-01T18:25:43.511Z" },
4347
"samp": {
4448
"HG03802": 1460,
4549
"NA19152": 2055,
@@ -2575,6 +2579,7 @@
25752579
},
25762580
"stype": "CASE_CONTROL",
25772581
"fname": "ALL.chr11.phase3_shapeit2_mvncall_integrated_v3plus_nounphased.rsID.genotypes.vcf.gz",
2582+
"date": { "$date" : "2016-01-01T18:25:43.511Z" },
25782583
"samp": {
25792584
"HG03802": 1460,
25802585
"NA19152": 2055,
@@ -5092,7 +5097,8 @@
50925097
"stype": "CASE_CONTROL",
50935098
"fname": "ESP6500SI-V2-SSA137.updatedRsIds.chr11.snps_indels.vcf.gz",
50945099
"samp": {},
5095-
"sid": "PRJEB5439"
5100+
"sid": "PRJEB5439",
5101+
"date": { "$date" : "2016-01-01T18:25:43.511Z" }
50965102
},
50975103
{
50985104
"sname": "Genome of the Netherlands Release 5",
@@ -5103,7 +5109,8 @@
51035109
"stype": "CASE_CONTROL",
51045110
"fname": "gonl.chr9.snps_indels.r5.vcf.gz",
51055111
"samp": {},
5106-
"sid": "PRJEB5829"
5112+
"sid": "PRJEB5829",
5113+
"date": { "$date" : "2016-01-01T18:25:43.511Z" }
51075114
},
51085115
{
51095116
"sname": "The National FINRISK Study",
@@ -5114,7 +5121,8 @@
51145121
"stype": "CASE_CONTROL",
51155122
"fname": "FINRISK.EVA.AN_FIXED.EVA.eva_1.vcf.gz",
51165123
"samp": {},
5117-
"sid": "PRJEB7895"
5124+
"sid": "PRJEB7895",
5125+
"date": { "$date" : "2016-01-01T18:25:43.511Z" }
51185126
},
51195127
{
51205128
"sname": "UK10K Avon Longitudinal Study of Parents and Children (ALSPAC) Variants",
@@ -5125,7 +5133,8 @@
51255133
"stype": "CASE_CONTROL",
51265134
"fname": "ALSPAC.beagle.anno.csq.shapeit.20131101.sites.vcf.gz",
51275135
"samp": {},
5128-
"sid": "PRJEB7217"
5136+
"sid": "PRJEB7217",
5137+
"date": { "$date" : "2016-01-01T18:25:43.511Z" }
51295138
},
51305139
{
51315140
"sname": "UK10K The Department of Twin Research and Genetic Epidemiology (TwinsUK) Variants",
@@ -5136,7 +5145,8 @@
51365145
"stype": "CASE_CONTROL",
51375146
"fname": "TWINSUK.beagle.anno.csq.shapeit.20131101.sites.vcf.gz",
51385147
"samp": {},
5139-
"sid": "PRJEB7218"
5148+
"sid": "PRJEB7218",
5149+
"date": { "$date" : "2016-01-01T18:25:43.511Z" }
51405150
},
51415151
{
51425152
"sname": "GEUVADIS: Genetic European Variation in Disease",
@@ -5147,7 +5157,8 @@
51475157
"stype": "CASE_CONTROL",
51485158
"fname": "GEEVS_aggregation_v2.EVA.eva_1.vcf.gz",
51495159
"samp": {},
5150-
"sid": "PRJEB6042"
5160+
"sid": "PRJEB6042",
5161+
"date": { "$date" : "2016-01-01T18:25:43.511Z" }
51515162
},
51525163
{
51535164
"sname": "Genome of the Netherlands Release 5",
@@ -5158,7 +5169,8 @@
51585169
"stype": "CASE_CONTROL",
51595170
"fname": "gonl.chr11.snps_indels.r5.vcf.gz",
51605171
"samp": {},
5161-
"sid": "PRJEB5829"
5172+
"sid": "PRJEB5829",
5173+
"date": { "$date" : "2016-01-01T18:25:43.511Z" }
51625174
},
51635175
{
51645176
"sname": "MGP: Medical Genome Project healthy controls from Spanish population",
@@ -5169,7 +5181,8 @@
51695181
"stype": "CASE_CONTROL",
51705182
"fname": "spv.fixed.V2.vcf.gz",
51715183
"samp": {},
5172-
"sid": "PRJEB8705"
5184+
"sid": "PRJEB8705",
5185+
"date": { "$date" : "2016-01-01T18:25:43.511Z" }
51735186
},
51745187
{
51755188
"sname": "1000 Genomes Phase 1 Analysis",
@@ -5189,6 +5202,7 @@
51895202
},
51905203
"stype": "CASE_CONTROL",
51915204
"fname": "ALL.chr11.integrated_phase1_v3.20101123.snps_indels_svs.genotypes.vcf.gz",
5205+
"date": { "$date" : "2016-01-01T18:25:43.511Z" },
51925206
"samp": {
51935207
"NA19152": 754,
51945208
"NA18543": 529,
@@ -6294,7 +6308,8 @@
62946308
"stype": "CASE_CONTROL",
62956309
"fname": "gonl.SV.r5.eva_1.vcf.gz",
62966310
"samp": {},
6297-
"sid": "PRJEB5829"
6311+
"sid": "PRJEB5829",
6312+
"date": { "$date" : "2016-01-01T18:25:43.511Z" }
62986313
},
62996314
{
63006315
"sname": "GenomeDK Release 1 GATK-Indels calls",
@@ -6305,7 +6320,8 @@
63056320
"stype": "CASE_CONTROL",
63066321
"fname": "genomedk.chr11.gatk_indels.r1.EVA.vcf.gz",
63076322
"samp": {},
6308-
"sid": "PRJEB8652"
6323+
"sid": "PRJEB8652",
6324+
"date": { "$date" : "2016-01-01T18:25:43.511Z" }
63096325
},
63106326
{
63116327
"sname": "GenomeDK Release 1 SoapAsmVar-Indels calls",
@@ -6316,7 +6332,8 @@
63166332
"stype": "CASE_CONTROL",
63176333
"fname": "genomedk.chr11.asmvar_indels.r1.EVA.vcf.gz",
63186334
"samp": {},
6319-
"sid": "PRJEB8639"
6335+
"sid": "PRJEB8639",
6336+
"date": { "$date" : "2016-01-01T18:25:43.511Z" }
63206337
},
63216338
{
63226339
"sname": "The Exome Aggregation Consortium (ExAC) v0.3",
@@ -6327,7 +6344,8 @@
63276344
"stype": "COLLECTION",
63286345
"fname": "ExAC.r0.3.sites.vep.fixed.V3.vcf.gz",
63296346
"samp": {},
6330-
"sid": "PRJEB8661"
6347+
"sid": "PRJEB8661",
6348+
"date": { "$date" : "2016-01-01T18:25:43.511Z" }
63316349
},
63326350
{
63336351
"sname": "Human Variation Data From dbSNP build 144",
@@ -6338,7 +6356,8 @@
63386356
"stype": "COLLECTION",
63396357
"fname": "dbsnp_chr2_20150715_1700.vcf.gz",
63406358
"samp": {},
6341-
"sid": "PRJX00001"
6359+
"sid": "PRJX00001",
6360+
"date": { "$date" : "2016-01-01T18:25:43.511Z" }
63426361
},
63436362
{
63446363
"sname": "Human Variation Data From dbSNP build 144",
@@ -6349,7 +6368,8 @@
63496368
"stype": "COLLECTION",
63506369
"fname": "dbsnp_chr9_20150715_1700.vcf.gz",
63516370
"samp": {},
6352-
"sid": "PRJX00001"
6371+
"sid": "PRJX00001",
6372+
"date": { "$date" : "2016-01-01T18:25:43.511Z" }
63536373
},
63546374
{
63556375
"sname": "Human Variation Data From dbSNP build 144",
@@ -6360,7 +6380,8 @@
63606380
"stype": "COLLECTION",
63616381
"fname": "dbsnp_chr11_20150715_1700.vcf.gz",
63626382
"samp": {},
6363-
"sid": "PRJX00001"
6383+
"sid": "PRJX00001",
6384+
"date": { "$date" : "2016-01-01T18:25:43.511Z" }
63646385
},
63656386
{
63666387
"sid": "PRJNA289433",
@@ -6370,6 +6391,7 @@
63706391
"_class": "uk.ac.ebi.eva.commons.models.data.VariantSourceEntity",
63716392
"stype": "COLLECTION",
63726393
"fname": "chr13.allImputed.EVA.garys.vcf.gz",
6394+
"date": { "$date" : "2016-01-01T18:25:43.511Z" },
63736395
"samp": {
63746396
"MD_CHW_AAR_13084": 7616,
63756397
"MD_CHW_AAM_6902": 5625,
@@ -17014,6 +17036,7 @@
1701417036
},
1701517037
"fid": "ERZ367948",
1701617038
"sname": "Study of Major Depression in Chinese women",
17039+
"date": { "$date" : "2016-01-01T18:25:43.511Z" },
1701717040
"aggregation": "NONE",
1701817041
"st": {
1701917042
"nVar": 1203645,
@@ -27679,6 +27702,7 @@
2767927702
"fid": "ERZ367955",
2768027703
"sname": "Study of Major Depression in Chinese women",
2768127704
"aggregation": "NONE",
27705+
"date": { "$date" : "2016-01-01T18:25:43.511Z" },
2768227706
"st": {
2768327707
"nVar": 1203645,
2768427708
"nTi": 824182,
@@ -27704,6 +27728,7 @@
2770427728
},
2770527729
"fid": "ERZ329750",
2770627730
"sname": "Illumina Platinum Genomes calls for NA12877 and NA12878 against GRCh37",
27731+
"date": { "$date" : "2016-01-01T18:25:43.511Z" },
2770727732
"aggregation": "NONE",
2770827733
"st": {
2770927734
"nVar": 5494956,

0 commit comments

Comments
 (0)