-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
analysis: improve PLINK analysis (variant command line), #126
- Loading branch information
Showing
11 changed files
with
705 additions
and
18 deletions.
There are no files selected for viewing
6 changes: 5 additions & 1 deletion
6
hpg-bigdata-analysis/src/main/java/org/opencb/hpg/bigdata/analysis/AnalysisExecutor.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
112 changes: 112 additions & 0 deletions
112
...-analysis/src/main/java/org/opencb/hpg/bigdata/analysis/variant/VariantAnalysisUtils.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
package org.opencb.hpg.bigdata.analysis.variant; | ||
|
||
import org.opencb.hpg.bigdata.core.lib.VariantDataset; | ||
|
||
import java.io.IOException; | ||
import java.util.List; | ||
|
||
/** | ||
* Created by jtarraga on 13/06/17. | ||
*/ | ||
public class VariantAnalysisUtils { | ||
|
||
/** | ||
* Add variant fitlers to the target variant dataset. | ||
* | ||
* @param filterOptions Filters to apply | ||
* @param vd Target variant dataset | ||
* @throws IOException Exception | ||
*/ | ||
public static void addVariantFilters(VariantFilterOptions filterOptions, | ||
VariantDataset vd) throws IOException { | ||
// ID list | ||
if (validList(filterOptions.getIdList())) { | ||
vd.idFilter(filterOptions.getIdList(), false); | ||
} | ||
|
||
// type | ||
if (validList(filterOptions.getTypeList())) { | ||
vd.typeFilter(filterOptions.getTypeList()); | ||
} | ||
|
||
// query for biotype | ||
if (validList(filterOptions.getBiotypeList())) { | ||
vd.annotationFilter("biotype", filterOptions.getBiotypeList()); | ||
} | ||
|
||
// query for study | ||
if (validList(filterOptions.getStudyList())) { | ||
vd.studyFilter("studyId", filterOptions.getStudyList()); | ||
} | ||
|
||
// query for maf (study:cohort) | ||
if (validList(filterOptions.getMafList())) { | ||
vd.studyFilter("stats.maf", filterOptions.getMafList()); | ||
} | ||
|
||
// query for mgf (study:cohort) | ||
if (validList(filterOptions.getMgfList())) { | ||
vd.studyFilter("stats.mgf", filterOptions.getMgfList()); | ||
} | ||
|
||
// query for region | ||
if (validList(filterOptions.getRegionList())) { | ||
vd.regionFilter(filterOptions.getRegionList()); | ||
} | ||
|
||
// query for consequence type (Sequence Ontology term names and accession codes) | ||
if (validList(filterOptions.getConsequenceTypeList())) { | ||
vd.annotationFilter("consequenceTypes.sequenceOntologyTerms", filterOptions.getConsequenceTypeList()); | ||
} | ||
|
||
// query for consequence type (gene names) | ||
if (validList(filterOptions.getGeneList())) { | ||
vd.annotationFilter("consequenceTypes.geneName", filterOptions.getGeneList()); | ||
} | ||
|
||
// query for clinvar (accession) | ||
if (validList(filterOptions.getClinvarList())) { | ||
vd.annotationFilter("variantTraitAssociation.clinvar.accession", filterOptions.getClinvarList()); | ||
} | ||
|
||
// query for cosmic (mutation ID) | ||
if (validList(filterOptions.getCosmicList())) { | ||
vd.annotationFilter("variantTraitAssociation.cosmic.mutationId", filterOptions.getCosmicList()); | ||
} | ||
|
||
// query for conservation (phastCons, phylop, gerp) | ||
if (validList(filterOptions.getConservScoreList())) { | ||
vd.annotationFilter("conservation", filterOptions.getConservScoreList()); | ||
} | ||
|
||
// query for protein substitution scores (polyphen, sift) | ||
if (validList(filterOptions.getSubstScoreList())) { | ||
vd.annotationFilter("consequenceTypes.proteinVariantAnnotation.substitutionScores", filterOptions.getSubstScoreList()); | ||
} | ||
|
||
// query for alternate population frequency (study:population) | ||
if (validList(filterOptions.getPfList())) { | ||
vd.annotationFilter("populationFrequencies.altAlleleFreq", filterOptions.getPfList()); | ||
} | ||
|
||
// query for population minor allele frequency (study:population) | ||
if (validList(filterOptions.getPmafList())) { | ||
vd.annotationFilter("populationFrequencies.refAlleleFreq", filterOptions.getPmafList()); | ||
} | ||
|
||
// query for sample genotypes | ||
// query for number of missing alleles (study:cohort) | ||
// query for number of missing genotypes (study:cohort) | ||
} | ||
|
||
/** | ||
* Sanity check. | ||
* | ||
* @param list list to check | ||
* @return Boolean | ||
*/ | ||
private static boolean validList(List list) { | ||
return (list != null && list.size() > 0); | ||
} | ||
} | ||
|
186 changes: 186 additions & 0 deletions
186
...-analysis/src/main/java/org/opencb/hpg/bigdata/analysis/variant/VariantFilterOptions.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,186 @@ | ||
package org.opencb.hpg.bigdata.analysis.variant; | ||
|
||
import org.opencb.biodata.models.core.Region; | ||
|
||
import java.util.List; | ||
|
||
/** | ||
* Created by jtarraga on 13/06/17. | ||
*/ | ||
public class VariantFilterOptions { | ||
|
||
// Filter ID | ||
private List<String> idList; | ||
|
||
// Filter type | ||
private List<String> typeList; | ||
|
||
// Filter study | ||
private List<String> studyList; | ||
|
||
// Filter biotype | ||
private List<String> biotypeList; | ||
|
||
// Filter regions | ||
private List<Region> regionList; | ||
|
||
// Filter Minor Allele Frequency (maf), study_name::cohort_name[<|>|<=|>=|==|!=]value | ||
// e.g.: 1000g::all>0.4 | ||
private List<String> mafList; | ||
|
||
// Filter Minor Genotype Frequency (mgf), study_name::cohort_name[<|>|<=|>=|==|!=]value | ||
// e.g.: 1000g::all>0.18198 | ||
private List<String> mgfList; | ||
|
||
// Filter consequence type, Sequence Ontology term names or accession codes | ||
// e.g.: transgenic insertion,SO:32234,SO:00124 | ||
private List<String> consequenceTypeList; | ||
|
||
// Filter gene | ||
// e.g.: BIN3,ZNF517 | ||
private List<String> geneList; | ||
|
||
// Filter clinvar | ||
private List<String> clinvarList; | ||
|
||
// Filter cosmic | ||
private List<String> cosmicList; | ||
|
||
// Filter conservation scores (phastCons, phylop, gerp) | ||
// e.g.: phylop<0.3,phastCons<0.1 | ||
private List<String> conservScoreList; | ||
|
||
// Filter protein substitution scores | ||
// e.g.: polyphen>0.3,sift>0.6 | ||
private List<String> substScoreList; | ||
|
||
// Filter alternate population frequency of a given study, study_name::population_name[<|>|<=|>=|==|!=]frequency_value | ||
// e.g.: 1000g::CEU<0.4 | ||
private List<String> pfList; | ||
|
||
// Filter population minor allele frequency of a given study: study_name:: population_name[<|>|<=|>=|==|!=]frequency_value | ||
// e.g.: 1000g::PJL<=0.25 | ||
private List<String> pmafList; | ||
|
||
//public String samples; | ||
|
||
public List<String> getIdList() { | ||
return idList; | ||
} | ||
|
||
public void setIdList(List<String> idList) { | ||
this.idList = idList; | ||
} | ||
|
||
public List<String> getTypeList() { | ||
return typeList; | ||
} | ||
|
||
public void setTypeList(List<String> typeList) { | ||
this.typeList = typeList; | ||
} | ||
|
||
public List<String> getStudyList() { | ||
return studyList; | ||
} | ||
|
||
public void setStudyList(List<String> studyList) { | ||
this.studyList = studyList; | ||
} | ||
|
||
public List<String> getBiotypeList() { | ||
return biotypeList; | ||
} | ||
|
||
public void setBiotypeList(List<String> biotypeList) { | ||
this.biotypeList = biotypeList; | ||
} | ||
|
||
public List<Region> getRegionList() { | ||
return regionList; | ||
} | ||
|
||
public void setRegionList(List<Region> regionList) { | ||
this.regionList = regionList; | ||
} | ||
|
||
public List<String> getMafList() { | ||
return mafList; | ||
} | ||
|
||
public void setMafList(List<String> mafList) { | ||
this.mafList = mafList; | ||
} | ||
|
||
public List<String> getMgfList() { | ||
return mgfList; | ||
} | ||
|
||
public void setMgfList(List<String> mgfList) { | ||
this.mgfList = mgfList; | ||
} | ||
|
||
public List<String> getConsequenceTypeList() { | ||
return consequenceTypeList; | ||
} | ||
|
||
public void setConsequenceTypeList(List<String> consequenceTypeList) { | ||
this.consequenceTypeList = consequenceTypeList; | ||
} | ||
|
||
public List<String> getGeneList() { | ||
return geneList; | ||
} | ||
|
||
public void setGeneList(List<String> geneList) { | ||
this.geneList = geneList; | ||
} | ||
|
||
public List<String> getClinvarList() { | ||
return clinvarList; | ||
} | ||
|
||
public void setClinvarList(List<String> clinvarList) { | ||
this.clinvarList = clinvarList; | ||
} | ||
|
||
public List<String> getCosmicList() { | ||
return cosmicList; | ||
} | ||
|
||
public void setCosmicList(List<String> cosmicList) { | ||
this.cosmicList = cosmicList; | ||
} | ||
|
||
public List<String> getConservScoreList() { | ||
return conservScoreList; | ||
} | ||
|
||
public void setConservScoreList(List<String> conservScoreList) { | ||
this.conservScoreList = conservScoreList; | ||
} | ||
|
||
public List<String> getSubstScoreList() { | ||
return substScoreList; | ||
} | ||
|
||
public void setSubstScoreList(List<String> substScoreList) { | ||
this.substScoreList = substScoreList; | ||
} | ||
|
||
public List<String> getPfList() { | ||
return pfList; | ||
} | ||
|
||
public void setPfList(List<String> pfList) { | ||
this.pfList = pfList; | ||
} | ||
|
||
public List<String> getPmafList() { | ||
return pmafList; | ||
} | ||
|
||
public void setPmafList(List<String> pmafList) { | ||
this.pmafList = pmafList; | ||
} | ||
} |
Oops, something went wrong.