Skip to content

Commit

Permalink
support phenopacket
Browse files Browse the repository at this point in the history
  • Loading branch information
kingmanzhang committed Apr 23, 2019
1 parent 55ec0bb commit 27c8307
Show file tree
Hide file tree
Showing 5 changed files with 395 additions and 31 deletions.
147 changes: 147 additions & 0 deletions phenomiser-cli/src/main/java/org/jax/cmd/PhenopacketCommand.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
package org.jax.cmd;

import com.beust.jcommander.Parameter;
import org.jax.Phenomiser;
import org.jax.io.DiseaseParser;
import org.jax.io.HpoParser;
import org.jax.io.PhenopacketImporter;
import org.jax.model.Item2PValueAndSimilarity;
import org.jax.services.AbstractResources;
import org.jax.services.CachedResources;
import org.jax.utils.DiseaseDB;
import org.json.simple.parser.ParseException;
import org.monarchinitiative.phenol.base.PhenolException;
import org.monarchinitiative.phenol.io.obo.hpo.HpoDiseaseAnnotationParser;
import org.monarchinitiative.phenol.ontology.data.TermId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nullable;
import java.io.*;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;

public class PhenopacketCommand extends PhenomiserCommand {

private static Logger logger = LoggerFactory.getLogger(QueryCommand.class);
final String HOME = System.getProperty("user.home");

@Parameter(names = {"-hpo", "--hpo_path"}, description = "specify the path to hp.obo")
private String hpoPath;
@Parameter(names = {"-da", "--disease_annotation"}, description = "specify the path to disease annotation file")
private String diseasePath;
@Parameter(names = {"-cachePath", "--cachePath"}, description = "specify the path to save precomputed data")
private String cachePath = HOME + File.separator + "Phenomiser_data";
@Parameter(names = {"-db", "--diseaseDB"},
description = "choose disease database [OMIM,ORPHA]")
private String diseaseDB = "OMIM";
@Parameter(names = {"-pp", "--phenopacket"}, description = "specify the path to a phenopachet file")
private String phenopacket;

@Parameter(names = {"-o", "--output"}, description = "specify output path")
private String outPath;

private AbstractResources resources;


@Override
public void run() {
HpoParser hpoParser = new HpoParser(hpoPath);
hpoParser.init();
HpoDiseaseAnnotationParser diseaseAnnotationParser = new HpoDiseaseAnnotationParser(diseasePath, hpoParser.getHpo());
DiseaseParser diseaseParser = new DiseaseParser(diseaseAnnotationParser, hpoParser.getHpo());
try {
diseaseParser.init();
} catch (PhenolException e) {
e.printStackTrace();
System.exit(1);
}

if (!Files.exists(Paths.get(cachePath))){
System.err.print("Cannot find caching data at " + cachePath);
System.exit(1);
}

List<TermId> queryList;
try {
PhenopacketImporter ppimporter = PhenopacketImporter.fromJson(phenopacket);
queryList = ppimporter.getHpoTerms();
} catch (ParseException e) {
e.printStackTrace();
return;
} catch (IOException e) {
e.printStackTrace();
return;
}

resources = new CachedResources(hpoParser, diseaseParser, cachePath, Math.min(queryList.size(), 10));
resources.init();
Phenomiser.setResources(resources);


List<DiseaseDB> db = Arrays.stream(diseaseDB.split(",")).map(DiseaseDB::valueOf).collect(Collectors.toList());
List<Item2PValueAndSimilarity<TermId>> result = Phenomiser.query(queryList, db);

//output query result
if (!result.isEmpty()) {
write_query_result(result, outPath);
}
}

public static Writer getWriter(String path) {
Writer writer;
try {
writer = new FileWriter(new File(path));
} catch (Exception e) {
logger.info("out path not found. writing to console: ");
writer = new OutputStreamWriter(System.out);
}
return writer;
}

public void write_query_result(List<Item2PValueAndSimilarity<TermId>> result, @Nullable String
outPath) {

Writer writer = getWriter(outPath);

try {
writer.write("diseaseId\tdiseaseName\tp\tadjust_p" +
"\tsimilarityScore" +
"\n");
} catch (IOException e) {
logger.error("io exception during writing header. writing output aborted.");
return;
}
List<Item2PValueAndSimilarity<TermId>> newList = new ArrayList<>(result);
Collections.sort(newList);

newList.stream().forEach(e -> {
try {
writer.write(e.getItem().getValue());
writer.write("\t");
writer.write(resources.getDiseaseMap().get(e.getItem()).getName());
writer.write("\t");
writer.write(Double.toString(e.getRawPValue()));
writer.write("\t");
writer.write(Double.toString(e.getAdjustedPValue()));
writer.write("\t");
writer.write(Double.toString(e.getSimilarityScore()));
writer.write("\n");
} catch (IOException exception) {
logger.error("IO exception during writing out adjusted p values");
}

});

try {
writer.close();
} catch (IOException e) {
logger.error("IO exception during closing writer");
}
}
}
5 changes: 0 additions & 5 deletions phenomiser-cli/src/main/java/org/jax/cmd/QueryCommand.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import org.monarchinitiative.phenol.base.PhenolException;
import org.monarchinitiative.phenol.io.obo.hpo.HpoDiseaseAnnotationParser;
import org.monarchinitiative.phenol.ontology.data.TermId;
import org.monarchinitiative.phenol.stats.Item2PValue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down Expand Up @@ -95,10 +94,6 @@ public static Writer getWriter(String path) {
public void write_query_result(List<Item2PValueAndSimilarity<TermId>> result, @Nullable String
outPath) {

// if (adjusted_p_value == null) {
// return;
// }

Writer writer = getWriter(outPath);

try {
Expand Down
78 changes: 52 additions & 26 deletions phenomiser-core/src/main/java/org/jax/Phenomiser.java
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
package org.jax;

import org.h2.mvstore.DataUtils;
import org.jax.model.Item2PValueAndSimilarity;
import org.jax.services.AbstractResources;
import org.jax.services.CachedResources;
import org.jax.services.PValueCalculator;
import org.jax.services.SimilarityScoreCalculator;
import org.jax.utils.DiseaseDB;
import org.jax.utils.Ranker;
import org.monarchinitiative.phenol.ontology.data.Term;
import org.monarchinitiative.phenol.ontology.data.TermId;
import org.monarchinitiative.phenol.stats.BenjaminiHochberg;
import org.monarchinitiative.phenol.stats.Item2PValue;
Expand Down Expand Up @@ -81,31 +80,8 @@ public static List<Item2PValueAndSimilarity<TermId>> query(List<TermId> queryTer
return adjusted;
}

/**
* Query in batch mode with multiple queries. This method optimizes resource usage to avoid repeated file io.
* @param queries a list of query list.
* @param dbs a list of disease databases
* @return a list of disease ranking lists
*/
public static List<List<Item2PValueAndSimilarity<TermId>>> batchQuery(List<List<TermId>> queries, List<DiseaseDB> dbs) {

Map<Integer, Integer> listSizes = new HashMap<>(); // from first to last list, count how many Terms each list has
for (int i = 0; i < queries.size(); i++) {
listSizes.put(i, queries.get(i).size());
}

//process query lists in the order of how many terms they have
listSizes.values().forEach(listSize -> {





});

throw new UnsupportedOperationException("TO implement");
}

/**
* Provide a list of query terms and a disease ID, find the rank of specified disease in the disease ranking
* @param queryTerms
Expand All @@ -129,6 +105,36 @@ public static int findRank(List<TermId> queryTerms, TermId targetDisease, List<D
return rank;
}

/**
* Query in batch mode with multiple queries. This method optimizes resource usage to avoid repeated file io.
* @param queries a list of query list.
* @param dbs a list of disease databases
* @return a list of disease ranking lists
*/
public static List<List<Item2PValueAndSimilarity<TermId>>> batchQuery(List<List<TermId>> queries, List<DiseaseDB> dbs) {

// from first to last list, count how many Terms each list has
Set<Integer> termCounts = queries.stream().map(List::size).collect(Collectors.toSet());

List<List<Item2PValueAndSimilarity<TermId>>> queryResults = new ArrayList<>();

//process query lists in the order of how many terms they have
termCounts.forEach(termCount -> {
if (resources instanceof CachedResources) {
((CachedResources) resources).cleanAndLoadScoreDistribution(termCount);
}

for (int i = 0; i < queries.size(); i++) {
if (queries.get(i).size() == termCount) {
List<Item2PValueAndSimilarity<TermId>> queryResult = query(queries.get(i), dbs);
queryResults.add(i, queryResult);
}
}
});

return queryResults;
}

/**
* Provide multiple query term lists. For each query list, provide a target disease in a separate list. Return the rank of specified disease for each query list.
* @param queries
Expand All @@ -138,7 +144,27 @@ public static int findRank(List<TermId> queryTerms, TermId targetDisease, List<D
*/
public static int[] batchFindRank(List<List<TermId>> queries,
List<TermId> targetDiseases, List<DiseaseDB> dbs){
throw new UnsupportedOperationException("TO implement");

// from first to last list, count how many Terms each list has
Set<Integer> termCounts = queries.stream().map(List::size).collect(Collectors.toSet());

int[] ranks = new int[queries.size()];

//process query lists in the order of how many terms they have
termCounts.forEach(termCount -> {
if (resources instanceof CachedResources) {
((CachedResources) resources).cleanAndLoadScoreDistribution(termCount);
}

for (int i = 0; i < queries.size(); i++) {
if (queries.get(i).size() == termCount) {
int rank = findRank(queries.get(i), targetDiseases.get(i), dbs);
ranks[i] = rank;
}
}
});

return ranks;
}

}
Loading

0 comments on commit 27c8307

Please sign in to comment.