-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmapUniProtGeneName2upId.java
78 lines (71 loc) · 2.85 KB
/
mapUniProtGeneName2upId.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import com.hp.hpl.jena.query.Query;
import com.hp.hpl.jena.query.QueryExecution;
import com.hp.hpl.jena.query.QueryExecutionFactory;
import com.hp.hpl.jena.query.QueryFactory;
import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.rdf.model.Resource;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
public class mapUniProtGeneName2upId {
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
int teller = 0;
Model model = ModelFactory.createDefaultModel();
Model outputModel = ModelFactory.createDefaultModel();
model.read(new File("/tmp/homo_sapiens_core_71_37_ensembl_Uniprot_genenameLinkSets.ttl").toURL().toString(), "TURTLE");
//model.read(, "TURTLE");
String getQuery = "SELECT DISTINCT * " +
"WHERE { " +
" ?ensembl ?p ?upGeneName ." +
"} ";
//System.out.println(getQuery);
Query query1 = QueryFactory.create(getQuery);
QueryExecution queryExecution = QueryExecutionFactory.create(query1, model);
ResultSet resultSet = queryExecution.execSelect();
int filecounter = 1;
FileOutputStream fout3;
while (resultSet.hasNext()) {
System.out.println(Runtime.getRuntime().freeMemory());
if (outputModel.size() >10000){
fout3 = new FileOutputStream("/tmp/Ensembl_uniprot"+filecounter+".ttl");
filecounter++;
outputModel.write(fout3, "TURTLE");
Model tempModel = ModelFactory.createDefaultModel();
outputModel = tempModel;
}
QuerySolution solution = resultSet.next();
String upGeneName = solution.get("upGeneName").toString();
String ensemblName = solution.get("ensembl").toString();
Resource ensemblResoruce = outputModel.createResource(solution.get("ensembl").toString());
String geneName = upGeneName.split("/")[4];
String upSparql = "PREFIX up: <http://purl.uniprot.org/core/> " +
"PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>" +
"SELECT ?gene ?protein " +
"WHERE " +
"{ " +
"?gene rdfs:comment '"+geneName+"' ." +
"?protein rdfs:seeAlso ?gene . " +
"}";
Query query2 = QueryFactory.create(upSparql);
QueryExecution queryExecution2 = QueryExecutionFactory.sparqlService("http://beta.sparql.uniprot.org", query2);
ResultSet resultSet2 = queryExecution2.execSelect();
System.out.println(resultSet2.getResultVars());
while (resultSet2.hasNext()){
QuerySolution solution2 = resultSet2.next();
String upURL = solution2.get("protein").toString();
ensemblResoruce.addProperty(Skos.relatedMatch, outputModel.createResource(upURL));
System.out.println(upURL);
}
}
fout3 = new FileOutputStream("/tmp/Ensembl_uniprot"+filecounter+".ttl");
filecounter++;
outputModel.write(fout3, "TURTLE");
}
}