Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add logic for correct CURIE format in Linker #722

Merged
merged 6 commits into from
Sep 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions dataload/configs/idocovid19.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{
"ontologies": [
{
"id": "idocovid19",
"preferredPrefix": "IDO-COVID-19",
"title": "The COVID-19 Infectious Disease Ontology",
"uri": "https://raw.githubusercontent.com/infectious-disease-ontology-extensions/ido-covid-19/master/ontology/ido%20covid-19",
"description": "The COVID-19 Infectious Disease Ontology (IDO-COVID-19) is an extension of the Infectious Disease Ontology (IDO) and the Virus Infectious Disease Ontology (VIDO). IDO-COVID-19 follows OBO Foundry guidelines, employs the Basic Formal Ontology as its starting point, and covers epidemiology, classification, pathogenesis, and treatment of terms used to represent infection by the SARS-CoV-2 virus strain, and the associated COVID-19 disease.",
"homepage": "https://github.com/infectious-disease-ontology-extensions/ido-covid-19",
"mailing_list": "[email protected]",
"definition_property": [
"http://purl.obolibrary.org/obo/IAO_0000115"
],
"synonym_property": [
"http://www.geneontology.org/formats/oboInOwl#hasExactSynonym"
],
"hierarchical_property": [
"http://purl.obolibrary.org/obo/BFO_0000050"
],
"base_uri": [
"http://purl.obolibrary.org/obo/IDO-COVID-19"
],
"oboSlims": false,
"reasoner": "OWL2",
"ontology_purl": "https://gist.githubusercontent.com/haideriqbal/9b5245af626bd7687831c19c2c8076e8/raw/2c75495f31df0a379062bf12d3fab323eedbb7a9/idocovid19.owl"
},
{
"id": "oio",
"ontology_purl": "https://gist.githubusercontent.com/haideriqbal/4a2b1a9aa81d9fa26cae81e0b0b7730b/raw/527665128b9be9d7d6133f9a796379600151c737/oboInOwl.owl",
"base_uri": ["http://www.geneontology.org/formats/oboInOwl#"]
}
]
}
53 changes: 40 additions & 13 deletions dataload/linker/src/main/java/LinkerPass1.java
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
import com.google.common.collect.LinkedHashMultimap;
import com.google.common.collect.Multimap;
import com.google.common.collect.SetMultimap;
import com.google.common.io.CountingInputStream;
import com.google.gson.Gson;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonParser;
import com.google.gson.*;
import com.google.gson.stream.JsonReader;
import com.google.gson.stream.JsonToken;

Expand Down Expand Up @@ -168,15 +163,31 @@ public static LinkerPass1Result run(String inputJsonFilename) throws IOException
for(var entry : result.iriToDefinitions.entrySet()) {

EntityDefinitionSet definitions = entry.getValue();

// definingOntologyIris -> definingOntologyIds
for(String ontologyIri : definitions.definingOntologyIris) {
for(String ontologyId : result.ontologyIriToOntologyIds.get(ontologyIri)) {
definitions.definingOntologyIds.add(ontologyId);
if (result.ontologyIriToOntologyIds.containsKey(ontologyIri)) {
for(String ontologyId : result.ontologyIriToOntologyIds.get(ontologyIri)) {
definitions.definingOntologyIds.add(ontologyId);
}
}
}

for(EntityDefinition def : definitions.definitions) {
if(def.curie != null && entry.getValue().definingOntologyIds.iterator().hasNext()) {
JsonObject curieObject = def.curie.getAsJsonObject();
if(curieObject.has("value")) {
String curieValue = curieObject.get("value").getAsString();
if(!curieValue.contains(":")) {
var definingOntologyId = entry.getValue().definingOntologyIds.iterator().next();
EntityDefinition definingEntity = entry.getValue().ontologyIdToDefinitions.get(definingOntologyId);
if (definingEntity != null && definingEntity.curie != null) {
curieValue = definingEntity.curie.getAsJsonObject().get("value").getAsString();
curieObject.addProperty("value", curieValue);
result.iriToDefinitions.put(entry.getKey(), definitions);
}
}
}
}
if(definitions.definingOntologyIds.contains(def.ontologyId)) {
def.isDefiningOntology = true;
}
Expand Down Expand Up @@ -235,14 +246,30 @@ public static void parseEntity(JsonReader jsonReader, String entityType, String
curie = jsonParser.parse(jsonReader);
} else if(key.equals("type")) {
types = gson.fromJson(jsonReader, Set.class);
} else if(key.equals("http://www.w3.org/2000/01/rdf-schema#definedBy")) {
} else if(key.equals("http://www.w3.org/2000/01/rdf-schema#isDefinedBy")) {
JsonElement jsonDefinedBy = jsonParser.parse(jsonReader);
if(jsonDefinedBy.isJsonArray()) {
JsonArray arr = jsonDefinedBy.getAsJsonArray();
for(JsonElement el : arr) {
definedBy.add( el.getAsString() );
for(JsonElement isDefinedBy : arr) {
if (isDefinedBy.isJsonObject()) {
JsonObject obj = isDefinedBy.getAsJsonObject();
var value = obj.get("value");
if (value.isJsonObject()) {
definedBy.add(value.getAsJsonObject().get("value").getAsString());
} else
definedBy.add(value.getAsString());
} else
definedBy.add( isDefinedBy.getAsString() );
}
} else {
} else if (jsonDefinedBy.isJsonObject()) {
JsonObject obj = jsonDefinedBy.getAsJsonObject();
var value = obj.get("value");
if (value.isJsonObject()) {
definedBy.add(value.getAsJsonObject().get("value").getAsString());
} else
definedBy.add(value.getAsString());
}
else {
definedBy.add(jsonDefinedBy.getAsString());
}
} else {
Expand Down
88 changes: 87 additions & 1 deletion dataload/linker/src/main/java/LinkerPass2.java
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import com.google.gson.stream.JsonReader;
import com.google.gson.stream.JsonToken;
Expand Down Expand Up @@ -151,6 +153,10 @@ private static void writeEntityArray(JsonReader jsonReader, JsonWriter jsonWrite
if(name.equals("iri")) {
entityIri = jsonReader.nextString();
jsonWriter.value(entityIri);
} else if (name.equalsIgnoreCase("curie")) {
processCurieObject(jsonReader, jsonWriter, pass1Result, entityIri);
} else if (name.equalsIgnoreCase("shortForm")) {
processShortFormObject(jsonReader, jsonWriter, pass1Result, entityIri);
} else {
CopyJsonGatheringStrings.copyJsonGatheringStrings(jsonReader, jsonWriter, stringsInEntity);
}
Expand Down Expand Up @@ -320,7 +326,7 @@ private static void writeLinkedEntitiesFromGatheredStrings(JsonWriter jsonWriter
private static void writeIriMapping(JsonWriter jsonWriter, EntityDefinitionSet definitions, String ontologyId) throws IOException {

if(definitions.definingDefinitions.size() > 0) {

// There are ontologies which canonically define this term

jsonWriter.name("definedBy");
Expand Down Expand Up @@ -436,4 +442,84 @@ private static class CurieMapResult {
public String source;
}

private static void processShortFormObject(JsonReader jsonReader, JsonWriter jsonWriter, LinkerPass1.LinkerPass1Result pass1Result, String entityIri) throws IOException {
jsonReader.beginObject();
JsonObject shortFormObject = new JsonObject();

while (jsonReader.peek() != JsonToken.END_OBJECT) {
String shortFormFieldName = jsonReader.nextName();
if (shortFormFieldName.equals("type")) {
JsonArray typeArray = new JsonArray();
jsonReader.beginArray();
while (jsonReader.peek() != JsonToken.END_ARRAY) {
typeArray.add(jsonReader.nextString());
}
jsonReader.endArray();
shortFormObject.add("type", typeArray);
} else if (shortFormFieldName.equals("value")) {
String shortFormValue = jsonReader.nextString();
// Modify the value attribute
shortFormValue = getProcessedCurieValue(pass1Result, entityIri).replace(":", "_");
shortFormObject.addProperty("value", shortFormValue);
}
}
jsonReader.endObject();

// Write the modified short form object
jsonWriter.beginObject();
jsonWriter.name("type");
jsonWriter.beginArray();
for (JsonElement typeElement : shortFormObject.getAsJsonArray("type")) {
jsonWriter.value(typeElement.getAsString());
}
jsonWriter.endArray();
jsonWriter.name("value").value(shortFormObject.get("value").getAsString());
jsonWriter.endObject();
}

private static void processCurieObject(JsonReader jsonReader, JsonWriter jsonWriter, LinkerPass1.LinkerPass1Result pass1Result, String entityIri) throws IOException {
jsonReader.beginObject();
JsonObject curieObject = new JsonObject();

while (jsonReader.peek() != JsonToken.END_OBJECT) {
String curieFieldName = jsonReader.nextName();
if (curieFieldName.equals("type")) {
JsonArray typeArray = new JsonArray();
jsonReader.beginArray();
while (jsonReader.peek() != JsonToken.END_ARRAY) {
typeArray.add(jsonReader.nextString());
}
jsonReader.endArray();
curieObject.add("type", typeArray);
} else if (curieFieldName.equals("value")) {
String curieValue = jsonReader.nextString();
// Modify the value attribute
curieValue = getProcessedCurieValue(pass1Result, entityIri);
curieObject.addProperty("value", curieValue);
}
}
jsonReader.endObject();

// Write the modified curie object
jsonWriter.beginObject();
jsonWriter.name("type");
jsonWriter.beginArray();
for (JsonElement typeElement : curieObject.getAsJsonArray("type")) {
jsonWriter.value(typeElement.getAsString());
}
jsonWriter.endArray();
jsonWriter.name("value").value(curieObject.get("value").getAsString());
jsonWriter.endObject();
}

private static String getProcessedCurieValue(LinkerPass1.LinkerPass1Result pass1Result, String entityIri) {
var def = pass1Result.iriToDefinitions.get(entityIri);
if (def.definitions.iterator().hasNext()) {
JsonObject defCurieObject = def.definitions.iterator().next().curie.getAsJsonObject();
if (defCurieObject.has("value")) {
return defCurieObject.get("value").getAsString();
}
}
return "";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ public static void annotateShortForms(OntologyGraph graph) {
if(c.uri == null)
continue;

if (preferredPrefix == null || preferredPrefix.isEmpty()) {
preferredPrefix = graph.config.get("id").toString().toUpperCase();
}

String shortForm = extractShortForm(graph, ontologyBaseUris, preferredPrefix, c.uri);
String curie = shortForm.replaceFirst("_", ":");
Expand Down
Loading