OpenEnergyPlatform · madbkr · Dec 11, 2024 · Dec 11, 2024 · Dec 12, 2024
diff --git a/oekg evaluation/How to use.txt b/oekg evaluation/How to use.txt
@@ -1,21 +1,14 @@
-There are 3 scripts. 
+There are 2 scripts. 
 
-This one is querying a file:
+This one is querying a file or the oekg SPARQL endpoint:
 
-python3 wordFinder.py [path to your rdf data file] [path to the file holding labels].
+python3 labelChecker.py <endpoint|file> > <path to the file holding labels> [path to your rdf data file].
+
+If you want to query the endpoint, use "endpoint" as the first artgument. Use "file" if you want to query a file. In that case you need to specify the path to the file in the third argument.
 
 The output will be printed into a file called abstract_evaluation.txt.
 You may need to instal rdflib first.
 
-///
-
-This one is querying the OEKG endpoint:
-
-python3 endpointWordFinder.py [path to the file holding labels]
-
-The output will be printed into a file called abstract_evaluation2.txt.
-
-///
 
 The file with all current OEO labels taken from the glossary and current alternative labels extracted from the omn files is provided in this folder and is called allLabels.txt.
 
@@ -24,9 +17,11 @@ The file with all current OEO labels taken from the glossary and current alterna
 This one is for querying...
 1) the number of studys in the sample
 2) the number of scenarios per study
-3) the descriptors of each study
+3) the keywords of each study
+
+python3 oekgQuery.py <endpoint|file> <studynumber|scenarionumber|keywords> [path to your rdf data file]
 
-python3 oekgQuery.py [studynumber|scenarionumber|descriptors]
+If you want to query the endpoint, use "endpoint" as the first artgument. Use "file" if you want to query a file. In that case you need to specify the path to the file in the third argument.
 
 Chosing "studynumber" will print the number in the terminal.
-Chosing "scenarionumber" or "descriptors" will output a file named "scenariosPerStudy.txt" or "studyDescriptors.txt" respectively.
+Chosing "scenarionumber" or "descriptors" will output a file named "scenariosPerStudy.txt" or "studyDescriptors.txt" respectively. It will also print a dictionary of the resultst to the terminal.
diff --git a/oekg evaluation/endpointWordFinder.py b/oekg evaluation/endpointWordFinder.py
diff --git a/oekg evaluation/label extraction/labels.py b/oekg evaluation/label extraction/labels.py
@@ -1,44 +1,53 @@
 import re
 import sys
 
-path = sys.argv[1]
-mode = sys.argv[2]
-#all of those were searched for alternative labels:
-#path = r"/home/madeleine/Schreibtisch/ontology/src/ontology/edits/oeo-social.omn"
-#path = r"/home/madeleine/Schreibtisch/ontology/src/ontology/edits/oeo-physical.omn"
-#path = r"/home/madeleine/Schreibtisch/ontology/src/ontology/edits/oeo-sector.omn"
-#path = r"/home/madeleine/Schreibtisch/ontology/src/ontology/edits/oeo-shared.omn"
-
 def write(result, p):
     with open("labels.txt", p) as file:
         for x in result:
             file.write(str(x)+"\n")
 
-def find(start, end):
+def find(start, end,s):
     result = re.findall('%s(.*)%s' % (start, end), s)
     return result
 
-s = ""
-start2 = 'rdfs:label "'
-start = '<http://purl.obolibrary.org/obo/IAO_0000118> "'
-end = '"@en'
-end2 = '"@de'
-
-with open(path, "r") as file:
-    for line in file:
-        s = s + str(line)
-
-if mode == "alt" or mode == "all":
-    res = find(start,end)
-    write(res,"w")
-
-if mode == "ger" or mode == "all":
-    res = find(start,end2)
-    write(res,"a+")
+def main():
+    s = ""
+    start2 = 'rdfs:label "'
+    start = '<http://purl.obolibrary.org/obo/IAO_0000118> "'
+    end = '"@en'
+    end2 = '"@de'
+
+    with open(path, "r") as file:
+        for line in file:
+            s = s + str(line)
+
+    if mode == "alt" or mode == "all":
+        res = find(start,end,s)
+        write(res,"w")
+
+    if mode == "ger" or mode == "all":
+        res = find(start,end2,s)
+        write(res,"a+")
+
+    if mode == "label" or mode == "all":
+        res = find(start2,end,s)
+        write(res,"a+")
+
+try:
+    path = sys.argv[1]
+    mode = sys.argv[2]
+    if not (mode =="alt" or mode == "all" or mode == "ger" or mode == "label"):
+        print("Error:Not a valid mode")
+    else:
+        main()
+except:
+    print("Error: Missing or invalid argument!")
+#all of those were searched for alternative labels:
+#path = r"/home/madeleine/Schreibtisch/ontology/src/ontology/edits/oeo-social.omn"
+#path = r"/home/madeleine/Schreibtisch/ontology/src/ontology/edits/oeo-physical.omn"
+#path = r"/home/madeleine/Schreibtisch/ontology/src/ontology/edits/oeo-sector.omn"
+#path = r"/home/madeleine/Schreibtisch/ontology/src/ontology/edits/oeo-shared.omn"
 
-if mode == "label" or mode == "all":
-    res = find(start2,end)
-    write(res,"a+")
 
 
 

diff --git a/oekg evaluation/label extraction/mergeLists.py b/oekg evaluation/label extraction/mergeLists.py
@@ -1,24 +1,38 @@
 import sys
 
-path1 = sys.argv[1]
-path2 = sys.argv[2]
+path1 = None
+path2 = None
+
+try:
+    path1 = sys.argv[1]
+    path2 = sys.argv[2]
+except:
+    print("Error: Missing or invalid argument")
+
 
 list1 = []
 list2 = []
 
+if not (path1 == None or path2 == None):
 
-with open(path1, "r") as file:
-    for line in file:
-        list1.append(line.strip())
+    try:
+        with open(path1, "r") as file:
+            for line in file:
+                list1.append(line.strip())
+    except:
+        print("Error: First file not found!")
 
-with open(path2, "r") as file:
-    for line in file:
-        list2.append(line.strip())
+    try:
+        with open(path2, "r") as file:
+            for line in file:
+                list2.append(line.strip())
+    except:
+        print("Error: Second file not found!")
 
-for line in list1:
-    if line not in list2:
-        list2.append(line)
+    for line in list1:
+        if line not in list2:
+            list2.append(line)
 
-with (open("mergedList.txt","w") as file):
-    for x in list2:
-        file.write(str(x) + "\n")
+    with (open("mergedList.txt","w") as file):
+        for x in list2:
+            file.write(str(x) + "\n")
diff --git a/oekg evaluation/labelChecker.py b/oekg evaluation/labelChecker.py
@@ -0,0 +1,86 @@
+import sys
+import rdflib
+import re
+import requests
+
+def queryFile():
+    g = rdflib.Graph()
+    g.parse(data)
+
+    knows_query = """                                     
+    SELECT ?s ?b                                          
+        WHERE {                                           
+            ?s DC:abstract ?b.                            
+        }"""
+
+    subjects = []
+    objects = []
+
+    qres = g.query(knows_query)
+    for row in qres:
+        subjects.append(str(row.s))  # stores the study URI
+        objects.append(str(row.b))  # stores the abstracts
+
+    return subjects, objects
+
+def queryEndpoint():
+    sparql_endpoint = "https://openenergyplatform.org/sparql_query/sparql"
+    sparql_query = {
+        "query": """
+        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+        PREFIX DC: <http://purl.org/dc/terms/>
+
+            SELECT * WHERE {
+                ?sub DC:abstract ?obj.
+
+            }"""
+    }
+
+    r = requests.get(url=sparql_endpoint, params=sparql_query)
+    start = '"sub": {"type": "uri", "value": "'
+    start2 = '"obj": {"type": "literal", "value": "'
+    end = '"}}'
+    end2 = '"},'
+
+    subjects = re.findall('%s(.*?)%s' % (start, end2), r.text)  # get the URIs
+    objects = re.findall('%s(.*?)%s' % (start2, end), r.text)  # get the abstracts
+
+    return subjects, objects
+
+def checkKeywords(keys):
+    keywords = []
+
+    with open(keys, "r") as file:
+        for line in file:
+            keywords.append(line.strip())  # collect all the labels
+
+    i = 0
+
+    with open("abstractEvaluation.txt", "w") as file:
+
+        for x in objects:  # go trough objects
+            file.write("\n")
+            file.write(subjects[i] + "\n")  # move trough URIs at the same pace
+            i = i + 1
+            for y in keywords:  # check every label
+                if y in x:
+                    file.write(y + ": " + str(x.count(y)) + "\n")
+
+
+try:
+    mode = sys.argv[1]  # endpoint | file
+    if not (mode == "endpoint" or mode == "file"):
+        print("Error: Invalid mode! Use 'endpoint' or 'file' as first argument!")
+    else:
+        keys = sys.argv[2]  # path to file with all the labes
+        if mode == "endpoint":
+            subjects, objects = queryEndpoint()
+        if mode == "file":
+            data = sys.argv[3]  # path to rdf data file
+            subjects, objects = queryFile()
+
+        checkKeywords(keys)
+
+except:
+    print("Error: Missing or invalid argument!")