-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathwd-query.py
60 lines (56 loc) · 1.64 KB
/
wd-query.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import requests
wd_url = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql'
# get properties from Wikidata connecting given subject and object
#returns a dict with values for p
def get_wd_properties(s, o):
query = 'SELECT ?p WHERE { <' + s + '> ?p ' + o + ' . FILTER (!regex(STR(?p), "wikiba.se")) }'
try:
wd_results = requests.get(wd_url, params={'query': query, 'format': 'json'}).json()
if wd_results:
return wd_results
except Exception as e:
print e
return
# get all 'instance of' classes for the items connected to a given properties
# must be at least 100 items per class
# limit to max of 5000 items otherwise hell breaks loose
# limit to 20 classes
def get_classes_with_numbers(prop):
query = """
SELECT ?class (COUNT(?item) AS ?count)
WHERE
{
?item wdt:%s ?value .
?item wdt:P31 ?class .
FILTER( !regex(str(?value), "entity" ))
} GROUP BY (?class)
HAVING (?count > 100 && ?count < 5000) LIMIT 20
""" % prop
try:
wd_results = requests.get(wd_url, params={'query': query, 'format': 'json'}).json()
if wd_results:
return wd_results
except Exception as e:
print e
return
# REMOVE LIMIT 10 !!!
def get_item_triples_for_class_and_property(cl, prop):
#use wdt instead of
query = """
SELECT DISTINCT ?item ?prop ?object
WHERE
{
?item ?prop ?object .
?item wdt:%s ?value .
?item wdt:P31 <%s> .
FILTER( !regex(str(?value), "entity" ))
FILTER( regex(str(?prop), "wikidata.org" ))
} #LIMIT 10
""" % (prop, cl)
try:
wd_results = requests.get(wd_url, params={'query': query, 'format': 'json'}).json()
if wd_results:
return wd_results
except Exception as e:
print e
return