-
Notifications
You must be signed in to change notification settings - Fork 0
/
sparql.py
76 lines (54 loc) · 2.21 KB
/
sparql.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
from SPARQLWrapper import SPARQLWrapper, JSON
from model import Category, Page
class Wikipedia():
def __init__(self, sparql_endpoint="http://ja.dbpedia.org/sparql"):
self.sparql = SPARQLWrapper(sparql_endpoint)
self.sparql.setReturnFormat(JSON)
def update_query(self, query):
self.sparql.setQuery(query)
def exec_query(self):
return self.sparql.query().convert()
def subject_of(self, category):
"""
categoryにあるページ一覧を取得
:param category:
:return:
"""
base_query = """
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT *
WHERE {{ ?subject <http://purl.org/dc/terms/subject> <{0}> }}
"""
query = base_query.format(category.url)
self.update_query(query)
results = self.exec_query()
children_pages = [Page(url=result["subject"]["value"], category_id=category.id) for result in results["results"]["bindings"]]
return children_pages
def broader_of(self, category):
"""
categoryの下位のカテゴリ一覧を取得
:param category:
:return:
"""
base_query = """
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT *
WHERE {{ ?subject <http://www.w3.org/2004/02/skos/core#broader> <{0}> }}
"""
query = base_query.format(category.url)
self.update_query(query)
results = self.exec_query()
children_categories = [Category(url=result["subject"]["value"], parent_id=category.id) for result in results["results"]["bindings"]]
return children_categories
def load(self, category, nested_depth=2):
yield category
if nested_depth > 0:
children_categories = self.broader_of(category)
print(children_categories)
if children_categories:
for child_category in children_categories:
yield from self.load(child_category, nested_depth - 1)
if __name__ == 'main':
wiki = Wikipedia()
category = Category(url='http://ja.dbpedia.org/resource/Category:旅行', parent_id=None)
gen = wiki.load(category, 3)