-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathJE_add_short_fprints.py
executable file
·92 lines (70 loc) · 2.74 KB
/
JE_add_short_fprints.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# Python3
""" Import short audio fingerprints for JE into RDF
Polina Proutskova, August-Seeptember 2019
"""
###########################################################
# Data paths
RDFfile = "TTL/JE_leaders.ttl"
RDFnewfile = "TTL/JE_fprints.ttl"
FPRINTfile = "DATA/id_dtl1000_idonly.csv"
###########################################################
# general import
import csv
import os
import re
import json
import dtlutil
# logging
import logging
MIN_LEVEL = logging.DEBUG
dtlutil.setup_log(MIN_LEVEL)
##############################################################
# read in rdf graph
import rdflib
from rdflib.graph import Graph, Store, URIRef, Literal, BNode
from rdflib.namespace import Namespace, RDFS
from rdflib import plugin
from rdflib.plugins import sparql
from rdflib import Namespace
from rdflib.namespace import RDF, FOAF, RDFS, DC, XSD
MO, TL, EVENT, OLO, DTL, initNs = dtlutil.init_namespaces()
g = dtlutil.create_graph()
dtlutil.read_in_rdf(g, RDFfile)
logging.debug("\ngraph has %i triples", len(g))
##############################################################
logging.info("\nReading fingerprints from %s", FPRINTfile)
with open(FPRINTfile, 'r') as csvfile:
csvreader = csv.reader(csvfile, delimiter=',')
count = 0
for row in csvreader:
if len(row) > 0 and row[0].startswith(".."):
filename = row[0].split("/")[3]
if filename.startswith("JE"):
count +=1
fsplit = re.split("[-\.]", filename)
part = int(fsplit[1])
part_title = dtlutil.get_JE_part(part)
cd = int(fsplit[2])
tnum = int(fsplit[3])
fingerprint = row[1]
# find signal
qstr = """SELECT ?trackt ?signal
WHERE {
?track RDF:type MO:Track .
?track DC:title ?trackt .
?track MO:track_number "%i" .
?medium MO:track ?track .
?medium DC:title ?mediumt .
?medium MO:record_number "%i" .
?release MO:record ?medium .
?release DC:title "%s" .
?signal MO:published_as ?track .
}
""" %(tnum, cd, part_title)
q = rdflib.plugins.sparql.prepareQuery(qstr, initNs)
found = g.query(q)
for row in found:
print(count, row[0], row[1])
g.add( (row[1], DTL.fingerprint_short, Literal(fingerprint)) )
logging.debug("\ngraph has %i triples", len(g))
dtlutil.write_rdf(g, RDFnewfile)