forked from elinorbgr/ai-project
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBackgroundGraph.py
162 lines (137 loc) · 5.11 KB
/
BackgroundGraph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import pickle
from operator import itemgetter
import itertools
import math
import copy
class BackgroundGraph:
#constructor
def __init__(self, nameCurrentGraph):
#variables initialization
self.nameCurrentGraph = nameCurrentGraph
self.graph={}
#Graph completion
self.loadFileGraph()
def addlistWords(self, listWords):
# extract sentences
indices = [i for i, x in enumerate(listWords) if x == "."]
indices = [0]+indices
listSentences = [listWords[indices[i - 1]:x] for i, x in enumerate(indices)][1:]
# add sentences
l = len(listSentences)
for i,s in enumerate(listSentences):
print(100.0*i/l)
self.addSentence(s)
def addSentence(self, sentence):
for x,y in itertools.product(sentence,sentence):
self.addWords(x,y)
#load a graph from an existing file
def loadFileGraph(self):
try:
file = open(self.nameCurrentGraph, 'rb')
try:
self.graph = pickle.load(file)
except:
print("The file doesn't contain a graph")
file.close()
except IOError:
print("File couldn't be opened ! Graph will start empty")
#save the current graph to a file
def saveFileGraph(self):
file= open(self.nameCurrentGraph, 'wb')
pickle.dump(self.graph, file)
file.close()
#add a new or existing combination of 2 words to the graph
def addWords(self, word1, word2):
w1 = (word1 in self.graph)
w2 = word2 in self.graph
if (word1 in self.graph) == False:
self.graph[word1] = {}
if (word2 in self.graph) == False:
self.graph[word2] = {}
if (word2 in self.graph[word1]):
self.graph[word1][word2] += 1
else:
self.graph[word1][word2] = 1
if (word1 in self.graph[word2]):
self.graph[word2][word1] += 1
else:
self.graph[word2][word1] = 1
#get the N closest neighbors
def getNeighbors(self, word, N):
if word not in self.graph:
return []
l=list(self.graph[word].items())
l.sort(key=itemgetter(1),reverse=True)
if N<len(l): return [l[i][0] for i in range(N)]
else: return [l[i][0] for i in range(len(l))]
#give the proximity between 2 words
def prox(self, word1, word2):
return self.graph[word1][word2]
def normalize(self):
#background and relative frequencies calculus
compt_back = dict((mot, sum(d.values())) for mot, d in self.graph.items())
tot_mot = sum(compt_back.values())
compt_rel= dict(
(mot1, dict(
(mot2, float(val)/compt_back[mot1]) for mot2, val in dic.items()
)) for mot1, dic in self.graph.items()
)
#final likelihood calculus
for mot1, dic in compt_rel.items():
for mot2, freq in dic.items():
p11 = compt_rel[mot1][mot2] * compt_back[mot1] / float(tot_mot)
if p11 <= 0:
p11 = 10**-15
k11 = self.graph[mot1][mot2]
p11n = compt_back[mot1]*compt_back[mot2] / float(tot_mot)
p12 = compt_back[mot2] / float(tot_mot) - p11
if p12 <= 0:
p12 = 10**-15
k12 = compt_back[mot2] - k11
p12n = compt_back[mot2] / float(tot_mot) - p11n
if p12n <= 0:
p12n = 10**-15
p21 = compt_back[mot1] / float(tot_mot) - p11
if p21 <= 0:
p21 = 10**-15
k21 = compt_back[mot1] - k11
p21n = compt_back[mot1] / float(tot_mot) - p11n
if p21n <= 0:
p21n = 10**-15
p22 = 1 - (p11+p12+p21)
if p22 <= 0:
p22 = 10**-15
p22n = 1 - (p11n+p12n+p21n)
if p22n <= 0:
p22n = 10**-15
k22 = float(tot_mot)
#print(p11,k11,p11n,p12,k12,p12n,p21,k21,p21n,p22,p22n,k22)
self.graph[mot1][mot2]= -2* ( k11*math.log(p11n/p11)+k12*math.log(p12n/p12)+ k21*math.log(p21n/p21)+ k22*math.log(p22n/p22))
def tests():
##########################
#CODE TESTING #
##########################
#Graph creation
graph = BackgroundGraph("Test")
#Graph filling
graph.addWords("chien", "chat")
graph.addWords("souris", "chat")
graph.addWords("laisse", "chien")
graph.addWords("souris", "chat")
graph.addWords("chat", "chien")
graph.addWords("poisson", "chat")
#graph printing
print(graph.graph)
#closest neighbors
print(graph.getNeighbors("chat", 2))
#proximity between 2 words
print(graph.prox("chat", "poisson"))
graph.normalize()
print(graph.graph)
print(graph.prox("chat", "poisson"))
print(graph.getNeighbors("chat", 2))
# serialization
graph.saveFileGraph()
# load preexisting graph from a file
graph2 = BackgroundGraph("Test")
print(graph2.graph)