-
Notifications
You must be signed in to change notification settings - Fork 49
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[structureLearner] training now works
- Loading branch information
Showing
9 changed files
with
6,167 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
import sys | ||
|
||
#scan thru lines like this: | ||
#+-------------------- | ||
#|0.00280174 0 interp(i_uncle,arthur,margaret) | ||
#|0.00276313 1 interp(i_uncle,charles,charlotte) | ||
#|0.00273479 1 interp(i_uncle,arthur,charlotte) | ||
#|0.00273329 1 interp(i_uncle,arthur,colin) | ||
#|0.0025534 0 interp(i_uncle,christopher,victoria) | ||
#|0.00255312 0 interp(i_uncle,andrew,jennifer) | ||
#... | ||
# | ||
#and find average precision | ||
|
||
if __name__ == "__main__": | ||
|
||
lastScore = 2.0 | ||
n = 0.0 | ||
sumPrec = 0.0 | ||
rank = 0.0 | ||
numPosAtThisRank = 0 | ||
print '#%s\t%s\t%s\t%s' % ('rank','prec@r','score','example') | ||
for line in sys.stdin: | ||
rank += 1.0 | ||
(scoreStr,label,goal) = line.strip().split("\t") | ||
# a check | ||
score = float(scoreStr) | ||
if score > lastScore: | ||
print 'lines out of order: this line has score > previous score:',line | ||
sys.exit(-1) | ||
if int(label): | ||
if score>0: | ||
numPosAtThisRank += 1.0 | ||
prec = numPosAtThisRank/rank | ||
sumPrec += prec | ||
n += 1.0 | ||
print '%f\t%f\t%f\t%s' % (rank,prec,score,goal) | ||
else: | ||
n += 1.0 | ||
prec = 0.0 | ||
print '%f\t%f\t%f\t%s' % (rank,prec,score,goal) | ||
if n>0: | ||
print '#\n#avgPrecision: ',sumPrec/n | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
import sys | ||
import re | ||
import random | ||
import collections | ||
|
||
DUPLICATE_TRAIN_AND_BACKGROUND = True | ||
|
||
#Generate a dataset of examples. Possible instances of a relation | ||
#P(X,Y) are all pairs X,Y that appear as arguments of some relation. | ||
#All possible first arguments x are used to produce a query p(x,Y), | ||
#and all possible instances are listed as pos/neg answers to the query | ||
#P(x,Y). | ||
|
||
#command-line argument is the set of relations to use as background data, | ||
#colon-separated, the the set of relations to use as training | ||
|
||
rels = set('father mother husband wife son daughter brother sister uncle aunt nephew niece'.split(' ')) | ||
trainFam = set('andrew arthur charles charlotte christine christopher colin james jennifer margaret penelope victoria'.split(' ')) | ||
testFam = set('alfonso angela emilio francesca gina lucia marco maria pierro roberto sophia tomaso'.split(' ')) | ||
|
||
def asExamples(facts,instances): | ||
print "asExamples facts:" | ||
print "\n ".join(sorted([str(f) for f in facts])) | ||
print "asExamples instances:" | ||
print "\n ".join(sorted([str(i) for i in instances])) | ||
rnd = random.Random() | ||
rnd.seed(None) | ||
trueYs = collections.defaultdict(set) | ||
people = set() | ||
for (r,x,y) in facts: | ||
trueYs[(r,x)].add(y) | ||
people.add(x) | ||
result = [] | ||
print "asExamples rels:" | ||
for r in rels: | ||
for x in people: | ||
query = 'interp(i_%s,%s,Y)' % (r,x) | ||
pos = [] | ||
neg = [] | ||
print r,x,trueYs[(r,x)] | ||
for y in trueYs[(r,x)]: | ||
pos += [('interp(i_%s,%s,%s)' % (r,x,y))] | ||
for y in people: | ||
if ((x,y) in instances) and (y not in trueYs[(r,x)]): | ||
neg += [('interp(i_%s,%s,%s)' % (r,x,y))] | ||
result += [(query,pos,neg)] | ||
rnd.shuffle(result) | ||
return result | ||
|
||
def loadKinship(fileName): | ||
facts = set() | ||
instances = set() | ||
for line in open(fileName): | ||
(dummy,rel,x,y) = line.strip().split('\t') | ||
facts.add((rel,x,y)) | ||
instances.add((x,y)) | ||
return facts,instances | ||
|
||
def splitFacts(facts,fam,pTrain,pSkip): | ||
examples = set() | ||
background = set() | ||
rnd = random.Random() | ||
rnd.seed(None) | ||
for (r,x,y) in facts: | ||
if x in fam: | ||
selectedForTrain = (rnd.random()<pTrain) | ||
selectedForBackground = (rnd.random()>pSkip) | ||
if selectedForTrain: | ||
examples.add((r,x,y)) | ||
if selectedForBackground and (not selectedForTrain or DUPLICATE_TRAIN_AND_BACKGROUND): | ||
background.add((r,x,y)) | ||
return examples,background | ||
|
||
if __name__ == "__main__": | ||
if len(sys.argv)!=5: | ||
print('usage: trainStem testStem pTrain pSkip') | ||
sys.exit(-1) | ||
|
||
trainStem = sys.argv[1] | ||
testStem = sys.argv[2] | ||
pTrain = float(sys.argv[3]) | ||
pSkip = float(sys.argv[4]) | ||
|
||
facts,instances = loadKinship('kinship.cfacts') | ||
trainExampleFacts,trainBackgroundFacts = splitFacts(facts,trainFam,pTrain,pSkip) | ||
testExampleFacts,testBackgroundFacts = splitFacts(facts,testFam,pTrain,pSkip) | ||
|
||
print trainStem+".cfacts holds background facts for trainFam" | ||
fp = open(trainStem+'.cfacts','w') | ||
for (p,x,y) in trainBackgroundFacts: | ||
#print 'bg',p,x,y | ||
fp.write(('rel\t%s\t%s\t%s\n' % (p,x,y))) | ||
fp.close() | ||
print testStem+".cfacts holds background facts for testFam" | ||
fp = open(testStem+'.cfacts','w') | ||
for (p,x,y) in testBackgroundFacts: | ||
fp.write('rel\t%s\t%s\t%s\n' % (p,x,y)) | ||
fp.close() | ||
|
||
print trainStem+".trainData holds training facts for trainFam" | ||
fp = open(trainStem+'.trainData','w') | ||
for (q,pos,neg) in asExamples(trainExampleFacts,instances): | ||
fp.write(q) | ||
for px in pos: fp.write('\t+' + px) | ||
for nx in neg: fp.write('\t-' + nx) | ||
fp.write('\n') | ||
fp.close() | ||
print testStem+".testData holds training facts for testFam" | ||
fp = open(testStem+'.testData','w') | ||
for (q,pos,neg) in asExamples(testExampleFacts,instances): | ||
fp.write(q) | ||
for px in pos: fp.write('\t+' + px) | ||
for nx in neg: fp.write('\t-' + nx) | ||
fp.write('\n') | ||
fp.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
rel son alfonso marco | ||
rel aunt angela alfonso | ||
rel daughter sophia lucia | ||
rel mother francesca angela | ||
rel father marco alfonso | ||
rel husband marco lucia | ||
rel uncle tomaso sophia | ||
rel aunt gina sophia | ||
rel brother marco angela | ||
rel sister sophia alfonso | ||
rel niece sophia angela | ||
rel wife francesca pierro | ||
rel wife gina emilio | ||
rel son alfonso lucia | ||
rel sister lucia emilio | ||
rel son marco francesca | ||
rel niece sophia gina | ||
rel daughter sophia marco | ||
rel niece sophia emilio | ||
rel mother maria emilio | ||
rel husband roberto maria | ||
rel uncle emilio alfonso | ||
rel mother maria lucia | ||
rel daughter angela pierro | ||
rel father roberto emilio | ||
rel aunt gina alfonso | ||
rel wife lucia marco | ||
rel father roberto lucia | ||
rel wife angela tomaso | ||
rel daughter lucia maria | ||
rel sister angela marco | ||
rel husband tomaso angela | ||
rel niece sophia tomaso | ||
rel mother francesca marco | ||
rel mother lucia sophia | ||
rel daughter lucia roberto | ||
rel father marco sophia | ||
rel brother alfonso sophia | ||
rel husband emilio gina | ||
rel son emilio roberto | ||
rel father pierro angela | ||
rel wife maria roberto | ||
rel son marco pierro | ||
rel father pierro marco | ||
rel nephew alfonso tomaso | ||
rel nephew alfonso gina | ||
rel uncle emilio sophia | ||
rel brother emilio lucia | ||
rel nephew alfonso angela | ||
rel husband pierro francesca | ||
rel uncle tomaso alfonso |
Oops, something went wrong.