Skip to content

Commit

Permalink
[structureLearner] training now works
Browse files Browse the repository at this point in the history
  • Loading branch information
krivard committed Jun 23, 2015
1 parent 366e61f commit 9407031
Show file tree
Hide file tree
Showing 9 changed files with 6,167 additions and 5 deletions.
11 changes: 6 additions & 5 deletions examples/structureLearning/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ TSRW=l2p:mu=0.0001:eta=1.0
EPOCHS=100
PROVER=dpr
APR=eps=0.0001:alph=0.001
UNNORMALIZED=--unnormalized --weightingScheme tanh
WEIGHTINGSCHEME=--weightingScheme tanh
UNNORMALIZED=--unnormalized $(WEIGHTINGSCHEME)

ifeq ($(strip $(ITERS)),)
ITERS=10
Expand Down Expand Up @@ -133,7 +134,7 @@ pre.${NAME}-test.%.solutions.txt: ${NAME}-test.examples ${NAME}_%.wam
# uml_1_h22_01.wam
# params.h22_01.wts
post.${NAME}-test.%.solutions.txt: ${NAME}-test.examples ${NAME}_%.wam params.%.wts
java ${JOPTS} -cp ${CP} edu.cmu.ml.proppr.QueryAnswerer --programFiles ${NAME}-test.cfacts:$(word 2,$^) --queries $< --solutions $@ --prover ${PROVER} --threads ${THREADS} --apr ${APR} ${UNNORMALIZED} --params $(word 3,$^)
java ${JOPTS} -cp ${CP} edu.cmu.ml.proppr.QueryAnswerer --programFiles ${NAME}.cfacts:$(word 2,$^) --queries $< --solutions $@ --prover ${PROVER} --threads ${THREADS} --apr ${APR} ${UNNORMALIZED} --params $(word 3,$^)

# make e.g. pre.uml_1-train.h22_01.solutions.txt
# from e.g. uml_1-train.examples
Expand All @@ -146,19 +147,19 @@ pre.${NAME}-train.%.solutions.txt: ${NAME}-train.examples ${NAME}_%.wam
# uml_1_h22_01.wam
# params.h22_01.wts
post.${NAME}-train.%.solutions.txt: ${NAME}-train.examples ${NAME}_%.wam params.%.wts
java ${JOPTS} -cp ${CP} edu.cmu.ml.proppr.QueryAnswerer --programFiles ${NAME}-train.cfacts:$(word 2,$^) --queries $< --solutions $@ --prover ${PROVER} --threads ${THREADS} --apr ${APR} ${UNNORMALIZED} --params $(word 3,$^)
java ${JOPTS} -cp ${CP} edu.cmu.ml.proppr.QueryAnswerer --programFiles ${NAME}.cfacts:$(word 2,$^) --queries $< --solutions $@ --prover ${PROVER} --threads ${THREADS} --apr ${APR} ${UNNORMALIZED} --params $(word 3,$^)


# make e.g. params.alone_01.wts
# from e.g. uml_1-train.alone_01.examples.grounded
params.%.wts: ${TRAIN}.%.examples.grounded
java ${JOPTS} -cp ${CP} edu.cmu.ml.proppr.Trainer --train $< --params $@ --threads ${THREADS} --srw ${TSRW} --epochs ${EPOCHS} --apr ${APR}
java ${JOPTS} -cp ${CP} edu.cmu.ml.proppr.Trainer --train $< --params $@ --threads ${THREADS} --srw ${TSRW} --epochs ${EPOCHS} --apr ${APR} ${WEIGHTINGSCHEME}

# make e.g. uml_1-train.h22_01.examples.grounded
# from e.g. uml_1-train.examples
# uml_1_h22_01.wam
${NAME}-train.%.examples.grounded: ${NAME}-train.examples ${NAME}_%.wam
java ${JOPTS} -cp ${CP} edu.cmu.ml.proppr.Grounder --programFiles ${NAME}-train.cfacts:$(word 2,$^) --queries $< --grounded $@ --prover ${PROVER} --threads ${THREADS} --apr ${APR}
java ${JOPTS} -cp ${CP} edu.cmu.ml.proppr.Grounder --programFiles ${NAME}.cfacts:$(word 2,$^) --queries $< --grounded $@ --prover ${PROVER} --threads ${THREADS} --apr ${APR} ${WEIGHTINGSCHEME}

## Additional targets for building an executable program from gradient-generated rules:

Expand Down
45 changes: 45 additions & 0 deletions examples/structureLearning/scripts/avgprec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import sys

#scan thru lines like this:
#+--------------------
#|0.00280174 0 interp(i_uncle,arthur,margaret)
#|0.00276313 1 interp(i_uncle,charles,charlotte)
#|0.00273479 1 interp(i_uncle,arthur,charlotte)
#|0.00273329 1 interp(i_uncle,arthur,colin)
#|0.0025534 0 interp(i_uncle,christopher,victoria)
#|0.00255312 0 interp(i_uncle,andrew,jennifer)
#...
#
#and find average precision

if __name__ == "__main__":

lastScore = 2.0
n = 0.0
sumPrec = 0.0
rank = 0.0
numPosAtThisRank = 0
print '#%s\t%s\t%s\t%s' % ('rank','prec@r','score','example')
for line in sys.stdin:
rank += 1.0
(scoreStr,label,goal) = line.strip().split("\t")
# a check
score = float(scoreStr)
if score > lastScore:
print 'lines out of order: this line has score > previous score:',line
sys.exit(-1)
if int(label):
if score>0:
numPosAtThisRank += 1.0
prec = numPosAtThisRank/rank
sumPrec += prec
n += 1.0
print '%f\t%f\t%f\t%s' % (rank,prec,score,goal)
else:
n += 1.0
prec = 0.0
print '%f\t%f\t%f\t%s' % (rank,prec,score,goal)
if n>0:
print '#\n#avgPrecision: ',sumPrec/n


115 changes: 115 additions & 0 deletions examples/structureLearning/scripts/prepareJointData.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
import sys
import re
import random
import collections

DUPLICATE_TRAIN_AND_BACKGROUND = True

#Generate a dataset of examples. Possible instances of a relation
#P(X,Y) are all pairs X,Y that appear as arguments of some relation.
#All possible first arguments x are used to produce a query p(x,Y),
#and all possible instances are listed as pos/neg answers to the query
#P(x,Y).

#command-line argument is the set of relations to use as background data,
#colon-separated, the the set of relations to use as training

rels = set('father mother husband wife son daughter brother sister uncle aunt nephew niece'.split(' '))
trainFam = set('andrew arthur charles charlotte christine christopher colin james jennifer margaret penelope victoria'.split(' '))
testFam = set('alfonso angela emilio francesca gina lucia marco maria pierro roberto sophia tomaso'.split(' '))

def asExamples(facts,instances):
print "asExamples facts:"
print "\n ".join(sorted([str(f) for f in facts]))
print "asExamples instances:"
print "\n ".join(sorted([str(i) for i in instances]))
rnd = random.Random()
rnd.seed(None)
trueYs = collections.defaultdict(set)
people = set()
for (r,x,y) in facts:
trueYs[(r,x)].add(y)
people.add(x)
result = []
print "asExamples rels:"
for r in rels:
for x in people:
query = 'interp(i_%s,%s,Y)' % (r,x)
pos = []
neg = []
print r,x,trueYs[(r,x)]
for y in trueYs[(r,x)]:
pos += [('interp(i_%s,%s,%s)' % (r,x,y))]
for y in people:
if ((x,y) in instances) and (y not in trueYs[(r,x)]):
neg += [('interp(i_%s,%s,%s)' % (r,x,y))]
result += [(query,pos,neg)]
rnd.shuffle(result)
return result

def loadKinship(fileName):
facts = set()
instances = set()
for line in open(fileName):
(dummy,rel,x,y) = line.strip().split('\t')
facts.add((rel,x,y))
instances.add((x,y))
return facts,instances

def splitFacts(facts,fam,pTrain,pSkip):
examples = set()
background = set()
rnd = random.Random()
rnd.seed(None)
for (r,x,y) in facts:
if x in fam:
selectedForTrain = (rnd.random()<pTrain)
selectedForBackground = (rnd.random()>pSkip)
if selectedForTrain:
examples.add((r,x,y))
if selectedForBackground and (not selectedForTrain or DUPLICATE_TRAIN_AND_BACKGROUND):
background.add((r,x,y))
return examples,background

if __name__ == "__main__":
if len(sys.argv)!=5:
print('usage: trainStem testStem pTrain pSkip')
sys.exit(-1)

trainStem = sys.argv[1]
testStem = sys.argv[2]
pTrain = float(sys.argv[3])
pSkip = float(sys.argv[4])

facts,instances = loadKinship('kinship.cfacts')
trainExampleFacts,trainBackgroundFacts = splitFacts(facts,trainFam,pTrain,pSkip)
testExampleFacts,testBackgroundFacts = splitFacts(facts,testFam,pTrain,pSkip)

print trainStem+".cfacts holds background facts for trainFam"
fp = open(trainStem+'.cfacts','w')
for (p,x,y) in trainBackgroundFacts:
#print 'bg',p,x,y
fp.write(('rel\t%s\t%s\t%s\n' % (p,x,y)))
fp.close()
print testStem+".cfacts holds background facts for testFam"
fp = open(testStem+'.cfacts','w')
for (p,x,y) in testBackgroundFacts:
fp.write('rel\t%s\t%s\t%s\n' % (p,x,y))
fp.close()

print trainStem+".trainData holds training facts for trainFam"
fp = open(trainStem+'.trainData','w')
for (q,pos,neg) in asExamples(trainExampleFacts,instances):
fp.write(q)
for px in pos: fp.write('\t+' + px)
for nx in neg: fp.write('\t-' + nx)
fp.write('\n')
fp.close()
print testStem+".testData holds training facts for testFam"
fp = open(testStem+'.testData','w')
for (q,pos,neg) in asExamples(testExampleFacts,instances):
fp.write(q)
for px in pos: fp.write('\t+' + px)
for nx in neg: fp.write('\t-' + nx)
fp.write('\n')
fp.close()
51 changes: 51 additions & 0 deletions examples/structureLearning/tmp-test.cfacts
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
rel son alfonso marco
rel aunt angela alfonso
rel daughter sophia lucia
rel mother francesca angela
rel father marco alfonso
rel husband marco lucia
rel uncle tomaso sophia
rel aunt gina sophia
rel brother marco angela
rel sister sophia alfonso
rel niece sophia angela
rel wife francesca pierro
rel wife gina emilio
rel son alfonso lucia
rel sister lucia emilio
rel son marco francesca
rel niece sophia gina
rel daughter sophia marco
rel niece sophia emilio
rel mother maria emilio
rel husband roberto maria
rel uncle emilio alfonso
rel mother maria lucia
rel daughter angela pierro
rel father roberto emilio
rel aunt gina alfonso
rel wife lucia marco
rel father roberto lucia
rel wife angela tomaso
rel daughter lucia maria
rel sister angela marco
rel husband tomaso angela
rel niece sophia tomaso
rel mother francesca marco
rel mother lucia sophia
rel daughter lucia roberto
rel father marco sophia
rel brother alfonso sophia
rel husband emilio gina
rel son emilio roberto
rel father pierro angela
rel wife maria roberto
rel son marco pierro
rel father pierro marco
rel nephew alfonso tomaso
rel nephew alfonso gina
rel uncle emilio sophia
rel brother emilio lucia
rel nephew alfonso angela
rel husband pierro francesca
rel uncle tomaso alfonso
Loading

0 comments on commit 9407031

Please sign in to comment.