Skip to content

Commit

Permalink
use candidates from cosine similarity only (data)
Browse files Browse the repository at this point in the history
Lenz Furrer committed Jun 11, 2018

Verified

This commit was signed with the committer’s verified signature.
aj-stein-nist A.J. Stein
1 parent 4366d5e commit 97164f5
Showing 3 changed files with 32 additions and 30 deletions.
5 changes: 2 additions & 3 deletions config
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[DEFAULT]
timestamp = 20180611-110130
rootpath = /mnt/storage/karr/users/furrer/prlnk
timestamp = 20180611-112025
workers = 10

[general]
@@ -15,8 +15,7 @@ summary_fn = runs/summaries/${timestamp}.txt
prediction_fn = runs/predictions/${timestamp}.tsv

[candidates]
generator = SGramCosine(.5, 10, [(2, 1), (3, 1)])
PhraseVecFixedSet(10, "mean")
generator = SGramCosine(.5, 20, [(2, 1), (3, 1)])
oracle = none
workers = 0

49 changes: 26 additions & 23 deletions log
Original file line number Diff line number Diff line change
@@ -1,23 +1,26 @@
2018-06-11 11:01:33,835 - 'pattern' package not found; tag filters are not available for English
2018-06-11 11:01:33,845 - loading pretrained embeddings...
2018-06-11 11:01:33,845 - loading projection weights from /mnt/storage/karr/users/furrer/prlnk/data/embeddings/wvec_50_haodi-li-et-al.bin
2018-06-11 11:01:39,387 - loaded (309058, 50) matrix from /mnt/storage/karr/users/furrer/prlnk/data/embeddings/wvec_50_haodi-li-et-al.bin
2018-06-11 11:01:39,729 - loading terminology...
2018-06-11 11:01:39,958 - loading vectorizer...
2018-06-11 11:01:39,958 - loading candidate generator...
2018-06-11 11:01:56,543 - preprocessing validation data...
2018-06-11 11:01:56,544 - loading corpus...
2018-06-11 11:01:56,554 - generating candidates with 0 workers...
2018-06-11 11:02:11,068 - generated 6180 pair-wise samples (13250 with duplicates)
2018-06-11 11:02:11,068 - compiling model architecture...
2018-06-11 11:02:11,570 - preprocessing training data...
2018-06-11 11:02:11,570 - loading corpus...
2018-06-11 11:02:11,633 - generating candidates with 0 workers...
2018-06-11 11:03:19,915 - generated 28647 pair-wise samples (84693 with duplicates)
2018-06-11 11:03:19,917 - training CNN...
2018-06-11 11:03:43,450 - Ranking accuracy: 0.707751
2018-06-11 11:04:06,082 - Ranking accuracy: 0.74587
2018-06-11 11:04:28,678 - Ranking accuracy: 0.747141
2018-06-11 11:04:51,120 - Ranking accuracy: 0.743329
2018-06-11 11:04:51,121 - Epoch 00004: early stopping
2018-06-11 11:04:51,121 - done training.
2018-06-11 11:20:28,792 - 'pattern' package not found; tag filters are not available for English
2018-06-11 11:20:28,799 - loading pretrained embeddings...
2018-06-11 11:20:28,799 - loading projection weights from /mnt/storage/karr/users/furrer/prlnk/data/embeddings/wvec_50_haodi-li-et-al.bin
2018-06-11 11:20:34,286 - loaded (309058, 50) matrix from /mnt/storage/karr/users/furrer/prlnk/data/embeddings/wvec_50_haodi-li-et-al.bin
2018-06-11 11:20:34,628 - loading terminology...
2018-06-11 11:20:34,845 - loading vectorizer...
2018-06-11 11:20:34,845 - loading candidate generator...
2018-06-11 11:20:46,545 - preprocessing validation data...
2018-06-11 11:20:46,545 - loading corpus...
2018-06-11 11:20:46,557 - generating candidates with 0 workers...
2018-06-11 11:21:00,534 - generated 5651 pair-wise samples (11585 with duplicates)
2018-06-11 11:21:00,534 - compiling model architecture...
2018-06-11 11:21:01,026 - preprocessing training data...
2018-06-11 11:21:01,027 - loading corpus...
2018-06-11 11:21:01,091 - generating candidates with 0 workers...
2018-06-11 11:22:07,034 - generated 26228 pair-wise samples (71125 with duplicates)
2018-06-11 11:22:07,036 - training CNN...
2018-06-11 11:22:28,634 - Ranking accuracy: 0.715375
2018-06-11 11:22:49,605 - Ranking accuracy: 0.743329
2018-06-11 11:23:10,267 - Ranking accuracy: 0.752224
2018-06-11 11:23:30,968 - Ranking accuracy: 0.753494
2018-06-11 11:23:51,620 - Ranking accuracy: 0.767471
2018-06-11 11:24:12,168 - Ranking accuracy: 0.766201
2018-06-11 11:24:32,784 - Ranking accuracy: 0.770013
2018-06-11 11:24:32,784 - Epoch 00007: early stopping
2018-06-11 11:24:32,784 - done training.
8 changes: 4 additions & 4 deletions results
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
accuracy 0.7433290978398983
correct 585
accuracy 0.770012706480305
correct 606
total 787
unreachable 108
nocandidates 0
unreachable 110
nocandidates 10
ambiguous 2
compound 10

0 comments on commit 97164f5

Please sign in to comment.