Skip to content

Commit

Permalink
Merge pull request #9 from Maluuba/oo-api
Browse files Browse the repository at this point in the history
add OO API
  • Loading branch information
kracwarlock authored Feb 27, 2018
2 parents 8400618 + d2d4970 commit 86584e1
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 4 deletions.
13 changes: 11 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,20 +47,29 @@ where each line in the hypothesis file is a generated sentence and the correspon
lines across the reference files are ground truth reference sentences for the
corresponding hypothesis.

### Within a script: for the entire corpus ###
### functional API: for the entire corpus ###

from nlgeval import compute_metrics
metrics_dict = compute_metrics(hypothesis='examples/hyp.txt',
references=['examples/ref1.txt', 'examples/ref2.txt'])

### Within a script: for only one sentence ###
### functional API: for only one sentence ###

from nlgeval import compute_individual_metrics
metrics_dict = compute_individual_metrics(references, hypothesis)

where `references` is a list of ground truth reference text strings and
`hypothesis` is the hypothesis text string.

### object oriented API for repeated calls in a script ###

from nlgeval import NLGEval
nlgeval = NLGEval() # loads the models
metrics_dict = nlgeval.evaluate(references, hypothesis)

where `references` is a list of ground truth reference text strings and
`hypothesis` is the hypothesis text string.

## Reference ##
If you use this code as part of any published research, please cite the following paper:

Expand Down
81 changes: 81 additions & 0 deletions nlgeval/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,3 +126,84 @@ def compute_individual_metrics(ref, hyp, no_overlap=False, no_skipthoughts=False
ret_scores[name] = value

return ret_scores


class NLGEval:
def __init__(self, no_overlap=False, no_skipthoughts=False, no_glove=False):
self.no_overlap = no_overlap
if not no_overlap:
self.load_scorers()

self.no_skipthoughts = no_skipthoughts
if not self.no_skipthoughts:
self.load_skipthought_model()

self.no_glove = no_glove
if not self.no_glove:
self.load_glove()

def load_scorers(self):
self.scorers = [
(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
(Meteor(), "METEOR"),
(Rouge(), "ROUGE_L"),
(Cider(), "CIDEr")
]

def load_skipthought_model(self):
from nlgeval.skipthoughts import skipthoughts
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
self.np = np
self.cosine_similarity = cosine_similarity

model = skipthoughts.load_model()
self.skipthought_encoder = skipthoughts.Encoder(model)

def load_glove(self):
from nlgeval.word2vec.evaluate import Embedding
from nlgeval.word2vec.evaluate import eval_emb_metrics
import numpy as np
self.eval_emb_metrics = eval_emb_metrics
self.np = np
self.glove_emb = Embedding()

def evaluate(self, ref, hyp):
assert isinstance(hyp, str)
ref = [a.strip() for a in ref]
refs = {0: ref}
ref_list = [ref]

hyps = {0: [hyp.strip()]}
hyp_list = [hyp]

ret_scores = {}
if not self.no_overlap:
for scorer, method in self.scorers:
score, scores = scorer.compute_score(refs, hyps)
if isinstance(method, list):
for sc, scs, m in zip(score, scores, method):
ret_scores[m] = sc
else:
ret_scores[method] = score

if not self.no_skipthoughts:
vector_hyps = self.skipthought_encoder.encode([h.strip() for h in hyp_list], verbose=False)
ref_list_T = self.np.array(ref_list).T.tolist()
vector_refs = map(lambda refl: self.skipthought_encoder.encode([r.strip() for r in refl], verbose=False), ref_list_T)
cosine_similarity = map(lambda refv: self.cosine_similarity(refv, vector_hyps).diagonal(), vector_refs)
cosine_similarity = self.np.max(cosine_similarity, axis=0).mean()
ret_scores['SkipThoughtCS'] = cosine_similarity

if not self.no_glove:
glove_hyps = [h.strip() for h in hyp_list]
ref_list_T = self.np.array(ref_list).T.tolist()
glove_refs = map(lambda refl: [r.strip() for r in refl], ref_list_T)
scores = self.eval_emb_metrics(glove_hyps, glove_refs, emb=self.glove_emb)
scores = scores.split('\n')
for score in scores:
name, value = score.split(':')
value = float(value.strip())
ret_scores[name] = value

return ret_scores
5 changes: 3 additions & 2 deletions nlgeval/word2vec/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,12 @@ def vec(self, key):
return self.unk


def eval_emb_metrics(hypothesis, references):
def eval_emb_metrics(hypothesis, references, emb=None):
from sklearn.metrics.pairwise import cosine_similarity
from nltk.tokenize import word_tokenize
import numpy as np
emb = Embedding()
if emb is None:
emb = Embedding()

emb_hyps = []
avg_emb_hyps = []
Expand Down
Empty file added test/__init__.py
Empty file.
13 changes: 13 additions & 0 deletions test/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from nlgeval import NLGEval

def test_oo_api():
with open("examples/hyp.txt") as f:
hyp = f.readlines()
with open("examples/ref1.txt") as f:
ref1 = f.readlines()
with open("examples/ref2.txt") as f:
ref2 = f.readlines()

nlge = NLGEval()
res = nlge.evaluate([ref1[0]] + [ref2[0]], hyp[0])
res = nlge.evaluate([ref1[1]] + [ref2[1]], hyp[1])

0 comments on commit 86584e1

Please sign in to comment.