Merge pull request #9 from Maluuba/oo-api

add OO API
Maluuba · Feb 27, 2018 · 86584e1 · 86584e1
2 parents 8400618 + d2d4970
commit 86584e1
Show file tree

Hide file tree

Showing 5 changed files with 108 additions and 4 deletions.
diff --git a/README.md b/README.md
@@ -47,20 +47,29 @@ where each line in the hypothesis file is a generated sentence and the correspon
 lines across the reference files are ground truth reference sentences for the
 corresponding hypothesis.
 
-### Within a script: for the entire corpus ###
+### functional API: for the entire corpus ###
 
     from nlgeval import compute_metrics
     metrics_dict = compute_metrics(hypothesis='examples/hyp.txt',
                                    references=['examples/ref1.txt', 'examples/ref2.txt'])
 
-### Within a script: for only one sentence ###
+### functional API: for only one sentence ###
 
     from nlgeval import compute_individual_metrics
     metrics_dict = compute_individual_metrics(references, hypothesis)
 
 where `references` is a list of ground truth reference text strings and
 `hypothesis` is the hypothesis text string.
 
+### object oriented API for repeated calls in a script ###
+
+    from nlgeval import NLGEval
+    nlgeval = NLGEval()  # loads the models
+    metrics_dict = nlgeval.evaluate(references, hypothesis)
+
+where `references` is a list of ground truth reference text strings and
+`hypothesis` is the hypothesis text string.
+
 ## Reference ##
 If you use this code as part of any published research, please cite the following paper:
 

diff --git a/nlgeval/__init__.py b/nlgeval/__init__.py
@@ -126,3 +126,84 @@ def compute_individual_metrics(ref, hyp, no_overlap=False, no_skipthoughts=False
             ret_scores[name] = value
 
     return ret_scores
+
+
+class NLGEval:
+    def __init__(self, no_overlap=False, no_skipthoughts=False, no_glove=False):
+        self.no_overlap = no_overlap
+        if not no_overlap:
+            self.load_scorers()
+
+        self.no_skipthoughts = no_skipthoughts
+        if not self.no_skipthoughts:
+            self.load_skipthought_model()
+
+        self.no_glove = no_glove
+        if not self.no_glove:
+            self.load_glove()
+
+    def load_scorers(self):
+        self.scorers = [
+            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
+            (Meteor(), "METEOR"),
+            (Rouge(), "ROUGE_L"),
+            (Cider(), "CIDEr")
+        ]
+
+    def load_skipthought_model(self):
+        from nlgeval.skipthoughts import skipthoughts
+        import numpy as np
+        from sklearn.metrics.pairwise import cosine_similarity
+        self.np = np
+        self.cosine_similarity = cosine_similarity
+
+        model = skipthoughts.load_model()
+        self.skipthought_encoder = skipthoughts.Encoder(model)
+
+    def load_glove(self):
+        from nlgeval.word2vec.evaluate import Embedding
+        from nlgeval.word2vec.evaluate import eval_emb_metrics
+        import numpy as np
+        self.eval_emb_metrics = eval_emb_metrics
+        self.np = np
+        self.glove_emb = Embedding()
+
+    def evaluate(self, ref, hyp):
+        assert isinstance(hyp, str)
+        ref = [a.strip() for a in ref]
+        refs = {0: ref}
+        ref_list = [ref]
+
+        hyps = {0: [hyp.strip()]}
+        hyp_list = [hyp]
+
+        ret_scores = {}
+        if not self.no_overlap:
+            for scorer, method in self.scorers:
+                score, scores = scorer.compute_score(refs, hyps)
+                if isinstance(method, list):
+                    for sc, scs, m in zip(score, scores, method):
+                        ret_scores[m] = sc
+                else:
+                    ret_scores[method] = score
+
+        if not self.no_skipthoughts:
+            vector_hyps = self.skipthought_encoder.encode([h.strip() for h in hyp_list], verbose=False)
+            ref_list_T = self.np.array(ref_list).T.tolist()
+            vector_refs = map(lambda refl: self.skipthought_encoder.encode([r.strip() for r in refl], verbose=False), ref_list_T)
+            cosine_similarity = map(lambda refv: self.cosine_similarity(refv, vector_hyps).diagonal(), vector_refs)
+            cosine_similarity = self.np.max(cosine_similarity, axis=0).mean()
+            ret_scores['SkipThoughtCS'] = cosine_similarity
+
+        if not self.no_glove:
+            glove_hyps = [h.strip() for h in hyp_list]
+            ref_list_T = self.np.array(ref_list).T.tolist()
+            glove_refs = map(lambda refl: [r.strip() for r in refl], ref_list_T)
+            scores = self.eval_emb_metrics(glove_hyps, glove_refs, emb=self.glove_emb)
+            scores = scores.split('\n')
+            for score in scores:
+                name, value = score.split(':')
+                value = float(value.strip())
+                ret_scores[name] = value
+
+        return ret_scores
diff --git a/nlgeval/word2vec/evaluate.py b/nlgeval/word2vec/evaluate.py
@@ -28,11 +28,12 @@ def vec(self, key):
             return self.unk
 
 
-def eval_emb_metrics(hypothesis, references):
+def eval_emb_metrics(hypothesis, references, emb=None):
     from sklearn.metrics.pairwise import cosine_similarity
     from nltk.tokenize import word_tokenize
     import numpy as np
-    emb = Embedding()
+    if emb is None:
+        emb = Embedding()
 
     emb_hyps = []
     avg_emb_hyps = []

diff --git a/test/__init__.py b/test/__init__.py
diff --git a/test/api.py b/test/api.py
@@ -0,0 +1,13 @@
+from nlgeval import NLGEval
+
+def test_oo_api():
+    with open("examples/hyp.txt") as f:
+        hyp = f.readlines()
+    with open("examples/ref1.txt") as f:
+        ref1 = f.readlines()
+    with open("examples/ref2.txt") as f:
+        ref2 = f.readlines()
+
+    nlge = NLGEval()
+    res = nlge.evaluate([ref1[0]] + [ref2[0]], hyp[0])
+    res = nlge.evaluate([ref1[1]] + [ref2[1]], hyp[1])