simple readme

huggingface · Jul 7, 2017 · 8a6e332 · 8a6e332
1 parent 11cadbf
commit 8a6e332
Show file tree

Hide file tree

Showing 4 changed files with 44 additions and 20 deletions.
diff --git a/neuralcoref/__init__.py b/neuralcoref/__init__.py
@@ -1,18 +1 @@
-import sys
 
-__project__ = "neuralcoref"
-__version__ = "0.1"
-__repo__ = "https://github.com/huggingface/neuralcoref"
-
-from neuralcoref.algorithm import Algorithm
-
-def print_version():
-    sv = sys.version_info
-    py_version = "{}.{}.{}".format(sv.major, sv.minor, sv.micro)
-    version_parts = __version__.split(".")
-    s = "{} version: [{}], Python {}".format(__project__, __version__, py_version)
-    s += "\nMajor version: {}  (breaking changes)".format(version_parts[0])
-    s += "\nMinor version: {}  (extra feature)".format(version_parts[1])
-    s += "\nMicro version: {} (commit count)".format(version_parts[2])
-    s += "\nFind out the most recent version at {}".format(__repo__)
-    return s
diff --git a/neuralcoref/algorithm.py b/neuralcoref/algorithm.py
@@ -61,7 +61,7 @@ def get_pair_mentions_score(self, antecedent, mention, pair_features):
         return self._score(first_layer_input, self.pair_mentions_model)
 
 
-class Algorithm:
+class Coref:
     '''
     Main coreference resolution algorithm
     '''

diff --git a/neuralcoref/server.py b/neuralcoref/server.py
@@ -10,13 +10,13 @@
 from wsgiref.simple_server import make_server
 import falcon
 
-from algorithm import Algorithm
+from algorithm import Coref
 from data import MENTION_LABEL
 
 is_python2 = int(sys.version[0]) == 2
 unicode_ = unicode if is_python2 else str
 
-class CorefWrapper(Algorithm):
+class CorefWrapper(Coref):
     def parse_and_get_mentions(self, utterances, utterances_speakers_id=None, context=None,
                                context_speakers_id=None, speakers_names=None):
         self.data.set_utterances(context, context_speakers_id, speakers_names)

diff --git a/readme.md b/readme.md
@@ -1 +1,42 @@
 # Neural coref
+
+A state-of-the-art coreference resolution system based on neural nets.
+
+This coreference resolution system is based on the super fast (spaCy parser)[https://spacy.io/] and uses the high quality scoring neural network described in [Deep Reinforcement Learning for Mention-Ranking Coreference Models](http://cs.stanford.edu/people/kevclark/resources/clark-manning-emnlp2016-deep.pdf) by Kevin Clark and Christopher D. Manning, EMNLP 2016.
+
+To know more about coreference adn neuralcoref, check out (our medium post)[https://medium.com/huggingface/state-of-the-art-neural-coreference-resolution-for-chatbots-3302365dcf30].
+
+## Installation
+`pip install neuralcoref`
+
+
+You also need to download an english model for Spacy if you don't already have one.
+`python -m spacy download 'en'`
+
+
+The mention extraction algorithm depends quite strongly on the quality of the parsing so we would recommand to use a model with a good accuray.
+On the other hand, the coreference algorithm don't use spacy's model word vectors (although it could) so a good balance between parsing accuracy and model size is for example [spacy's 'en_depent_web_md' model](https://github.com/explosion/spacy-models/releases/en_depent_web_md-1.2.1) which has a parsing accuracy of 90.6% on Ontonotes 5.0.
+
+You can get and use it like this
+````
+python -m spacy download 'en_depent_web_md'
+python -m spacy link en_depent_web_md en_default --force
+````
+
+## Usage
+### Standalone server
+`python server.py` starts a wsgiref simple server on port 8000, endpoint `/coref/`.
+You can retreive coreferences on a text or dialogue utterances by calling the server.
+Example:
+`curl http://localhost:8001/coref?text=She%20loves%20him%20so%20much&context=My%20sister%20has%20a%20dog.`
+### Library
+````
+from neuralcoref import Coref
+coref = Coref()
+# retrieve all the coreference resolved clusters
+clusters = coref.one_shot_coref(u"My sister has a dog and she loves him.")
+print(clusters)
+# Show a dictionnary of resolved coreferences with the most representative mention of each cluster
+coreferences = coref.get_most_representative()
+pritn(coreferences)
+````