forked from EducationalTestingService/python-zpar
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
pip-friendly and better organization
- Added setup.py to make zpar pip installable - Reorganized code to hide the ctypes stuff from the user and make zpar easier to use.
- Loading branch information
1 parent
bd2661e
commit 9c6508a
Showing
10 changed files
with
362 additions
and
241 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
#!/usr/bin/env python | ||
|
||
# Adapted from https://github.com/Turbo87/py-xcsoar/blob/master/setup.py | ||
|
||
import os | ||
from setuptools import setup | ||
from setuptools.command.install import install | ||
from distutils.command.build import build | ||
from subprocess import call | ||
|
||
import sys | ||
|
||
BASEPATH = os.path.dirname(os.path.abspath(__file__)) | ||
ZPAR_PATH = os.path.join(BASEPATH, 'zpar') | ||
ZPAR_LIB_PATH = os.path.join(ZPAR_PATH, 'dist') | ||
|
||
class build_zpar(build): | ||
def run(self): | ||
|
||
# run original build code | ||
build.run(self) | ||
|
||
sys.stderr.write('running build_zpar\n') | ||
|
||
# for now the compilation is just calling make | ||
cmd = ['make'] | ||
|
||
# compile the shared library path | ||
def compile(): | ||
sys.stderr.write('*' * 80 + '\n') | ||
call(cmd) | ||
sys.stderr.write('*' * 80 + '\n') | ||
self.execute(compile, [], 'compiling zpar library') | ||
|
||
# copy resulting tool to library build folder | ||
self.mkpath(self.build_lib) | ||
|
||
if not self.dry_run: | ||
self.copy_tree(ZPAR_PATH, self.build_lib) | ||
|
||
class install_zpar(install): | ||
|
||
def initialize_options(self): | ||
install.initialize_options(self) | ||
self.build_scripts = None | ||
|
||
def finalize_options(self): | ||
install.finalize_options(self) | ||
self.set_undefined_options('build', ('build_scripts', 'build_scripts')) | ||
|
||
def run(self): | ||
# run original install code | ||
install.run(self) | ||
|
||
# install ZPar executables | ||
sys.stderr.write('running install_zpar\n') | ||
install_path = os.path.join(self.install_lib, 'zpar') | ||
self.mkpath(install_path) | ||
self.copy_tree(self.build_lib, install_path) | ||
|
||
|
||
def read(fname): | ||
return open(os.path.join(os.path.dirname(__file__), fname)).read() | ||
|
||
|
||
setup( | ||
name='zpar', | ||
version='0.1', | ||
description='A Wrapper around the ZPar statistical tagger/parser for English', | ||
maintainer='Nitin Madnani', | ||
maintainer_email='[email protected]', | ||
license='MIT', | ||
url='http://www.github.com/EducationalTestingService/python-zpar', | ||
long_description=read('README.md'), | ||
classifiers=['Intended Audience :: Science/Research', | ||
'Intended Audience :: Developers', | ||
'License :: OSI Approved :: MIT License', | ||
'Programming Language :: Python', | ||
'Topic :: Software Development', | ||
'Topic :: Scientific/Engineering', | ||
'Operating System :: POSIX', | ||
'Operating System :: Unix', | ||
'Operating System :: MacOS', | ||
'Programming Language :: Python :: 2', | ||
'Programming Language :: Python :: 2.7', | ||
'Programming Language :: Python :: 3', | ||
'Programming Language :: Python :: 3.3', | ||
], | ||
cmdclass={ | ||
'build': build_zpar, | ||
'install': install_zpar, | ||
} | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
# License: MIT | ||
''' | ||
:author: Nitin Madnani ([email protected]) | ||
:organization: ETS | ||
''' | ||
|
||
import ctypes as c | ||
|
||
class DepParser(object): | ||
"""The ZPar English Dependency Parser""" | ||
|
||
def __init__(self, modelpath, libptr): | ||
super(DepParser, self).__init__() | ||
|
||
# get the library method that loads the parser models | ||
self._load_depparser = libptr.load_depparser | ||
self._load_depparser.restype = c.c_int | ||
self._load_depparser.argtypes = [c.c_char_p] | ||
|
||
# get the library methods that parse sentences and files | ||
self._dep_parse_sentence = libptr.dep_parse_sentence | ||
self._dep_parse_sentence.restype = c.c_char_p | ||
self._dep_parse_sentence.argtypes = [c.c_char_p] | ||
|
||
self._parse_file = libptr.dep_parse_file | ||
self._parse_file.restype = None | ||
self._parse_file.argtypes = [c.c_char_p, c.c_char_p] | ||
|
||
if self._load_depparser(modelpath.encode('utf-8')): | ||
raise OSError('Cannot find dependency parser model at {}\n'.format(modelpath)) | ||
|
||
def parse_sentence(self, sentence): | ||
zpar_compatible_sentence = sentence.strip() + "\n " | ||
zpar_compatible_sentence = zpar_compatible_sentence.encode('utf-8') | ||
parsed_sent = self._dep_parse_sentence(zpar_compatible_sentence) | ||
return parsed_sent.decode('utf-8') | ||
|
||
def parse_file(self, inputfile, outputfile): | ||
self._parse_file(inputfile.encode('utf-8'), outputfile.encode('utf-8')) | ||
|
||
def cleanup(self): | ||
self._load_depparser = None | ||
self._dep_parse_sentence = None | ||
self._parse_file = None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
# License: MIT | ||
''' | ||
:author: Nitin Madnani ([email protected]) | ||
:organization: ETS | ||
''' | ||
|
||
import ctypes as c | ||
|
||
class Parser(object): | ||
"""The ZPar English Constituency Parser""" | ||
|
||
def __init__(self, modelpath, libptr): | ||
super(Parser, self).__init__() | ||
|
||
# get the library method that loads the parser models | ||
self._load_parser = libptr.load_parser | ||
self._load_parser.restype = c.c_int | ||
self._load_parser.argtypes = [c.c_char_p] | ||
|
||
# get the library methods that parse sentences and files | ||
self._parse_sentence = libptr.parse_sentence | ||
self._parse_sentence.restype = c.c_char_p | ||
self._parse_sentence.argtypes = [c.c_char_p] | ||
|
||
self._parse_file = libptr.parse_file | ||
self._parse_file.restype = None | ||
self._parse_file.argtypes = [c.c_char_p, c.c_char_p] | ||
|
||
if self._load_parser(modelpath.encode('utf-8')): | ||
raise OSError('Cannot find parser model at {}\n'.format(modelpath)) | ||
|
||
def parse_sentence(self, sentence): | ||
zpar_compatible_sentence = sentence.strip() + "\n " | ||
zpar_compatible_sentence = zpar_compatible_sentence.encode('utf-8') | ||
parsed_sent = self._parse_sentence(zpar_compatible_sentence) | ||
return parsed_sent.decode('utf-8') | ||
|
||
def parse_file(self, inputfile, outputfile): | ||
self._parse_file(inputfile.encode('utf-8'), outputfile.encode('utf-8')) | ||
|
||
def cleanup(self): | ||
self._load_parser = None | ||
self._parse_sentence = None | ||
self._parse_file = None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
# License: MIT | ||
''' | ||
:author: Nitin Madnani ([email protected]) | ||
:organization: ETS | ||
''' | ||
|
||
import ctypes as c | ||
|
||
class Tagger(object): | ||
"""The ZPar English POS Tagger""" | ||
|
||
def __init__(self, modelpath, libptr): | ||
super(Tagger, self).__init__() | ||
|
||
# get the library method that loads the tagger models | ||
self._load_tagger = libptr.load_tagger | ||
self._load_tagger.restype = c.c_int | ||
self._load_tagger.argtypes = [c.c_char_p] | ||
|
||
# get the library methods that tag sentences and files | ||
self._tag_sentence = libptr.tag_sentence | ||
self._tag_sentence.restype = c.c_char_p | ||
self._tag_sentence.argtypes = [c.c_char_p] | ||
|
||
self._tag_file = libptr.tag_file | ||
self._tag_file.restype = None | ||
self._tag_file.argtypes = [c.c_char_p, c.c_char_p] | ||
|
||
if self._load_tagger(modelpath.encode('utf-8')): | ||
raise OSError('Cannot find tagger model at {}\n'.format(modelpath)) | ||
|
||
def tag_sentence(self, sentence): | ||
zpar_compatible_sentence = sentence.strip() + "\n " | ||
zpar_compatible_sentence = zpar_compatible_sentence.encode('utf-8') | ||
tagged_sent = self._tag_sentence(zpar_compatible_sentence) | ||
return tagged_sent.decode('utf-8') | ||
|
||
def tag_file(self, inputfile, outputfile): | ||
self._tag_file(inputfile.encode('utf-8'), outputfile.encode('utf-8')) | ||
|
||
def cleanup(self): | ||
self._load_tagger = None | ||
self._tag_sentence = None | ||
self._tag_file = None | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
# License: MIT | ||
''' | ||
:author: Nitin Madnani ([email protected]) | ||
:organization: ETS | ||
''' | ||
|
||
import _ctypes | ||
import ctypes as c | ||
import os | ||
|
||
from .Tagger import Tagger | ||
from .Parser import Parser | ||
from .DepParser import DepParser | ||
|
||
__all__ = ['Tagger', 'Parser', 'DepParser'] | ||
|
||
class ZPar(object): | ||
"""The ZPar wrapper object""" | ||
|
||
def __init__(self, modelpath): | ||
super(ZPar, self).__init__() | ||
|
||
# get a pointer to the zpar shared library | ||
base_path = os.path.dirname(os.path.abspath(__file__)) | ||
zpar_path = os.path.join(base_path, 'dist', 'zpar.so') | ||
self.libptr = c.cdll.LoadLibrary(zpar_path) | ||
self.modelpath = modelpath | ||
self.tagger = None | ||
self.parser = None | ||
self.depparser = None | ||
|
||
def close(self): | ||
# unload the models on the C++ side | ||
self.libptr.unload_models() | ||
|
||
# clean up the data structures on the python side | ||
if self.tagger: | ||
self.tagger.cleanup() | ||
|
||
if self.parser: | ||
self.parser.cleanup() | ||
|
||
if self.depparser: | ||
self.depparser.cleanup() | ||
|
||
# set all the fields to none to enable clean reuse | ||
self.tagger = None | ||
self.parser = None | ||
self.depparser = None | ||
self.modelpath = None | ||
|
||
# clean up the CDLL object too so that upon reuse, we get a new one | ||
_ctypes.dlclose(self.libptr._handle) | ||
|
||
def __enter__(self): | ||
"""Enable ZPar to be used as a ContextManager""" | ||
return self | ||
|
||
def __exit__(self, type, value, traceback): | ||
"""Clean up when done""" | ||
self.close() | ||
|
||
def get_tagger(self): | ||
self.tagger = Tagger(self.modelpath, self.libptr) | ||
return self.tagger | ||
|
||
def get_parser(self): | ||
self.parser = Parser(self.modelpath, self.libptr) | ||
return self.parser | ||
|
||
def get_depparser(self): | ||
self.depparser = DepParser(self.modelpath, self.libptr) | ||
return self.depparser | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.