Skip to content

Commit

Permalink
pip-friendly and better organization
Browse files Browse the repository at this point in the history
- Added setup.py to make zpar pip installable
- Reorganized code to hide the ctypes stuff from the user and make zpar easier to use.
  • Loading branch information
desilinguist committed Oct 4, 2014
1 parent bd2661e commit 9c6508a
Show file tree
Hide file tree
Showing 10 changed files with 362 additions and 241 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ python-zpar: clean /tmp/zpar.zip
cp src/Makefile /tmp/zpar
cp src/reader.h /tmp/zpar/src/include/reader.h
make -C /tmp/zpar zpar.so
mkdir -p dist
cp /tmp/zpar/dist/zpar.so dist/
mkdir -p zpar/dist
cp /tmp/zpar/dist/zpar.so zpar/dist/

/tmp/zpar.zip:
wget -N http://sourceforge.net/projects/zpar/files/latest/zpar.zip -O /tmp/zpar.zip
Expand Down
93 changes: 93 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#!/usr/bin/env python

# Adapted from https://github.com/Turbo87/py-xcsoar/blob/master/setup.py

import os
from setuptools import setup
from setuptools.command.install import install
from distutils.command.build import build
from subprocess import call

import sys

BASEPATH = os.path.dirname(os.path.abspath(__file__))
ZPAR_PATH = os.path.join(BASEPATH, 'zpar')
ZPAR_LIB_PATH = os.path.join(ZPAR_PATH, 'dist')

class build_zpar(build):
def run(self):

# run original build code
build.run(self)

sys.stderr.write('running build_zpar\n')

# for now the compilation is just calling make
cmd = ['make']

# compile the shared library path
def compile():
sys.stderr.write('*' * 80 + '\n')
call(cmd)
sys.stderr.write('*' * 80 + '\n')
self.execute(compile, [], 'compiling zpar library')

# copy resulting tool to library build folder
self.mkpath(self.build_lib)

if not self.dry_run:
self.copy_tree(ZPAR_PATH, self.build_lib)

class install_zpar(install):

def initialize_options(self):
install.initialize_options(self)
self.build_scripts = None

def finalize_options(self):
install.finalize_options(self)
self.set_undefined_options('build', ('build_scripts', 'build_scripts'))

def run(self):
# run original install code
install.run(self)

# install ZPar executables
sys.stderr.write('running install_zpar\n')
install_path = os.path.join(self.install_lib, 'zpar')
self.mkpath(install_path)
self.copy_tree(self.build_lib, install_path)


def read(fname):
return open(os.path.join(os.path.dirname(__file__), fname)).read()


setup(
name='zpar',
version='0.1',
description='A Wrapper around the ZPar statistical tagger/parser for English',
maintainer='Nitin Madnani',
maintainer_email='[email protected]',
license='MIT',
url='http://www.github.com/EducationalTestingService/python-zpar',
long_description=read('README.md'),
classifiers=['Intended Audience :: Science/Research',
'Intended Audience :: Developers',
'License :: OSI Approved :: MIT License',
'Programming Language :: Python',
'Topic :: Software Development',
'Topic :: Scientific/Engineering',
'Operating System :: POSIX',
'Operating System :: Unix',
'Operating System :: MacOS',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.3',
],
cmdclass={
'build': build_zpar,
'install': install_zpar,
}
)
8 changes: 8 additions & 0 deletions src/zpar.lib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,14 @@ extern "C" int load_parser(const char *sFeaturePath) {
// The function to load the dependency parser model
extern "C" int load_depparser(const char *sFeaturePath) {

// If the tagger is not already loaded, then we need to load
// it since the parser requires the tagger
if (!zpm->tagger) {
if (load_tagger(sFeaturePath)) {
return 1;
}
}

CDepParser *depparser;
std::string sDepParserFeatureFile = std::string(sFeaturePath) + "/depparser";
std::cerr << "Loading dependency parser from " << sDepParserFeatureFile << std::endl;
Expand Down
44 changes: 44 additions & 0 deletions zpar/DepParser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# License: MIT
'''
:author: Nitin Madnani ([email protected])
:organization: ETS
'''

import ctypes as c

class DepParser(object):
"""The ZPar English Dependency Parser"""

def __init__(self, modelpath, libptr):
super(DepParser, self).__init__()

# get the library method that loads the parser models
self._load_depparser = libptr.load_depparser
self._load_depparser.restype = c.c_int
self._load_depparser.argtypes = [c.c_char_p]

# get the library methods that parse sentences and files
self._dep_parse_sentence = libptr.dep_parse_sentence
self._dep_parse_sentence.restype = c.c_char_p
self._dep_parse_sentence.argtypes = [c.c_char_p]

self._parse_file = libptr.dep_parse_file
self._parse_file.restype = None
self._parse_file.argtypes = [c.c_char_p, c.c_char_p]

if self._load_depparser(modelpath.encode('utf-8')):
raise OSError('Cannot find dependency parser model at {}\n'.format(modelpath))

def parse_sentence(self, sentence):
zpar_compatible_sentence = sentence.strip() + "\n "
zpar_compatible_sentence = zpar_compatible_sentence.encode('utf-8')
parsed_sent = self._dep_parse_sentence(zpar_compatible_sentence)
return parsed_sent.decode('utf-8')

def parse_file(self, inputfile, outputfile):
self._parse_file(inputfile.encode('utf-8'), outputfile.encode('utf-8'))

def cleanup(self):
self._load_depparser = None
self._dep_parse_sentence = None
self._parse_file = None
44 changes: 44 additions & 0 deletions zpar/Parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# License: MIT
'''
:author: Nitin Madnani ([email protected])
:organization: ETS
'''

import ctypes as c

class Parser(object):
"""The ZPar English Constituency Parser"""

def __init__(self, modelpath, libptr):
super(Parser, self).__init__()

# get the library method that loads the parser models
self._load_parser = libptr.load_parser
self._load_parser.restype = c.c_int
self._load_parser.argtypes = [c.c_char_p]

# get the library methods that parse sentences and files
self._parse_sentence = libptr.parse_sentence
self._parse_sentence.restype = c.c_char_p
self._parse_sentence.argtypes = [c.c_char_p]

self._parse_file = libptr.parse_file
self._parse_file.restype = None
self._parse_file.argtypes = [c.c_char_p, c.c_char_p]

if self._load_parser(modelpath.encode('utf-8')):
raise OSError('Cannot find parser model at {}\n'.format(modelpath))

def parse_sentence(self, sentence):
zpar_compatible_sentence = sentence.strip() + "\n "
zpar_compatible_sentence = zpar_compatible_sentence.encode('utf-8')
parsed_sent = self._parse_sentence(zpar_compatible_sentence)
return parsed_sent.decode('utf-8')

def parse_file(self, inputfile, outputfile):
self._parse_file(inputfile.encode('utf-8'), outputfile.encode('utf-8'))

def cleanup(self):
self._load_parser = None
self._parse_sentence = None
self._parse_file = None
45 changes: 45 additions & 0 deletions zpar/Tagger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# License: MIT
'''
:author: Nitin Madnani ([email protected])
:organization: ETS
'''

import ctypes as c

class Tagger(object):
"""The ZPar English POS Tagger"""

def __init__(self, modelpath, libptr):
super(Tagger, self).__init__()

# get the library method that loads the tagger models
self._load_tagger = libptr.load_tagger
self._load_tagger.restype = c.c_int
self._load_tagger.argtypes = [c.c_char_p]

# get the library methods that tag sentences and files
self._tag_sentence = libptr.tag_sentence
self._tag_sentence.restype = c.c_char_p
self._tag_sentence.argtypes = [c.c_char_p]

self._tag_file = libptr.tag_file
self._tag_file.restype = None
self._tag_file.argtypes = [c.c_char_p, c.c_char_p]

if self._load_tagger(modelpath.encode('utf-8')):
raise OSError('Cannot find tagger model at {}\n'.format(modelpath))

def tag_sentence(self, sentence):
zpar_compatible_sentence = sentence.strip() + "\n "
zpar_compatible_sentence = zpar_compatible_sentence.encode('utf-8')
tagged_sent = self._tag_sentence(zpar_compatible_sentence)
return tagged_sent.decode('utf-8')

def tag_file(self, inputfile, outputfile):
self._tag_file(inputfile.encode('utf-8'), outputfile.encode('utf-8'))

def cleanup(self):
self._load_tagger = None
self._tag_sentence = None
self._tag_file = None

74 changes: 74 additions & 0 deletions zpar/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# License: MIT
'''
:author: Nitin Madnani ([email protected])
:organization: ETS
'''

import _ctypes
import ctypes as c
import os

from .Tagger import Tagger
from .Parser import Parser
from .DepParser import DepParser

__all__ = ['Tagger', 'Parser', 'DepParser']

class ZPar(object):
"""The ZPar wrapper object"""

def __init__(self, modelpath):
super(ZPar, self).__init__()

# get a pointer to the zpar shared library
base_path = os.path.dirname(os.path.abspath(__file__))
zpar_path = os.path.join(base_path, 'dist', 'zpar.so')
self.libptr = c.cdll.LoadLibrary(zpar_path)
self.modelpath = modelpath
self.tagger = None
self.parser = None
self.depparser = None

def close(self):
# unload the models on the C++ side
self.libptr.unload_models()

# clean up the data structures on the python side
if self.tagger:
self.tagger.cleanup()

if self.parser:
self.parser.cleanup()

if self.depparser:
self.depparser.cleanup()

# set all the fields to none to enable clean reuse
self.tagger = None
self.parser = None
self.depparser = None
self.modelpath = None

# clean up the CDLL object too so that upon reuse, we get a new one
_ctypes.dlclose(self.libptr._handle)

def __enter__(self):
"""Enable ZPar to be used as a ContextManager"""
return self

def __exit__(self, type, value, traceback):
"""Clean up when done"""
self.close()

def get_tagger(self):
self.tagger = Tagger(self.modelpath, self.libptr)
return self.tagger

def get_parser(self):
self.parser = Parser(self.modelpath, self.libptr)
return self.parser

def get_depparser(self):
self.depparser = DepParser(self.modelpath, self.libptr)
return self.depparser

4 changes: 2 additions & 2 deletions zpar_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,14 @@
logging.info("Output: {}".format(tagged_sent))

logging.info('Dep Parsing "{}"'.format(test_sentence))
dep_parsed_sent = proxy.dep_parse_sentence(test_sentence)
dep_parsed_sent = proxy.parse_sentence(test_sentence)
logging.info("Output: {}".format(dep_parsed_sent))

logging.info('Tagging file {} into {}'.format(test_file, tag_outfile))
proxy.tag_file(test_file, tag_outfile)

logging.info('Dep Parsing file {} into {}'.format(test_file, dep_outfile))
proxy.dep_parse_file(test_file, dep_outfile)
proxy.parse_file(test_file, dep_outfile)

except socket.error as err:
sys.stderr.write("{}\n".format(err))
Expand Down
Loading

0 comments on commit 9c6508a

Please sign in to comment.