Skip to content

Metapy integration #123

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added Documentation.pdf
Binary file not shown.
Binary file added Project Progress Report.pdf
Binary file not shown.
Binary file added Project Proposal.pdf
Binary file not shown.
16 changes: 14 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# CourseProject
# Enhancing and Extending Metapy versioning and functionality

Please fork this repository and paste the github link of your fork on Microsoft CMT. Detailed instructions are on Coursera under Week 1: Course Project Overview/Week 9 Activities.
Where are respective files placed?

Project Progress Report: This can be found in the root directory of the project within a file labeled: Project Progress Report.pdf

Source Code:
The source code for the core functionality can be found at: /metapy/src/nltk_additions.py
The respective test code for the core functionality can be found at: /metapy/src/nltk_test.py

Documentation: Detailed documentation can be found in the root directory of the project within a file labeled: Documentation.pdf

Software Tutorial Link:

Software Tutorial Presentation Link:
70 changes: 70 additions & 0 deletions metapy/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
cmake_minimum_required(VERSION 3.2.0)

project(metapy)

set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)

set(CMAKE_EXPORT_COMPILE_COMMANDS 1)

set(METAPY_PYTHON_VERSION "" CACHE STRING
"Python version to use for compiling the extension")

add_subdirectory(deps/meta EXCLUDE_FROM_ALL)

if (NOT PYTHON_INCLUDE_DIRS)
if (NOT ${METAPY_PYTHON_VERSION} STREQUAL "")
list(APPEND Python_ADDITIONAL_VERSIONS ${METAPY_PYTHON_VERSION})
find_package(PythonLibs ${METAPY_PYTHON_VERSION} EXACT)
if (NOT PythonLibs_FOUND)
find_package(PythonLibs ${METAPY_PYTHON_VERSION} REQUIRED)
endif()
else()
find_package(PythonLibs REQUIRED)
endif()
else()
message("-- Using manual Python include dirs: ${PYTHON_INCLUDE_DIRS}")
endif()

include_directories(${PYTHON_INCLUDE_DIRS})
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/deps/pybind11/include)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)

add_library(metapy SHARED src/metapy_analyzers.cpp
src/metapy_classify.cpp
src/metapy_embeddings.cpp
src/metapy_index.cpp
src/metapy_learn.cpp
src/metapy_sequence.cpp
src/metapy_stats.cpp
src/metapy_parser.cpp
src/metapy_topics.cpp
src/metapy.cpp)
target_link_libraries(metapy meta-index meta-classify meta-ranker
meta-sequence meta-sequence-analyzers meta-greedy-tagger meta-parser
meta-parser-analyzers meta-embeddings meta-topics)

# don't add a "lib" prefix to the metapy shared library
set_target_properties(metapy PROPERTIES PREFIX "")

if (APPLE)
# OS X stupid fixes
# (see http://pybind11.readthedocs.org/en/latest/cmake.html)
set_target_properties(metapy PROPERTIES
MACOSX_RPATH "."
LINK_FLAGS "-undefined dynamic_lookup "
SUFFIX ".so")
endif()

if (WIN32)
set_target_properties(metapy PROPERTIES SUFFIX ".pyd")
target_link_libraries(metapy ${PYTHON_LIBRARY})
target_compile_definitions(metapy PUBLIC -DMS_WIN64)

# fix for std::_hypot has not been declared
target_compile_definitions(metapy PUBLIC -D_hypot=hypot)
endif()

install(TARGETS metapy DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/dist/metapy)
18 changes: 18 additions & 0 deletions metapy/LICENSE.mit
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
Copyright (c) 2016 Sean Massung, Chase Geigle

Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 changes: 29 additions & 0 deletions metapy/LICENSE.ncsa
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
Copyright (c) 2016 Sean Massung, Chase Geigle
All rights reserved.

Developed by: MeTA Team
University of Illinois at Urbana-Champaign
https://meta-toolkit.org

Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal with
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:

Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimers. Redistributions in binary
form must reproduce the above copyright notice, this list of conditions and the
following disclaimers in the documentation and/or other materials provided with
the distribution. Neither the names of MeTA, University of Illinois, nor the
names of its contributors may be used to endorse or promote products derived
from this Software without specific prior written permission.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
SOFTWARE.
16 changes: 16 additions & 0 deletions metapy/MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Licenses
include LICENSE.mit
include LICENSE.ncsa

# Build system
include CMakeLists.txt

# Dependencies
recursive-include deps **

# metapy source code
recursive-include src **
recursive-include include **

# metapy package
recursive-include dist/metapy **
61 changes: 61 additions & 0 deletions metapy/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# metapy: (experimental) Python bindings for [MeTA][meta]

[![Build Status](https://travis-ci.org/meta-toolkit/metapy.svg?branch=master)](https://travis-ci.org/meta-toolkit/metapy)

[![Windows Build Status](https://ci.appveyor.com//api/projects/status/github/meta-toolkit/metapy?svg=true&branch=master)](https://ci.appveyor.com/project/skystrife/metapy)

This project provides Python (2.7 and 3.x are supported) bindings for the
MeTA toolkit. They are still very much under construction, but the goal is
to make it seamless to use MeTA's components within any Python application
(e.g., a Django or Flask web app).

This project is made possible by the excellent [pybind11][pybind11]
library.

## Getting Started (the easy way)

```bash
# Ensure your pip is up to date
pip install --upgrade pip

# install metapy!
pip install metapy
```

This should work on Linux, OS X, and Windows with pretty much any recent
Python version >= 2.7. On Linux, make sure to update your `pip` to version
8.1 so you can install from a binary package---this will save you a lot of
time.

## Getting Started (the hard way)

You will, of course, need Python installed. You will also need its headers
to be installed as well, so look for a `python-dev` or similar package for
your system. Beyond that, you'll of course need to satisfy the requirements
for [building MeTA itself][build-guide].

This repository should have everything you need to get started. You should
ensure that you've fetched all of the submodules first, though:

```bash
git submodule update --init --recursive
```

Once that's done, you should be able to build the library like so:

```bash
mkdir build
cd build
cmake .. -DCMAKE_BUILD_TYPE=Release
make
```

You can force building against a specific version of Python if you happen
to have multiple versions installed by specifying
`-DMETAPY_PYTHON_VERSION=x.y` when invoking `cmake`.

The module should be written to `metapy.so` in the build directory.

[meta]: https://meta-toolkit.org
[pybind11]: https://github.com/pybind/pybind11
[build-guide]: https://meta-toolkit.org/setup-guide.html
55 changes: 55 additions & 0 deletions metapy/examples/query_runner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
"""
Mimics MeTA's query-runner program.
"""

import math
import sys
import time

import metapy

class PL2Ranker(metapy.index.RankingFunction):
"""
Create a new ranking function in Python that can be used in MeTA
"""
def __init__(self, c_param=0.5):
self.c = c_param
super(PL2Ranker, self).__init__()

def score_one(self, sd):
lda = sd.num_docs / sd.corpus_term_count
tfn = sd.doc_term_count * math.log2(1.0 + self.c * sd.avg_dl /
sd.doc_size)
if lda < 1 or tfn <= 0:
return 0.0
numerator = tfn * math.log2(tfn * lda) \
+ math.log2(math.e) * (1.0 / lda - tfn) \
+ 0.5 * math.log2(2.0 * math.pi * tfn)
return sd.query_term_weight * numerator / (tfn + 1.0)

if __name__ == '__main__':

if len(sys.argv) != 4:
print("Usage: {} config.toml queries.txt start_query".format(sys.argv[0]))
sys.exit(1)

cfg = sys.argv[1]
idx = metapy.index.make_inverted_index(cfg)

query_path = sys.argv[2]
query_num = int(sys.argv[3])
start_time = time.time()
with open(query_path) as query_file:
pl2 = PL2Ranker()
for line in query_file:
query = metapy.index.Document()
query.content(line.strip())
res_num = 1
for doc in pl2.score(idx, query, 1000):
docno = idx.metadata(doc[0]).get('name')
print("{}\t_\t{}\t{}\t{}\tMeTA".format( query_num, docno,
res_num, doc[1]))
res_num += 1
query_num += 1

print("Elapsed: {} seconds".format(round(time.time() - start_time, 4)))
45 changes: 45 additions & 0 deletions metapy/get-release.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/usr/bin/env python
from __future__ import print_function

from clint.textui import progress
import requests
import sys

if len(sys.argv) != 2:
print("Usage: {} release-tag".format(sys.argv[0]))
sys.exit(1)

baseurl = 'https://api.github.com/repos/meta-toolkit/metapy/releases/tags'

r = requests.get('{}/{}'.format(baseurl, sys.argv[1]))

if r.status_code != 200:
print("Error: {}".format(r.status_code))
print(r.text)
sys.exit(1)

json = r.json()

print("Found release {} tagged by {}".format(json['tag_name'],
json['author']['login']))

for asset in json['assets']:
url = asset['browser_download_url']
name = asset['name']
print("Fetching {}...".format(name))

r = requests.get(url, stream=True)
if r.status_code != 200:
print("Error fetching {}: {}".format(name, r.status_code))
print(r.text)
sys.exit(1)

with open('dist/{}'.format(name), 'wb') as f:
total_length = int(r.headers.get('content-length'))
for chunk in progress.bar(r.iter_content(chunk_size = 4096),
expected_size = total_length / 4096 + 1):
if chunk:
f.write(chunk)
f.flush()

print("Done!")
13 changes: 13 additions & 0 deletions metapy/include/metapy_analyzers.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
/**
* @file metapy_analyzers.h
* @author Chase Geigle
*/

#ifndef METAPY_ANALYZERS_H_
#define METAPY_ANALYZERS_H_

#include <cmath>
#include <pybind11/pybind11.h>

void metapy_bind_analyzers(pybind11::module& m);
#endif
14 changes: 14 additions & 0 deletions metapy/include/metapy_classify.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/**
* @file metapy_classify.h
* @author Chase Geigle
*/

#ifndef METAPY_CLASSIFY_H_
#define METAPY_CLASSIFY_H_

#include <cmath>
#include <pybind11/pybind11.h>

void metapy_bind_classify(pybind11::module& m);

#endif
14 changes: 14 additions & 0 deletions metapy/include/metapy_embeddings.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/**
* @file metapy_embeddings.h
* @author Chase Geigle
*/

#ifndef METAPY_EMBEDDINGS_H_
#define METAPY_EMBEDDINGS_H_

#include <cmath>
#include <pybind11/pybind11.h>

void metapy_bind_embeddings(pybind11::module& m);

#endif
Loading