Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

python interaction #46

Open
wants to merge 22 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,7 @@ build/*
.Rhistory
*.tar.gz
xlearn/*

# Python-package
*.egg-info
*.pyc
10 changes: 8 additions & 2 deletions python-package/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
# Copy Python file
FILE(COPY "${CMAKE_CURRENT_SOURCE_DIR}/xlearn/base.py"
DESTINATION ${PROJECT_BINARY_DIR}/python-package/xlearn)
FILE(COPY "${CMAKE_CURRENT_SOURCE_DIR}/xlearn/libpath.py"
FILE(COPY "${CMAKE_CURRENT_SOURCE_DIR}/xlearn/core.py"
DESTINATION ${PROJECT_BINARY_DIR}/python-package/xlearn)
FILE(COPY "${CMAKE_CURRENT_SOURCE_DIR}/xlearn/compat.py"
DESTINATION ${PROJECT_BINARY_DIR}/python-package/xlearn)
FILE(COPY "${CMAKE_CURRENT_SOURCE_DIR}/xlearn/libpath.py"
DESTINATION ${PROJECT_BINARY_DIR}/python-package/xlearn)
FILE(COPY "${CMAKE_CURRENT_SOURCE_DIR}/xlearn/xlearn.py"
DESTINATION ${PROJECT_BINARY_DIR}/python-package/xlearn)
Expand All @@ -15,5 +19,7 @@ FILE(COPY "${CMAKE_CURRENT_SOURCE_DIR}/MANIFEST.in"
DESTINATION ${PROJECT_BINARY_DIR}/python-package)
FILE(COPY "${CMAKE_CURRENT_SOURCE_DIR}/test_python.py"
DESTINATION ${PROJECT_BINARY_DIR}/python-package/test)
FILE(COPY "${CMAKE_CURRENT_SOURCE_DIR}/install-python.sh"
FILE(COPY "${CMAKE_CURRENT_SOURCE_DIR}/test_python_module.py"
DESTINATION ${PROJECT_BINARY_DIR}/python-package/)
FILE(COPY "${CMAKE_CURRENT_SOURCE_DIR}/install-python.sh"
DESTINATION ${PROJECT_BINARY_DIR}/python-package)
6 changes: 5 additions & 1 deletion python-package/setup_pip.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ def silent_call(cmd, raise_error=False, error_msg=''):
with open(os.devnull, 'w') as shut_up:
subprocess.check_output(cmd, stderr=shut_up)
return 0
except OSError:
if raise_error:
raise Exception("open devnull error")
return 1
except Exception:
if raise_error:
raise Exception(error_msg);
Expand Down Expand Up @@ -81,7 +85,7 @@ def run(self):

if __name__ == "__main__":
setup(name='xlearn',
version="0.20.a1",
version=open(os.path.join(CURRENT_DIR, 'xlearn/VERSION')).read().strip(),
description="xLearn Python Package",
maintainer='Chao Ma',
maintainer_email='[email protected]',
Expand Down
57 changes: 57 additions & 0 deletions python-package/test_python_module.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#!/usr/bin/python
# coding: utf-8
# This file test the xlearn python package.
# We create a ffm model for binary classification problem.
# The dataset comes from the criteo CTR.
from __future__ import absolute_import
import numpy as np
import pandas as pd
import xlearn as xl

from scipy.sparse import csr_matrix
from sklearn.datasets import load_svmlight_file

# Set hyper-parameters
param = { 'task':'binary',
'lr' : 0.2,
'lambda' : 0.002,
'metric' : 'acc' }

X, Y = load_svmlight_file("./test_dmatrix.txt")
print(type(X), type(Y))
print(Y.dtype)
tmp_dmatrix = xl.DMatrix(X, Y)
fm_model = xl.create_fm()
fm_model.setTrain(tmp_dmatrix)
fm_model.setValidate(tmp_dmatrix)
fm_model.fit(param, "fm_model.out")

# Test Pandas
df_x = pd.DataFrame(X.todense())
tmp_dmatrix = xl.DMatrix(df_X, Y)
fm_model = xl.create_fm()
fm_model.setTrain(tmp_dmatrix)
fm_model.setValidate(tmp_dmatrix)
fm_model.fit(param, "fm_model.out")


# Create factorazation machine
ffm_model = xl.create_ffm()

# Set training data and validation data
dtrain = xl.DMatrix("./small_train.txt")
dtest = xl.DMatrix("./small_test.txt")
#ffm_model.setTrain("./small_train.txt")
ffm_model.setTrain(dtrain)
ffm_model.setValidate(dtest);
#ffm_model.setValidate("./small_test.txt")

# Tarin model
ffm_model.fit(param, "model.out")

# Predict
#ffm_model.setTest("./small_test.txt")
ffm_model.setTest(dtest)
ffm_model.predict("model.out", "output")


2 changes: 1 addition & 1 deletion python-package/xlearn/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.2.0
0.20a1
1 change: 1 addition & 0 deletions python-package/xlearn/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from __future__ import absolute_import
import os
from .xlearn import *
from .core import DMatrix

VERSION_FILE = os.path.join(os.path.dirname(__file__), 'VERSION')
with open(VERSION_FILE) as f:
Expand Down
14 changes: 14 additions & 0 deletions python-package/xlearn/compat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# coding: utf-8

from __future__ import absolute_import

import sys

PY3 = (sys.version_info[0] == 3)

if PY3:
STRING_TYPES = str

else:
STRING_TYPES = basestring

80 changes: 80 additions & 0 deletions python-package/xlearn/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# coding: utf-8

from __future__ import absolute_import

import ctypes
import numpy as np
import scipy.sparse

from .base import _LIB, _check_call, c_str
from .compat import STRING_TYPES

def c_array(ctype, values):
"""Convert a python string to c array."""
if isinstance(values, np.ndarray) and values.dtype.itemsize == ctypes.sizeof(ctype):
return (ctype * len(values)).from_buffer_copy(values)
return (ctype * len(values))(*values)

class DMatrix(object):
"""Data Matrix used in xlearn"""

_feature_names = None
_field_names = None

def __init__(self, data, label=None, field=None, silent=1,
feature_names=None):
if data is None:
self.handle = None
return

if isinstance(data, STRING_TYPES):
self.handle = ctypes.c_void_p()
_check_call(_LIB.XLDMatrixCreateFromFile(c_str(data),
ctypes.c_int(silent),
ctypes.byref(self.handle)))
elif isinstance(data, scipy.sparse.csr_matrix):
self._init_from_csr(data, field);
elif isinstance(data, scipy.sparse.csc_matrix):
self._init_from_csc(data, field);
else:
try:
csr = scipy.sparse.csr_matrix(data)
csr_field = scipy.sparse.csr_matrix(field) if field else None;
self._init_from_csr(csr, csr_field)
except:
raise TypeError('can not initialize DMatrix from {}'.format(type(data).__name__))
if label is not None:
_check_call(_LIB.XLDMatrixSetLabel(ctypes.byref(self.handle),
c_array(ctypes.c_float, label),
ctypes.c_size_t(len(label))))

def _init_from_csr(self, csr, field):
if len(csr.indices) != len(csr.data):
raise ValueError('length mismatch: {} vs {}'.format(len(csr.indices), len(csr.data)))
self.handle = ctypes.c_void_p()
_check_call(_LIB.XLDMatrixCreateFromCSREx(c_array(ctypes.c_size_t, csr.indptr),
c_array(ctypes.c_uint, csr.indices),
c_array(ctypes.c_float, csr.data),
c_array(ctypes.c_uint,
field.data if field is not None else []),
ctypes.c_size_t(len(csr.indptr) - 1),
ctypes.c_size_t(len(csr.data)),
ctypes.c_size_t(csr.shape[1]),
ctypes.c_bool(field is not None),
ctypes.byref(self.handle)))

def _init_from_csc(self, csc, field):
if len(csc.indices) != len(csc.data):
raise ValueError('length mismatch: {} vs {}'.format(len(csc.indices), len(csc.data)))
self.handle = ctypes.c_void_p();
_check_call(_LIB.XLDMatrixCreateFromCSCEx(c_array(ctypes.c_size_t, csc.indptr),
c_array(ctypes.c_uint, csc.indices),
c_array(ctypes.c_float, csc.data),
c_array(ctypes.c_uint,
field.data if field is not None else []),
ctypes.c_size_t(len(csc.indptr) - 1),
ctypes.c_size_t(len(csc.data)),
ctypes.c_size_t(csc.shape[0]),
ctypes.c_bool(field is not None),
ctypes.byref(self.handle)))

39 changes: 28 additions & 11 deletions python-package/xlearn/xlearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import ctypes
from .base import _LIB, XLearnHandle
from .base import _check_call, c_str
from .compat import STRING_TYPES
from .core import DMatrix

class XLearn(object):
"""XLearn is the core interface used by python API."""
Expand Down Expand Up @@ -83,35 +85,50 @@ def show(self):
"""
_check_call(_LIB.XLearnShow(ctypes.byref(self.handle)))

def setTrain(self, train_path):
"""Set file path of training data.
def setTrain(self, train_data):
"""Set file path of training data / DMatrix of training

Parameters
----------
train_path : str
the path of training data
train_path : str / DMatrix
the path of training data / DMatrix of training
"""
_check_call(_LIB.XLearnSetTrain(ctypes.byref(self.handle), c_str(train_path)))
if isinstance(train_data, STRING_TYPES):
_check_call(_LIB.XLearnSetTrain(ctypes.byref(self.handle), c_str(train_data)))
elif isinstance(train_data, DMatrix):
_check_call(_LIB.XLearnSetTrainDMatrix(ctypes.byref(self.handle), ctypes.byref(train_data.handle)))
else:
raise Exception("Unkown Type")

def setTest(self, test_path):
"""Set file path of test data.
def setTest(self, test_data):
"""Set file path of test data / DMatrix

Parameters
----------
test_path : str
the path of test data.
"""
_check_call(_LIB.XLearnSetTest(ctypes.byref(self.handle), c_str(test_path)))

def setValidate(self, val_path):
if isinstance(test_data, STRING_TYPES):
_check_call(_LIB.XLearnSetTest(ctypes.byref(self.handle), c_str(test_data)))
elif isinstance(test_data, DMatrix):
_check_call(_LIB.XLearnSetTestDMatrix(ctypes.byref(self.handle), ctypes.byref(test_data.handle)))
else:
raise Exception("Unkown Type")

def setValidate(self, val_data):
"""Set file path of validation data.

Parameters
----------
val_path : str
the path of validation data.
"""
_check_call(_LIB.XLearnSetValidate(ctypes.byref(self.handle), c_str(val_path)))
if isinstance(val_data, STRING_TYPES):
_check_call(_LIB.XLearnSetValidate(ctypes.byref(self.handle), c_str(val_data)))
elif isinstance(val_data, DMatrix):
_check_call(_LIB.XLearnSetValidateDMatrix(ctypes.byref(self.handle), ctypes.byref(val_data.handle)))
else:
raise Exception("Unkown Type")

def setQuiet(self):
"""Set xlearn to quiet model"""
Expand Down
2 changes: 1 addition & 1 deletion src/base/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ programming convenient.
} while (0)

//------------------------------------------------------------------------------
// This marcro is used to disallow copy constructor and assign operator in
// This macro is used to disallow copy constructor and assign operator in
// class definition. For more details, please refer to Google coding style
// document
// [http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
Expand Down
Loading