This project is not maintained since March 2019.
TensorFlow 2.0 provides feature columns support for tf.keras.
Keras model for categorical features support in neural networks
Inspired by the paper "Entity Embeddings of Categorical Variables".
pip install git+https://github.com/manuel-calzolari/keras-cat.git
- Python >= 2.7
- Keras >= 2.0.0
Download data from "Amazon.com - Employee Access Challenge".
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense
from input_categorical import InputCategorical
df = pd.read_csv('train.csv')
X = df.drop('ACTION', axis=1)
y = df['ACTION']
# Encode categorical features (all 9 features) as ordinal
le = LabelEncoder()
for col in X:
X[col] = le.fit_transform(X[col])
X = X.values
y = y.values
model = Sequential()
model.add(InputCategorical(input_dim=9,
categorical=[0, 1, 2, 3, 4, 5, 6, 7, 8],
max_level=[X[:, 0].max(),
X[:, 1].max(),
X[:, 2].max(),
X[:, 3].max(),
X[:, 4].max(),
X[:, 5].max(),
X[:, 6].max(),
X[:, 7].max(),
X[:, 8].max()],
embedding_dim='4throot'
))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='sgd', metrics=['accuracy'])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
model.fit(X_train, y_train)
score = model.evaluate(X_test, y_test)