Update docs & fix bug

fix bug in MLP Layer when use advance activation layer add linear term in FNN
shenweichen · Nov 26, 2018 · d15a956 · d15a956
1 parent 9e28448
commit d15a956
Show file tree

Hide file tree

Showing 9 changed files with 62 additions and 35 deletions.
diff --git a/deepctr/__init__.py b/deepctr/__init__.py
@@ -1,3 +1,4 @@
 from .import activations
 from .import layers
 from .import sequence
+from .import models
diff --git a/deepctr/activations.py b/deepctr/activations.py
@@ -26,6 +26,7 @@ def __init__(self, axis=-1, epsilon=1e-9, **kwargs):
         self.epsilon = epsilon
         super(Dice, self).__init__(**kwargs)
 
+
     def build(self, input_shape):
         self.alphas = self.add_weight(shape=(input_shape[-1],), initializer=Zeros(
         ), dtype=tf.float32, name=self.name+'dice_alpha')  # name='alpha_'+self.name
@@ -34,7 +35,7 @@ def build(self, input_shape):
     def call(self, inputs, **kwargs):
 
         inputs_normed = BatchNormalization(
-            axis=self.axis, epsilon=self.epsilon, center=False, scale=False,name=self.name+"bn")(inputs)
+            axis=self.axis, epsilon=self.epsilon, center=False, scale=False)(inputs)
         x_p = tf.sigmoid(inputs_normed)
         return self.alphas * (1.0 - x_p) * inputs + x_p * inputs
     def get_config(self,):

diff --git a/deepctr/layers.py b/deepctr/layers.py
@@ -3,7 +3,6 @@
 from tensorflow.python.keras.initializers import  RandomNormal,Zeros,glorot_normal,glorot_uniform
 from tensorflow.python.keras import backend as K
 from tensorflow.python.keras.activations import  softmax
-from .activations import  Dice
 import tensorflow as tf
 
 
@@ -302,9 +301,10 @@ def call(self, inputs,**kwargs):
 
             if isinstance(self.activation,str):
                 fc = Activation(self.activation)(fc)
+            elif issubclass(self.activation,Layer):
+                fc = self.activation()(fc)
             else:
-                fc = self.activation(fc,name=self.name+"act"+str(l))
-
+                raise ValueError("Invalid activation of MLP,found %s.You should use a str or a Activation Layer Class."%(self.activation))
             fc = Dropout(1 - self.keep_prob)(fc)
 
             deep_input = fc
@@ -636,9 +636,9 @@ def call(self, inputs,**kwargs):
         keys_len = keys.get_shape()[1]
         queries = K.repeat_elements(query,keys_len,1)
 
-        att_input = K.concatenate([queries, keys, queries - keys, queries * keys], axis=-1)
+        att_input = tf.concat([queries, keys, queries - keys, queries * keys], axis=-1)
         att_input = BatchNormalization()(att_input)
-        att_out = MLP(self.hidden_size, self.activation, self.l2_reg, self.keep_prob, self.use_bn, seed=self.seed,name=self.name+"mlp")(att_input)
+        att_out = MLP(self.hidden_size, self.activation, self.l2_reg, self.keep_prob, self.use_bn, seed=self.seed)(att_input)
         attention_score = Dense(1, 'linear')(att_out)
 
         return attention_score

diff --git a/deepctr/models/din.py b/deepctr/models/din.py
@@ -30,7 +30,7 @@ def get_input(feature_dim_dict, seq_feature_list, seq_max_len):
 
 
 def DIN(feature_dim_dict, seq_feature_list, embedding_size=4, hist_len_max=16,
-        use_din=True, use_bn=True, hidden_size=[200, 80], activation=Dice(), att_hidden_size=[80, 40], att_activation='sigmoid', att_weight_normalization=True,
+        use_din=True, use_bn=False, hidden_size=[200, 80], activation=Dice, att_hidden_size=[80, 40], att_activation='sigmoid', att_weight_normalization=True,
         l2_reg_deep=5e-5, l2_reg_embedding=0, final_activation='sigmoid', keep_prob=1, init_std=0.0001, seed=1024, ):
     """Instantiates the Deep Interest Network architecture.
 
@@ -39,7 +39,7 @@ def DIN(feature_dim_dict, seq_feature_list, embedding_size=4, hist_len_max=16,
     :param embedding_size: positive integer,sparse feature embedding_size.
     :param hist_len_max: positive int, to indicate the max length of seq input
     :param use_din: bool, whether use din pooling or not.If set to ``False``,use **sum pooling**
-    :param use_bn: bool. Whether use BatchNormalization before activation or not.in deep net
+    :param use_bn: bool. Whether use BatchNormalization before activation or not in deep net
     :param hidden_size: list,list of positive integer or empty list, the layer number and units in each layer of deep net
     :param activation: Activation function to use in deep net
     :param att_hidden_size: list,list of positive integer , the layer number and units in each layer of attention net
@@ -93,7 +93,7 @@ def DIN(feature_dim_dict, seq_feature_list, embedding_size=4, hist_len_max=16,
 
     deep_input_emb = Concatenate()([deep_input_emb, hist])
     output = MLP(hidden_size, activation, l2_reg_deep,
-                 keep_prob, use_bn, seed,)(deep_input_emb)
+                 keep_prob, use_bn, seed)(deep_input_emb)
     output = Dense(1, final_activation)(output)
     output = Reshape([1])(output)
     model_input_list = list(sparse_input.values(

diff --git a/deepctr/models/fnn.py b/deepctr/models/fnn.py
@@ -7,7 +7,7 @@
     [1] Zhang, Weinan, Tianming Du, and Jun Wang. "Deep learning over multi-field categorical data." European conference on information retrieval. Springer, Cham, 2016.(https://arxiv.org/pdf/1601.02376.pdf)
 """
 
-from tensorflow.python.keras.layers import Dense, Embedding, Concatenate, Reshape
+from tensorflow.python.keras.layers import Dense, Embedding, Concatenate, Reshape,add
 from tensorflow.python.keras.models import Model
 from tensorflow.python.keras.initializers import RandomNormal
 from tensorflow.python.keras.regularizers import l2
@@ -18,7 +18,7 @@
 
 def FNN(feature_dim_dict, embedding_size=4,
         hidden_size=[32],
-        l2_reg_embedding=1e-5, l2_reg_deep=0,
+        l2_reg_embedding=1e-5, l2_reg_linear=1e-5,l2_reg_deep=0,
         init_std=0.0001, seed=1024, keep_prob=0.5,
         activation='relu', final_activation='sigmoid', ):
     """Instantiates the Factorization-supported Neural Network architecture.
@@ -27,6 +27,7 @@ def FNN(feature_dim_dict, embedding_size=4,
     :param embedding_size: positive integer,sparse feature embedding_size
     :param hidden_size: list,list of positive integer or empty list, the layer number and units in each layer of deep net
     :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
+    :param l2_reg_linear: float. L2 regularizer strength applied to linear weight
     :param l2_reg_deep: float . L2 regularizer strength applied to deep net
     :param init_std: float,to use as the initialize std of embedding vector
     :param seed: integer ,to use as random seed.
@@ -41,15 +42,23 @@ def FNN(feature_dim_dict, embedding_size=4,
             "feature_dim must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_5',]}")
 
     sparse_input, dense_input = get_input(feature_dim_dict, None)
-    sparse_embedding = [Embedding(feature_dim_dict["sparse"][feat], embedding_size,
-                                  embeddings_initializer=RandomNormal(
-        mean=0.0, stddev=init_std, seed=seed),
-        embeddings_regularizer=l2(
-        l2_reg_embedding),
-        name='sparse_emb_' + str(i) + '-' + feat) for i, feat in
-        enumerate(feature_dim_dict["sparse"])]
+    #sparse_embedding = [Embedding(feature_dim_dict["sparse"][feat], embedding_size,
+    #                              embeddings_initializer=RandomNormal( mean=0.0, stddev=init_std, seed=seed),
+     #   embeddings_regularizer=l2( l2_reg_embedding),name='sparse_emb_' + str(i) + '-' + feat) for i, feat in
+     #   enumerate(feature_dim_dict["sparse"])]
+    sparse_embedding, linear_embedding, = get_embeddings(feature_dim_dict, embedding_size, init_std, seed, l2_reg_deep,
+                                                         l2_reg_linear)
+
     embed_list = [sparse_embedding[i](sparse_input[i])
                   for i in range(len(feature_dim_dict["sparse"]))]
+
+    linear_term = [linear_embedding[i](sparse_input[i]) for i in range(len(sparse_input))]
+    if len(linear_term) > 1:
+        linear_term = add(linear_term)
+    elif len(linear_term) >0:
+        linear_term = linear_term[0]
+    else:
+        linear_term = 0
     #linear_term = add([linear_embedding[i](sparse_input[i]) for i in range(len(feature_dim_dict["sparse"]))])
     if len(dense_input) > 0:
         continuous_embedding_list = list(
@@ -59,18 +68,33 @@ def FNN(feature_dim_dict, embedding_size=4,
             map(Reshape((1, embedding_size)), continuous_embedding_list))
         embed_list += continuous_embedding_list
 
-        #dense_input_ = dense_input[0] if len(dense_input) == 1 else Concatenate()(dense_input)
-        #linear_dense_logit = Dense(1,activation=None,use_bias=False,kernel_regularizer=l2(l2_reg_linear))(dense_input_)
-        #linear_term = linear_dense_logit
+        dense_input_ = dense_input[0] if len(dense_input) == 1 else Concatenate()(dense_input)
+        linear_dense_logit = Dense(1,activation=None,use_bias=False,kernel_regularizer=l2(l2_reg_linear))(dense_input_)
+        linear_term = add([linear_dense_logit, linear_term])
 
     num_inputs = len(dense_input) + len(sparse_input)
     deep_input = Reshape([num_inputs*embedding_size]
                          )(Concatenate()(embed_list))
     deep_out = MLP(hidden_size, activation, l2_reg_deep,
                    keep_prob, False, seed)(deep_input)
     deep_logit = Dense(1, use_bias=False, activation=None)(deep_out)
-    final_logit = deep_logit
+    final_logit = add([deep_logit,linear_term])
     output = PredictionLayer(final_activation)(final_logit)
     model = Model(inputs=sparse_input + dense_input,
                   outputs=output)
     return model
+
+def get_embeddings(feature_dim_dict, embedding_size, init_std, seed, l2_rev_V, l2_reg_w):
+    sparse_embedding = [Embedding(feature_dim_dict["sparse"][feat], embedding_size,
+                                  embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std, seed=seed),
+                                  embeddings_regularizer=l2(l2_rev_V),
+                                  name='sparse_emb_' + str(i) + '-' + feat) for i, feat in
+                        enumerate(feature_dim_dict["sparse"])]
+    linear_embedding = [Embedding(feature_dim_dict["sparse"][feat], 1,
+                                  embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std,
+                                                                      seed=seed)
+                                  , embeddings_regularizer=l2(l2_reg_w),
+                                  name='linear_emb_' + str(i) + '-' + feat) for
+                        i, feat in enumerate(feature_dim_dict["sparse"])]
+
+    return sparse_embedding, linear_embedding
diff --git a/deepctr/sequence.py b/deepctr/sequence.py
@@ -1,5 +1,4 @@
 from tensorflow.python.keras.layers import Layer
-from tensorflow.python.keras import backend as K
 from .layers import LocalActivationUnit
 from .activations import Dice
 import tensorflow as tf
@@ -16,7 +15,7 @@ class SequencePoolingLayer(Layer):
         - seq_len is a 2D tensor with shape : ``(batch_size, 1)``,indicate valid length of each sequence.
 
       Output shape
-        - 3D tensor with shape: ``(batch_size, 1, embedding_size)``.
+        - 3D tensor with shape: `(batch_size, 1, embedding_size)`.
 
       Arguments
         - **seq_len_max**:Positive integer indicates that the max length of all the sequence feature,usually same as T.
@@ -42,14 +41,15 @@ def call(self, seq_value_len_list, **kwargs):
         mask = tf.sequence_mask(user_behavior_length,
                                 self.seq_len_max, dtype=tf.float32)
 
-        mask = K.permute_dimensions(mask, [0, 2, 1])
+        mask = tf.transpose(mask,(0,2,1))
+
         mask = tf.tile(mask, [1, 1, embedding_size])
         uiseq_embed_list *= mask
         hist = uiseq_embed_list
         if self.mode == "max":
-            return K.max(hist, 1, keepdims=True)
+            return tf.reduce_max(hist, 1, keep_dims=True)
 
-        hist = K.sum(hist, 1, keepdims=False)
+        hist = tf.reduce_sum(hist, 1, keep_dims=False)
         if self.mode == "mean":
 
             hist = tf.div(hist, user_behavior_length)
@@ -78,7 +78,7 @@ class AttentionSequencePoolingLayer(Layer):
         - keys_length is a 2D tensor with shape: ``(batch_size, 1)``
 
       Output shape
-        - 3D tensor with shape: ``(batch_size, 1, embedding_size)``.
+        -3D tensor with shape: ``(batch_size, 1, embedding_size)``.
 
       Arguments
         - **hidden_size**:list of positive integer, the attention net layer number and units in each layer.
@@ -124,7 +124,8 @@ def call(self, inputs, **kwargs):
         attention_score = LocalActivationUnit(
             self.hidden_size, self.activation, 0, 1, False, 1024,)([queries, keys])
 
-        outputs = K.permute_dimensions(attention_score, (0, 2, 1))
+        outputs = tf.transpose(attention_score,(0,2,1))
+
         key_masks = tf.sequence_mask(keys_length, hist_len)
 
         if self.weight_normalization:
@@ -135,7 +136,7 @@ def call(self, inputs, **kwargs):
         outputs = tf.where(key_masks, outputs, paddings)
 
         if self.weight_normalization:
-            outputs = K.softmax(outputs)
+            outputs = tf.nn.softmax(outputs)
 
         outputs = tf.matmul(outputs, keys)
 

diff --git a/docs/source/Features.rst b/docs/source/Features.rst
@@ -178,8 +178,8 @@ Layers
 The models of deepctr are modular,
 so you can use different modules to build your own models.
 
-The module is a class that inherits from ``Keras.layers.Layer``,it has
-the same properties and methods as keras Layers like ``keras.layers.Dense()`` etc
+The module is a class that inherits from ``tf.keras.layers.Layer``,it has
+the same properties and methods as keras Layers like ``tf.keras.layers.Dense()`` etc
 
 You can see layers API in `layers <./deepctr.layers.html>`_ 
 

diff --git a/docs/source/Quick-Start.rst b/docs/source/Quick-Start.rst
@@ -3,7 +3,7 @@ Quick-Start
 
 Installation Guide
 ----------------------
-Install deepctr package is through ``pip`` .You must make sure that you have already installed 1.4.0<=tensorflow<1.7.0 on your local machine: ::
+Install deepctr package is through ``pip`` .You must make sure that you have already installed ``1.4.0<=tensorflow<1.7.0`` on your local machine: ::
 
     pip install deepctr
 

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -8,7 +8,7 @@ Welcome to DeepCTR's documentation!
 
 DeepCTR is a **Easy-to-use** , **Modular** and **Extendible** package of deep-learning based CTR models along with lots of core components layer  which can be used to build your own custom model easily.You can use any complex model with ``model.fit()`` and ``model.predict()`` just like any other keras model.And the layers are compatible with tensorflow.
 
-Through  ``pip install deepctr``  get the package and [**Get Started!**](https://deepctr-doc.readthedocs.io/en/latest/Quick-Start.html)
+Through  ``pip install deepctr``  get the package and `Get Started! <./Quick-Start.html>`_
 
 You can read the source code at https://github.com/shenweichen/DeepCTR
 
@@ -22,7 +22,7 @@ You can read the source code at https://github.com/shenweichen/DeepCTR
 
 .. toctree::
    :maxdepth: 3
-   :caption: APIs:
+   :caption: API:
 
    Models API<Models-API>
    Layers API<deepctr.layers.rst>