forked from ZiyaoGeng/RecLearn
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel.py
115 lines (103 loc) · 5.13 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
"""
Created on Nov 18, 2020
model: Personalized Top-N Sequential Recommendation via Convolutional Sequence Embedding
@author: Ziyao Geng
"""
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import Embedding, Input, Conv1D, GlobalMaxPooling1D, Dense, Dropout
class Caser(Model):
def __init__(self, feature_columns, maxlen=40, hor_n=2, hor_h=8, ver_n=8, dropout=0.5, activation='relu', embed_reg=1e-6):
"""
AttRec
:param feature_columns: A feature columns list. user + seq
:param maxlen: A scalar. In the paper, maxlen is L, the number of latest items.
:param hor_n: A scalar. The number of horizontal filters.
:param hor_h: A scalar. Height of horizontal filters.
:param ver_n: A scalar. The number of vertical filters.
:param dropout: A scalar. The number of dropout.
:param activation: A string. 'relu', 'sigmoid' or 'tanh'.
:param embed_reg: A scalar. The regularizer of embedding.
"""
super(Caser, self).__init__()
# maxlen
self.maxlen = maxlen
# feature columns
self.user_fea_col, self.item_fea_col = feature_columns
# embed_dim
self.embed_dim = self.item_fea_col['embed_dim']
# total number of item set
self.total_item = self.item_fea_col['feat_num']
# horizontal filters
self.hor_n = hor_n
self.hor_h = hor_h if hor_h <= self.maxlen else self.maxlen
# vertical filters
self.ver_n = ver_n
self.ver_w = 1
# user embedding
self.user_embedding = Embedding(input_dim=self.user_fea_col['feat_num'],
input_length=1,
output_dim=self.user_fea_col['embed_dim'],
mask_zero=False,
embeddings_initializer='random_normal',
embeddings_regularizer=l2(embed_reg))
# item embedding
self.item_embedding = Embedding(input_dim=self.item_fea_col['feat_num'],
input_length=1,
output_dim=self.item_fea_col['embed_dim'],
mask_zero=True,
embeddings_initializer='random_normal',
embeddings_regularizer=l2(embed_reg))
# item2 embedding
self.item2_embedding = Embedding(input_dim=self.item_fea_col['feat_num'],
input_length=1,
output_dim=self.item_fea_col['embed_dim'] * 2,
mask_zero=True,
embeddings_initializer='random_normal',
embeddings_regularizer=l2(embed_reg))
# horizontal conv
self.hor_conv = Conv1D(filters=self.hor_n, kernel_size=self.hor_h)
# vertical conv, should transpose
self.ver_conv = Conv1D(filters=self.ver_n, kernel_size=self.ver_w)
# max_pooling
self.pooling = GlobalMaxPooling1D()
# dense
self.dense = Dense(self.embed_dim, activation=activation)
self.dropout = Dropout(dropout)
def call(self, inputs):
# input
user_inputs, seq_inputs, item_inputs = inputs
# user info
user_embed = self.user_embedding(tf.squeeze(user_inputs, axis=-1)) # (None, dim)
# seq info
seq_embed = self.item_embedding(seq_inputs) # (None, maxlen, dim)
# horizontal conv (None, (maxlen - kernel_size + 2 * pad) / stride +1, hor_n)
hor_info = self.hor_conv(seq_embed)
hor_info = self.pooling(hor_info) # (None, hor_n)
# vertical conv (None, (dim - 1 + 2 * pad) / stride + 1, ver_n)
ver_info = self.ver_conv(tf.transpose(seq_embed, perm=(0, 2, 1)))
ver_info = tf.reshape(ver_info, shape=(-1, ver_info.shape[1] * ver_info.shape[2])) # (None, ?)
# info
seq_info = self.dense(tf.concat([hor_info, ver_info], axis=-1)) # (None, d)
seq_info = self.dropout(seq_info)
# concat
info = tf.concat([seq_info, user_embed], axis=-1) # (None, 2 * d)
# item info
item_embed = self.item2_embedding(tf.squeeze(item_inputs, axis=-1)) # (None, dim)
# predict
outputs = tf.nn.sigmoid(tf.reduce_sum(tf.multiply(info, item_embed), axis=1, keepdims=True))
return outputs
def summary(self):
seq_inputs = Input(shape=(self.maxlen,), dtype=tf.int32)
user_inputs = Input(shape=(1, ), dtype=tf.int32)
item_inputs = Input(shape=(1,), dtype=tf.int32)
Model(inputs=[user_inputs, seq_inputs, item_inputs],
outputs=self.call([user_inputs, seq_inputs, item_inputs])).summary()
def test_model():
user_features = {'feat': 'user_id', 'feat_num': 100, 'embed_dim': 8}
seq_features = {'feat': 'item_id', 'feat_num': 100, 'embed_dim': 8}
features = [user_features, seq_features]
model = Caser(features)
model.summary()
# test_model()