From 25c03a2e178971a94fc718bcd5f086ad8c0737e8 Mon Sep 17 00:00:00 2001
From: XiaoLongtaoo <90383205+XiaoLongtaoo@users.noreply.github.com>
Date: Tue, 5 Nov 2024 21:30:36 +0800
Subject: [PATCH] Add the WuKong model (#106)

* the implementation of WuKong by XiaoLongtao
---
 model_zoo/WuKong/README.md                  |   3 +
 model_zoo/WuKong/config/dataset_config.yaml |   7 +
 model_zoo/WuKong/config/model_config.yaml   |  65 ++++++++
 model_zoo/WuKong/fuxictr_version.py         |   3 +
 model_zoo/WuKong/run_expid.py               |  87 +++++++++++
 model_zoo/WuKong/src/WuKong.py              | 155 ++++++++++++++++++++
 model_zoo/WuKong/src/__init__.py            |   1 +
 7 files changed, 321 insertions(+)
 create mode 100644 model_zoo/WuKong/README.md
 create mode 100644 model_zoo/WuKong/config/dataset_config.yaml
 create mode 100644 model_zoo/WuKong/config/model_config.yaml
 create mode 100644 model_zoo/WuKong/fuxictr_version.py
 create mode 100644 model_zoo/WuKong/run_expid.py
 create mode 100644 model_zoo/WuKong/src/WuKong.py
 create mode 100644 model_zoo/WuKong/src/__init__.py

diff --git a/model_zoo/WuKong/README.md b/model_zoo/WuKong/README.md
new file mode 100644
index 0000000..4c5df72
--- /dev/null
+++ b/model_zoo/WuKong/README.md
@@ -0,0 +1,3 @@
+# WuKong
+
+> Buyun Zhang, Liang Luo, Yuxin Chen, Jade Nie, Xi Liu, Daifeng Guo, Yanli Zhao, Shen Li, Yuchen Hao, Yantao Yao, Guna Lakshminarayanan, Ellie Dingqiao Wen, Jongsoo Park, Maxim Naumov, Wenlin Chen. [Wukong: Towards a Scaling Law for Large-Scale Recommendation](https://arxiv.org/abs/2403.02545), in Arxiv 2024.
\ No newline at end of file
diff --git a/model_zoo/WuKong/config/dataset_config.yaml b/model_zoo/WuKong/config/dataset_config.yaml
new file mode 100644
index 0000000..a1dbabf
--- /dev/null
+++ b/model_zoo/WuKong/config/dataset_config.yaml
@@ -0,0 +1,7 @@
+### Tiny data for tests only
+tiny_npz:
+    data_root: ../../data/
+    data_format: npz
+    train_data: ../../data/tiny_npz/train.npz
+    valid_data: ../../data/tiny_npz/valid.npz
+    test_data: ../../data/tiny_npz/test.npz
\ No newline at end of file
diff --git a/model_zoo/WuKong/config/model_config.yaml b/model_zoo/WuKong/config/model_config.yaml
new file mode 100644
index 0000000..e85c00f
--- /dev/null
+++ b/model_zoo/WuKong/config/model_config.yaml
@@ -0,0 +1,65 @@
+Base:
+    model_root: './checkpoints/'
+    num_workers: 12
+    verbose: 1
+    early_stop_patience: 2
+    pickle_feature_encoder: True
+    save_best_only: True
+    eval_steps: null
+    debug_mode: False
+    group_id: null
+    use_features: null
+    feature_specs: null
+    feature_config: null
+
+WuKong_default: # This is a config template
+    model: WuKong
+    dataset_id: TBD
+    loss: 'binary_crossentropy'
+    metrics: ['logloss', 'AUC']
+    task: binary_classification
+    optimizer: adam
+    learning_rate: 1.0e-3
+    embedding_regularizer: 0
+    net_regularizer: 0
+    batch_size: 10000
+    embedding_dim: 64
+    num_layers: 8
+    compression_dim: 40
+    fmb_units: [200,200]
+    fmb_dim: 40
+    project_dim: 8
+    dropout_rate: 0.2
+    hidden_activations: relu
+    mlp_hidden_units: [32,32]
+    epochs: 100
+    shuffle: True
+    seed: 2024
+    monitor: {'AUC': 1, 'logloss': -1}
+    monitor_mode: 'max'
+
+WuKong_test:
+    model: WuKong
+    dataset_id: tiny_npz
+    loss: 'binary_crossentropy'
+    metrics: ['logloss', 'AUC']
+    task: binary_classification
+    optimizer: adam
+    learning_rate: 1.0e-3
+    embedding_regularizer: 0
+    net_regularizer: 0
+    batch_size: 2048
+    embedding_dim: 64
+    num_layers: 4
+    compression_dim: 32
+    fmb_units: [128,128,128]
+    fmb_dim: 32
+    project_dim: 24
+    dropout_rate: 0.2
+    hidden_activations: relu
+    mlp_hidden_units: [64]
+    epochs: 5
+    shuffle: True
+    seed: 2024
+    monitor: 'AUC'
+    monitor_mode: 'max'
\ No newline at end of file
diff --git a/model_zoo/WuKong/fuxictr_version.py b/model_zoo/WuKong/fuxictr_version.py
new file mode 100644
index 0000000..c8f6192
--- /dev/null
+++ b/model_zoo/WuKong/fuxictr_version.py
@@ -0,0 +1,3 @@
+# pip install -U fuxictr
+import fuxictr
+assert fuxictr.__version__ >= "2.3.2"
diff --git a/model_zoo/WuKong/run_expid.py b/model_zoo/WuKong/run_expid.py
new file mode 100644
index 0000000..effe553
--- /dev/null
+++ b/model_zoo/WuKong/run_expid.py
@@ -0,0 +1,87 @@
+# =========================================================================
+# Copyright (C) 2024. The FuxiCTR Library. All rights reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =========================================================================
+
+
+import os
+os.chdir(os.path.dirname(os.path.realpath(__file__)))
+import sys
+import logging
+import fuxictr_version
+from fuxictr import datasets
+from datetime import datetime
+from fuxictr.utils import load_config, set_logger, print_to_json, print_to_list
+from fuxictr.features import FeatureMap
+from fuxictr.pytorch.dataloaders import RankDataLoader
+from fuxictr.pytorch.torch_utils import seed_everything
+from fuxictr.preprocess import FeatureProcessor, build_dataset
+import src
+import gc
+import argparse
+import os
+from pathlib import Path
+
+
+if __name__ == '__main__':
+    ''' Usage: python run_expid.py --config {config_dir} --expid {experiment_id} --gpu {gpu_device_id}
+    '''
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--config', type=str, default='./config/', help='The config directory.')
+    parser.add_argument('--expid', type=str, default='DeepFM_test', help='The experiment id to run.')
+    parser.add_argument('--gpu', type=int, default=-1, help='The gpu index, -1 for cpu')
+    args = vars(parser.parse_args())
+    
+    experiment_id = args['expid']
+    params = load_config(args['config'], experiment_id)
+    params['gpu'] = args['gpu']
+    set_logger(params)
+    logging.info("Params: " + print_to_json(params))
+    seed_everything(seed=params['seed'])
+
+    data_dir = os.path.join(params['data_root'], params['dataset_id'])
+    feature_map_json = os.path.join(data_dir, "feature_map.json")
+    if params["data_format"] == "csv":
+        # Build feature_map and transform data
+        feature_encoder = FeatureProcessor(**params)
+        params["train_data"], params["valid_data"], params["test_data"] = \
+            build_dataset(feature_encoder, **params)
+    feature_map = FeatureMap(params['dataset_id'], data_dir)
+    feature_map.load(feature_map_json, params)
+    logging.info("Feature specs: " + print_to_json(feature_map.features))
+    
+    model_class = getattr(src, params['model'])
+    model = model_class(feature_map, **params)
+    model.count_parameters() # print number of parameters used in model
+
+    train_gen, valid_gen = RankDataLoader(feature_map, stage='train', **params).make_iterator()
+    model.fit(train_gen, validation_data=valid_gen, **params)
+
+    logging.info('****** Validation evaluation ******')
+    valid_result = model.evaluate(valid_gen)
+    del train_gen, valid_gen
+    gc.collect()
+    
+    test_result = {}
+    if params["test_data"]:
+        logging.info('******** Test evaluation ********')
+        test_gen = RankDataLoader(feature_map, stage='test', **params).make_iterator()
+        test_result = model.evaluate(test_gen)
+    
+    result_filename = Path(args['config']).name.replace(".yaml", "") + '.csv'
+    with open(result_filename, 'a+') as fw:
+        fw.write(' {},[command] python {},[exp_id] {},[dataset_id] {},[train] {},[val] {},[test] {}\n' \
+            .format(datetime.now().strftime('%Y%m%d-%H%M%S'), 
+                    ' '.join(sys.argv), experiment_id, params['dataset_id'],
+                    "N.A.", print_to_list(valid_result), print_to_list(test_result)))
diff --git a/model_zoo/WuKong/src/WuKong.py b/model_zoo/WuKong/src/WuKong.py
new file mode 100644
index 0000000..ad42bbc
--- /dev/null
+++ b/model_zoo/WuKong/src/WuKong.py
@@ -0,0 +1,155 @@
+# =========================================================================
+# Copyright (C) 2024. XiaoLongtao. All rights reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =========================================================================
+""" This model implements the paper: Zhang et al., Wukong: Towards a Scaling Law for 
+    Large-Scale Recommendation, Arxiv 2024.
+    [PDF] https://arxiv.org/abs/2403.02545
+"""
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+import numpy as np
+from fuxictr.pytorch.models import BaseModel
+from fuxictr.pytorch.layers import FeatureEmbedding, MLP_Block
+
+
+class WuKong(BaseModel):
+    """
+    The WuKong model class that implements factorization machines-based model.
+
+    Args:
+        feature_map: A FeatureMap instance used to store feature specs (e.g., vocab_size).
+        model_id: Equivalent to model class name by default, which is used in config to determine 
+            which model to call.
+        gpu: gpu device used to load model. -1 means cpu (default=-1).
+        learning_rate: learning rate for training (default=1e-3).
+        embedding_dim: embedding dimension of features (default=64).
+        num_layers: number of WuKong layers (default=3).
+        compression_dim: dimension of compressed features in LCB (default=40).
+        mlp_hidden_units: hidden units of MLP on top of WuKong (default=[32,32]).
+        fmb_units: hidden units of FMB (default=[32,32]).
+        fmb_dim: dimension of FMB output (default=40).
+        project_dim: dimension of projection matrix in FMB (default=8).
+        dropout_rate: dropout rate used in LCB (default=0.2).
+        embedding_regularizer: regularization term used for embedding parameters (default=0).
+        net_regularizer: regularization term used for network parameters (default=0).
+    """
+    def __init__(self,
+                 feature_map,
+                 model_id="WuKong",
+                 gpu=-1,
+                 learning_rate=1e-3,
+                 embedding_dim=64,
+                 num_layers=3,
+                 compression_dim=40,
+                 mlp_hidden_units=[32,32],
+                 fmb_units=[32,32],
+                 fmb_dim=40,
+                 project_dim=8,
+                 dropout_rate=0.2,
+                 embedding_regularizer=None,
+                 net_regularizer=None,
+                 **kwargs):
+        super(WuKong, self).__init__(feature_map, 
+                                     model_id=model_id, 
+                                     gpu=gpu, 
+                                     embedding_regularizer=embedding_regularizer,
+                                     net_regularizer=net_regularizer,
+                                     **kwargs)
+        self.feature_map = feature_map
+        self.embedding_dim = embedding_dim
+        self.embedding_layer = FeatureEmbedding(feature_map, embedding_dim)
+        self.interaction_layers = nn.ModuleList([
+            WuKongLayer(feature_map.num_fields, embedding_dim, project_dim, fmb_units, fmb_dim, compression_dim,dropout_rate) for _ in range(num_layers)
+            ])
+        self.final_mlp = MLP_Block(input_dim=feature_map.num_fields*embedding_dim,
+                                   output_dim=1,
+                                   hidden_units=mlp_hidden_units,
+                                   hidden_activations='relu',
+                                   output_activation=None)
+        self.compile(kwargs["optimizer"], kwargs["loss"], learning_rate)
+        self.reset_parameters()
+        self.model_to_device()
+
+    def forward(self, inputs):
+        X = self.get_inputs(inputs)
+        feature_emb = self.embedding_layer(X)
+        for layer in self.interaction_layers:
+            feature_emb = layer(feature_emb)
+        y_pred = self.final_mlp(feature_emb)
+        y_pred = self.output_activation(y_pred)
+        return_dict = {"y_pred": y_pred}
+        return return_dict
+
+class FactorizationMachineBlock(nn.Module):
+    def __init__(self, num_features=14, embedding_dim=16, project_dim=8):
+        super(FactorizationMachineBlock, self).__init__()
+        self.embedding_dim = embedding_dim
+        self.project_dim = project_dim
+        self.num_features = num_features
+        self.projection_matrix = nn.Parameter(torch.randn(self.num_features, self.project_dim))
+    
+    def forward(self, x):
+        batch_size = x.size(0)
+        x_fm = x.view(batch_size, self.num_features, self.embedding_dim)
+        projected = torch.matmul(x_fm.transpose(1, 2), self.projection_matrix)
+        fm_matrix = torch.matmul(x_fm, projected)
+        return fm_matrix.view(batch_size, -1)
+
+class FMB(nn.Module):
+    def __init__(self, num_features=14, embedding_dim=16, fmb_units=[32,32], fmb_dim=40, project_dim=8):
+        super(FMB, self).__init__()
+        self.fm_block = FactorizationMachineBlock(num_features, embedding_dim, project_dim)
+        self.layer_norm = nn.LayerNorm(num_features * project_dim)
+        model_layers = [nn.Linear(num_features * project_dim, fmb_units[0]), nn.ReLU()]
+        for i in range(1, len(fmb_units)):
+            model_layers.append(nn.Linear(fmb_units[i-1], fmb_units[i]))
+            model_layers.append(nn.ReLU())
+        model_layers.append(nn.Linear(fmb_units[-1], fmb_dim))
+        self.mlp = nn.Sequential(*model_layers)
+    
+    def forward(self, x):
+        y = self.fm_block(x)
+        y = self.layer_norm(y)
+        y = self.mlp(y)
+        y = F.relu(y)
+        return y
+
+# Linear Compression Block (LCB)
+class LinearCompressionBlock(nn.Module):
+    def __init__(self, num_features=14, embedding_dim=16, compressed_dim=8,dropout_rate=0.2):
+        super(LinearCompressionBlock, self).__init__()
+        self.linear = nn.Linear(num_features * embedding_dim, compressed_dim)
+        self.dropout = nn.Dropout(p=dropout_rate)
+    def forward(self, x):
+        return self.dropout(self.linear(x.view(x.size(0), -1)))
+
+# WuKong Layer
+class WuKongLayer(nn.Module):
+    def __init__(self, num_features=14, embedding_dim=16, project_dim=4, fmb_units=[40,40,40], fmb_dim=40, compressed_dim=40, dropout_rate=0.2):
+        super(WuKongLayer, self).__init__()
+        self.fmb = FMB(num_features, embedding_dim, fmb_units, fmb_dim, project_dim)
+        self.lcb = LinearCompressionBlock(num_features, embedding_dim, compressed_dim, dropout_rate)
+        self.layer_norm = nn.LayerNorm(num_features * embedding_dim)
+        self.transform = nn.Linear(fmb_dim + compressed_dim, num_features*embedding_dim)
+    
+    def forward(self, x):
+        fmb_out = self.fmb(x)
+        lcb_out = self.lcb(x)
+        concat_out = torch.cat([fmb_out, lcb_out], dim=1)
+        concat_out = self.transform(concat_out)
+        add_norm_out = self.layer_norm(concat_out+x.view(x.size(0), -1))
+        return add_norm_out
\ No newline at end of file
diff --git a/model_zoo/WuKong/src/__init__.py b/model_zoo/WuKong/src/__init__.py
new file mode 100644
index 0000000..a153b43
--- /dev/null
+++ b/model_zoo/WuKong/src/__init__.py
@@ -0,0 +1 @@
+from .WuKong import *
\ No newline at end of file