From 876bb978bb3d4458c2f7733f576debd67b522411 Mon Sep 17 00:00:00 2001
From: gyzhou2000 <gyzhou@bupt.edu.cn>
Date: Tue, 16 Jul 2024 12:10:11 +0000
Subject: [PATCH] update

---
 gammagl/mpops/torch_ext/cpu/bspmm_sum_cpu.cpp |  4 +-
 gammagl/ops/segment/cpu/segment_csr.cpp       |  2 +-
 .../complete_test/mp_cpu/bspmm_sum_cpu.py     | 67 +++++++++++++++++++
 setup.py                                      |  2 +-
 4 files changed, 71 insertions(+), 4 deletions(-)
 create mode 100644 profiler/mpops/complete_test/mp_cpu/bspmm_sum_cpu.py

diff --git a/gammagl/mpops/torch_ext/cpu/bspmm_sum_cpu.cpp b/gammagl/mpops/torch_ext/cpu/bspmm_sum_cpu.cpp
index 75710641..925fcc81 100644
--- a/gammagl/mpops/torch_ext/cpu/bspmm_sum_cpu.cpp
+++ b/gammagl/mpops/torch_ext/cpu/bspmm_sum_cpu.cpp
@@ -13,7 +13,7 @@ torch::Tensor bspmm_sum_cpu_forward(torch::Tensor &index, torch::Tensor &weight,
         index = index.contiguous();
     }
 
-    int num_nodes = x.size(0);
+    // int num_nodes = x.size(0);
     int heads = x.size(1);
     int out_channels = x.size(2);
 
@@ -62,7 +62,7 @@ std::tuple<torch::Tensor, torch::Tensor> bspmm_sum_cpu_backward(torch::Tensor &i
         index = index.contiguous();
     }
 
-    int num_nodes = grad.size(0);
+    // int num_nodes = grad.size(0);
     int heads = grad.size(1);
     int out_channels = grad.size(2);
 
diff --git a/gammagl/ops/segment/cpu/segment_csr.cpp b/gammagl/ops/segment/cpu/segment_csr.cpp
index 72ae5037..b92d7dc0 100644
--- a/gammagl/ops/segment/cpu/segment_csr.cpp
+++ b/gammagl/ops/segment/cpu/segment_csr.cpp
@@ -105,7 +105,7 @@ py::list segment_csr_cpu(
 
   //    auto indptr_info = getTensorInfo<int64_t>(indptr);
   //    auto stride = indptr_info.strides[indptr_info.dims - 1];
-  auto stride = 1;
+  // auto stride = 1;
   std::vector<int64_t> args(K);
 
   auto src_data = src.unchecked();
diff --git a/profiler/mpops/complete_test/mp_cpu/bspmm_sum_cpu.py b/profiler/mpops/complete_test/mp_cpu/bspmm_sum_cpu.py
new file mode 100644
index 00000000..bc700cc4
--- /dev/null
+++ b/profiler/mpops/complete_test/mp_cpu/bspmm_sum_cpu.py
@@ -0,0 +1,67 @@
+import os
+
+os.environ['TL_BACKEND'] = 'torch'
+os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
+import numpy as np
+import tensorlayerx as tlx
+from gammagl.mpops import *
+import time
+
+relative_path = '/home/zgy/review/zgy/GammaGL/profiler/mpops/edge_index/'
+file_name = ['cora.npy', 'pubmed.npy', 'ogbn-arxiv.npy']
+embedding = [16, 64, 256]
+heads = [8, 16, 32, 64]
+iter = 100
+
+
+with open('test_results.txt', 'w') as result_file:
+    for name in file_name:
+        path = relative_path + name
+        info = f"Loading data from {path}\n"
+        result_file.write(info)
+        print(info)
+
+        edge_index = np.load(path)
+
+        num_nodes = np.max(edge_index) + 1
+        src = tlx.convert_to_tensor(edge_index[0, :], tlx.int64)
+        dst = tlx.convert_to_tensor(edge_index[1, :], tlx.int64)
+        edge_index = tlx.convert_to_tensor(edge_index)
+
+        for head in heads:
+
+            weight = torch.ones((edge_index.shape[1], head), dtype=tlx.float32)
+
+            for embedding_dim in embedding:
+                info = f"**********embedding_dim={embedding_dim}  head={head}**********\n"
+                result_file.write(info)
+                print(info)
+                x = tlx.convert_to_tensor(np.random.randn(num_nodes, head, embedding_dim), dtype=tlx.float32)
+
+                start = time.time()
+                for j in range(iter):
+                    bspmm(edge_index, weight=weight, x=x, reduce='sum')
+                end = time.time()
+                info = "bspmm_sum:{:.3f}\n".format(end-start)
+                result_file.write(info)
+                print(info)
+
+                start = time.time()
+                for j in range(iter):
+                    msg = tlx.gather(x, src)
+                    edge_weight = tlx.expand_dims(weight, -1)
+                    msg = msg * edge_weight
+                    unsorted_segment_sum(msg, dst, num_nodes)
+                end = time.time()
+                info = "segment_sum:{:.3f}\n".format(end-start)
+                result_file.write(info)
+                print(info)
+
+
+                info = f"**********embedding_dim={embedding_dim}  head={head}**********\n"
+                result_file.write(info)
+                print(info)
+
+        info = f"Data tensors are on device: {x.device}\n"
+        result_file.write(info)
+        print(info)
diff --git a/setup.py b/setup.py
index cbdce38a..e1aba1a7 100644
--- a/setup.py
+++ b/setup.py
@@ -12,7 +12,7 @@
 # WITH_CUDA = True
 
 cuda_macro = ('COMPILE_WITH_CUDA', True)
-omp_macro = ('COMPILE_WITH_OMP', True)  # Note: OpenMP needs gcc>4.2.0
+omp_macro = ('COMPLIE_WITH_OMP', True)  # Note: OpenMP needs gcc>4.2.0
 compile_args = {
     'cxx': ['-fopenmp', '-std=c++17']
 }