From 876bb978bb3d4458c2f7733f576debd67b522411 Mon Sep 17 00:00:00 2001 From: gyzhou2000 Date: Tue, 16 Jul 2024 12:10:11 +0000 Subject: [PATCH] update --- gammagl/mpops/torch_ext/cpu/bspmm_sum_cpu.cpp | 4 +- gammagl/ops/segment/cpu/segment_csr.cpp | 2 +- .../complete_test/mp_cpu/bspmm_sum_cpu.py | 67 +++++++++++++++++++ setup.py | 2 +- 4 files changed, 71 insertions(+), 4 deletions(-) create mode 100644 profiler/mpops/complete_test/mp_cpu/bspmm_sum_cpu.py diff --git a/gammagl/mpops/torch_ext/cpu/bspmm_sum_cpu.cpp b/gammagl/mpops/torch_ext/cpu/bspmm_sum_cpu.cpp index 75710641..925fcc81 100644 --- a/gammagl/mpops/torch_ext/cpu/bspmm_sum_cpu.cpp +++ b/gammagl/mpops/torch_ext/cpu/bspmm_sum_cpu.cpp @@ -13,7 +13,7 @@ torch::Tensor bspmm_sum_cpu_forward(torch::Tensor &index, torch::Tensor &weight, index = index.contiguous(); } - int num_nodes = x.size(0); + // int num_nodes = x.size(0); int heads = x.size(1); int out_channels = x.size(2); @@ -62,7 +62,7 @@ std::tuple bspmm_sum_cpu_backward(torch::Tensor &i index = index.contiguous(); } - int num_nodes = grad.size(0); + // int num_nodes = grad.size(0); int heads = grad.size(1); int out_channels = grad.size(2); diff --git a/gammagl/ops/segment/cpu/segment_csr.cpp b/gammagl/ops/segment/cpu/segment_csr.cpp index 72ae5037..b92d7dc0 100644 --- a/gammagl/ops/segment/cpu/segment_csr.cpp +++ b/gammagl/ops/segment/cpu/segment_csr.cpp @@ -105,7 +105,7 @@ py::list segment_csr_cpu( // auto indptr_info = getTensorInfo(indptr); // auto stride = indptr_info.strides[indptr_info.dims - 1]; - auto stride = 1; + // auto stride = 1; std::vector args(K); auto src_data = src.unchecked(); diff --git a/profiler/mpops/complete_test/mp_cpu/bspmm_sum_cpu.py b/profiler/mpops/complete_test/mp_cpu/bspmm_sum_cpu.py new file mode 100644 index 00000000..bc700cc4 --- /dev/null +++ b/profiler/mpops/complete_test/mp_cpu/bspmm_sum_cpu.py @@ -0,0 +1,67 @@ +import os + +os.environ['TL_BACKEND'] = 'torch' +os.environ["CUDA_VISIBLE_DEVICES"] = "-1" +import numpy as np +import tensorlayerx as tlx +from gammagl.mpops import * +import time + +relative_path = '/home/zgy/review/zgy/GammaGL/profiler/mpops/edge_index/' +file_name = ['cora.npy', 'pubmed.npy', 'ogbn-arxiv.npy'] +embedding = [16, 64, 256] +heads = [8, 16, 32, 64] +iter = 100 + + +with open('test_results.txt', 'w') as result_file: + for name in file_name: + path = relative_path + name + info = f"Loading data from {path}\n" + result_file.write(info) + print(info) + + edge_index = np.load(path) + + num_nodes = np.max(edge_index) + 1 + src = tlx.convert_to_tensor(edge_index[0, :], tlx.int64) + dst = tlx.convert_to_tensor(edge_index[1, :], tlx.int64) + edge_index = tlx.convert_to_tensor(edge_index) + + for head in heads: + + weight = torch.ones((edge_index.shape[1], head), dtype=tlx.float32) + + for embedding_dim in embedding: + info = f"**********embedding_dim={embedding_dim} head={head}**********\n" + result_file.write(info) + print(info) + x = tlx.convert_to_tensor(np.random.randn(num_nodes, head, embedding_dim), dtype=tlx.float32) + + start = time.time() + for j in range(iter): + bspmm(edge_index, weight=weight, x=x, reduce='sum') + end = time.time() + info = "bspmm_sum:{:.3f}\n".format(end-start) + result_file.write(info) + print(info) + + start = time.time() + for j in range(iter): + msg = tlx.gather(x, src) + edge_weight = tlx.expand_dims(weight, -1) + msg = msg * edge_weight + unsorted_segment_sum(msg, dst, num_nodes) + end = time.time() + info = "segment_sum:{:.3f}\n".format(end-start) + result_file.write(info) + print(info) + + + info = f"**********embedding_dim={embedding_dim} head={head}**********\n" + result_file.write(info) + print(info) + + info = f"Data tensors are on device: {x.device}\n" + result_file.write(info) + print(info) diff --git a/setup.py b/setup.py index cbdce38a..e1aba1a7 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ # WITH_CUDA = True cuda_macro = ('COMPILE_WITH_CUDA', True) -omp_macro = ('COMPILE_WITH_OMP', True) # Note: OpenMP needs gcc>4.2.0 +omp_macro = ('COMPLIE_WITH_OMP', True) # Note: OpenMP needs gcc>4.2.0 compile_args = { 'cxx': ['-fopenmp', '-std=c++17'] }