diff --git a/benchmarks/benchmarks/multigpu/bench_multigpu_rgcn.py b/benchmarks/benchmarks/multigpu/bench_multigpu_rgcn.py index 3dbc9e4c02c2..ec66a2d4f114 100644 --- a/benchmarks/benchmarks/multigpu/bench_multigpu_rgcn.py +++ b/benchmarks/benchmarks/multigpu/bench_multigpu_rgcn.py @@ -48,7 +48,6 @@ def __init__( num_hidden_layers=1, dropout=0, use_self_loop=False, - low_mem=True, layer_norm=False, ): super(EntityClassify, self).__init__() @@ -61,7 +60,6 @@ def __init__( self.num_hidden_layers = num_hidden_layers self.dropout = dropout self.use_self_loop = use_self_loop - self.low_mem = low_mem self.layer_norm = layer_norm self.layers = nn.ModuleList() @@ -75,7 +73,6 @@ def __init__( self.num_bases, activation=F.relu, self_loop=self.use_self_loop, - low_mem=self.low_mem, dropout=self.dropout, layer_norm=layer_norm, ) @@ -91,7 +88,6 @@ def __init__( self.num_bases, activation=F.relu, self_loop=self.use_self_loop, - low_mem=self.low_mem, dropout=self.dropout, layer_norm=layer_norm, ) @@ -106,7 +102,6 @@ def __init__( self.num_bases, activation=None, self_loop=self.use_self_loop, - low_mem=self.low_mem, layer_norm=layer_norm, ) ) @@ -236,7 +231,6 @@ def run(proc_id, n_gpus, n_cpus, args, devices, dataset, split, queue=None): num_hidden_layers=args.n_layers - 2, dropout=args.dropout, use_self_loop=args.use_self_loop, - low_mem=args.low_mem, layer_norm=args.layer_norm, ) @@ -373,14 +367,12 @@ def run(proc_id, n_gpus, n_cpus, args, devices, dataset, split, queue=None): @utils.skip_if_not_4gpu() @utils.benchmark("time", timeout=600) @utils.parametrize("data", ["am", "ogbn-mag"]) -@utils.parametrize("low_mem", [True, False]) @utils.parametrize("dgl_sparse", [True, False]) -def track_time(data, low_mem, dgl_sparse): +def track_time(data, dgl_sparse): # load graph data dataset = utils.process_data(data) args = config() devices = [0, 1, 2, 3] - args.low_mem = low_mem args.dgl_sparse = dgl_sparse args.dataset = dataset ogb_dataset = False @@ -572,49 +564,8 @@ def config(): node_feats=False, num_workers=0, dgl_sparse=False, - low_mem=False, ) - # parser.add_argument("--dropout", type=float, default=0, - # help="dropout probability") - # parser.add_argument("--n-hidden", type=int, default=16, - # help="number of hidden units") - # parser.add_argument("--gpu", type=str, default='0', - # help="gpu") - # parser.add_argument("--lr", type=float, default=1e-2, - # help="learning rate") - # parser.add_argument("--sparse-lr", type=float, default=2e-2, - # help="sparse embedding learning rate") - # parser.add_argument("--n-bases", type=int, default=-1, - # help="number of filter weight matrices, default: -1 [use all]") - # parser.add_argument("--n-layers", type=int, default=2, - # help="number of propagation rounds") - # parser.add_argument("-e", "--n-epochs", type=int, default=50, - # help="number of training epochs") - # parser.add_argument("-d", "--dataset", type=str, required=True, - # help="dataset to use") - # parser.add_argument("--l2norm", type=float, default=0, - # help="l2 norm coef") - # parser.add_argument("--fanout", type=str, default="4, 4", - # help="Fan-out of neighbor sampling.") - # parser.add_argument("--use-self-loop", default=False, action='store_true', - # help="include self feature as a special relation") - # fp = parser.add_mutually_exclusive_group(required=False) - # parser.add_argument("--batch-size", type=int, default=100, - # help="Mini-batch size. ") - # parser.add_argument("--eval-batch-size", type=int, default=32, - # help="Mini-batch size. ") - # parser.add_argument("--num-workers", type=int, default=0, - # help="Number of workers for dataloader.") - # parser.add_argument("--low-mem", default=False, action='store_true', - # help="Whether use low mem RelGraphCov") - # parser.add_argument("--dgl-sparse", default=False, action='store_true', - # help='Use sparse embedding for node embeddings.') - # parser.add_argument('--node-feats', default=False, action='store_true', - # help='Whether use node features') - # parser.add_argument('--layer-norm', default=False, action='store_true', - # help='Use layer norm') - # parser.set_defaults(validation=True) - # args = parser.parse_args() + return args diff --git a/benchmarks/benchmarks/utils.py b/benchmarks/benchmarks/utils.py index 7e20608869e7..b4e8159db155 100644 --- a/benchmarks/benchmarks/utils.py +++ b/benchmarks/benchmarks/utils.py @@ -534,7 +534,7 @@ def skip_if_not_4gpu(): """skip if DGL_BENCH_DEVICE is gpu""" def _wrapper(func): - if GPU_COUNT != 4: + if GPU_COUNT < 4: # skip if not enabled print("Skip {}".format(func.__name__)) func.benchmark_name = "skip_" + func.__name__ diff --git a/benchmarks/scripts/build_dgl_asv.sh b/benchmarks/scripts/build_dgl_asv.sh index 47162dcf0fb1..28fb008c2725 100644 --- a/benchmarks/scripts/build_dgl_asv.sh +++ b/benchmarks/scripts/build_dgl_asv.sh @@ -10,7 +10,7 @@ pip install -r /asv/torch_gpu_pip.txt # build CMAKE_VARS="-DUSE_OPENMP=ON -DBUILD_TORCH=ON -DBUILD_SPARSE=ON -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda" if [[ $DEVICE == "gpu" ]]; then - CMAKE_VARS="-DUSE_CUDA=ON $CMAKE_VARS" + CMAKE_VARS="-DUSE_CUDA=ON -DUSE_NCCL=ON $CMAKE_VARS" fi arch=`uname -m` if [[ $arch == *"x86"* ]]; then diff --git a/benchmarks/scripts/publish.sh b/benchmarks/scripts/publish.sh index 833df473b4ea..484208a9c8cb 100644 --- a/benchmarks/scripts/publish.sh +++ b/benchmarks/scripts/publish.sh @@ -26,7 +26,7 @@ else fi WS_ROOT=/asv/dgl -docker pull public.ecr.aws/s1o7b3d9/benchmark_test:cu116 +docker pull public.ecr.aws/s1o7b3d9/benchmark_test:cu116_v230110 if [ -z "$DGL_REG_CONF" ]; then DOCKER_ENV_OPT="$DOCKER_ENV_OPT" else @@ -56,14 +56,14 @@ if [[ $DEVICE == "cpu" ]]; then $DOCKER_MOUNT_OPT \ $DOCKER_ENV_OPT \ --shm-size="16g" \ - --hostname=$MACHINE -dit public.ecr.aws/s1o7b3d9/benchmark_test:cu116 /bin/bash + --hostname=$MACHINE -dit public.ecr.aws/s1o7b3d9/benchmark_test:cu116_v230110 /bin/bash else docker run --name dgl-reg \ --rm --gpus all \ $DOCKER_MOUNT_OPT \ $DOCKER_ENV_OPT \ --shm-size="16g" \ - --hostname=$MACHINE -dit public.ecr.aws/s1o7b3d9/benchmark_test:cu116 /bin/bash + --hostname=$MACHINE -dit public.ecr.aws/s1o7b3d9/benchmark_test:cu116_v230110 /bin/bash fi pwd diff --git a/docker/Dockerfile.ci_benchmark b/docker/Dockerfile.ci_benchmark index af4af27a080e..1b36808f844d 100644 --- a/docker/Dockerfile.ci_benchmark +++ b/docker/Dockerfile.ci_benchmark @@ -27,5 +27,4 @@ ENV CPLUS_INCLUDE_PATH=/usr/local/cuda/include:${CPLUS_INCLUDE_PATH} ENV C_INCLUDE_PATH=/usr/local/cuda/include:${C_INCLUDE_PATH} ENV LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64:${LIBRARY_PATH} ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64:${LD_LIBRARY_PATH} -ENV CUDA_VISIBLE_DEVICES=0 ENV TF_FORCE_GPU_ALLOW_GROWTH=true \ No newline at end of file