Skip to content

Commit

Permalink
change convert_partition.py
Browse files Browse the repository at this point in the history
  • Loading branch information
Ubuntu committed Sep 6, 2024
2 parents d03a323 + f91e8c2 commit a5e478b
Show file tree
Hide file tree
Showing 141 changed files with 5,323 additions and 4,209 deletions.
10 changes: 0 additions & 10 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ dgl_option(BUILD_TYPE "Type of the build: dev, dogfood or release" "dev")
message(STATUS "Build for ${BUILD_TYPE}")

dgl_option(USE_CUDA "Build with CUDA" OFF)
dgl_option(USE_LIBURING "Build with liburing" ON)
dgl_option(TORCH_PYTHON_INTERPS "Python interpreter for building sub-components" python3)

# Conda build related options.
Expand Down Expand Up @@ -512,10 +511,6 @@ if(BUILD_GRAPHBOLT)
string(REPLACE ";" "\\;" CUDA_ARCHITECTURES_ESCAPED "${CUDA_ARCHITECTURES}")
file(TO_NATIVE_PATH ${CMAKE_CURRENT_BINARY_DIR} BINDIR)
file(TO_NATIVE_PATH ${CMAKE_COMMAND} CMAKE_CMD)
if(USE_CUDA)
get_target_property(GPU_CACHE_INCLUDE_DIRS gpu_cache INCLUDE_DIRECTORIES)
endif(USE_CUDA)
string(REPLACE ";" "\\;" GPU_CACHE_INCLUDE_DIRS_ESCAPED "${GPU_CACHE_INCLUDE_DIRS}")
if(MSVC)
file(TO_NATIVE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/graphbolt/build.bat BUILD_SCRIPT)
add_custom_target(
Expand All @@ -526,7 +521,6 @@ if(BUILD_GRAPHBOLT)
CUDA_TOOLKIT_ROOT_DIR=${CUDA_TOOLKIT_ROOT_DIR}
USE_CUDA=${USE_CUDA}
BINDIR=${BINDIR}
GPU_CACHE_INCLUDE_DIRS="${GPU_CACHE_INCLUDE_DIRS_ESCAPED}"
CFLAGS=${CMAKE_C_FLAGS}
CXXFLAGS=${CMAKE_CXX_FLAGS}
CUDAARCHS="${CUDA_ARCHITECTURES_ESCAPED}"
Expand All @@ -545,7 +539,6 @@ if(BUILD_GRAPHBOLT)
USE_CUDA=${USE_CUDA}
USE_LIBURING=${USE_LIBURING}
BINDIR=${CMAKE_CURRENT_BINARY_DIR}
GPU_CACHE_INCLUDE_DIRS="${GPU_CACHE_INCLUDE_DIRS_ESCAPED}"
CFLAGS=${CMAKE_C_FLAGS}
CXXFLAGS=${CMAKE_CXX_FLAGS}
CUDAARCHS="${CUDA_ARCHITECTURES_ESCAPED}"
Expand All @@ -554,7 +547,4 @@ if(BUILD_GRAPHBOLT)
DEPENDS ${BUILD_SCRIPT}
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/graphbolt)
endif(MSVC)
if(USE_CUDA)
add_dependencies(graphbolt gpu_cache)
endif(USE_CUDA)
endif(BUILD_GRAPHBOLT)
2 changes: 1 addition & 1 deletion Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def is_authorized(name) {
'nv-dlasalle', 'yaox12', 'chang-l', 'Kh4L', 'VibhuJawa', 'kkranen',
'TristonC', 'mfbalin',
'bgawrych', 'itaraban', 'daniil-sizov', 'anko-intel', 'Kacper-Pietkun',
'hankaj', 'agrabows', 'DominikaJedynak', 'RafLit',
'hankaj', 'agrabows', 'DominikaJedynak', 'RafLit', 'CfromBU',
// Emeritus:
'VoVAllen',
]
Expand Down
2 changes: 1 addition & 1 deletion conda/dgl/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package:
name: dgl{{ environ.get('DGL_PACKAGE_SUFFIX', '') }}
version: 2.4{{ environ.get('DGL_VERSION_SUFFIX', '') }}
version: 2.5{{ environ.get('DGL_VERSION_SUFFIX', '') }}

source:
git_rev: {{ environ.get('DGL_RELEASE_BRANCH', 'master') }}
Expand Down
2 changes: 0 additions & 2 deletions docs/source/api/python/dgl.dataloading.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@ DataLoaders

DataLoader
GraphDataLoader
DistNodeDataLoader
DistEdgeDataLoader

.. _api-dataloading-neighbor-sampling:

Expand Down
4 changes: 4 additions & 0 deletions docs/source/api/python/dgl.distributed.rst
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ Distributed DataLoader

.. autoclass:: DistDataLoader

.. autoclass:: DistNodeDataLoader

.. autoclass:: DistEdgeDataLoader

.. _api-distributed-sampling-ops:
Distributed Graph Sampling Operators
```````````````````````````````````````
Expand Down
5 changes: 4 additions & 1 deletion docs/source/api/python/dgl.graphbolt.rst
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,8 @@ Utilities
:toctree: ../../generated/
:nosignatures:

cpu_cached_feature
gpu_cached_feature
fused_csc_sampling_graph
load_from_shared_memory
from_dglgraph
Expand All @@ -193,9 +195,10 @@ Utilities
seed
index_select
expand_indptr
indptr_edge_ids
add_reverse_edges
exclude_seed_edges
compact_csc_format
unique_and_compact
unique_and_compact_csc_formats

numpy_save_aligned
5 changes: 4 additions & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,10 @@
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = []
exclude_patterns = [
"tutorials/**/*.ipynb",
"tutorials/**/*.py",
]

# The name of the Pygments (syntax highlighting) style to use.
pygments_style = None
Expand Down
6 changes: 3 additions & 3 deletions docs/source/guide/distributed-apis.rst
Original file line number Diff line number Diff line change
Expand Up @@ -275,14 +275,14 @@ difference is that users need to use :func:`dgl.distributed.sample_neighbors` an
The high-level sampling APIs (:class:`~dgl.dataloading.NodeDataLoader` and
:class:`~dgl.dataloading.EdgeDataLoader` ) has distributed counterparts
(:class:`~dgl.dataloading.DistNodeDataLoader` and
:class:`~dgl.dataloading.DistEdgeDataLoader`). The code is exactly the same as
(:class:`~dgl.distributed.DistNodeDataLoader` and
:class:`~dgl.distributed.DistEdgeDataLoader`). The code is exactly the same as
single-process sampling otherwise.

.. code:: python
sampler = dgl.sampling.MultiLayerNeighborSampler([10, 25])
dataloader = dgl.sampling.DistNodeDataLoader(g, train_nid, sampler,
dataloader = dgl.distributed.DistNodeDataLoader(g, train_nid, sampler,
batch_size=batch_size, shuffle=True)
for batch in dataloader:
...
Expand Down
5 changes: 3 additions & 2 deletions examples/distributed/graphsage/node_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import time

import dgl
import dgl.distributed
import dgl.nn.pytorch as dglnn
import numpy as np
import torch as th
Expand Down Expand Up @@ -109,7 +110,7 @@ def inference(self, g, x, batch_size, device):
# `-1` indicates all inbound edges will be inlcuded, namely, full
# neighbor sampling.
sampler = dgl.dataloading.NeighborSampler([-1])
dataloader = dgl.dataloading.DistNodeDataLoader(
dataloader = dgl.distributed.DistNodeDataLoader(
g,
nodes,
sampler,
Expand Down Expand Up @@ -212,7 +213,7 @@ def run(args, device, data):
sampler = dgl.dataloading.NeighborSampler(
[int(fanout) for fanout in args.fan_out.split(",")]
)
dataloader = dgl.dataloading.DistNodeDataLoader(
dataloader = dgl.distributed.DistNodeDataLoader(
g,
train_nid,
sampler,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from contextlib import contextmanager

import dgl
import dgl.distributed
import dgl.function as fn
import dgl.nn.pytorch as dglnn

Expand Down Expand Up @@ -79,7 +80,7 @@ def inference(self, g, x, batch_size, device):
# Create sampler
sampler = dgl.dataloading.NeighborSampler([-1])
# Create dataloader
dataloader = dgl.dataloading.DistNodeDataLoader(
dataloader = dgl.distributed.DistNodeDataLoader(
g,
nodes,
sampler,
Expand Down Expand Up @@ -203,7 +204,7 @@ def run(args, device, data):
# Create dataloader
exclude = "reverse_id" if args.remove_edge else None
reverse_eids = th.arange(g.num_edges()) if args.remove_edge else None
dataloader = dgl.dataloading.DistEdgeDataLoader(
dataloader = dgl.distributed.DistEdgeDataLoader(
g,
train_eids,
sampler,
Expand Down
8 changes: 5 additions & 3 deletions examples/distributed/rgcn/node_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
* l2norm applied to all weights
* remove nodes that won't be touched
"""

import argparse
import gc, os
import itertools
Expand All @@ -18,6 +19,7 @@
from functools import partial

import dgl
import dgl.distributed
import torch as th
import torch.multiprocessing as mp
import torch.nn as nn
Expand Down Expand Up @@ -459,7 +461,7 @@ def run(args, device, data):
val_fanouts = [int(fanout) for fanout in args.validation_fanout.split(",")]

sampler = dgl.dataloading.MultiLayerNeighborSampler(fanouts)
dataloader = dgl.dataloading.DistNodeDataLoader(
dataloader = dgl.distributed.DistNodeDataLoader(
g,
{"paper": train_nid},
sampler,
Expand All @@ -469,7 +471,7 @@ def run(args, device, data):
)

valid_sampler = dgl.dataloading.MultiLayerNeighborSampler(val_fanouts)
valid_dataloader = dgl.dataloading.DistNodeDataLoader(
valid_dataloader = dgl.distributed.DistNodeDataLoader(
g,
{"paper": val_nid},
valid_sampler,
Expand All @@ -479,7 +481,7 @@ def run(args, device, data):
)

test_sampler = dgl.dataloading.MultiLayerNeighborSampler(val_fanouts)
test_dataloader = dgl.dataloading.DistNodeDataLoader(
test_dataloader = dgl.distributed.DistNodeDataLoader(
g,
{"paper": test_nid},
test_sampler,
Expand Down
30 changes: 15 additions & 15 deletions examples/graphbolt/disk_based_feature/node_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,10 @@ def create_dataloader(
else {}
)
datapipe = getattr(datapipe, args.sample_mode)(
graph, fanout if job != "infer" else [-1], **kwargs
graph,
fanout if job != "infer" else [-1],
overlap_fetch=args.overlap_graph_fetch,
**kwargs,
)
# Copy the data to the specified device.
if args.feature_device != "cpu":
Expand All @@ -130,11 +133,7 @@ def create_dataloader(
if args.feature_device == "cpu":
datapipe = datapipe.copy_to(device=device)
# Create and return a DataLoader to handle data loading.
return gb.DataLoader(
datapipe,
num_workers=args.num_workers,
overlap_graph_fetch=args.overlap_graph_fetch,
)
return gb.DataLoader(datapipe, num_workers=args.num_workers)


def train_step(minibatch, optimizer, model, loss_fn):
Expand Down Expand Up @@ -337,9 +336,10 @@ def parse_args():
"ogbn-arxiv",
"ogbn-products",
"ogbn-papers100M",
"reddit",
"yelp",
"flickr",
"igb-hom-tiny",
"igb-hom-small",
"igb-hom-medium",
"igb-hom-large",
],
)
parser.add_argument("--root", type=str, default="datasets")
Expand Down Expand Up @@ -377,13 +377,13 @@ def parse_args():
"--cpu-cache-size-in-gigabytes",
type=float,
default=0,
help="The capacity of the CPU cache, the number of features to store.",
help="The capacity of the CPU cache in GiB.",
)
parser.add_argument(
"--gpu-cache-size-in-gigabytes",
type=float,
default=0,
help="The capacity of the GPU cache, the number of features to store.",
help="The capacity of the GPU cache in GiB.",
)
parser.add_argument("--early-stopping-patience", type=int, default=25)
parser.add_argument(
Expand Down Expand Up @@ -458,14 +458,14 @@ def main():
if args.cpu_cache_size_in_gigabytes > 0 and isinstance(
features[("node", None, "feat")], gb.DiskBasedFeature
):
features[("node", None, "feat")] = gb.CPUCachedFeature(
features[("node", None, "feat")] = gb.cpu_cached_feature(
features[("node", None, "feat")],
int(args.cpu_cache_size_in_gigabytes * 1024 * 1024 * 1024),
args.cpu_feature_cache_policy,
args.feature_device == "pinned",
)
cpu_cached_feature = features[("node", None, "feat")]
cpu_cache_miss_rate_fn = lambda: cpu_cached_feature._feature.miss_rate
cpu_cache_miss_rate_fn = lambda: cpu_cached_feature.miss_rate
else:
cpu_cache_miss_rate_fn = lambda: 1

Expand All @@ -475,12 +475,12 @@ def main():
host-to-device copy operations for this feature.
"""
if args.gpu_cache_size_in_gigabytes > 0 and args.feature_device != "cuda":
features[("node", None, "feat")] = gb.GPUCachedFeature(
features[("node", None, "feat")] = gb.gpu_cached_feature(
features[("node", None, "feat")],
int(args.gpu_cache_size_in_gigabytes * 1024 * 1024 * 1024),
)
gpu_cached_feature = features[("node", None, "feat")]
gpu_cache_miss_rate_fn = lambda: gpu_cached_feature._feature.miss_rate
gpu_cache_miss_rate_fn = lambda: gpu_cached_feature.miss_rate
else:
gpu_cache_miss_rate_fn = lambda: 1

Expand Down
10 changes: 7 additions & 3 deletions examples/graphbolt/link_prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,10 @@ def create_dataloader(args, graph, features, itemset, is_train=True):
# Initialize a neighbor sampler for sampling the neighborhoods of nodes.
############################################################################
datapipe = datapipe.sample_neighbor(
graph, args.fanout if is_train else [-1]
graph,
args.fanout if is_train else [-1],
overlap_fetch=args.storage_device == "pinned",
asynchronous=args.storage_device != "cpu",
)

############################################################################
Expand All @@ -199,8 +202,9 @@ def create_dataloader(args, graph, features, itemset, is_train=True):
# the negative samples.
############################################################################
if is_train and args.exclude_edges:
datapipe = datapipe.transform(
partial(gb.exclude_seed_edges, include_reverse_edges=True)
datapipe = datapipe.exclude_seed_edges(
include_reverse_edges=True,
asynchronous=args.storage_device != "cpu",
)

############################################################################
Expand Down
24 changes: 16 additions & 8 deletions examples/graphbolt/node_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,10 @@ def create_dataloader(
# Initialize a neighbor sampler for sampling the neighborhoods of nodes.
############################################################################
datapipe = getattr(datapipe, args.sample_mode)(
graph, fanout if job != "infer" else [-1]
graph,
fanout if job != "infer" else [-1],
overlap_fetch=args.storage_device == "pinned",
asynchronous=args.storage_device != "cpu",
)

############################################################################
Expand Down Expand Up @@ -156,11 +159,7 @@ def create_dataloader(
# [Role]:
# Initialize a multi-process dataloader to load the data in parallel.
############################################################################
dataloader = gb.DataLoader(
datapipe,
num_workers=num_workers,
overlap_graph_fetch=args.storage_device == "pinned",
)
dataloader = gb.DataLoader(datapipe, num_workers=num_workers)

# Return the fully-initialized DataLoader object.
return dataloader
Expand Down Expand Up @@ -364,9 +363,18 @@ def parse_args():
"--dataset",
type=str,
default="ogbn-products",
choices=["ogbn-arxiv", "ogbn-products", "ogbn-papers100M"],
choices=[
"ogbn-arxiv",
"ogbn-products",
"ogbn-papers100M",
"igb-hom-tiny",
"igb-hom-small",
"igb-hom-medium",
"igb-hom-large",
],
help="The dataset we can use for node classification example. Currently"
" ogbn-products, ogbn-arxiv, ogbn-papers100M datasets are supported.",
" ogbn-products, ogbn-arxiv, ogbn-papers100M and"
" igb-hom-[tiny|small|medium] datasets are supported.",
)
parser.add_argument(
"--mode",
Expand Down
Loading

0 comments on commit a5e478b

Please sign in to comment.