change convert_partition.py

dmlc · Sep 6, 2024 · a5e478b · a5e478b
2 parents d03a323 + f91e8c2
commit a5e478b
Show file tree

Hide file tree

Showing 141 changed files with 5,323 additions and 4,209 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -20,7 +20,6 @@ dgl_option(BUILD_TYPE "Type of the build: dev, dogfood or release" "dev")
 message(STATUS "Build for ${BUILD_TYPE}")
 
 dgl_option(USE_CUDA "Build with CUDA" OFF)
-dgl_option(USE_LIBURING "Build with liburing" ON)
 dgl_option(TORCH_PYTHON_INTERPS "Python interpreter for building sub-components" python3)
 
 # Conda build related options.
@@ -512,10 +511,6 @@ if(BUILD_GRAPHBOLT)
   string(REPLACE ";" "\\;" CUDA_ARCHITECTURES_ESCAPED "${CUDA_ARCHITECTURES}")
   file(TO_NATIVE_PATH ${CMAKE_CURRENT_BINARY_DIR} BINDIR)
   file(TO_NATIVE_PATH ${CMAKE_COMMAND} CMAKE_CMD)
-  if(USE_CUDA)
-    get_target_property(GPU_CACHE_INCLUDE_DIRS gpu_cache INCLUDE_DIRECTORIES)
-  endif(USE_CUDA)
-  string(REPLACE ";" "\\;" GPU_CACHE_INCLUDE_DIRS_ESCAPED "${GPU_CACHE_INCLUDE_DIRS}")
   if(MSVC)
     file(TO_NATIVE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/graphbolt/build.bat BUILD_SCRIPT)
     add_custom_target(
@@ -526,7 +521,6 @@ if(BUILD_GRAPHBOLT)
       CUDA_TOOLKIT_ROOT_DIR=${CUDA_TOOLKIT_ROOT_DIR}
       USE_CUDA=${USE_CUDA}
       BINDIR=${BINDIR}
-      GPU_CACHE_INCLUDE_DIRS="${GPU_CACHE_INCLUDE_DIRS_ESCAPED}"
       CFLAGS=${CMAKE_C_FLAGS}
       CXXFLAGS=${CMAKE_CXX_FLAGS}
       CUDAARCHS="${CUDA_ARCHITECTURES_ESCAPED}"
@@ -545,7 +539,6 @@ if(BUILD_GRAPHBOLT)
       USE_CUDA=${USE_CUDA}
       USE_LIBURING=${USE_LIBURING}
       BINDIR=${CMAKE_CURRENT_BINARY_DIR}
-      GPU_CACHE_INCLUDE_DIRS="${GPU_CACHE_INCLUDE_DIRS_ESCAPED}"
       CFLAGS=${CMAKE_C_FLAGS}
       CXXFLAGS=${CMAKE_CXX_FLAGS}
       CUDAARCHS="${CUDA_ARCHITECTURES_ESCAPED}"
@@ -554,7 +547,4 @@ if(BUILD_GRAPHBOLT)
       DEPENDS ${BUILD_SCRIPT}
       WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/graphbolt)
   endif(MSVC)
-  if(USE_CUDA)
-    add_dependencies(graphbolt gpu_cache)
-  endif(USE_CUDA)
 endif(BUILD_GRAPHBOLT)
diff --git a/Jenkinsfile b/Jenkinsfile
@@ -162,7 +162,7 @@ def is_authorized(name) {
     'nv-dlasalle', 'yaox12', 'chang-l', 'Kh4L', 'VibhuJawa', 'kkranen',
     'TristonC', 'mfbalin',
     'bgawrych', 'itaraban', 'daniil-sizov', 'anko-intel', 'Kacper-Pietkun',
-    'hankaj', 'agrabows', 'DominikaJedynak', 'RafLit',
+    'hankaj', 'agrabows', 'DominikaJedynak', 'RafLit', 'CfromBU',
     // Emeritus:
     'VoVAllen',
   ]

diff --git a/conda/dgl/meta.yaml b/conda/dgl/meta.yaml
@@ -1,6 +1,6 @@
 package:
   name: dgl{{ environ.get('DGL_PACKAGE_SUFFIX', '') }}
-  version: 2.4{{ environ.get('DGL_VERSION_SUFFIX', '') }}
+  version: 2.5{{ environ.get('DGL_VERSION_SUFFIX', '') }}
 
 source:
   git_rev: {{ environ.get('DGL_RELEASE_BRANCH', 'master') }}

diff --git a/docs/source/api/python/dgl.dataloading.rst b/docs/source/api/python/dgl.dataloading.rst
@@ -26,8 +26,6 @@ DataLoaders
 
     DataLoader
     GraphDataLoader
-    DistNodeDataLoader
-    DistEdgeDataLoader
 
 .. _api-dataloading-neighbor-sampling:
 

diff --git a/docs/source/api/python/dgl.distributed.rst b/docs/source/api/python/dgl.distributed.rst
@@ -70,6 +70,10 @@ Distributed DataLoader
 
 .. autoclass:: DistDataLoader
 
+.. autoclass:: DistNodeDataLoader
+
+.. autoclass:: DistEdgeDataLoader
+
 .. _api-distributed-sampling-ops:
 Distributed Graph Sampling Operators
 ```````````````````````````````````````

diff --git a/docs/source/api/python/dgl.graphbolt.rst b/docs/source/api/python/dgl.graphbolt.rst
@@ -184,6 +184,8 @@ Utilities
     :toctree: ../../generated/
     :nosignatures:
 
+    cpu_cached_feature
+    gpu_cached_feature
     fused_csc_sampling_graph
     load_from_shared_memory
     from_dglgraph
@@ -193,9 +195,10 @@ Utilities
     seed
     index_select
     expand_indptr
+    indptr_edge_ids
     add_reverse_edges
     exclude_seed_edges
     compact_csc_format
     unique_and_compact
     unique_and_compact_csc_formats
-
+    numpy_save_aligned
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -81,7 +81,10 @@
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = []
+exclude_patterns = [
+    "tutorials/**/*.ipynb",
+    "tutorials/**/*.py",
+]
 
 # The name of the Pygments (syntax highlighting) style to use.
 pygments_style = None

diff --git a/docs/source/guide/distributed-apis.rst b/docs/source/guide/distributed-apis.rst
@@ -275,14 +275,14 @@ difference is that users need to use :func:`dgl.distributed.sample_neighbors` an
 
 The high-level sampling APIs (:class:`~dgl.dataloading.NodeDataLoader` and
 :class:`~dgl.dataloading.EdgeDataLoader` ) has distributed counterparts
-(:class:`~dgl.dataloading.DistNodeDataLoader` and
-:class:`~dgl.dataloading.DistEdgeDataLoader`).  The code is exactly the same as
+(:class:`~dgl.distributed.DistNodeDataLoader` and
+:class:`~dgl.distributed.DistEdgeDataLoader`).  The code is exactly the same as
 single-process sampling otherwise.
 
 .. code:: python
 
     sampler = dgl.sampling.MultiLayerNeighborSampler([10, 25])
-    dataloader = dgl.sampling.DistNodeDataLoader(g, train_nid, sampler,
+    dataloader = dgl.distributed.DistNodeDataLoader(g, train_nid, sampler,
                                                  batch_size=batch_size, shuffle=True)
     for batch in dataloader:
         ...

diff --git a/examples/distributed/graphsage/node_classification.py b/examples/distributed/graphsage/node_classification.py
@@ -3,6 +3,7 @@
 import time
 
 import dgl
+import dgl.distributed
 import dgl.nn.pytorch as dglnn
 import numpy as np
 import torch as th
@@ -109,7 +110,7 @@ def inference(self, g, x, batch_size, device):
             # `-1` indicates all inbound edges will be inlcuded, namely, full
             # neighbor sampling.
             sampler = dgl.dataloading.NeighborSampler([-1])
-            dataloader = dgl.dataloading.DistNodeDataLoader(
+            dataloader = dgl.distributed.DistNodeDataLoader(
                 g,
                 nodes,
                 sampler,
@@ -212,7 +213,7 @@ def run(args, device, data):
     sampler = dgl.dataloading.NeighborSampler(
         [int(fanout) for fanout in args.fan_out.split(",")]
     )
-    dataloader = dgl.dataloading.DistNodeDataLoader(
+    dataloader = dgl.distributed.DistNodeDataLoader(
         g,
         train_nid,
         sampler,

diff --git a/examples/distributed/graphsage/node_classification_unsupervised.py b/examples/distributed/graphsage/node_classification_unsupervised.py
@@ -3,6 +3,7 @@
 from contextlib import contextmanager
 
 import dgl
+import dgl.distributed
 import dgl.function as fn
 import dgl.nn.pytorch as dglnn
 
@@ -79,7 +80,7 @@ def inference(self, g, x, batch_size, device):
             # Create sampler
             sampler = dgl.dataloading.NeighborSampler([-1])
             # Create dataloader
-            dataloader = dgl.dataloading.DistNodeDataLoader(
+            dataloader = dgl.distributed.DistNodeDataLoader(
                 g,
                 nodes,
                 sampler,
@@ -203,7 +204,7 @@ def run(args, device, data):
     # Create dataloader
     exclude = "reverse_id" if args.remove_edge else None
     reverse_eids = th.arange(g.num_edges()) if args.remove_edge else None
-    dataloader = dgl.dataloading.DistEdgeDataLoader(
+    dataloader = dgl.distributed.DistEdgeDataLoader(
         g,
         train_eids,
         sampler,

diff --git a/examples/distributed/rgcn/node_classification.py b/examples/distributed/rgcn/node_classification.py
@@ -6,6 +6,7 @@
 * l2norm applied to all weights
 * remove nodes that won't be touched
 """
+
 import argparse
 import gc, os
 import itertools
@@ -18,6 +19,7 @@
 from functools import partial
 
 import dgl
+import dgl.distributed
 import torch as th
 import torch.multiprocessing as mp
 import torch.nn as nn
@@ -459,7 +461,7 @@ def run(args, device, data):
     val_fanouts = [int(fanout) for fanout in args.validation_fanout.split(",")]
 
     sampler = dgl.dataloading.MultiLayerNeighborSampler(fanouts)
-    dataloader = dgl.dataloading.DistNodeDataLoader(
+    dataloader = dgl.distributed.DistNodeDataLoader(
         g,
         {"paper": train_nid},
         sampler,
@@ -469,7 +471,7 @@ def run(args, device, data):
     )
 
     valid_sampler = dgl.dataloading.MultiLayerNeighborSampler(val_fanouts)
-    valid_dataloader = dgl.dataloading.DistNodeDataLoader(
+    valid_dataloader = dgl.distributed.DistNodeDataLoader(
         g,
         {"paper": val_nid},
         valid_sampler,
@@ -479,7 +481,7 @@ def run(args, device, data):
     )
 
     test_sampler = dgl.dataloading.MultiLayerNeighborSampler(val_fanouts)
-    test_dataloader = dgl.dataloading.DistNodeDataLoader(
+    test_dataloader = dgl.distributed.DistNodeDataLoader(
         g,
         {"paper": test_nid},
         test_sampler,

diff --git a/examples/graphbolt/disk_based_feature/node_classification.py b/examples/graphbolt/disk_based_feature/node_classification.py
@@ -115,7 +115,10 @@ def create_dataloader(
         else {}
     )
     datapipe = getattr(datapipe, args.sample_mode)(
-        graph, fanout if job != "infer" else [-1], **kwargs
+        graph,
+        fanout if job != "infer" else [-1],
+        overlap_fetch=args.overlap_graph_fetch,
+        **kwargs,
     )
     # Copy the data to the specified device.
     if args.feature_device != "cpu":
@@ -130,11 +133,7 @@ def create_dataloader(
     if args.feature_device == "cpu":
         datapipe = datapipe.copy_to(device=device)
     # Create and return a DataLoader to handle data loading.
-    return gb.DataLoader(
-        datapipe,
-        num_workers=args.num_workers,
-        overlap_graph_fetch=args.overlap_graph_fetch,
-    )
+    return gb.DataLoader(datapipe, num_workers=args.num_workers)
 
 
 def train_step(minibatch, optimizer, model, loss_fn):
@@ -337,9 +336,10 @@ def parse_args():
             "ogbn-arxiv",
             "ogbn-products",
             "ogbn-papers100M",
-            "reddit",
-            "yelp",
-            "flickr",
+            "igb-hom-tiny",
+            "igb-hom-small",
+            "igb-hom-medium",
+            "igb-hom-large",
         ],
     )
     parser.add_argument("--root", type=str, default="datasets")
@@ -377,13 +377,13 @@ def parse_args():
         "--cpu-cache-size-in-gigabytes",
         type=float,
         default=0,
-        help="The capacity of the CPU cache, the number of features to store.",
+        help="The capacity of the CPU cache in GiB.",
     )
     parser.add_argument(
         "--gpu-cache-size-in-gigabytes",
         type=float,
         default=0,
-        help="The capacity of the GPU cache, the number of features to store.",
+        help="The capacity of the GPU cache in GiB.",
     )
     parser.add_argument("--early-stopping-patience", type=int, default=25)
     parser.add_argument(
@@ -458,14 +458,14 @@ def main():
     if args.cpu_cache_size_in_gigabytes > 0 and isinstance(
         features[("node", None, "feat")], gb.DiskBasedFeature
     ):
-        features[("node", None, "feat")] = gb.CPUCachedFeature(
+        features[("node", None, "feat")] = gb.cpu_cached_feature(
             features[("node", None, "feat")],
             int(args.cpu_cache_size_in_gigabytes * 1024 * 1024 * 1024),
             args.cpu_feature_cache_policy,
             args.feature_device == "pinned",
         )
         cpu_cached_feature = features[("node", None, "feat")]
-        cpu_cache_miss_rate_fn = lambda: cpu_cached_feature._feature.miss_rate
+        cpu_cache_miss_rate_fn = lambda: cpu_cached_feature.miss_rate
     else:
         cpu_cache_miss_rate_fn = lambda: 1
 
@@ -475,12 +475,12 @@ def main():
     host-to-device copy operations for this feature.
     """
     if args.gpu_cache_size_in_gigabytes > 0 and args.feature_device != "cuda":
-        features[("node", None, "feat")] = gb.GPUCachedFeature(
+        features[("node", None, "feat")] = gb.gpu_cached_feature(
             features[("node", None, "feat")],
             int(args.gpu_cache_size_in_gigabytes * 1024 * 1024 * 1024),
         )
         gpu_cached_feature = features[("node", None, "feat")]
-        gpu_cache_miss_rate_fn = lambda: gpu_cached_feature._feature.miss_rate
+        gpu_cache_miss_rate_fn = lambda: gpu_cached_feature.miss_rate
     else:
         gpu_cache_miss_rate_fn = lambda: 1
 

diff --git a/examples/graphbolt/link_prediction.py b/examples/graphbolt/link_prediction.py
@@ -182,7 +182,10 @@ def create_dataloader(args, graph, features, itemset, is_train=True):
     # Initialize a neighbor sampler for sampling the neighborhoods of nodes.
     ############################################################################
     datapipe = datapipe.sample_neighbor(
-        graph, args.fanout if is_train else [-1]
+        graph,
+        args.fanout if is_train else [-1],
+        overlap_fetch=args.storage_device == "pinned",
+        asynchronous=args.storage_device != "cpu",
     )
 
     ############################################################################
@@ -199,8 +202,9 @@ def create_dataloader(args, graph, features, itemset, is_train=True):
     # the negative samples.
     ############################################################################
     if is_train and args.exclude_edges:
-        datapipe = datapipe.transform(
-            partial(gb.exclude_seed_edges, include_reverse_edges=True)
+        datapipe = datapipe.exclude_seed_edges(
+            include_reverse_edges=True,
+            asynchronous=args.storage_device != "cpu",
         )
 
     ############################################################################

diff --git a/examples/graphbolt/node_classification.py b/examples/graphbolt/node_classification.py
@@ -117,7 +117,10 @@ def create_dataloader(
     # Initialize a neighbor sampler for sampling the neighborhoods of nodes.
     ############################################################################
     datapipe = getattr(datapipe, args.sample_mode)(
-        graph, fanout if job != "infer" else [-1]
+        graph,
+        fanout if job != "infer" else [-1],
+        overlap_fetch=args.storage_device == "pinned",
+        asynchronous=args.storage_device != "cpu",
     )
 
     ############################################################################
@@ -156,11 +159,7 @@ def create_dataloader(
     # [Role]:
     # Initialize a multi-process dataloader to load the data in parallel.
     ############################################################################
-    dataloader = gb.DataLoader(
-        datapipe,
-        num_workers=num_workers,
-        overlap_graph_fetch=args.storage_device == "pinned",
-    )
+    dataloader = gb.DataLoader(datapipe, num_workers=num_workers)
 
     # Return the fully-initialized DataLoader object.
     return dataloader
@@ -364,9 +363,18 @@ def parse_args():
         "--dataset",
         type=str,
         default="ogbn-products",
-        choices=["ogbn-arxiv", "ogbn-products", "ogbn-papers100M"],
+        choices=[
+            "ogbn-arxiv",
+            "ogbn-products",
+            "ogbn-papers100M",
+            "igb-hom-tiny",
+            "igb-hom-small",
+            "igb-hom-medium",
+            "igb-hom-large",
+        ],
         help="The dataset we can use for node classification example. Currently"
-        " ogbn-products, ogbn-arxiv, ogbn-papers100M datasets are supported.",
+        " ogbn-products, ogbn-arxiv, ogbn-papers100M and"
+        " igb-hom-[tiny|small|medium] datasets are supported.",
     )
     parser.add_argument(
         "--mode",