support torch 2.1.0 and cuda 12.1 (NVIDIAGameWorks#765)

Signed-off-by: Clement Fuji Tsang <[email protected]>
muses0229 · Nov 7, 2023 · e7c97f7 · e7c97f7
1 parent 70c6d20
commit e7c97f7
Showing 10 changed files with 162 additions and 147 deletions.
diff --git a/ci/gitlab_jenkins_templates/core_ci.jenkins b/ci/gitlab_jenkins_templates/core_ci.jenkins
@@ -9,14 +9,24 @@ gitlabCommitStatus("launch all builds") {
 // Configs for build from pytorch docker images
 // (See: https://hub.docker.com/r/pytorch/pytorch/tags)
 def ubuntu_from_pytorch_configs = [
+    [
+        // python: 3.7
+        'cudaVer': '11.3', 'cudnnVer': '8', 'torchVer': '1.12.1',
+        'archsToTest': 'MULTI'
+    ],
     [
         // python: 3.7
         'cudaVer': '11.6', 'cudnnVer': '8', 'torchVer': '1.13.1',
         'archsToTest': 'MULTI'
     ],
+    [
+        // python: 3.7
+        'cudaVer': '12.1', 'cudnnVer': '8', 'torchVer': '2.1.0',
+        'archsToTest': 'MULTI'
+    ],
     [
         // python: 3.10
-        'cudaVer': '11.7', 'cudnnVer': '8', 'torchVer': '2.0.1',
+        'cudaVer': '11.8', 'cudnnVer': '8', 'torchVer': '2.1.0',
         'archsToTest': 'MULTI'
     ]
 ]
@@ -25,7 +35,7 @@ def ubuntu_from_pytorch_configs = [
 // (See: https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch/tags)
 def ubuntu_from_nvcr_configs = [
     [
-        'baseImageTag': '22.10-py3',
+        'baseImageTag': '23.10-py3',
         'archsToTest': 'MULTI'
     ],
 ]
@@ -40,8 +50,8 @@ def ubuntu_from_cuda_configs = [
         'archsToTest': 'MULTI'
     ],
     [
-        'cudaVer': '11.8.0', 'cudnnVer': '8',
-        'pythonVer': '3.10', 'torchVer': '2.0.1',
+        'cudaVer': '12.1.0', 'cudnnVer': '8',
+        'pythonVer': '3.10', 'torchVer': '2.1.0',
         'archsToTest': 'MULTI'
     ],
 ]

diff --git a/ci/gitlab_jenkins_templates/ubuntu_build_CI.jenkins b/ci/gitlab_jenkins_templates/ubuntu_build_CI.jenkins
@@ -16,7 +16,7 @@ kind: Pod
 spec:
   containers:
   - name: docker
-    image: docker:19.03.1
+    image: docker:20.10.23
     command:
     - sleep
     args:
@@ -25,7 +25,7 @@ spec:
       - name: DOCKER_HOST
         value: tcp://localhost:2375
   - name: docker-daemon
-    image: docker:19.03.1-dind
+    image: docker:20.10.23-dind
     securityContext:
       privileged: true
     env:
@@ -41,6 +41,8 @@ spec:
 ''') {
   node(POD_LABEL) {
     container("docker") {
+      // This is to let the time for the docker-daemon to get initialized.
+      sleep 10
       try {
         stage("Checkout") {
           checkout([

diff --git a/ci/gitlab_jenkins_templates/windows_build_CI.jenkins b/ci/gitlab_jenkins_templates/windows_build_CI.jenkins
@@ -36,7 +36,7 @@ spec:
       claimName: 'kaolin-pvc'
   containers:
   - name: jnlp
-    image: jenkins/jnlp-agent:latest-windows
+    image: urm.nvidia.com/sw-ipp-blossom-sre-docker-local/jnlp-agent:jdk11-windows
     env: 
     - name: JENKINS_AGENT_WORKDIR
       value: C:/Jenkins/agent

diff --git a/kaolin/csrc/ops/conversions/unbatched_mcube/unbatched_mcube_cuda.cu b/kaolin/csrc/ops/conversions/unbatched_mcube/unbatched_mcube_cuda.cu
diff --git a/kaolin/csrc/ops/spc/convolution_cuda.cu b/kaolin/csrc/ops/spc/convolution_cuda.cu
@@ -17,10 +17,6 @@
 #include "../../utils.h"
 #include "convolution.cuh"
 
-#define CUB_NS_PREFIX namespace kaolin {
-#define CUB_NS_POSTFIX }
-#define CUB_NS_QUALIFIER ::kaolin::cub
-
 #include <ATen/cuda/CUDAContext.h>
 #include <cub/device/device_scan.cuh>
 

diff --git a/kaolin/csrc/spc_utils.cuh b/kaolin/csrc/spc_utils.cuh
@@ -16,10 +16,6 @@
 #ifndef KAOLIN_SPC_UTILS_CUH_
 #define KAOLIN_SPC_UTILS_CUH_
 
-#define CUB_NS_PREFIX namespace kaolin {
-#define CUB_NS_POSTFIX }
-#define CUB_NS_QUALIFIER ::kaolin::cub
-
 #include <cub/device/device_scan.cuh>
 #include "spc_math.h"
 

diff --git a/setup.py b/setup.py
@@ -14,7 +14,7 @@
 import warnings
 
 TORCH_MIN_VER = '1.6.0'
-TORCH_MAX_VER = '2.0.1'
+TORCH_MAX_VER = '2.1.0'
 CYTHON_MIN_VER = '0.29.20'
 IGNORE_TORCH_VER = os.getenv('IGNORE_TORCH_VER') is not None
 
@@ -134,6 +134,8 @@ def get_cuda_bare_metal_version(cuda_dir):
                     os.environ["TORCH_CUDA_ARCH_LIST"] = "6.0;6.1;6.2;7.0;7.5;8.0;8.6"
                 else:
                     os.environ["TORCH_CUDA_ARCH_LIST"] = "6.0;6.1;6.2;7.0;7.5;8.0;8.6;9.0"
+            elif int(bare_metal_major) == 12:
+                os.environ["TORCH_CUDA_ARCH_LIST"] = "6.0;6.1;6.2;7.0;7.5;8.0;8.6;9.0"
             else:
                 os.environ["TORCH_CUDA_ARCH_LIST"] = "6.0;6.1;6.2;7.0;7.5"
         print(f'TORCH_CUDA_ARCH_LIST: {os.environ["TORCH_CUDA_ARCH_LIST"]}')

diff --git a/tests/python/kaolin/io/test_materials.py b/tests/python/kaolin/io/test_materials.py
@@ -277,7 +277,7 @@ def test_cuda(self, material_values, material_textures, device, non_blocking):
                 assert cuda_val is None
             else:
                 assert torch.equal(cuda_val, val.cuda())
-                assert val.is_cpu
+                assert not val.is_cuda
 
         for param_name in _misc_attributes:
             assert getattr(mat, param_name) == getattr(cuda_mat, param_name)

diff --git a/tests/python/kaolin/ops/mesh/test_trianglemesh.py b/tests/python/kaolin/ops/mesh/test_trianglemesh.py
@@ -58,7 +58,7 @@ def test_packed_face_areas(self, device, dtype):
         output = kaolin.ops.mesh.packed_face_areas(vertices, first_idx_vertices,
                                         faces, num_faces_per_mesh)
         expected_output = torch.tensor([0.5, 1., math.sqrt(2.)], device=device, dtype=dtype)
-        assert torch.allclose(output, expected_output)
+        check_allclose(output, expected_output)
 
 @pytest.mark.parametrize("device,dtype", FLOAT_TYPES)
 class TestSamplePoints:
@@ -130,10 +130,10 @@ def test_sample_points(self, vertices, faces, face_features,
 
         v0_p = points - face_vertices_choices[:, :, 0]  # batch_size x num_points x 3
         len_v0_p = torch.sqrt(torch.sum(v0_p ** 2, dim=-1))
-        cos_a = torch.matmul(v0_p.reshape(-1, 1, 3),
-                             face_normals.reshape(-1, 3, 1)).reshape(
-            batch_size, num_samples) / len_v0_p
-        point_to_face_dist = len_v0_p * cos_a
+        point_to_face_dist = torch.matmul(
+            v0_p.reshape(-1, 1, 3),
+            face_normals.reshape(-1, 3, 1)
+        ).reshape(batch_size, num_samples)
 
         if dtype == torch.half:
             atol = 1e-2
@@ -143,10 +143,9 @@ def test_sample_points(self, vertices, faces, face_features,
             rtol = 1e-5
 
         # check that the point is close to the plan
-        assert torch.allclose(point_to_face_dist,
-                              torch.zeros((batch_size, num_samples),
-                                          device=device, dtype=dtype),
-                              atol=atol, rtol=rtol)
+        check_allclose(point_to_face_dist,
+                       torch.zeros((batch_size, num_samples), device=device, dtype=dtype),
+                       atol=atol, rtol=rtol)
 
         # check that the point lie in the triangle
         edges0 = face_vertices_choices[:, :, 1] - face_vertices_choices[:, :, 0]
@@ -201,15 +200,15 @@ def test_sample_points(self, vertices, faces, face_features,
 
             gt_points = torch.sum(
                 face_vertices_choices * weights.unsqueeze(-1), dim=-2)
-            assert torch.allclose(points, gt_points, atol=atol, rtol=rtol)
+            check_allclose(points, gt_points, atol=atol, rtol=rtol)
 
             _face_choices = face_choices[..., None, None].repeat(1, 1, 3, feat_dim)
             face_features_choices = torch.gather(face_features, 1, _face_choices)
 
             gt_interpolated_features = torch.sum(
                 face_features_choices * weights.unsqueeze(-1), dim=-2)
-            assert torch.allclose(interpolated_features, gt_interpolated_features,
-                                  atol=atol, rtol=rtol)
+            check_allclose(interpolated_features, gt_interpolated_features,
+                           atol=atol, rtol=rtol)
 
     def test_sample_points_with_areas(self, vertices, faces, dtype, device):
         num_samples = 1000
@@ -218,7 +217,7 @@ def test_sample_points_with_areas(self, vertices, faces, dtype, device):
             kaolin.ops.mesh.sample_points)(vertices, faces, num_samples, face_areas)
         points2, face_choices2 = with_seed(1234)(
             kaolin.ops.mesh.sample_points)(vertices, faces, num_samples)
-        assert torch.allclose(points1, points2)
+        check_allclose(points1, points2)
         assert torch.equal(face_choices1, face_choices2)
 
     def test_sample_points_with_areas_with_features(self, vertices, faces,
@@ -231,9 +230,9 @@ def test_sample_points_with_areas_with_features(self, vertices, faces,
         points2, face_choices2, interpolated_features2 = with_seed(1234)(
             kaolin.ops.mesh.sample_points)(vertices, faces, num_samples,
                                 face_features=face_features)
-        assert torch.allclose(points1, points2)
+        check_allclose(points1, points2)
         assert torch.equal(face_choices1, face_choices2)
-        assert torch.allclose(interpolated_features1, interpolated_features2)
+        check_allclose(interpolated_features1, interpolated_features2)
 
     def test_diff_sample_points(self, vertices, faces, device, dtype):
         num_samples = 1000
@@ -308,10 +307,10 @@ def test_packed_sample_points(self, packed_vertices_info, packed_faces_info,
         face_normals = kaolin.ops.mesh.face_normals(face_vertices_choices, unit=True)
         v0_p = points - face_vertices_choices[:, :, 0]  # batch_size x num_points x 3
         len_v0_p = torch.sqrt(torch.sum(v0_p ** 2, dim=-1))
-        cos_a = torch.matmul(v0_p.reshape(-1, 1, 3),
-                             face_normals.reshape(-1, 3, 1)).reshape(
-            batch_size, num_samples) / len_v0_p
-        point_to_face_dist = len_v0_p * cos_a
+        point_to_face_dist = torch.matmul(
+            v0_p.reshape(-1, 1, 3),
+            face_normals.reshape(-1, 3, 1)
+        ).reshape(batch_size, num_samples)
 
         if dtype == torch.half:
             atol = 1e-2
@@ -321,10 +320,9 @@ def test_packed_sample_points(self, packed_vertices_info, packed_faces_info,
             rtol = 1e-5
 
         # check that the point is close to the plan
-        assert torch.allclose(point_to_face_dist,
-                              torch.zeros((batch_size, num_samples),
-                                          device=device, dtype=dtype),
-                              atol=atol, rtol=rtol)
+        check_allclose(point_to_face_dist,
+                       torch.zeros((batch_size, num_samples), device=device, dtype=dtype),
+                       atol=atol, rtol=rtol)
 
         # check that the point lie in the triangle
         edges0 = face_vertices_choices[:, :, 1] - face_vertices_choices[:, :, 0]
@@ -363,7 +361,7 @@ def test_packed_sample_points_with_areas(self, packed_vertices_info, packed_face
         points2, face_choices2 = with_seed(1234)(kaolin.ops.mesh.packed_sample_points)(
             vertices, first_idx_vertices, faces, num_faces_per_mesh, num_samples)
 
-        assert torch.allclose(points1, points2)
+        check_allclose(points1, points2)
         assert torch.equal(face_choices1, face_choices2)
 
     def test_diff_packed_sample_points(self, packed_vertices_info, packed_faces_info,
@@ -811,21 +809,21 @@ def expected_faces_icosahedron_1_iter(self, device):
     def test_subdivide_trianglemesh_1_iter_default_alpha(self, vertices_icosahedron, faces_icosahedron, expected_vertices_default_alpha, expected_faces_icosahedron_1_iter):
         new_vertices, new_faces = kaolin.ops.mesh.subdivide_trianglemesh(
             vertices_icosahedron, faces_icosahedron, 1)
-        assert torch.allclose(new_vertices, expected_vertices_default_alpha, atol=1e-04)
+        check_allclose(new_vertices, expected_vertices_default_alpha, atol=1e-04)
         assert torch.equal(new_faces, expected_faces_icosahedron_1_iter)
 
     def test_subdivide_trianglemesh_1_iter_zero_alpha(self, vertices_icosahedron, faces_icosahedron, expected_vertices_zero_alpha, expected_faces_icosahedron_1_iter):
         alpha = torch.zeros_like(vertices_icosahedron[..., 0])
         new_vertices, new_faces = kaolin.ops.mesh.subdivide_trianglemesh(
             vertices_icosahedron, faces_icosahedron, 1, alpha)
-        assert torch.allclose(new_vertices, expected_vertices_zero_alpha, atol=1e-04)
+        check_allclose(new_vertices, expected_vertices_zero_alpha, atol=1e-04)
         assert torch.equal(new_faces, expected_faces_icosahedron_1_iter)
 
     def test_subdivide_trianglemesh_5_iter(self, vertices_icosahedron, faces_icosahedron):
         new_vertices, new_faces = kaolin.ops.mesh.subdivide_trianglemesh(
             vertices_icosahedron, faces_icosahedron, 5)
         # check total area of all faces
-        assert torch.allclose(
+        check_allclose(
             kaolin.ops.mesh.face_areas(new_vertices, new_faces).sum(),
             torch.tensor([6.2005], dtype=new_vertices.dtype, device=new_faces.device),
             atol=1e-4)

diff --git a/version.txt b/version.txt
@@ -1 +1 @@
-0.14.0
+0.15.0a0