Factor out CUDA code

facebookresearch · Jul 13, 2018 · de3743f · de3743f
1 parent f0407b3
commit de3743f
Show file tree

Hide file tree

Showing 96 changed files with 5,850 additions and 8,346 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,5 @@
 SparseConvNetTorch/build/
-*.t7
-t7/
+*.pth
 *.o
 *.a
 *.so
@@ -11,3 +10,5 @@ pickle
 PyTorch/sparseconvnet.egg-info/
 PyTorch/sparseconvnet/SCN/__init__.py
 sparseconvnet.egg-info
+*.zip
+*.rar
diff --git a/build.sh b/build.sh
@@ -1,3 +1,3 @@
 #!/bin/bash
-rm -rf build/ sparseconvnet.egg-info sparseconvnet_SCN*.so
+rm -rf build/ dist/ sparseconvnet.egg-info sparseconvnet_SCN*.so
 python setup.py install
diff --git a/examples/3d_segmentation/fully_convolutional.py b/examples/3d_segmentation/fully_convolutional.py
@@ -47,7 +47,7 @@ def forward(self,x):
 p['lr_decay'] = 4e-2
 p['weight_decay'] = 1e-4
 p['momentum'] = 0.9
-p['check_point'] = True
+p['check_point'] = False
 p['use_cuda'] = torch.cuda.is_available()
 dtype = 'torch.cuda.FloatTensor' if p['use_cuda'] else 'torch.FloatTensor'
 dtypei = 'torch.cuda.LongTensor' if p['use_cuda'] else 'torch.LongTensor'

diff --git a/examples/3d_segmentation/unet.py b/examples/3d_segmentation/unet.py
@@ -47,7 +47,7 @@ def forward(self,x):
 p['lr_decay'] = 4e-2
 p['weight_decay'] = 1e-4
 p['momentum'] = 0.9
-p['check_point'] = True
+p['check_point'] = False
 p['use_cuda'] = torch.cuda.is_available()
 dtype = 'torch.cuda.FloatTensor' if p['use_cuda'] else 'torch.FloatTensor'
 dtypei = 'torch.cuda.LongTensor' if p['use_cuda'] else 'torch.LongTensor'

diff --git a/examples/Assamese_handwriting/data.py b/examples/Assamese_handwriting/data.py
@@ -4,8 +4,7 @@
 # This source code is licensed under the license found in the
 # LICENSE file in the root directory of this source tree.
 
-import torch
-import torchnet
+import torch, torch.utils.data
 import sparseconvnet as scn
 import pickle
 import math

diff --git a/examples/Assamese_handwriting/process.sh b/examples/Assamese_handwriting/process.sh
@@ -4,6 +4,7 @@
 # This source code is licensed under the license found in the
 # LICENSE file in the root directory of this source tree.
 #!/bin/bash
+set -e
 wget https://archive.ics.uci.edu/ml/machine-learning-databases/00208/Online%20Handwritten%20Assamese%20Characters%20Dataset.rar
 unrar e -cl -y "Online Handwritten Assamese Characters Dataset.rar"
 mkdir tmp

diff --git a/setup.py b/setup.py
@@ -24,12 +24,13 @@
     packages=['sparseconvnet','sparseconvnet.SCN'],
     ext_modules=[
       CUDAExtension('sparseconvnet_SCN',
-        ['sparseconvnet/SCN/pybind_cuda.cpp', 'sparseconvnet/SCN/instantiate_cpu.cpp', 'sparseconvnet/SCN/instantiate_cuda.cu'],
+        [
+         'sparseconvnet/SCN/cuda.cu', 'sparseconvnet/SCN/sparseconvnet_cuda.cpp', 'sparseconvnet/SCN/pybind.cpp'],
         include_dirs=[conda_include_dir, this_dir+'/sparseconvnet/SCN/'],
         extra_compile_args=extra)
       if torch.cuda.is_available()  else
       CppExtension('sparseconvnet_SCN',
-        ['sparseconvnet/SCN/pybind_cpu.cpp', 'sparseconvnet/SCN/instantiate_cpu.cpp'],
+        ['sparseconvnet/SCN/pybind.cpp', 'sparseconvnet/SCN/sparseconvnet_cpu.cpp'],
         include_dirs=[conda_include_dir, this_dir+'/sparseconvnet/SCN/'],
         extra_compile_args=extra['cxx'])],
     cmdclass={'build_ext': BuildExtension},

diff --git a/sparseconvnet/SCN/CPU/ActivePooling.cpp b/sparseconvnet/SCN/CPU/ActivePooling.cpp
@@ -4,7 +4,39 @@
 // This source code is licensed under the license found in the
 // LICENSE file in the root directory of this source tree.
 
-#include "ActivePooling.h"
+// Assume output_features and d_input_features have been zero-ed
+template <typename T>
+void ActivePooling_ForwardPass(T *input_features, T *output_features,
+                               Int batchSize, Int maxActive, Int nPlanes,
+                               RuleBook &rules, bool average) {
+  for (Int outSite = 0; outSite < batchSize; outSite++) {
+    T *out = &output_features[outSite * nPlanes];
+    Int *r = &rules[0][outSite * (maxActive + 1)];
+    Int nActive = *r++;
+    T multiplier = (average and nActive > 0) ? 1.0f / nActive : 1.0f;
+    while (nActive-- > 0) {
+      T *inp = &input_features[(*r++) * nPlanes];
+      for (Int plane = 0; plane < nPlanes; plane++)
+        out[plane] += inp[plane] * multiplier;
+    }
+  }
+}
+template <typename T>
+void ActivePooling_BackwardPass(T *d_input_features, T *d_output_features,
+                                Int batchSize, Int maxActive, Int nPlanes,
+                                RuleBook &rules, bool average) {
+  for (Int outSite = 0; outSite < batchSize; outSite++) {
+    T *out = &d_output_features[outSite * nPlanes];
+    Int *r = &rules[0][outSite * (maxActive + 1)];
+    Int nActive = *r++;
+    T multiplier = (average and nActive > 0) ? 1.0f / nActive : 1.0f;
+    while (nActive-- > 0) {
+      T *inp = &d_input_features[(*r++) * nPlanes];
+      for (Int plane = 0; plane < nPlanes; plane++)
+        inp[plane] = out[plane] * multiplier;
+    }
+  }
+}
 
 template <typename T, Int Dimension>
 void cpu_ActivePooling_updateOutput(

diff --git a/sparseconvnet/SCN/CPU/ActivePooling.h b/sparseconvnet/SCN/CPU/ActivePooling.h
diff --git a/sparseconvnet/SCN/CPU/AffineReluTrivialConvolution.cpp b/sparseconvnet/SCN/CPU/AffineReluTrivialConvolution.cpp
@@ -4,7 +4,68 @@
 // This source code is licensed under the license found in the
 // LICENSE file in the root directory of this source tree.
 
-#include "AffineReluTrivialConvolution.h"
+#include <cstring>
+
+template <typename T>
+void AffineReluTrivialConvolution_ForwardPass(
+    T *input_features, Int input_nPlanes, Int input_stride, T *output_features,
+    Int output_nPlanes, Int output_stride, T *affineWeight, T *affineBias,
+    T *convWeight, Int nActive) {
+
+  for (Int row = 0; row < nActive; row++) {
+    for (Int column = 0; column < output_nPlanes; column++) {
+      T sum = 0;
+      for (Int j = 0; j < input_nPlanes; j++) {
+        T i = input_features[row * input_stride + j] * affineWeight[j] +
+              affineBias[j];
+        i = (i > 0) ? i : 0;
+        sum += i * convWeight[j * output_nPlanes + column];
+      }
+      output_features[row * output_stride + column] = sum;
+    }
+  }
+}
+
+template <typename T>
+void AffineReluTrivialConvolution_BackwardPass(
+    T *input_features, T *d_input_features, Int input_nPlanes, Int input_stride,
+    T *d_output_features, Int output_nPlanes, Int output_stride,
+    T *affineWeight, T *dAffineWeight, T *affineBias, T *dAffineBias,
+    T *convWeight, T *dConvWeight, Int nActive, bool additiveGrad) {
+
+  for (Int row = 0; row < input_nPlanes; row++) {
+    for (Int column = 0; column < output_nPlanes; column++) {
+      T sum = 0;
+      for (Int j = 0; j < nActive; j++) {
+        T i = input_features[j * input_stride + row] * affineWeight[row] +
+              affineBias[row];
+        i = (i > 0) ? i : 0;
+        sum += i * d_output_features[j * output_stride + column];
+      }
+      dConvWeight[row * output_nPlanes + column] += sum;
+    }
+  }
+  for (Int row = 0; row < nActive; row++) {
+    for (Int column = 0; column < input_nPlanes; column++) {
+      T sum = 0;
+      for (Int j = 0; j < output_nPlanes; j++) {
+        sum += d_output_features[row * output_stride + j] *
+               convWeight[column * output_nPlanes + j];
+      }
+      T i = input_features[row * input_stride + column] * affineWeight[column] +
+            affineBias[column];
+      if (i <= 0) // d_ReLU
+        sum = 0;
+      dAffineWeight[column] += sum * i;
+      dAffineBias[column] += sum;
+      sum *= affineWeight[column];
+      if (additiveGrad)
+        d_input_features[row * input_stride + column] += sum;
+      else
+        d_input_features[row * input_stride + column] = sum;
+    }
+  }
+}
 
 template <typename T>
 double cpu_AffineReluTrivialConvolution_updateOutput(

diff --git a/sparseconvnet/SCN/CPU/AffineReluTrivialConvolution.h b/sparseconvnet/SCN/CPU/AffineReluTrivialConvolution.h
diff --git a/sparseconvnet/SCN/CPU/AveragePooling.cpp b/sparseconvnet/SCN/CPU/AveragePooling.cpp
@@ -4,7 +4,31 @@
 // This source code is licensed under the license found in the
 // LICENSE file in the root directory of this source tree.
 
-#include "AveragePooling.h"
+template <typename T>
+void AveragePooling_ForwardPass(T *input_features, T *output_features,
+                                Int nPlanes, Int input_stride,
+                                Int output_stride, Int *rules, Int nHot,
+                                Int filterVolume) {
+  for (Int outSite = 0; outSite < nHot; outSite++) {
+    Int i = rules[2 * outSite] * input_stride;
+    Int o = rules[2 * outSite + 1] * output_stride;
+    for (Int plane = 0; plane < nPlanes; plane++)
+      output_features[o + plane] += input_features[i + plane] / filterVolume;
+  }
+}
+template <typename T>
+void AveragePooling_BackwardPass(T *d_input_features, T *d_output_features,
+                                 Int nPlanes, Int input_stride,
+                                 Int output_stride, Int *rules, Int nHot,
+                                 Int filterVolume) {
+  for (Int outSite = 0; outSite < nHot; outSite++) {
+    Int i = rules[2 * outSite] * input_stride;
+    Int o = rules[2 * outSite + 1] * output_stride;
+    for (Int plane = 0; plane < nPlanes; plane++)
+      d_input_features[i + plane] +=
+          d_output_features[o + plane] / filterVolume;
+  }
+}
 
 template <typename T, Int Dimension>
 void cpu_AveragePooling_updateOutput(

diff --git a/sparseconvnet/SCN/CPU/AveragePooling.h b/sparseconvnet/SCN/CPU/AveragePooling.h