Cleanup hints and warnings (#421)

* declaredButNotUsed and deprecated cleanup * more warning and hints cleanup * No need to split testing suite in 6 parts anymore in Nim 1.0+ * Appveyor to download 1.0.6 * einsum eqIdent instead of using deprecated .ident * imports cleanup * Use the new pragma ordering for the autograd Gates * Another cleanup pass * K-Means clustering doesn't depend on LAPACK * do the clustering tests at all times
mratsim · Mar 5, 2020 · a0bdf4e · a0bdf4e
1 parent 8acfd68
commit a0bdf4e
Show file tree

Hide file tree

Showing 66 changed files with 132 additions and 171 deletions.
diff --git a/.appveyor.yml b/.appveyor.yml
@@ -5,17 +5,17 @@ matrix:
 
 environment:
   matrix:
-    - NIM_ARCHIVE: nim-0.20.0_x64.zip
-      NIM_DIR: nim-0.20.0
-      NIM_URL: https://nim-lang.org/download/nim-0.20.0_x64.zip
+    - NIM_ARCHIVE: nim-1.0.6_x64.zip
+      NIM_DIR: nim-1.0.6
+      NIM_URL: https://nim-lang.org/download/nim-1.0.6_x64.zip
       MINGW_PATH: C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin
       LAPACK_URL: http://icl.cs.utk.edu/lapack-for-windows/libraries/VisualStudio/3.7.0/Dynamic-MINGW/Win64/liblapack.dll
       BLAS_PLATFORM: x64
       platform: x64
 
-    - NIM_ARCHIVE: nim-0.20.0_x32.zip
-      NIM_DIR: nim-0.20.0
-      NIM_URL: https://nim-lang.org/download/nim-0.20.0_x32.zip
+    - NIM_ARCHIVE: nim-1.0.6_x32.zip
+      NIM_DIR: nim-1.0.6
+      NIM_URL: https://nim-lang.org/download/nim-1.0.6_x32.zip
       MINGW_PATH: C:\mingw-w64\i686-6.3.0-posix-dwarf-rt_v5-rev1\mingw32\bin
       LAPACK_URL: http://icl.cs.utk.edu/lapack-for-windows/libraries/VisualStudio/3.7.0/Dynamic-MINGW/Win32/liblapack.dll
       BLAS_PLATFORM: win32

diff --git a/arraymancer.nimble b/arraymancer.nimble
@@ -5,7 +5,7 @@ description   = "A n-dimensional tensor (ndarray) library"
 license       = "Apache License 2.0"
 
 ### Dependencies
-requires "nim >= 0.20.0",
+requires "nim >= 1.0.0",
   "nimblas >= 0.2.2",
   "nimlapack >= 0.1.1",
   "nimcuda >= 0.1.4",
@@ -149,22 +149,31 @@ task all_tests, "Run all tests - Intel MKL + Cuda + OpenCL + OpenMP":
   switches.cuda_mkl_openmp()
   test "full_test_suite", switches, split=false, lang="cpp"
 
+# Split tests are unnecessary after 1.0.0 (no more 3GB+ memory used when compiling)
+#
+# task test, "Run all tests - Default BLAS & Lapack":
+#   test "tests_tensor_part01", "", split = true
+#   test "tests_tensor_part02", "", split = true
+#   test "tests_tensor_part03", "", split = true
+#   test "tests_tensor_part04", "", split = true
+#   test "tests_tensor_part05", "", split = true
+#   test "tests_cpu_remainder", "", split = true
+#
+# task test_no_lapack, "Run all tests - Default BLAS without lapack":
+#   let switch = " -d:no_lapack"
+#   test "tests_tensor_part01", switch, split = true
+#   test "tests_tensor_part02", switch, split = true
+#   test "tests_tensor_part03", switch, split = true
+#   test "tests_tensor_part04", switch, split = true
+#   test "tests_tensor_part05", switch, split = true
+#   test "tests_cpu_remainder", switch, split = true
+
 task test, "Run all tests - Default BLAS & Lapack":
-  test "tests_tensor_part01", "", split = true
-  test "tests_tensor_part02", "", split = true
-  test "tests_tensor_part03", "", split = true
-  test "tests_tensor_part04", "", split = true
-  test "tests_tensor_part05", "", split = true
-  test "tests_cpu_remainder", "", split = true
+  test "tests_cpu", "", split = false
 
 task test_no_lapack, "Run all tests - Default BLAS without lapack":
   let switch = " -d:no_lapack"
-  test "tests_tensor_part01", switch, split = true
-  test "tests_tensor_part02", switch, split = true
-  test "tests_tensor_part03", switch, split = true
-  test "tests_tensor_part04", switch, split = true
-  test "tests_tensor_part05", switch, split = true
-  test "tests_cpu_remainder", switch, split = true
+  test "tests_cpu", switch, split = false
 
 task test_cpp, "Run all tests - Cpp codegen":
   test "tests_cpu", "", split = false, "cpp"

diff --git a/src/autograd/gates_basic.nim b/src/autograd/gates_basic.nim
@@ -18,7 +18,7 @@
 import  ../tensor/tensor,
         ./autograd_common
 
-type AddGate* {.final.} [TT] = ref object of Gate[TT]
+type AddGate*[TT] {.final.} = ref object of Gate[TT]
 
 proc add_backward_ag[TT](self: AddGate[TT], payload: Payload[TT]): SmallDiffs[TT] =
   let gradient = payload.variable.grad
@@ -57,7 +57,7 @@ proc `+`*[TT](a, b: Variable[TT]): Variable[TT] =
   if a.is_grad_needed or b.is_grad_needed:
     result.add_cache(a, b)
 
-type SubGate* {.final.} [TT] = ref object of Gate[TT]
+type SubGate*[TT] {.final.} = ref object of Gate[TT]
 
 proc sub_backward_ag[TT](self: SubGate[TT], payload: Payload[TT]): SmallDiffs[TT] =
   let gradient = payload.variable.grad

diff --git a/src/autograd/gates_blas.nim b/src/autograd/gates_blas.nim
@@ -15,7 +15,7 @@
 import  ../tensor/tensor,
         ./autograd_common
 
-type MatMulGate* {.final.} [TT] = ref object of Gate[TT]
+type MatMulGate*[TT] {.final.} = ref object of Gate[TT]
   ## TODO: generalize to C <- alpha AB + C
   a: Variable[TT]
   b: Variable[TT]

diff --git a/src/autograd/gates_hadamard.nim b/src/autograd/gates_hadamard.nim
@@ -15,7 +15,7 @@
 import  ../tensor/tensor,
         ./autograd_common
 
-type HadamardGate* {.final.} [TT] = ref object of Gate[TT]
+type HadamardGate*[TT]{.final.} = ref object of Gate[TT]
   a: Variable[TT]
   b: Variable[TT]
 

diff --git a/src/autograd/gates_reduce.nim b/src/autograd/gates_reduce.nim
@@ -17,7 +17,7 @@ import  ../private/ast_utils,
         ./autograd_common,
         sequtils
 
-type MeanGate* {.final.} [TT] = ref object of Gate[TT]
+type MeanGate*[TT] {.final.} = ref object of Gate[TT]
   ## TODO: generalize to C <- alpha AB + C
   cached_input_shape: MetadataArray
   axis: int
@@ -101,7 +101,7 @@ proc mean*[TT](a: Variable[TT], axis: Natural): Variable[TT] =
   if a.is_grad_needed:
     result.mean_cache(a, axis)
 
-type SumGate* {.final.} [TT] = ref object of Gate[TT]
+type SumGate*[TT]{.final.}= ref object of Gate[TT]
   ## TODO: generalize to C <- alpha AB + C
   cached_input_shape: MetadataArray
 

diff --git a/src/autograd/gates_shapeshifting_concat_split.nim b/src/autograd/gates_shapeshifting_concat_split.nim
@@ -17,7 +17,7 @@ import  ../tensor/tensor,
         ./autograd_common,
         sequtils
 
-type StackGate{.final.}[TT] = ref object of Gate[TT]
+type StackGate[TT] {.final.} = ref object of Gate[TT]
   ## TODO support unlimited stacking
   axis: int
   nb_grads: int
@@ -80,7 +80,7 @@ proc stack*[TT](variables: varargs[Variable[TT]], axis = 0): Variable[TT] =
 
 # ###########################################################
 
-type ChunkSplitGate*{.final.}[TT] = ref object of Gate[TT]
+type ChunkSplitGate*[TT] {.final.} = ref object of Gate[TT]
   axis: int
 
 proc chunk_inference[TT](result: var seq[Variable[TT]], x: Variable[TT], nb_chunks: Positive, axis: int) =

diff --git a/src/autograd/gates_shapeshifting_views.nim b/src/autograd/gates_shapeshifting_views.nim
@@ -53,7 +53,7 @@ template `[]`*[TT](v: Variable[TT], args: varargs[untyped]): Variable[TT] =
 
 # #############################################
 
-type ReshapeGate*{.final.}[TT] = ref object of Gate[TT]
+type ReshapeGate*[TT] {.final.} = ref object of Gate[TT]
   cached_input_shape: MetadataArray
 
 proc reshape_backward_ag[TT](self: ReshapeGate[TT], payload: Payload[TT]): SmallDiffs[TT] =
@@ -115,7 +115,7 @@ proc flatten*[TT](a: Variable[TT]): Variable[TT] =
 
 template squeezeUnsqueeze(GateName, forward_proc, backward_proc: untyped): untyped =
 
-  type GateName{.final.}[TT] = ref object of Gate[TT]
+  type GateName[TT] {.final.} = ref object of Gate[TT]
     axis: int
 
   proc `forward_proc _ backward _ ag`[TT](self: GateName[TT], payload: Payload[TT]): SmallDiffs[TT] =

diff --git a/src/datasets/imdb.nim b/src/datasets/imdb.nim
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 
-import  httpclient, zip/gzipfiles, strformat, os, strutils,
+import  httpclient, strformat, os, strutils,
         ./util, ../tensor/tensor,
         untar
 

diff --git a/src/datasets/mnist.nim b/src/datasets/mnist.nim
@@ -49,7 +49,7 @@
 # The labels values are 0 to 9.
 
 
-import  streams, endians, os, httpClient, strformat, sugar, sequtils, ospaths,
+import  streams, os, httpClient, strformat, sugar, sequtils,
         ../tensor/tensor, ../io/io_stream_readers, ./util,
         zip/gzipfiles
 

diff --git a/src/io/io_hdf5.nim b/src/io/io_hdf5.nim
@@ -5,7 +5,7 @@
 import
   ../tensor/tensor,
   ../tensor/private/p_init_cpu,
-  strutils, strformat, ospaths, options,
+  strutils, strformat, os, options,
   nimhdf5
 
 # constant which we use to keep track of how many tensors are stored

diff --git a/src/laser/compiler_optim_hints.nim b/src/laser/compiler_optim_hints.nim
@@ -72,7 +72,7 @@ template prefetch*[T](
   else:
     discard
 
-template pragma_ivdep() =
+template pragma_ivdep() {.used.} =
   ## Tell the compiler to ignore unproven loop dependencies
   ## such as "a[i] = a[i + k] * c;" if k is unknown, as it introduces a loop
   ## dependency if it's negative
@@ -86,7 +86,7 @@ template pragma_ivdep() =
   else: # Supported on ICC and Cray
     {.emit: "pragma ivdep".}
 
-template withCompilerFunctionHints() =
+template withCompilerFunctionHints() {.used.} =
   ## Not exposed, Nim codegen will declare them as normal C function.
   ## This messes up with N_NIMCALL, N_LIB_PRIVATE, N_INLINE and also
   ## creates duplicate symbols when one function called by a hot or pure function
@@ -146,4 +146,3 @@ template withCompilerFunctionHints() =
   #
   # This wouldn't be needed with fast-math because compiler would consider FP addition associative
   # and create intermediate variables as needed to exploit this through put.
-
diff --git a/src/laser/cpuinfo_x86.nim b/src/laser/cpuinfo_x86.nim
@@ -14,7 +14,7 @@ proc cpuidX86(eaxi, ecxi: int32): tuple[eax, ebx, ecx, edx: int32] =
       :"a"(`eaxi`), "c"(`ecxi`)"""
     (eaxr, ebxr, ecxr, edxr)
 
-proc cpuNameX86(): string =
+proc cpuNameX86(): string {.used.} =
   var leaves {.global.} = cast[array[48, char]]([
     cpuidX86(eaxi = 0x80000002'i32, ecxi = 0),
     cpuidX86(eaxi = 0x80000003'i32, ecxi = 0),

diff --git a/src/laser/openmp.nim b/src/laser/openmp.nim
@@ -122,7 +122,7 @@ template omp_parallel*(body: untyped): untyped =
   block: body
 
 template omp_parallel_if*(condition: bool, body: untyped) =
-  let predicate = condition # Make symbol valid and ensure it's lvalue
+  let predicate{.used.} = condition # Make symbol valid and ensure it's lvalue
   # New line intentional: https://github.com/mratsim/Arraymancer/issues/407
   {.emit: ["""
   #pragma omp parallel if (", predicate, ")"""].}

diff --git a/src/laser/primitives/matrix_multiplication/gemm.nim b/src/laser/primitives/matrix_multiplication/gemm.nim
@@ -69,7 +69,6 @@ proc gebp_mkernel*[T; ukernel: static MicroKernel](
   const
     MR = ukernel.extract_mr
     NR = ukernel.extract_nr
-    PT = ukernel.extract_pt
 
   # #####################################
   # 4. for jr = 0,...,nc−1 in steps of nr

diff --git a/src/laser/primitives/matrix_multiplication/gemm_tiling.nim b/src/laser/primitives/matrix_multiplication/gemm_tiling.nim
@@ -30,10 +30,9 @@
 #     - https://github.com/numforge/laser/wiki/GEMM-optimization-resources
 
 import
-  # ../../cpuinfo,
   ../../compiler_optim_hints,
   ../../private/[memory, align_unroller],
-  typetraits, macros,
+  macros,
   ./gemm_utils
 
 # ############################################################

diff --git a/src/laser/primitives/matrix_multiplication/gemm_ukernel_generator.nim b/src/laser/primitives/matrix_multiplication/gemm_ukernel_generator.nim
@@ -5,7 +5,6 @@
 
 import
   ../../compiler_optim_hints,
-  ../../simd,
   ./gemm_tiling, ./gemm_utils,
   ./gemm_ukernel_generic,
   macros
@@ -36,7 +35,7 @@ template ukernel_simd_proc(ukernel_name, epilogue_name: NimNode, edge: bool) {.d
           `simd_setZero`, `simd_load_aligned`, `simd_broadcast_value`, `simd_fma`
         )
         const
-          is_c_unit_stride = ukernel.extract_c_unit_stride()
+          # is_c_unit_stride = ukernel.extract_c_unit_stride()
           MR = ukernel.extract_mr()
           NR = ukernel.extract_nr()
 
@@ -56,7 +55,7 @@ template ukernel_simd_proc(ukernel_name, epilogue_name: NimNode, edge: bool) {.d
           `simd_setZero`, `simd_load_aligned`, `simd_broadcast_value`, `simd_fma`
         )
         const
-          is_c_unit_stride = ukernel.extract_c_unit_stride()
+          # is_c_unit_stride = ukernel.extract_c_unit_stride()
           MR = ukernel.extract_mr()
           NR = ukernel.extract_nr()
 
@@ -228,7 +227,7 @@ macro ukernel_simd_impl*(
       bcast_fma.add quote do:
         # At the edge: `i`+1 = MR so equivalent to loading A[(k+1)*MR]
         `a_next` = `simd_broadcast_value`(`A`[`k`*`MR`+(`i`+1)])
-      
+
       # load current
       let a = rA[i mod NbVecs]
 

diff --git a/src/laser/primitives/matrix_multiplication/gemm_ukernel_generic.nim b/src/laser/primitives/matrix_multiplication/gemm_ukernel_generic.nim
@@ -23,7 +23,6 @@ template ukernel_generic_impl*(){.dirty.} =
   const
     MR = ukernel.extract_mr()
     NR = ukernel.extract_nr()
-    simd = ukernel.extract_cpu_simd
 
   var AB{.align_variable.}: array[MR, array[NR, T]]
   var  A {.restrict.} = assume_aligned packedA # [kc, mc] by chunks of mr
@@ -86,7 +85,6 @@ proc gebb_ukernel_fallback*[T; ukernel: static MicroKernel](
     ) =
   ukernel_generic_impl()
 
-  const is_c_unit_stride = ukernel.extract_c_unit_stride
   gebb_ukernel_epilogue_fallback(alpha, to_ptr(AB, MR, NR, T), beta, vC)
 
 # ############################################################

diff --git a/src/linear_algebra/helpers/auxiliary_lapack.nim b/src/linear_algebra/helpers/auxiliary_lapack.nim
@@ -5,8 +5,7 @@
 import
   nimlapack,
   ./overload,
-  ../../tensor/tensor,
-  ../../private/sequninit
+  ../../tensor/tensor
 
 # Auxiliary functions from Lapack
 # ----------------------------------

diff --git a/src/linear_algebra/helpers/least_squares_lapack.nim b/src/linear_algebra/helpers/least_squares_lapack.nim
@@ -105,9 +105,9 @@ proc gelsd*[T: SomeFloat](
 
   if rank == n and m > n:
     residuals = (b2[n .. _, _].squeeze(axis = 1)).fold_axis_inline(Tensor[T], fold_axis = 0):
-      x = y .^ 2f # initial value
+      x = y ^. 2f # initial value
     do:
-      x += y .^ 2f # core loop
+      x += y ^. 2f # core loop
     do:
       x += y # values were stored in a temporary array of size == nb of cores
               # to avoid multithreading issues and must be reduced one last time

diff --git a/src/ml/clustering/kmeans.nim b/src/ml/clustering/kmeans.nim
@@ -3,8 +3,7 @@
 # This file may not be copied, modified, or distributed except according to those terms.
 import math, random, tables
 
-import
-  ../../tensor/tensor, ../../linear_algebra/linear_algebra
+import ../../tensor/tensor
 
 proc euclidean_distance[T: SomeFloat](u: Tensor[T], v: Tensor[T], squared: bool = false): T {.noInit.} =
   ## Calculates the euclidean distance
@@ -60,7 +59,7 @@ proc get_closest_centroid[T: SomeFloat](x: Tensor[T], centroids: Tensor[T], cid:
 
 proc get_candidates[T: SomeFloat](n: int, distances: Tensor[T]): Tensor[int] {.noInit.} =
   ## Sample candidates with probability weighted by the distances
-  let probs = cumsum(distances ./ distances.sum)
+  let probs = cumsum(distances /. distances.sum)
   result = newTensor[int](n)
   for t in 0..<n:
     block sampling:
@@ -201,7 +200,7 @@ proc assign_labels[T: SomeFloat](x: Tensor[T], n_clusters = 10, tol: float = 0.0
         # Avoid NaNs
         if counts[i] > 0:
           var count = @[counts[i]].toTensor.astype(T)
-          centroids[i, _] = (totals[i] ./ count).reshape(1, n_cols)
+          centroids[i, _] = (totals[i] /. count).reshape(1, n_cols)
 
   return (labels: labels, centroids: centroids, inertia: inertia)
 

diff --git a/src/ml/ml.nim b/src/ml/ml.nim
@@ -13,13 +13,13 @@
 # limitations under the License.
 
 import  ./metrics/accuracy_score,
-        ./metrics/common_error_functions
+        ./metrics/common_error_functions,
+        ./clustering/kmeans
 
 export  accuracy_score,
-        common_error_functions
+        common_error_functions,
+        kmeans
 
 when not defined(no_lapack):
-  import ./dimensionality_reduction/pca,
-         ./clustering/kmeans
-  export pca,
-         kmeans
+  import ./dimensionality_reduction/pca
+  export pca
diff --git a/src/nn/activation/relu.nim b/src/nn/activation/relu.nim
@@ -16,7 +16,7 @@ import  ../../tensor/tensor,
         ../../nn_primitives/nn_primitives,
         ../../autograd/autograd
 
-type ReluActivation* {.final.} [TT] = ref object of Gate[TT]
+type ReluActivation*[TT] {.final.} = ref object of Gate[TT]
   cache: TT
 
 proc relu_backward_ag[TT](self: ReluActivation[TT], payload: Payload[TT]): SmallDiffs[TT] =
@@ -55,7 +55,3 @@ proc relu*[TT](a: Variable[TT]): Variable[TT] =
   # Caching for backprop
   if a.is_grad_needed:
     result.relu_cache(a)
-
-
-
-