Make masked_fill work with openArray values (#624)

mratsim · Feb 19, 2024 · 52cc834 · 52cc834
1 parent 800b0f5
commit 52cc834
Show file tree

Hide file tree

Showing 2 changed files with 92 additions and 38 deletions.
diff --git a/src/arraymancer/tensor/selectors.nim b/src/arraymancer/tensor/selectors.nim
@@ -180,25 +180,17 @@ proc masked_fill*[T](t: var Tensor[T], mask: openArray, value: T) =
     return
   t.masked_fill(mask.toTensor(), value)
 
-proc masked_fill*[T](t: var Tensor[T], mask: Tensor[bool], value: Tensor[T]) =
-  ## For each element ``t[index]`` of the input tensor ``t`` with index ``index``,
-  ## check if ``mask[index]`` is true. If so fill it with the _next_
-  ## element from the ``value`` tensor. Otherwise leave it untouched.
-  ##
-  ## Note that this does _not_ fill ``t[index]`` with ``value[index]``, but
-  ## with the n-th element of ``value`` where n is the number of true elements
-  ## in the mask before and including the index-th mask element.
-  ## Because of this, the value tensor must have at least as many elements as
-  ## the number of true elements in the mask. If that is not the case an
-  ## IndexDefect exception will be raised at runtime. The ``value`` tensor
-  ## can have even more values which will simply be ignored.
-  ##
-  ## Example:
-  ##
-  ##   t.masked_fill(t > 0, [3, 4, -1].toTensor)
-  ##
-  ## In this version of this procedure the boolean mask is a ``Tensor[bool]``
-  ## with the same size as the input tensor ``t``.
+template masked_fill_impl[T](t: var Tensor[T], mask: Tensor[bool], value: Tensor[T] | openArray[T]) =
+  ## Implementation of masked_fill for both openArray and Tensor value
+  ##
+  ## It should have been possible to use a regular procedure to implement
+  ## masked_fill both for openArrays an tensors. However, as for nim 2.0.2
+  ## there are some limitations / bugs with implicit type conversions when
+  ## applied to `or` typeclasses that contain openArrays. Because of that we've
+  ## had to encapsulate the implementation of masked_fill in a template, and
+  ## create 2 separate versions of the masked_fill procedure (one taking a
+  ## tensor value and another taking an openArray value) which call this
+  ## implementation template. Somehow this seems to work around the issue.
 
   if t.size == 0 or mask.size == 0:
     return
@@ -220,27 +212,27 @@ proc masked_fill*[T](t: var Tensor[T], mask: Tensor[bool], value: Tensor[T]) =
   var lock: Lock
   initLock(lock)
   var too_few_values = false
-
+  let value_size = value.len
   omp_parallel_blocks(block_offset, block_size, t.size):
     var n = block_offset
     for tElem, maskElem in mzip(t, mask, block_offset, block_size):
       if maskElem:
-        if n >= value.size:
+        if n >= value_size:
           withLock(lock):
             # The lock protection is technically unnecessary but it is good form
             too_few_values = true
           break
         tElem = value[n]
         inc n
   if too_few_values:
-    let error_msg = "masked_fill error: the size of the value tensor (" & $value.size &
+    let error_msg = "masked_fill error: the size of the value tensor (" & $value_size &
       ") is smaller than the number of true elements in the mask"
     when not(compileOption("mm", "arc") or compileOption("mm", "orc")):
       # Other memory management modes crash without showing the exception message
       echo error_msg
     raise newException(IndexDefect, error_msg)
 
-proc masked_fill*[T](t: var Tensor[T], mask: openArray, value: Tensor[T]) =
+proc masked_fill*[T](t: var Tensor[T], mask: Tensor[bool], value: Tensor[T]) =
   ## For each element ``t[index]`` of the input tensor ``t`` with index ``index``,
   ## check if ``mask[index]`` is true. If so fill it with the _next_
   ## element from the ``value`` tensor. Otherwise leave it untouched.
@@ -255,17 +247,48 @@ proc masked_fill*[T](t: var Tensor[T], mask: openArray, value: Tensor[T]) =
   ##
   ## Example:
   ##
-  ##   t.masked_fill([true, false, true, true], [3, 4, -1].toTensor)
+  ##   t.masked_fill(t > 0, [3, 4, -1].toTensor)
   ##
-  ## In this version of this procedure the boolean mask, which must have the
-  ## same size as the input tensor ``t``, is an openArray of bools, i.e.:
-  ##   - an array or sequence of bools
-  ##   - an array of arrays of bools,
-  ##   - ...
+  ## In this version of this procedure the boolean mask is a ``Tensor[bool]``
+  ## with the same size as the input tensor ``t``.
+  masked_fill_impl(t, mask, value)
+
+proc masked_fill*[T](t: var Tensor[T], mask: Tensor[bool], value: openArray[T]) =
+  ## Version of `masked_fill` that takes an openArray as the value
+  ##
+  ## For each element ``t[index]`` of the input tensor ``t`` with index ``index``,
+  ## check if ``mask[index]`` is true. If so fill it with the _next_
+  ## element from the ``value`` openArray. Otherwise leave it untouched.
+  ##
+  ## Note that this does _not_ fill ``t[index]`` with ``value[index]``, but
+  ## with the n-th element of ``value`` where n is the number of true elements
+  ## in the mask before and including the index-th mask element.
+  ## Because of this, the value openArray must have at least as many elements as
+  ## the number of true elements in the mask. If that is not the case an
+  ## IndexDefect exception will be raised at runtime. The ``value`` tensor
+  ## can have even more values which will simply be ignored.
+  ##
+  ## Example:
+  ##
+  ##   t.masked_fill(t > 0, [3, 4, -1])
+  ##
+  ## In this version of this procedure the boolean mask is a ``Tensor[bool]``
+  ## with the same size as the input tensor ``t``.
+  masked_fill_impl(t, mask, value)
 
+proc masked_fill*[T](t: var Tensor[T], mask: openArray, value: Tensor[T]) =
+  ## Version of masked_fill that takes an openArray[bool] as the mask
+  ## and a tensor as the value
   if t.size == 0 or mask.len == 0:
     return
-  t.masked_fill(mask.toTensor(), value)
+  masked_fill_impl(t, mask.toTensor(), value)
+
+proc masked_fill*[T](t: var Tensor[T], mask: openArray, value: openArray[T]) =
+  ## Version of masked_fill that takes an openArray[bool] as the mask
+  ## and an openArray as the value
+  if t.size == 0 or mask.len == 0:
+    return
+  masked_fill_impl(t, mask.toTensor(), value)
 
 # Mask axis
 # --------------------------------------------------------------------------------------------

diff --git a/tests/tensor/test_selectors.nim b/tests/tensor/test_selectors.nim
@@ -163,7 +163,7 @@ proc main() =
         let expected = [1.0, 2.0, 3.0].toTensor()
         check: r == expected
 
-    test "Masked_fill":
+    test "Masked_fill with single value":
       # Numpy reference doc
       # https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#boolean-array-indexing
       # select non NaN
@@ -174,6 +174,7 @@ proc main() =
       let t = [[1.0, 2.0],
         [NaN, 3.0],
         [NaN, NaN]].toTensor
+
       block: # Single value masked fill
         var x = t.clone()
 
@@ -182,12 +183,34 @@ proc main() =
         let expected = [[1.0, 2.0], [-1.0, 3.0], [-1.0, -1.0]].toTensor()
         check: x == expected
 
-      block: # Multiple value masked fill
+      block: # Fill array/sequence mask with scalar value
         var x = t.clone()
 
+        x.masked_fill(
+          [[false,  false],
+          [true, false],
+          [true, true]],
+          -1.0
+        )
+
+        let expected = [[1.0, 2.0], [-1.0, 3.0], [-1.0, -1.0]].toTensor()
+        check: x == expected
+
+    test "Masked_fill with multiple values (Tensor or openArray)":
+      let t = [[1.0, 2.0],
+        [NaN, 3.0],
+        [NaN, NaN]].toTensor
+      let expected = [[1.0, 2.0], [-10.0, 3.0], [-20.0, -30.0]].toTensor()
+
+      block: # Tensor mask
+        # Fill with tensor
+        var x = t.clone()
         x.masked_fill(x.isNaN, [-10.0, -20.0, -30.0].toTensor())
+        check: x == expected
 
-        let expected = [[1.0, 2.0], [-10.0, 3.0], [-20.0, -30.0]].toTensor()
+        # Fill with openArray
+        x = t.clone()
+        x.masked_fill(x.isNaN, [-10.0, -20.0, -30.0])
         check: x == expected
 
         when compileOption("mm", "arc") or compileOption("mm", "orc"):
@@ -202,17 +225,25 @@ proc main() =
             exception_thrown_when_true_element_mask_count_exceeds_value_tensor_size = true
           check: exception_thrown_when_true_element_mask_count_exceeds_value_tensor_size
 
-      block: # Fill with regular arrays/sequences
+      block: # openArray mask
+        # Fill with tensor
         var x = t.clone()
-
         x.masked_fill(
           [[false,  false],
           [true, false],
           [true, true]],
-          -1.0
+          [-10.0, -20.0, -30.0].toTensor()
         )
+        check: x == expected
 
-        let expected = [[1.0, 2.0], [-1.0, 3.0], [-1.0, -1.0]].toTensor()
+        # Fill with openArray
+        x = t.clone()
+        x.masked_fill(
+          [[false,  false],
+          [true, false],
+          [true, true]],
+          [-10.0, -20.0, -30.0]
+        )
         check: x == expected
 
     test "Masked_axis_select":
@@ -255,7 +286,7 @@ proc main() =
                         [1, 1]].toTensor
         check r == expected
 
-    test "Masked_axis_fill with value":
+    test "Masked_axis_fill with single value":
       block: # Numpy
             # Fill all columns which sum up to greater than 1
             # with -10