Skip to content

Commit

Permalink
Make masked_fill work with openArray values (#624)
Browse files Browse the repository at this point in the history
  • Loading branch information
AngelEzquerra authored Feb 19, 2024
1 parent 800b0f5 commit 52cc834
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 38 deletions.
83 changes: 53 additions & 30 deletions src/arraymancer/tensor/selectors.nim
Original file line number Diff line number Diff line change
Expand Up @@ -180,25 +180,17 @@ proc masked_fill*[T](t: var Tensor[T], mask: openArray, value: T) =
return
t.masked_fill(mask.toTensor(), value)

proc masked_fill*[T](t: var Tensor[T], mask: Tensor[bool], value: Tensor[T]) =
## For each element ``t[index]`` of the input tensor ``t`` with index ``index``,
## check if ``mask[index]`` is true. If so fill it with the _next_
## element from the ``value`` tensor. Otherwise leave it untouched.
##
## Note that this does _not_ fill ``t[index]`` with ``value[index]``, but
## with the n-th element of ``value`` where n is the number of true elements
## in the mask before and including the index-th mask element.
## Because of this, the value tensor must have at least as many elements as
## the number of true elements in the mask. If that is not the case an
## IndexDefect exception will be raised at runtime. The ``value`` tensor
## can have even more values which will simply be ignored.
##
## Example:
##
## t.masked_fill(t > 0, [3, 4, -1].toTensor)
##
## In this version of this procedure the boolean mask is a ``Tensor[bool]``
## with the same size as the input tensor ``t``.
template masked_fill_impl[T](t: var Tensor[T], mask: Tensor[bool], value: Tensor[T] | openArray[T]) =
## Implementation of masked_fill for both openArray and Tensor value
##
## It should have been possible to use a regular procedure to implement
## masked_fill both for openArrays an tensors. However, as for nim 2.0.2
## there are some limitations / bugs with implicit type conversions when
## applied to `or` typeclasses that contain openArrays. Because of that we've
## had to encapsulate the implementation of masked_fill in a template, and
## create 2 separate versions of the masked_fill procedure (one taking a
## tensor value and another taking an openArray value) which call this
## implementation template. Somehow this seems to work around the issue.

if t.size == 0 or mask.size == 0:
return
Expand All @@ -220,27 +212,27 @@ proc masked_fill*[T](t: var Tensor[T], mask: Tensor[bool], value: Tensor[T]) =
var lock: Lock
initLock(lock)
var too_few_values = false

let value_size = value.len
omp_parallel_blocks(block_offset, block_size, t.size):
var n = block_offset
for tElem, maskElem in mzip(t, mask, block_offset, block_size):
if maskElem:
if n >= value.size:
if n >= value_size:
withLock(lock):
# The lock protection is technically unnecessary but it is good form
too_few_values = true
break
tElem = value[n]
inc n
if too_few_values:
let error_msg = "masked_fill error: the size of the value tensor (" & $value.size &
let error_msg = "masked_fill error: the size of the value tensor (" & $value_size &
") is smaller than the number of true elements in the mask"
when not(compileOption("mm", "arc") or compileOption("mm", "orc")):
# Other memory management modes crash without showing the exception message
echo error_msg
raise newException(IndexDefect, error_msg)

proc masked_fill*[T](t: var Tensor[T], mask: openArray, value: Tensor[T]) =
proc masked_fill*[T](t: var Tensor[T], mask: Tensor[bool], value: Tensor[T]) =
## For each element ``t[index]`` of the input tensor ``t`` with index ``index``,
## check if ``mask[index]`` is true. If so fill it with the _next_
## element from the ``value`` tensor. Otherwise leave it untouched.
Expand All @@ -255,17 +247,48 @@ proc masked_fill*[T](t: var Tensor[T], mask: openArray, value: Tensor[T]) =
##
## Example:
##
## t.masked_fill([true, false, true, true], [3, 4, -1].toTensor)
## t.masked_fill(t > 0, [3, 4, -1].toTensor)
##
## In this version of this procedure the boolean mask, which must have the
## same size as the input tensor ``t``, is an openArray of bools, i.e.:
## - an array or sequence of bools
## - an array of arrays of bools,
## - ...
## In this version of this procedure the boolean mask is a ``Tensor[bool]``
## with the same size as the input tensor ``t``.
masked_fill_impl(t, mask, value)

proc masked_fill*[T](t: var Tensor[T], mask: Tensor[bool], value: openArray[T]) =
## Version of `masked_fill` that takes an openArray as the value
##
## For each element ``t[index]`` of the input tensor ``t`` with index ``index``,
## check if ``mask[index]`` is true. If so fill it with the _next_
## element from the ``value`` openArray. Otherwise leave it untouched.
##
## Note that this does _not_ fill ``t[index]`` with ``value[index]``, but
## with the n-th element of ``value`` where n is the number of true elements
## in the mask before and including the index-th mask element.
## Because of this, the value openArray must have at least as many elements as
## the number of true elements in the mask. If that is not the case an
## IndexDefect exception will be raised at runtime. The ``value`` tensor
## can have even more values which will simply be ignored.
##
## Example:
##
## t.masked_fill(t > 0, [3, 4, -1])
##
## In this version of this procedure the boolean mask is a ``Tensor[bool]``
## with the same size as the input tensor ``t``.
masked_fill_impl(t, mask, value)

proc masked_fill*[T](t: var Tensor[T], mask: openArray, value: Tensor[T]) =
## Version of masked_fill that takes an openArray[bool] as the mask
## and a tensor as the value
if t.size == 0 or mask.len == 0:
return
t.masked_fill(mask.toTensor(), value)
masked_fill_impl(t, mask.toTensor(), value)

proc masked_fill*[T](t: var Tensor[T], mask: openArray, value: openArray[T]) =
## Version of masked_fill that takes an openArray[bool] as the mask
## and an openArray as the value
if t.size == 0 or mask.len == 0:
return
masked_fill_impl(t, mask.toTensor(), value)

# Mask axis
# --------------------------------------------------------------------------------------------
Expand Down
47 changes: 39 additions & 8 deletions tests/tensor/test_selectors.nim
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ proc main() =
let expected = [1.0, 2.0, 3.0].toTensor()
check: r == expected

test "Masked_fill":
test "Masked_fill with single value":
# Numpy reference doc
# https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#boolean-array-indexing
# select non NaN
Expand All @@ -174,6 +174,7 @@ proc main() =
let t = [[1.0, 2.0],
[NaN, 3.0],
[NaN, NaN]].toTensor

block: # Single value masked fill
var x = t.clone()

Expand All @@ -182,12 +183,34 @@ proc main() =
let expected = [[1.0, 2.0], [-1.0, 3.0], [-1.0, -1.0]].toTensor()
check: x == expected

block: # Multiple value masked fill
block: # Fill array/sequence mask with scalar value
var x = t.clone()

x.masked_fill(
[[false, false],
[true, false],
[true, true]],
-1.0
)

let expected = [[1.0, 2.0], [-1.0, 3.0], [-1.0, -1.0]].toTensor()
check: x == expected

test "Masked_fill with multiple values (Tensor or openArray)":
let t = [[1.0, 2.0],
[NaN, 3.0],
[NaN, NaN]].toTensor
let expected = [[1.0, 2.0], [-10.0, 3.0], [-20.0, -30.0]].toTensor()

block: # Tensor mask
# Fill with tensor
var x = t.clone()
x.masked_fill(x.isNaN, [-10.0, -20.0, -30.0].toTensor())
check: x == expected

let expected = [[1.0, 2.0], [-10.0, 3.0], [-20.0, -30.0]].toTensor()
# Fill with openArray
x = t.clone()
x.masked_fill(x.isNaN, [-10.0, -20.0, -30.0])
check: x == expected

when compileOption("mm", "arc") or compileOption("mm", "orc"):
Expand All @@ -202,17 +225,25 @@ proc main() =
exception_thrown_when_true_element_mask_count_exceeds_value_tensor_size = true
check: exception_thrown_when_true_element_mask_count_exceeds_value_tensor_size

block: # Fill with regular arrays/sequences
block: # openArray mask
# Fill with tensor
var x = t.clone()

x.masked_fill(
[[false, false],
[true, false],
[true, true]],
-1.0
[-10.0, -20.0, -30.0].toTensor()
)
check: x == expected

let expected = [[1.0, 2.0], [-1.0, 3.0], [-1.0, -1.0]].toTensor()
# Fill with openArray
x = t.clone()
x.masked_fill(
[[false, false],
[true, false],
[true, true]],
[-10.0, -20.0, -30.0]
)
check: x == expected

test "Masked_axis_select":
Expand Down Expand Up @@ -255,7 +286,7 @@ proc main() =
[1, 1]].toTensor
check r == expected

test "Masked_axis_fill with value":
test "Masked_axis_fill with single value":
block: # Numpy
# Fill all columns which sum up to greater than 1
# with -10
Expand Down

0 comments on commit 52cc834

Please sign in to comment.