Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

k-d tree performance improvements & KDE adjust fix #587

Merged
merged 13 commits into from
Sep 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
Arraymancer v0.7.x
=====================================================

Arraymancer v0.7.22 Sep. 12 2023
=====================================================

- performance improvements to the k-d tree implementation by avoiding
`pow` and `sqrt` calls if unnecessary and providing a custom code
path for euclidean distances
- fix an issue in `kde` such that the `adjust` argument actually takes effect

Arraymancer v0.7.21 Aug. 31 2023
=====================================================

Expand Down
2 changes: 1 addition & 1 deletion src/arraymancer/laser/openmp.nim
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# Compile-time name mangling for OpenMP thresholds
# Workaround https://github.com/nim-lang/Nim/issues/9365
# and https://github.com/nim-lang/Nim/issues/9366
import random
import std / random
from strutils import toHex

var mangling_rng {.compileTime.} = initRand(0x1337DEADBEEF)
Expand Down
2 changes: 1 addition & 1 deletion src/arraymancer/ml/clustering/kmeans.nim
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) 2018 Mamy André-Ratsimbazafy and the Arraymancer contributors
# Distributed under the Apache v2 License (license terms are at http://www.apache.org/licenses/LICENSE-2.0).
# This file may not be copied, modified, or distributed except according to those terms.
import math, random, tables
import std / [math, random, tables]

import ../../tensor
import ../../spatial/distances
Expand Down
43 changes: 34 additions & 9 deletions src/arraymancer/spatial/distances.nim
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,30 @@ proc distance*(metric: typedesc[Euclidean], v, w: Tensor[float], squared: static
assert v.squeeze.rank == 1
assert w.squeeze.rank == 1
# Note: possibly faster by writing `let uv = u -. v; dot(uv, uv);` ?
result = Minkowski.distance(v, w, p = 2.0, squared = squared)
#result = Minkowski.distance(v, w, p = 2.0, squared = squared)
## NOTE: this is the branch used in the kd-tree. It's very performance critical there,
## hence we use this simple manual code (benchmarked to be more than 2x faster than
## via a 'higher order' approach).
## DBSCAN clustering test (11,000 points)
## - debug mode, old branch: 98.5s
## - debug mode, this branch: 50s
## - danger mode, old branch: 6.3s
## - danger mode, this branch: 2.8s
when squared:
if v.is_C_contiguous and w.is_C_contiguous:
result = 0.0
var tmp = 0.0
let vBuf = v.toUnsafeView()
let wBuf = w.toUnsafeView()
for idx in 0 ..< v.size:
# Use `atIndex` so that this also works for rank 2 tensors with `[1, N]` size, as this is
# what we get from `pairwiseDistance` due to not squeezing the dimensions anymore.
tmp = vBuf[idx] - wBuf[idx] # no need for abs, as we square
result += tmp*tmp
else: # Fall back to broadcasting implementation which handles non contiguous data
result = sum( abs(v -. w).map_inline(x * x) )
else:
result = sqrt( sum( abs(v -. w).map_inline(x * x) ) )

proc distance*(metric: typedesc[Jaccard], v, w: Tensor[float]): float =
## Computes the Jaccard distance between points `v` and `w`. Both need to
Expand Down Expand Up @@ -107,6 +130,7 @@ proc pairwiseDistances*(metric: typedesc[AnyMetric],
## `[1, n_dimensions]`. In this case all distances between this point and
## all in the other input will be computed so that the result is always of
## shape `[n_observations]`.
## If one input has only shape `[n_dimensions]` it is unsqueezed to `[1, n_dimensions]`.
##
## The first argument is the metric to compute the distance under. If the Minkowski metric
## is selected the power `p` is used.
Expand All @@ -121,29 +145,30 @@ proc pairwiseDistances*(metric: typedesc[AnyMetric],
if x.rank == y.rank and x.shape[0] == y.shape[0]:
for idx in 0 ..< n_obs:
when metric is Minkowski:
result[idx] = Minkowski.distance(x[idx, _].squeeze, y[idx, _].squeeze,
result[idx] = Minkowski.distance(x[idx, _], y[idx, _],
p = p, squared = squared)
elif metric is Euclidean:
result[idx] = Euclidean.distance(x[idx, _].squeeze, y[idx, _].squeeze,
result[idx] = Euclidean.distance(x[idx, _], y[idx, _],
squared = squared)
else:
result[idx] = metric.distance(x[idx, _].squeeze, y[idx, _].squeeze)
result[idx] = metric.distance(x[idx, _], y[idx, _])
else:
# determine which is one is 1 along n_observations
let nx = if x.rank == 2 and x.shape[0] == n_obs: x else: y
let ny = if x.rank == 2 and x.shape[0] == n_obs: y else: x
var ny = if x.rank == 2 and x.shape[0] == n_obs: y else: x
# in this case compute distance between all `nx` and single `ny`

if ny.rank == 1: # unsqueeze to have both rank 2
ny = ny.unsqueeze(0)
var idx = 0
for ax in axis(nx, 0):
when metric is Minkowski:
result[idx] = Minkowski.distance(ax.squeeze, ny.squeeze,
result[idx] = Minkowski.distance(ax, ny,
p = p, squared = squared)
elif metric is Euclidean:
result[idx] = Euclidean.distance(ax.squeeze, ny.squeeze,
result[idx] = Euclidean.distance(ax, ny,
squared = squared)
else:
result[idx] = metric.distance(ax.squeeze, ny.squeeze)
result[idx] = metric.distance(ax, ny)
inc idx

proc distanceMatrix*(metric: typedesc[AnyMetric],
Expand Down
48 changes: 37 additions & 11 deletions src/arraymancer/spatial/kdtree.nim
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import math, heapqueue, typetraits
import math, typetraits

import ../tensor
import ./distances

import std / heapqueue

#[

This module implements a k-d tree.
Expand Down Expand Up @@ -43,6 +45,8 @@ type
tree*: Node[T] ## the root node of the tree
size*: int ## number of nodes in the tree

proc isSquared(p: float): bool = abs(p - 2.0) < 1e-6

proc clone*[T](n: Node[T]): Node[T] =
result = Node[T](level: n.level,
id: n.id,
Expand Down Expand Up @@ -216,7 +220,7 @@ proc toTensorTuple[T, U](q: var HeapQueue[T],
retType: typedesc[U],
p = Inf): tuple[dist: Tensor[U],
idx: Tensor[int]] =
## Helper proc to convert the contents of the HeapQueue to a tuple of
## Helper proc to convert the contents of the `HeapQueue` to a tuple of
## two tensors.
##
## The heap queue here is used to accumulate neighbors in the `query` proc. It
Expand All @@ -227,18 +231,26 @@ proc toTensorTuple[T, U](q: var HeapQueue[T],
var vals = newTensorUninit[U](q.len)
var idxs = newTensorUninit[int](q.len)
var i = 0
let squared = isSquared(p)
if classify(p) == fcInf:
while q.len > 0:
let (val, idx) = q.pop
vals[i] = -val
idxs[i] = idx
inc i
else:
while q.len > 0:
let (val, idx) = q.pop
vals[i] = pow(-val, 1.0 / p)
idxs[i] = idx
inc i
if squared:
while q.len > 0:
let (val, idx) = q.pop
vals[i] = sqrt(-val)
idxs[i] = idx
inc i
else:
while q.len > 0:
let (val, idx) = q.pop
vals[i] = pow(-val, 1.0 / p)
idxs[i] = idx
inc i
result = (vals, idxs)

import ./tensor_compare_helper
Expand All @@ -257,14 +269,19 @@ proc queryImpl[T](
## and the static `yieldNumber` arguments it returns:
## - the `k` neighbors around `x` within a maximum `radius` (`yieldNumber = true`)
## - all points around `x` within `radius` (`yieldNumber = false`)
let squared = isSquared(p)

var side_distances = map2_inline(x -. tree.maxes,
tree.mins -. x):
max(0, max(x, y))

var min_distance: T
var distanceUpperBound = radius
if classify(p) != fcInf:
side_distances = side_distances.map_inline(pow(x, p))
if squared:
side_distances = side_distances.map_inline(x*x)
else:
side_distances = side_distances.map_inline(pow(x, p))
min_distance = sum(side_distances)
else:
min_distance = max(side_distances)
Expand All @@ -276,7 +293,6 @@ proc queryImpl[T](
bind tensor_compare_helper.`<`
var q = initHeapQueue[(T, Tensor[T], Node[T])]()
q.push (min_distance, side_distances.clone, tree.tree)

# priority queue for nearest neighbors, i.e. our result
# - (- distance ** p) from input `x` to current point
# - index of point in `KDTree's` data
Expand All @@ -288,12 +304,18 @@ proc queryImpl[T](
epsfac = 1.T
elif classify(p) == fcInf:
epsfac = T(1 / (1 + eps))
elif squared:
let tmp = 1 + eps
epsfac = T(1 / (tmp*tmp))
else:
epsfac = T(1 / pow(1 + eps, p))

# normalize the radius to the correct power
if classify(p) != fcInf and classify(distanceUpperBound) != fcInf:
distanceUpperBound = pow(distanceUpperBound, p)
if squared:
distanceUpperBound = distanceUpperBound*distanceUpperBound
else:
distanceUpperBound = pow(distanceUpperBound, p)

var node: Node[T]
while q.len > 0:
Expand Down Expand Up @@ -334,7 +356,11 @@ proc queryImpl[T](
sd[node.split_dim] = abs(node.split - x[node.split_dim])
min_distance = min_distance - side_distances[node.split_dim] + sd[node.split_dim]
else:
sd[node.split_dim] = pow(abs(node.split - x[node.split_dim]), p)
if squared:
let tmp = node.split - x[node.split_dim]
sd[node.split_dim] = tmp*tmp
else:
sd[node.split_dim] = pow(abs(node.split - x[node.split_dim]), p)
min_distance = min_distance - side_distances[node.split_dim] + sd[node.split_dim]

if min_distance <= distanceUpperBound * epsfac:
Expand Down
4 changes: 2 additions & 2 deletions src/arraymancer/spatial/neighbors.nim
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ proc nearestNeighbors*[T](X: Tensor[T], eps: float, metric: typedesc[AnyMetric],
let kd = kdTree(X)
result = newSeq[Tensor[int]](X.shape[0])
var idx = 0
for v in axis(X, 0):
let (dist, idxs) = kd.query_ball_point(v.squeeze, radius = eps, metric = metric)
for i in 0 ..< X.shape[0]:
let (dist, idxs) = kd.query_ball_point(X[i, _].squeeze, radius = eps, metric = metric)
result[idx] = idxs
inc idx
else:
Expand Down
9 changes: 4 additions & 5 deletions src/arraymancer/stats/kde.nim
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,8 @@ proc kde*[T: SomeNumber; U: int | Tensor[SomeNumber] | openArray[SomeNumber]](
var t = t.asType(float)
let A = min(std(t),
iqr(t) / 1.34)
let bwAct = if classify(bw) != fcNan: bw
else: 0.9 * A * pow(N.float, -1.0/5.0)

var bwAct = if classify(bw) == fcNormal: bw
else: adjust * (0.9 * A * pow(N.float, -1.0/5.0))
var weights = weights.asType(float)
if weights.size > 0:
doAssert weights.size == t.size
Expand All @@ -142,10 +141,10 @@ proc kde*[T: SomeNumber; U: int | Tensor[SomeNumber] | openArray[SomeNumber]](
let nsamples = samples
elif U is seq | array:
let x = toTensor(@samples).asType(float)
let nsamples = x.size
let nsamples = x.size.int
else:
let x = samples.asType(float)
let nsamples = x.size
let nsamples = x.size.int
result = newTensor[float](nsamples)
let norm = 1.0 / (N.float * bwAct)
var
Expand Down
28 changes: 21 additions & 7 deletions src/arraymancer/tensor/aggregate.nim
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,8 @@ proc cumprod*[T](arg: Tensor[T], axis: int = 0): Tensor[T] = # from hugogranstro
else:
temp[_] = result.atAxisIndex(axis, i-1) *. tAxis

when (NimMajor, NimMinor, NimPatch) > (1, 6, 0):
import std/atomics
proc nonzero*[T](arg: Tensor[T]): Tensor[int] =
## Returns the indices, which are non zero as a `Tensor[int]`.
##
Expand Down Expand Up @@ -368,15 +370,27 @@ proc nonzero*[T](arg: Tensor[T]): Tensor[int] =
## # - 1 -> 4 in col 1
## # - 0 -> 5 in col 0
## # - 1 -> 6 in col 1
var count = 0 # number of non zero elements
let mask = map_inline(arg):
block:
let cond = x != 0.T
if cond:
when (NimMajor, NimMinor, NimPatch) > (1, 6, 0):
## Use `Atomic` counter. If compiled with `-d:openmp` otherwise the code breaks!
var count: Atomic[int]
count.store(0)
let mask = map_inline(arg):
block:
let cond = x != 0.T
if cond:
atomicInc count
cond

result = newTensor[int]([arg.shape.len, count.load])
else:
let mask = map_inline(arg): # generate the mask
x != 0.T
var count = 0 # and count non zero elements (avoid openmp issues)
for x in mask:
if x:
inc count
cond
result = newTensor[int]([arg.shape.len, count])

result = newTensor[int]([arg.shape.len, count])
var ax = 0 # current axis
var k = 0 # counter for indices in one axis
for idx, x in mask:
Expand Down
2 changes: 1 addition & 1 deletion src/arraymancer/tensor/init_cpu.nim
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import
../laser/strided_iteration/foreach,
./data_structure,
# Standard library
random,
std / random,
math

export initialization
Expand Down
Loading