Skip to content

Commit

Permalink
Merge branch 'IntelPython:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
adarshyoga authored Apr 4, 2024
2 parents 61b6a45 + bece9a4 commit c186af9
Show file tree
Hide file tree
Showing 17 changed files with 69 additions and 53 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/build_and_run.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ on:

env:
# sycl is not included. Add it manually if you need
WORKLOADS: python,numpy,dpnp,numba_n,numba_np,numba_npr,numba_dpex_k,numba_dpex_n,numba_dpex_p,numba_mlir_k,numba_mlir_n,numba_mlir_p
WORKLOADS: python,numpy,dpnp,numba_n,numba_np,numba_npr,numba_dpex_k,numba_dpex_n,numba_dpex_p
PYTHONIOENCODING: 'utf-8'

jobs:
Expand Down Expand Up @@ -68,7 +68,6 @@ jobs:
shell: bash -l {0}
run: |
find ./environments -type f | xargs sed -i 's/intel::numpy/numpy/'
find ./environments -type f | xargs sed -i '/numba-mlir/d'
find ./environments -type f | xargs sed -i 's/setuptools>=42,<64/setuptools/'
- name: Setup miniconda
Expand Down Expand Up @@ -168,8 +167,9 @@ jobs:
- name: Run benchmarks
run: dpbench -i ${{env.WORKLOADS}} run -r2 --no-print-results --precision=${{matrix.precision}} || exit 1

- name: Run rodinia benchmarks
run: dpbench -i ${{env.WORKLOADS}} --last-run run -r2 --no-print-results --rodinia --no-dpbench --precision=${{matrix.precision}} || exit 1
# TODO: Re-enable rodinia benchmarks in CI once workloads have been changed to new numba-dpex API
# - name: Run rodinia benchmarks
# run: dpbench -i ${{env.WORKLOADS}} --last-run run -r2 --no-print-results --rodinia --no-dpbench --precision=${{matrix.precision}} || exit 1

- name: Generate report
run: dpbench -i ${{env.WORKLOADS}} report || exit 1
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from math import erf, exp, log, sqrt

import numba_dpex.experimental as dpex
import numba_dpex as dpex
from numba_dpex import kernel_api as kapi


Expand Down
2 changes: 1 addition & 1 deletion dpbench/benchmarks/default/dbscan/dbscan_numba_dpex_k.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import dpnp as np
import numba as nb
import numba_dpex.experimental as dpex
import numba_dpex as dpex
import numpy
from numba_dpex import kernel_api as kapi

Expand Down
5 changes: 2 additions & 3 deletions dpbench/benchmarks/default/gpairs/gpairs_numba_dpex_k.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,12 @@
# SPDX-License-Identifier: Apache-2.0

import numba_dpex as dpex
import numba_dpex.experimental as dpexexp
from numba_dpex import kernel_api as kapi

# This implementation is numba dpex kernel version with atomics.


@dpexexp.kernel
@dpex.kernel
def count_weighted_pairs_3d_intel_no_slm_ker(
nd_item: kapi.NdItem,
n,
Expand Down Expand Up @@ -151,7 +150,7 @@ def gpairs(
ceiling_quotient(nbins, private_hist_size) * private_hist_size
)

dpexexp.call_kernel(
dpex.call_kernel(
count_weighted_pairs_3d_intel_no_slm_ker,
kapi.NdRange(dpex.Range(*gwsRange), dpex.Range(*lwsRange)),
nopt,
Expand Down
2 changes: 1 addition & 1 deletion dpbench/benchmarks/default/kmeans/kmeans_initialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def initialize(npoints, niters, seed, ndims, ncentroids, types_dict):
arrayP = default_rng.uniform(XL, XH, (npoints, ndims)).astype(f_dtype)
arrayPclusters = np.ones(npoints, dtype=i_dtype)
arrayC = np.empty((ncentroids, ndims), dtype=f_dtype)
arrayCnumpoint = np.ones(ncentroids, dtype=i_dtype)
arrayCnumpoint = np.ones(ncentroids, dtype=np.int64)

arrayC[:] = arrayP[:ncentroids]

Expand Down
55 changes: 35 additions & 20 deletions dpbench/benchmarks/default/kmeans/kmeans_numba_dpex_k.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from math import sqrt

import numba_dpex as dpex
import numba_dpex.experimental as dpexexp
from dpctl import tensor as dpt
from numba_dpex import kernel_api as kapi

Expand All @@ -23,9 +22,7 @@ def Align(value, base):
def getGroupByCluster( # noqa: C901
dims, num_centroids, dtyp, WorkPI, local_size_
):
local_copies = min(4, max(1, DivUp(local_size_, num_centroids)))

@dpexexp.kernel
@dpex.kernel
def groupByCluster(
nd_item: kapi.NdItem,
arrayP,
Expand All @@ -34,15 +31,12 @@ def groupByCluster(
NewCentroids,
NewCount,
last,
local_copies,
localCentroids,
localNewCentroids,
localNewCount,
):
numpoints = arrayP.shape[0]
localCentroids = dpex.local.array((dims, num_centroids), dtype=dtyp)
localNewCentroids = dpex.local.array(
(local_copies, dims, num_centroids), dtype=dtyp
)
localNewCount = dpex.local.array(
(local_copies, num_centroids), dtype=dpt.int32
)

grid = nd_item.get_group().get_group_id(0)
lid = nd_item.get_local_id(0)
Expand Down Expand Up @@ -121,20 +115,19 @@ def groupByCluster(

@lru_cache(maxsize=1)
def getUpdateCentroids(dims, num_centroids, dtyp, local_size_):
@dpexexp.kernel
@dpex.kernel
def updateCentroids(
nd_item: kapi.NdItem,
diff,
arrayC,
arrayCnumpoint,
NewCentroids,
NewCount,
local_distance,
):
lid = nd_item.get_local_id(0)
local_size = nd_item.get_local_range(0)

local_distance = dpex.local.array(local_size_, dtype=dtyp)

max_distance = dtyp.type(0)
for c in range(lid, num_centroids, local_size):
numpoints = NewCount[c]
Expand Down Expand Up @@ -168,10 +161,11 @@ def updateCentroids(

@lru_cache(maxsize=1)
def getUpdateLabels(dims, num_centroids, dtyp, WorkPI):
@dpexexp.kernel
def updateLabels(nd_item: kapi.NdItem, arrayP, arrayPcluster, arrayC):
@dpex.kernel
def updateLabels(
nd_item: kapi.NdItem, arrayP, arrayPcluster, arrayC, localCentroids
):
numpoints = arrayP.shape[0]
localCentroids = dpex.local.array((dims, num_centroids), dtype=dtyp)

grid = nd_item.get_group().get_group_id(0)
lid = nd_item.get_local_id(0)
Expand Down Expand Up @@ -245,16 +239,31 @@ def kmeans_kernel(
for i in range(niters):
last = i == (niters - 1)
if diff_host < tolerance:
dpexexp.call_kernel(
localCentroids = kapi.LocalAccessor(
(dims, num_centroids), dtype=arrayP.dtype
)

dpex.call_kernel(
updateLabels,
kapi.NdRange((global_size,), (local_size,)),
arrayP,
arrayPcluster,
arrayC,
localCentroids,
)
break

dpexexp.call_kernel(
local_copies = min(4, max(1, DivUp(local_size, num_centroids)))
localCentroids = kapi.LocalAccessor(
(dims, num_centroids), dtype=arrayP.dtype
)
localNewCentroids = kapi.LocalAccessor(
(local_copies, dims, num_centroids), dtype=arrayP.dtype
)
localNewCount = kapi.LocalAccessor(
(local_copies, num_centroids), dtype=dpt.int64
)
dpex.call_kernel(
groupByCluster,
kapi.NdRange((global_size,), (local_size,)),
arrayP,
Expand All @@ -263,17 +272,23 @@ def kmeans_kernel(
NewCentroids,
NewCount,
last,
local_copies,
localCentroids,
localNewCentroids,
localNewCount,
)

local_distance = kapi.LocalAccessor(local_size, dtype=arrayP.dtype)
update_centroid_size = min(num_centroids, local_size)
dpexexp.call_kernel(
dpex.call_kernel(
updateCentroids,
kapi.NdRange((update_centroid_size,), (update_centroid_size,)),
diff,
arrayC,
arrayCnumpoint,
NewCentroids,
NewCount,
local_distance,
)
diff_host = dpt.asnumpy(diff)[0]

Expand Down
25 changes: 18 additions & 7 deletions dpbench/benchmarks/default/kmeans/kmeans_numba_dpex_p.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import dpnp as np
import numba as nb
import numba_dpex as dpex
from numba_dpex import kernel_api as kapi


# determine the euclidean distance from the cluster center to each point
Expand Down Expand Up @@ -40,12 +41,17 @@ def calCentroidsSum(


@dpex.kernel
def calCentroidsSum2(arrayP, arrayPcluster, arrayCsum, arrayCnumpoint):
i = dpex.get_global_id(0)
def calCentroidsSum2(
item: kapi.Item, arrayP, arrayPcluster, arrayCsum, arrayCnumpoint
):
i = item.get_id(0)
ci = arrayPcluster[i]
dpex.atomic.add(arrayCsum, (ci, 0), arrayP[i, 0])
dpex.atomic.add(arrayCsum, (ci, 1), arrayP[i, 1])
dpex.atomic.add(arrayCnumpoint, ci, 1)
arrayCsum_aref = kapi.AtomicRef(arrayCsum, index=(ci, 0))
arrayCsum_aref.fetch_add(arrayP[i, 0])
arrayCsum_aref = kapi.AtomicRef(arrayCsum, index=(ci, 1))
arrayCsum_aref.fetch_add(arrayP[i, 1])
arrayCnumpoint_aref = kapi.AtomicRef(arrayCnumpoint, index=ci)
arrayCnumpoint_aref.fetch_add(1)


# update the centriods array after computation
Expand Down Expand Up @@ -86,8 +92,13 @@ def kmeans_numba(arrayP, arrayPcluster, arrayC, arrayCnumpoint, niters):
num_centroids,
)

calCentroidsSum2[dpex.Range(num_points)](
arrayP, arrayPcluster, arrayCsum, arrayCnumpoint
dpex.call_kernel(
calCentroidsSum2,
kapi.Range(num_points),
arrayP,
arrayPcluster,
arrayCsum,
arrayCnumpoint,
)

# TODO: get rid of it once prange supports dtype
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,6 @@ void kmeans_sync(dpctl::tensor::usm_ndarray arrayP,
throw std::runtime_error("All arrays must have the same precision");
}

if (arrayPclusters.get_typenum() != arrayCnumpoint.get_typenum()) {
throw std::runtime_error("All arrays must have the same precision");
}

auto npoints = arrayP.get_shape(0);
auto ncentroids = arrayC.get_shape(0);
auto ndims = arrayC.get_shape(1);
Expand Down
5 changes: 2 additions & 3 deletions dpbench/benchmarks/default/knn/knn_numba_dpex_k.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,11 @@
from math import sqrt

import numba_dpex as dpex
import numba_dpex.experimental as dpexexp
import numpy as np
from numba_dpex import kernel_api as kapi


@dpexexp.kernel
@dpex.kernel
def _knn_kernel( # noqa: C901: TODO: can we simplify logic?
item: kapi.Item,
train,
Expand Down Expand Up @@ -109,7 +108,7 @@ def knn(
votes_to_classes,
data_dim,
):
dpexexp.call_kernel(
dpex.call_kernel(
_knn_kernel,
kapi.Range(test_size),
x_train,
Expand Down
2 changes: 1 addition & 1 deletion dpbench/benchmarks/default/l2_norm/l2_norm_numba_dpex_k.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import math

import numba_dpex.experimental as dpex
import numba_dpex as dpex
from numba_dpex import kernel_api as kapi


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# SPDX-License-Identifier: Apache-2.0

import dpnp as np
import numba_dpex.experimental as dpex
import numba_dpex as dpex
from numba_dpex import kernel_api as kapi


Expand Down
2 changes: 1 addition & 1 deletion dpbench/benchmarks/default/rambo/rambo_numba_dpex_k.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from math import cos, log, pi, sin, sqrt

import numba_dpex.experimental as dpex
import numba_dpex as dpex
from numba_dpex import kernel_api as kapi


Expand Down
2 changes: 1 addition & 1 deletion dpbench/configs/bench_info/kmeans.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ output_args = [
]
# TODO: remove once fixed. Fails randomly
# remove numba_dpex_k once atomics on SLM is implemented
expected_failure_implementations = ["numba_mlir_k", "numba_dpex_k"]
expected_failure_implementations = ["numba_mlir_k", "numba_dpex_k", "sycl"]

[benchmark.parameters.S]
npoints = 4096
Expand Down
3 changes: 1 addition & 2 deletions dpbench/configs/bench_info/knn.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ output_args = [
"predictions",
]

# `sycl` fails just on Windows
expected_failure_implementations = ["numba_dpex_p", "sycl"]
expected_failure_implementations = ["numba_dpex_p"]

[benchmark.parameters.S]
test_size = 1024
Expand Down
1 change: 0 additions & 1 deletion environments/conda-linux-sycl.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ dependencies:
- dpctl
- dpnp
- numba-dpex
- numba-mlir
# TODO: fix issues on conda-forge build
- intel::dpcpp_linux-64==2024.0.0
- intel::dpcpp-cpp-rt==2024.0.0
Expand Down
1 change: 0 additions & 1 deletion environments/conda-win-sycl.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ dependencies:
- dpctl
- dpnp
- numba-dpex
- numba-mlir
# TODO: switch to conda-forge, but it results in broken OpenCL rt (see below)
# - conda-forge::dpcpp_win-64
- intel::dpcpp_win-64==2024.0.0
Expand Down
1 change: 0 additions & 1 deletion environments/conda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,3 @@ dependencies:
- dpctl
- dpnp
- numba-dpex
- numba-mlir

0 comments on commit c186af9

Please sign in to comment.