diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index 7342d0b8..ca6fd778 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -156,10 +156,10 @@ jobs: # Needed to be able to run conda index - name: Install conda-build - run: mamba install conda-build + run: mamba install conda-build conda-index - name: Create conda channel - run: conda index ${{ env.CHANNEL_PATH }} + python -m conda_index ${{ env.CHANNEL_PATH }} - name: Test conda channel run: | diff --git a/dpbench/benchmarks/black_scholes/black_scholes_cupy.py b/dpbench/benchmarks/black_scholes/black_scholes_cupy.py index 927d71cb..8cf4650e 100644 --- a/dpbench/benchmarks/black_scholes/black_scholes_cupy.py +++ b/dpbench/benchmarks/black_scholes/black_scholes_cupy.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 -import cupy as np +import cupy as cp from scipy.special import erf @@ -14,12 +14,12 @@ def black_scholes(nopt, price, strike, t, rate, volatility, call, put): S = strike T = t - a = np.log(P / S) + a = cp.log(P / S) b = T * mr z = T * sig_sig_two c = 0.25 * z - y = np.true_divide(1.0, np.sqrt(z)) + y = cp.true_divide(1.0, cp.sqrt(z)) w1 = (a - b + c) * y w2 = (a - b - c) * y @@ -27,7 +27,9 @@ def black_scholes(nopt, price, strike, t, rate, volatility, call, put): d1 = 0.5 + 0.5 * erf(w1) d2 = 0.5 + 0.5 * erf(w2) - Se = np.exp(b) * S + Se = cp.exp(b) * S call[:] = P * d1 - Se * d2 put[:] = call - P + Se + + cp.cuda.stream.get_current_stream().synchronize() diff --git a/dpbench/benchmarks/black_scholes/black_scholes_numba_dpex_k.py b/dpbench/benchmarks/black_scholes/black_scholes_numba_dpex_k.py index 502b6a2f..dc4ab77c 100644 --- a/dpbench/benchmarks/black_scholes/black_scholes_numba_dpex_k.py +++ b/dpbench/benchmarks/black_scholes/black_scholes_numba_dpex_k.py @@ -4,16 +4,19 @@ from math import erf, exp, log, sqrt -import numba_dpex as dpex +import numba_dpex.experimental as dpex +from numba_dpex import kernel_api as kapi @dpex.kernel -def _black_scholes_kernel(nopt, price, strike, t, rate, volatility, call, put): +def _black_scholes_kernel( + item: kapi.Item, nopt, price, strike, t, rate, volatility, call, put +): dtype = price.dtype mr = -rate sig_sig_two = volatility * volatility * dtype.type(2) - i = dpex.get_global_id(0) + i = item.get_id(0) P = price[i] S = strike[i] @@ -40,6 +43,15 @@ def _black_scholes_kernel(nopt, price, strike, t, rate, volatility, call, put): def black_scholes(nopt, price, strike, t, rate, volatility, call, put): - _black_scholes_kernel[dpex.Range(nopt)]( - nopt, price, strike, t, rate, volatility, call, put + dpex.call_kernel( + _black_scholes_kernel, + kapi.Range(nopt), + nopt, + price, + strike, + t, + rate, + volatility, + call, + put, ) diff --git a/dpbench/benchmarks/dbscan/dbscan_numba_dpex_k.py b/dpbench/benchmarks/dbscan/dbscan_numba_dpex_k.py index 66e7396c..730a94f2 100644 --- a/dpbench/benchmarks/dbscan/dbscan_numba_dpex_k.py +++ b/dpbench/benchmarks/dbscan/dbscan_numba_dpex_k.py @@ -4,8 +4,9 @@ import dpnp as np import numba as nb -import numba_dpex as dpex +import numba_dpex.experimental as dpex import numpy +from numba_dpex import kernel_api as kapi NOISE = -1 UNDEFINED = -2 @@ -50,8 +51,10 @@ def _queue_empty(head, tail): @dpex.kernel -def get_neighborhood(n, dim, data, eps, ind_lst, sz_lst, block_size, nblocks): - i = dpex.get_global_id(0) +def get_neighborhood( + item: kapi.Item, n, dim, data, eps, ind_lst, sz_lst, block_size, nblocks +): + i = item.get_id(0) start = i * block_size stop = n if i + 1 == nblocks else start + block_size @@ -130,7 +133,9 @@ def dbscan(n_samples, n_features, data, eps, min_pts): ) sizes = np.zeros_like(data, shape=n_samples, dtype=np.int64) - get_neighborhood[dpex.Range(n_samples)]( + dpex.call_kernel( + get_neighborhood, + kapi.Range(n_samples), n_samples, n_features, data, diff --git a/dpbench/benchmarks/gpairs/gpairs_cupy.py b/dpbench/benchmarks/gpairs/gpairs_cupy.py index 8e26f2cb..f0a58074 100644 --- a/dpbench/benchmarks/gpairs/gpairs_cupy.py +++ b/dpbench/benchmarks/gpairs/gpairs_cupy.py @@ -2,19 +2,23 @@ # # SPDX-License-Identifier: Apache-2.0 -import cupy as np +import cupy as cp def _gpairs_impl(x1, y1, z1, w1, x2, y2, z2, w2, rbins): dm = ( - np.square(x2 - x1[:, None]) - + np.square(y2 - y1[:, None]) - + np.square(z2 - z1[:, None]) + cp.square(x2 - x1[:, None]) + + cp.square(y2 - y1[:, None]) + + cp.square(z2 - z1[:, None]) ) - return np.array( - [np.outer(w1, w2)[dm <= rbins[k]].sum() for k in range(len(rbins))] + ret_arr = cp.array( + [cp.outer(w1, w2)[dm <= rbins[k]].sum() for k in range(len(rbins))] ) + cp.cuda.stream.get_current_stream().synchronize() + + return ret_arr + def gpairs(nopt, nbins, x1, y1, z1, w1, x2, y2, z2, w2, rbins, results): results[:] = _gpairs_impl(x1, y1, z1, w1, x2, y2, z2, w2, rbins) diff --git a/dpbench/benchmarks/gpairs/gpairs_numba_dpex_k.py b/dpbench/benchmarks/gpairs/gpairs_numba_dpex_k.py index 9635d224..65f088fa 100644 --- a/dpbench/benchmarks/gpairs/gpairs_numba_dpex_k.py +++ b/dpbench/benchmarks/gpairs/gpairs_numba_dpex_k.py @@ -3,12 +3,15 @@ # SPDX-License-Identifier: Apache-2.0 import numba_dpex as dpex +import numba_dpex.experimental as dpexexp +from numba_dpex import kernel_api as kapi # This implementation is numba dpex kernel version with atomics. -@dpex.kernel +@dpexexp.kernel def count_weighted_pairs_3d_intel_no_slm_ker( + nd_item: kapi.NdItem, n, nbins, slm_hist_size, @@ -25,14 +28,14 @@ def count_weighted_pairs_3d_intel_no_slm_ker( result, ): dtype = x0.dtype - lid0 = dpex.get_local_id(0) - gr0 = dpex.get_group_id(0) + lid0 = nd_item.get_local_id(0) + gr0 = nd_item.get_group().get_group_id(0) - lid1 = dpex.get_local_id(1) - gr1 = dpex.get_group_id(1) + lid1 = nd_item.get_local_id(1) + gr1 = nd_item.get_group().get_group_id(1) - lws0 = dpex.get_local_size(0) - lws1 = dpex.get_local_size(1) + lws0 = nd_item.get_local_range(0) + lws1 = nd_item.get_local_range(1) n_wi = 20 @@ -107,7 +110,8 @@ def count_weighted_pairs_3d_intel_no_slm_ker( pk = k for p in range(private_hist_size): - dpex.atomic.add(result, pk, private_hist[p]) + result_aref = kapi.AtomicRef(result, index=pk) + result_aref.fetch_add(private_hist[p]) pk += 1 @@ -147,7 +151,9 @@ def gpairs( ceiling_quotient(nbins, private_hist_size) * private_hist_size ) - count_weighted_pairs_3d_intel_no_slm_ker[dpex.NdRange(gwsRange, lwsRange)]( + dpexexp.call_kernel( + count_weighted_pairs_3d_intel_no_slm_ker, + kapi.NdRange(dpex.Range(*gwsRange), dpex.Range(*lwsRange)), nopt, nbins, slm_hist_size, diff --git a/dpbench/benchmarks/kmeans/kmeans_numba_dpex_k.py b/dpbench/benchmarks/kmeans/kmeans_numba_dpex_k.py index b2363256..a66193e8 100644 --- a/dpbench/benchmarks/kmeans/kmeans_numba_dpex_k.py +++ b/dpbench/benchmarks/kmeans/kmeans_numba_dpex_k.py @@ -6,9 +6,9 @@ from math import sqrt import numba_dpex as dpex -import numpy +import numba_dpex.experimental as dpexexp from dpctl import tensor as dpt -from numba_dpex import NdRange +from numba_dpex import kernel_api as kapi def DivUp(numerator, denominator): @@ -25,9 +25,15 @@ def getGroupByCluster( # noqa: C901 ): local_copies = min(4, max(1, DivUp(local_size_, num_centroids))) - @dpex.kernel + @dpexexp.kernel def groupByCluster( - arrayP, arrayPcluster, arrayC, NewCentroids, NewCount, last + nd_item: kapi.NdItem, + arrayP, + arrayPcluster, + arrayC, + NewCentroids, + NewCount, + last, ): numpoints = arrayP.shape[0] localCentroids = dpex.local.array((dims, num_centroids), dtype=dtyp) @@ -38,9 +44,9 @@ def groupByCluster( (local_copies, num_centroids), dtype=dpt.int32 ) - grid = dpex.get_group_id(0) - lid = dpex.get_local_id(0) - local_size = dpex.get_local_size(0) + grid = nd_item.get_group().get_group_id(0) + lid = nd_item.get_local_id(0) + local_size = nd_item.get_local_range(0) for i in range(lid, num_centroids * dims, local_size): localCentroids[i % dims, i // dims] = arrayC[i // dims, i % dims] @@ -51,7 +57,7 @@ def groupByCluster( for lc in range(local_copies): localNewCount[lc, c] = 0 - dpex.barrier(dpex.LOCAL_MEM_FENCE) + kapi.group_barrier(nd_item.get_group()) for i in range(WorkPI): point_id = grid * WorkPI * local_size + i * local_size + lid @@ -73,44 +79,59 @@ def groupByCluster( lc = lid % local_copies for d in range(dims): - dpex.atomic.add( - localNewCentroids, (lc, d, nearest_centroid), localP[d] + localNewCentroids_aref = kapi.AtomicRef( + localNewCentroids, + index=(lc, d, nearest_centroid), + address_space=kapi.AddressSpace.LOCAL, ) + localNewCentroids_aref.fetch_add(localP[d]) - dpex.atomic.add(localNewCount, (lc, nearest_centroid), 1) + localNewCount_aref = kapi.AtomicRef( + localNewCount, + index=(lc, nearest_centroid), + address_space=kapi.AddressSpace.LOCAL, + ) + localNewCount_aref.fetch_add(1) if last: arrayPcluster[point_id] = nearest_centroid - dpex.barrier(dpex.LOCAL_MEM_FENCE) + kapi.group_barrier(nd_item.get_group()) for i in range(lid, num_centroids * dims, local_size): local_centroid_d = dtyp.type(0) for lc in range(local_copies): local_centroid_d += localNewCentroids[lc, i % dims, i // dims] - dpex.atomic.add( - NewCentroids, - (i // dims, i % dims), - local_centroid_d, + NewCentroids_aref = kapi.AtomicRef( + NewCentroids, index=(i // dims, i % dims) ) + NewCentroids_aref.fetch_add(local_centroid_d) for c in range(lid, num_centroids, local_size): local_centroid_npoints = dpt.int32.type(0) for lc in range(local_copies): local_centroid_npoints += localNewCount[lc, c] - dpex.atomic.add(NewCount, c, local_centroid_npoints) + NewCount_aref = kapi.AtomicRef(NewCount, index=c) + NewCount_aref.fetch_add(local_centroid_npoints) return groupByCluster @lru_cache(maxsize=1) def getUpdateCentroids(dims, num_centroids, dtyp, local_size_): - @dpex.kernel - def updateCentroids(diff, arrayC, arrayCnumpoint, NewCentroids, NewCount): - lid = dpex.get_local_id(0) - local_size = dpex.get_local_size(0) + @dpexexp.kernel + def updateCentroids( + nd_item: kapi.NdItem, + diff, + arrayC, + arrayCnumpoint, + NewCentroids, + NewCount, + ): + lid = nd_item.get_local_id(0) + local_size = nd_item.get_local_range(0) local_distance = dpex.local.array(local_size_, dtype=dtyp) @@ -134,7 +155,7 @@ def updateCentroids(diff, arrayC, arrayCnumpoint, NewCentroids, NewCount): max_distance = max(max_distance, distance) local_distance[c] = max_distance - dpex.barrier(dpex.LOCAL_MEM_FENCE) + kapi.group_barrier(nd_item.get_group()) if lid == 0: for c in range(local_size): @@ -147,19 +168,19 @@ def updateCentroids(diff, arrayC, arrayCnumpoint, NewCentroids, NewCount): @lru_cache(maxsize=1) def getUpdateLabels(dims, num_centroids, dtyp, WorkPI): - @dpex.kernel - def updateLabels(arrayP, arrayPcluster, arrayC): + @dpexexp.kernel + def updateLabels(nd_item: kapi.NdItem, arrayP, arrayPcluster, arrayC): numpoints = arrayP.shape[0] localCentroids = dpex.local.array((dims, num_centroids), dtype=dtyp) - grid = dpex.get_group_id(0) - lid = dpex.get_local_id(0) - local_size = dpex.get_local_size(0) + grid = nd_item.get_group().get_group_id(0) + lid = nd_item.get_local_id(0) + local_size = nd_item.get_local_range(0) for i in range(lid, num_centroids * dims, local_size): localCentroids[i % dims, i // dims] = arrayC[i // dims, i % dims] - dpex.barrier(dpex.LOCAL_MEM_FENCE) + kapi.group_barrier(nd_item.get_group()) for i in range(WorkPI): point_id = grid * WorkPI * local_size + i * local_size + lid @@ -224,19 +245,36 @@ def kmeans_kernel( for i in range(niters): last = i == (niters - 1) if diff_host < tolerance: - updateLabels[NdRange((global_size,), (local_size,))]( - arrayP, arrayPcluster, arrayC + dpexexp.call_kernel( + updateLabels, + kapi.NdRange((global_size,), (local_size,)), + arrayP, + arrayPcluster, + arrayC, ) break - groupByCluster[NdRange((global_size,), (local_size,))]( - arrayP, arrayPcluster, arrayC, NewCentroids, NewCount, last + dpexexp.call_kernel( + groupByCluster, + kapi.NdRange((global_size,), (local_size,)), + arrayP, + arrayPcluster, + arrayC, + NewCentroids, + NewCount, + last, ) update_centroid_size = min(num_centroids, local_size) - updateCentroids[ - NdRange((update_centroid_size,), (update_centroid_size,)) - ](diff, arrayC, arrayCnumpoint, NewCentroids, NewCount) + dpexexp.call_kernel( + updateCentroids, + kapi.NdRange((update_centroid_size,), (update_centroid_size,)), + diff, + arrayC, + arrayCnumpoint, + NewCentroids, + NewCount, + ) diff_host = dpt.asnumpy(diff)[0] diff --git a/dpbench/benchmarks/knn/knn_numba_dpex_k.py b/dpbench/benchmarks/knn/knn_numba_dpex_k.py index f5e3935c..640a94cd 100644 --- a/dpbench/benchmarks/knn/knn_numba_dpex_k.py +++ b/dpbench/benchmarks/knn/knn_numba_dpex_k.py @@ -5,11 +5,14 @@ from math import sqrt import numba_dpex as dpex +import numba_dpex.experimental as dpexexp import numpy as np +from numba_dpex import kernel_api as kapi -@dpex.kernel +@dpexexp.kernel def _knn_kernel( # noqa: C901: TODO: can we simplify logic? + item: kapi.Item, train, train_labels, test, @@ -21,7 +24,7 @@ def _knn_kernel( # noqa: C901: TODO: can we simplify logic? data_dim, ): dtype = train.dtype - i = dpex.get_global_id(0) + i = item.get_id(0) # here k has to be 5 in order to match with numpy queue_neighbors = dpex.private.array(shape=(5, 2), dtype=dtype) @@ -106,7 +109,9 @@ def knn( votes_to_classes, data_dim, ): - _knn_kernel[dpex.Range(test_size)]( + dpexexp.call_kernel( + _knn_kernel, + kapi.Range(test_size), x_train, y_train, x_test, diff --git a/dpbench/benchmarks/l2_norm/l2_norm_cupy.py b/dpbench/benchmarks/l2_norm/l2_norm_cupy.py index a0a979c5..95582772 100644 --- a/dpbench/benchmarks/l2_norm/l2_norm_cupy.py +++ b/dpbench/benchmarks/l2_norm/l2_norm_cupy.py @@ -2,10 +2,12 @@ # # SPDX-License-Identifier: Apache-2.0 -import cupy as np +import cupy as cp def l2_norm(a, d): - sq = np.square(a) + sq = cp.square(a) sum = sq.sum(axis=1) - d[:] = np.sqrt(sum) + d[:] = cp.sqrt(sum) + + cp.cuda.stream.get_current_stream().synchronize() diff --git a/dpbench/benchmarks/l2_norm/l2_norm_numba_dpex_k.py b/dpbench/benchmarks/l2_norm/l2_norm_numba_dpex_k.py index cbc7fe37..eb95183b 100644 --- a/dpbench/benchmarks/l2_norm/l2_norm_numba_dpex_k.py +++ b/dpbench/benchmarks/l2_norm/l2_norm_numba_dpex_k.py @@ -4,12 +4,13 @@ import math -import numba_dpex as dpex +import numba_dpex.experimental as dpex +from numba_dpex import kernel_api as kapi @dpex.kernel -def l2_norm_kernel(a, d): - i = dpex.get_global_id(0) +def l2_norm_kernel(item: kapi.Item, a, d): + i = item.get_id(0) a_rows = a.shape[1] d[i] = 0.0 for k in range(a_rows): @@ -18,4 +19,4 @@ def l2_norm_kernel(a, d): def l2_norm(a, d): - l2_norm_kernel[dpex.Range(a.shape[0])](a, d) + dpex.call_kernel(l2_norm_kernel, kapi.Range(a.shape[0]), a, d) diff --git a/dpbench/benchmarks/pairwise_distance/pairwise_distance_cupy.py b/dpbench/benchmarks/pairwise_distance/pairwise_distance_cupy.py index d4fc1110..0c534215 100644 --- a/dpbench/benchmarks/pairwise_distance/pairwise_distance_cupy.py +++ b/dpbench/benchmarks/pairwise_distance/pairwise_distance_cupy.py @@ -2,15 +2,17 @@ # # SPDX-License-Identifier: Apache-2.0 -import cupy as np +import cupy as cp def pairwise_distance(X1, X2, D): - x1 = np.sum(np.square(X1), axis=1) - x2 = np.sum(np.square(X2), axis=1) - np.dot(X1, X2.T, D) + x1 = cp.sum(cp.square(X1), axis=1) + x2 = cp.sum(cp.square(X2), axis=1) + cp.dot(X1, X2.T, D) D *= -2 x3 = x1.reshape(x1.size, 1) - np.add(D, x3, D) - np.add(D, x2, D) - np.sqrt(D, D) + cp.add(D, x3, D) + cp.add(D, x2, D) + cp.sqrt(D, D) + + cp.cuda.stream.get_current_stream().synchronize() diff --git a/dpbench/benchmarks/pairwise_distance/pairwise_distance_numba_dpex_k.py b/dpbench/benchmarks/pairwise_distance/pairwise_distance_numba_dpex_k.py index e0fa9982..c3dea96b 100644 --- a/dpbench/benchmarks/pairwise_distance/pairwise_distance_numba_dpex_k.py +++ b/dpbench/benchmarks/pairwise_distance/pairwise_distance_numba_dpex_k.py @@ -3,13 +3,14 @@ # SPDX-License-Identifier: Apache-2.0 import dpnp as np -import numba_dpex as dpex +import numba_dpex.experimental as dpex +from numba_dpex import kernel_api as kapi @dpex.kernel -def _pairwise_distance_kernel(X1, X2, D): - i = dpex.get_global_id(1) - j = dpex.get_global_id(0) +def _pairwise_distance_kernel(item: kapi.Item, X1, X2, D): + i = item.get_id(1) + j = item.get_id(0) X1_cols = X1.shape[1] @@ -21,4 +22,10 @@ def _pairwise_distance_kernel(X1, X2, D): def pairwise_distance(X1, X2, D): - _pairwise_distance_kernel[dpex.Range(X2.shape[0], X1.shape[0])](X1, X2, D) + dpex.call_kernel( + _pairwise_distance_kernel, + kapi.Range(X2.shape[0], X1.shape[0]), + X1, + X2, + D, + ) diff --git a/dpbench/benchmarks/pca/pca_cupy.py b/dpbench/benchmarks/pca/pca_cupy.py index 4bfb9631..f439117e 100644 --- a/dpbench/benchmarks/pca/pca_cupy.py +++ b/dpbench/benchmarks/pca/pca_cupy.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 -import cupy as np +import cupy as cp def pca(data, dims_rescaled_data=2): @@ -10,13 +10,13 @@ def pca(data, dims_rescaled_data=2): data -= data.mean(axis=0) # calculate the covariance matrix - v = np.cov(data, rowvar=False, dtype=data.dtype) + v = cp.cov(data, rowvar=False, dtype=data.dtype) # calculate eigenvectors & eigenvalues of the covariance matrix - evalues, evectors = np.linalg.eigh(v) + evalues, evectors = cp.linalg.eigh(v) # sort eigenvalues and eigenvectors in decreasing order - idx = np.argsort(evalues)[::-1] + idx = cp.argsort(evalues)[::-1] evectors = evectors[:, idx] evalues = evalues[idx] @@ -25,7 +25,10 @@ def pca(data, dims_rescaled_data=2): evectors = evectors[:, :dims_rescaled_data] # carry out the transformation on the data using eigenvectors - tdata = np.dot(evectors.T, data.T).T + tdata = cp.dot(evectors.T, data.T).T + + cp.cuda.stream.get_current_stream().synchronize() # return the transformed data, eigenvalues, and eigenvectors + return tdata, evalues, evectors diff --git a/dpbench/benchmarks/rambo/rambo_cupy.py b/dpbench/benchmarks/rambo/rambo_cupy.py index 1ffb1b96..87b7b4b0 100644 --- a/dpbench/benchmarks/rambo/rambo_cupy.py +++ b/dpbench/benchmarks/rambo/rambo_cupy.py @@ -2,16 +2,18 @@ # # SPDX-License-Identifier: Apache-2.0 -import cupy as np +import cupy as cp def rambo(nevts, nout, C1, F1, Q1, output): C = 2.0 * C1 - 1.0 - S = np.sqrt(1 - np.square(C)) - F = 2.0 * np.pi * F1 - Q = -np.log(Q1) + S = cp.sqrt(1 - cp.square(C)) + F = 2.0 * cp.pi * F1 + Q = -cp.log(Q1) output[:, :, 0] = Q - output[:, :, 1] = Q * S * np.sin(F) - output[:, :, 2] = Q * S * np.cos(F) + output[:, :, 1] = Q * S * cp.sin(F) + output[:, :, 2] = Q * S * cp.cos(F) output[:, :, 3] = Q * C + + cp.cuda.stream.get_current_stream().synchronize() diff --git a/dpbench/benchmarks/rambo/rambo_numba_dpex_k.py b/dpbench/benchmarks/rambo/rambo_numba_dpex_k.py index 3937a442..43644d06 100644 --- a/dpbench/benchmarks/rambo/rambo_numba_dpex_k.py +++ b/dpbench/benchmarks/rambo/rambo_numba_dpex_k.py @@ -4,13 +4,14 @@ from math import cos, log, pi, sin, sqrt -import numba_dpex as dpex +import numba_dpex.experimental as dpex +from numba_dpex import kernel_api as kapi @dpex.kernel -def _rambo(C1, F1, Q1, nout, output): +def _rambo(item: kapi.Item, C1, F1, Q1, nout, output): dtype = C1.dtype - i = dpex.get_global_id(0) + i = item.get_id(0) for j in range(nout): C = dtype.type(2.0) * C1[i, j] - dtype.type(1.0) S = sqrt(dtype.type(1) - C * C) @@ -24,10 +25,4 @@ def _rambo(C1, F1, Q1, nout, output): def rambo(nevts, nout, C1, F1, Q1, output): - _rambo[dpex.Range(nevts)]( - C1, - F1, - Q1, - nout, - output, - ) + dpex.call_kernel(_rambo, kapi.Range(nevts), C1, F1, Q1, nout, output) diff --git a/dpbench/configs/bench_info/kmeans.toml b/dpbench/configs/bench_info/kmeans.toml index 1fea6d20..e2020b23 100644 --- a/dpbench/configs/bench_info/kmeans.toml +++ b/dpbench/configs/bench_info/kmeans.toml @@ -29,7 +29,8 @@ output_args = [ "arrayCnumpoint", ] # TODO: remove once fixed. Fails randomly -expected_failure_implementations = ["numba_mlir_k"] +# remove numba_dpex_k once atomics on SLM is implemented +expected_failure_implementations = ["numba_mlir_k", "numba_dpex_k"] [benchmark.parameters.S] npoints = 4096 diff --git a/dpbench/configs/bench_info/pairwise_distance.toml b/dpbench/configs/bench_info/pairwise_distance.toml index ac133a08..b4eece56 100644 --- a/dpbench/configs/bench_info/pairwise_distance.toml +++ b/dpbench/configs/bench_info/pairwise_distance.toml @@ -41,7 +41,7 @@ dims = 3 seed = 7777777 [benchmark.parameters.L] -npoints = 65536 +npoints = 44032 dims = 3 seed = 7777777 diff --git a/dpbench/infrastructure/benchmark_validation.py b/dpbench/infrastructure/benchmark_validation.py index 65d42a19..8db81345 100644 --- a/dpbench/infrastructure/benchmark_validation.py +++ b/dpbench/infrastructure/benchmark_validation.py @@ -91,7 +91,7 @@ def relative_error( Returns: relative error. """ ref_norm = np.linalg.norm(ref) - if ref_norm: + if ref_norm == 0: val_norm = np.linalg.norm(val) if val_norm == 0: return 0.0