Merge branch 'IntelPython:main' into main

adarshyoga · Feb 23, 2024 · 21f3c4f · 21f3c4f
2 parents 32ccc67 + 6120680
commit 21f3c4f
Show file tree

Hide file tree

Showing 18 changed files with 196 additions and 111 deletions.
diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml
@@ -156,10 +156,10 @@ jobs:
 
       # Needed to be able to run conda index
       - name: Install conda-build
-        run: mamba install conda-build
+        run: mamba install conda-build conda-index
 
       - name: Create conda channel
-        run: conda index ${{ env.CHANNEL_PATH }}
+        python -m conda_index ${{ env.CHANNEL_PATH }}
 
       - name: Test conda channel
         run: |

diff --git a/dpbench/benchmarks/black_scholes/black_scholes_cupy.py b/dpbench/benchmarks/black_scholes/black_scholes_cupy.py
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-import cupy as np
+import cupy as cp
 from scipy.special import erf
 
 
@@ -14,20 +14,22 @@ def black_scholes(nopt, price, strike, t, rate, volatility, call, put):
     S = strike
     T = t
 
-    a = np.log(P / S)
+    a = cp.log(P / S)
     b = T * mr
 
     z = T * sig_sig_two
     c = 0.25 * z
-    y = np.true_divide(1.0, np.sqrt(z))
+    y = cp.true_divide(1.0, cp.sqrt(z))
 
     w1 = (a - b + c) * y
     w2 = (a - b - c) * y
 
     d1 = 0.5 + 0.5 * erf(w1)
     d2 = 0.5 + 0.5 * erf(w2)
 
-    Se = np.exp(b) * S
+    Se = cp.exp(b) * S
 
     call[:] = P * d1 - Se * d2
     put[:] = call - P + Se
+
+    cp.cuda.stream.get_current_stream().synchronize()
diff --git a/dpbench/benchmarks/black_scholes/black_scholes_numba_dpex_k.py b/dpbench/benchmarks/black_scholes/black_scholes_numba_dpex_k.py
@@ -4,16 +4,19 @@
 
 from math import erf, exp, log, sqrt
 
-import numba_dpex as dpex
+import numba_dpex.experimental as dpex
+from numba_dpex import kernel_api as kapi
 
 
 @dpex.kernel
-def _black_scholes_kernel(nopt, price, strike, t, rate, volatility, call, put):
+def _black_scholes_kernel(
+    item: kapi.Item, nopt, price, strike, t, rate, volatility, call, put
+):
     dtype = price.dtype
     mr = -rate
     sig_sig_two = volatility * volatility * dtype.type(2)
 
-    i = dpex.get_global_id(0)
+    i = item.get_id(0)
 
     P = price[i]
     S = strike[i]
@@ -40,6 +43,15 @@ def _black_scholes_kernel(nopt, price, strike, t, rate, volatility, call, put):
 
 
 def black_scholes(nopt, price, strike, t, rate, volatility, call, put):
-    _black_scholes_kernel[dpex.Range(nopt)](
-        nopt, price, strike, t, rate, volatility, call, put
+    dpex.call_kernel(
+        _black_scholes_kernel,
+        kapi.Range(nopt),
+        nopt,
+        price,
+        strike,
+        t,
+        rate,
+        volatility,
+        call,
+        put,
     )
diff --git a/dpbench/benchmarks/dbscan/dbscan_numba_dpex_k.py b/dpbench/benchmarks/dbscan/dbscan_numba_dpex_k.py
@@ -4,8 +4,9 @@
 
 import dpnp as np
 import numba as nb
-import numba_dpex as dpex
+import numba_dpex.experimental as dpex
 import numpy
+from numba_dpex import kernel_api as kapi
 
 NOISE = -1
 UNDEFINED = -2
@@ -50,8 +51,10 @@ def _queue_empty(head, tail):
 
 
 @dpex.kernel
-def get_neighborhood(n, dim, data, eps, ind_lst, sz_lst, block_size, nblocks):
-    i = dpex.get_global_id(0)
+def get_neighborhood(
+    item: kapi.Item, n, dim, data, eps, ind_lst, sz_lst, block_size, nblocks
+):
+    i = item.get_id(0)
 
     start = i * block_size
     stop = n if i + 1 == nblocks else start + block_size
@@ -130,7 +133,9 @@ def dbscan(n_samples, n_features, data, eps, min_pts):
     )
     sizes = np.zeros_like(data, shape=n_samples, dtype=np.int64)
 
-    get_neighborhood[dpex.Range(n_samples)](
+    dpex.call_kernel(
+        get_neighborhood,
+        kapi.Range(n_samples),
         n_samples,
         n_features,
         data,

diff --git a/dpbench/benchmarks/gpairs/gpairs_cupy.py b/dpbench/benchmarks/gpairs/gpairs_cupy.py
@@ -2,19 +2,23 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-import cupy as np
+import cupy as cp
 
 
 def _gpairs_impl(x1, y1, z1, w1, x2, y2, z2, w2, rbins):
     dm = (
-        np.square(x2 - x1[:, None])
-        + np.square(y2 - y1[:, None])
-        + np.square(z2 - z1[:, None])
+        cp.square(x2 - x1[:, None])
+        + cp.square(y2 - y1[:, None])
+        + cp.square(z2 - z1[:, None])
     )
-    return np.array(
-        [np.outer(w1, w2)[dm <= rbins[k]].sum() for k in range(len(rbins))]
+    ret_arr = cp.array(
+        [cp.outer(w1, w2)[dm <= rbins[k]].sum() for k in range(len(rbins))]
     )
 
+    cp.cuda.stream.get_current_stream().synchronize()
+
+    return ret_arr
+
 
 def gpairs(nopt, nbins, x1, y1, z1, w1, x2, y2, z2, w2, rbins, results):
     results[:] = _gpairs_impl(x1, y1, z1, w1, x2, y2, z2, w2, rbins)
diff --git a/dpbench/benchmarks/gpairs/gpairs_numba_dpex_k.py b/dpbench/benchmarks/gpairs/gpairs_numba_dpex_k.py
@@ -3,12 +3,15 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import numba_dpex as dpex
+import numba_dpex.experimental as dpexexp
+from numba_dpex import kernel_api as kapi
 
 # This implementation is numba dpex kernel version with atomics.
 
 
-@dpex.kernel
+@dpexexp.kernel
 def count_weighted_pairs_3d_intel_no_slm_ker(
+    nd_item: kapi.NdItem,
     n,
     nbins,
     slm_hist_size,
@@ -25,14 +28,14 @@ def count_weighted_pairs_3d_intel_no_slm_ker(
     result,
 ):
     dtype = x0.dtype
-    lid0 = dpex.get_local_id(0)
-    gr0 = dpex.get_group_id(0)
+    lid0 = nd_item.get_local_id(0)
+    gr0 = nd_item.get_group().get_group_id(0)
 
-    lid1 = dpex.get_local_id(1)
-    gr1 = dpex.get_group_id(1)
+    lid1 = nd_item.get_local_id(1)
+    gr1 = nd_item.get_group().get_group_id(1)
 
-    lws0 = dpex.get_local_size(0)
-    lws1 = dpex.get_local_size(1)
+    lws0 = nd_item.get_local_range(0)
+    lws1 = nd_item.get_local_range(1)
 
     n_wi = 20
 
@@ -107,7 +110,8 @@ def count_weighted_pairs_3d_intel_no_slm_ker(
 
         pk = k
         for p in range(private_hist_size):
-            dpex.atomic.add(result, pk, private_hist[p])
+            result_aref = kapi.AtomicRef(result, index=pk)
+            result_aref.fetch_add(private_hist[p])
             pk += 1
 
 
@@ -147,7 +151,9 @@ def gpairs(
         ceiling_quotient(nbins, private_hist_size) * private_hist_size
     )
 
-    count_weighted_pairs_3d_intel_no_slm_ker[dpex.NdRange(gwsRange, lwsRange)](
+    dpexexp.call_kernel(
+        count_weighted_pairs_3d_intel_no_slm_ker,
+        kapi.NdRange(dpex.Range(*gwsRange), dpex.Range(*lwsRange)),
         nopt,
         nbins,
         slm_hist_size,