Skip to content

Commit 0646f1e

Browse files
authored
Merge pull request #335 from IntelPython/cupy_sync_add
Adding synchronize to cupy implementations
2 parents 8972feb + ba7c7a6 commit 0646f1e

File tree

6 files changed

+46
-31
lines changed

6 files changed

+46
-31
lines changed

dpbench/benchmarks/black_scholes/black_scholes_cupy.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5-
import cupy as np
5+
import cupy as cp
66
from scipy.special import erf
77

88

@@ -14,20 +14,22 @@ def black_scholes(nopt, price, strike, t, rate, volatility, call, put):
1414
S = strike
1515
T = t
1616

17-
a = np.log(P / S)
17+
a = cp.log(P / S)
1818
b = T * mr
1919

2020
z = T * sig_sig_two
2121
c = 0.25 * z
22-
y = np.true_divide(1.0, np.sqrt(z))
22+
y = cp.true_divide(1.0, cp.sqrt(z))
2323

2424
w1 = (a - b + c) * y
2525
w2 = (a - b - c) * y
2626

2727
d1 = 0.5 + 0.5 * erf(w1)
2828
d2 = 0.5 + 0.5 * erf(w2)
2929

30-
Se = np.exp(b) * S
30+
Se = cp.exp(b) * S
3131

3232
call[:] = P * d1 - Se * d2
3333
put[:] = call - P + Se
34+
35+
cp.cuda.stream.get_current_stream().synchronize()

dpbench/benchmarks/gpairs/gpairs_cupy.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,23 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5-
import cupy as np
5+
import cupy as cp
66

77

88
def _gpairs_impl(x1, y1, z1, w1, x2, y2, z2, w2, rbins):
99
dm = (
10-
np.square(x2 - x1[:, None])
11-
+ np.square(y2 - y1[:, None])
12-
+ np.square(z2 - z1[:, None])
10+
cp.square(x2 - x1[:, None])
11+
+ cp.square(y2 - y1[:, None])
12+
+ cp.square(z2 - z1[:, None])
1313
)
14-
return np.array(
15-
[np.outer(w1, w2)[dm <= rbins[k]].sum() for k in range(len(rbins))]
14+
ret_arr = cp.array(
15+
[cp.outer(w1, w2)[dm <= rbins[k]].sum() for k in range(len(rbins))]
1616
)
1717

18+
cp.cuda.stream.get_current_stream().synchronize()
19+
20+
return ret_arr
21+
1822

1923
def gpairs(nopt, nbins, x1, y1, z1, w1, x2, y2, z2, w2, rbins, results):
2024
results[:] = _gpairs_impl(x1, y1, z1, w1, x2, y2, z2, w2, rbins)

dpbench/benchmarks/l2_norm/l2_norm_cupy.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,12 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5-
import cupy as np
5+
import cupy as cp
66

77

88
def l2_norm(a, d):
9-
sq = np.square(a)
9+
sq = cp.square(a)
1010
sum = sq.sum(axis=1)
11-
d[:] = np.sqrt(sum)
11+
d[:] = cp.sqrt(sum)
12+
13+
cp.cuda.stream.get_current_stream().synchronize()

dpbench/benchmarks/pairwise_distance/pairwise_distance_cupy.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,17 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5-
import cupy as np
5+
import cupy as cp
66

77

88
def pairwise_distance(X1, X2, D):
9-
x1 = np.sum(np.square(X1), axis=1)
10-
x2 = np.sum(np.square(X2), axis=1)
11-
np.dot(X1, X2.T, D)
9+
x1 = cp.sum(cp.square(X1), axis=1)
10+
x2 = cp.sum(cp.square(X2), axis=1)
11+
cp.dot(X1, X2.T, D)
1212
D *= -2
1313
x3 = x1.reshape(x1.size, 1)
14-
np.add(D, x3, D)
15-
np.add(D, x2, D)
16-
np.sqrt(D, D)
14+
cp.add(D, x3, D)
15+
cp.add(D, x2, D)
16+
cp.sqrt(D, D)
17+
18+
cp.cuda.stream.get_current_stream().synchronize()

dpbench/benchmarks/pca/pca_cupy.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,21 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5-
import cupy as np
5+
import cupy as cp
66

77

88
def pca(data, dims_rescaled_data=2):
99
# mean center the data
1010
data -= data.mean(axis=0)
1111

1212
# calculate the covariance matrix
13-
v = np.cov(data, rowvar=False, dtype=data.dtype)
13+
v = cp.cov(data, rowvar=False, dtype=data.dtype)
1414

1515
# calculate eigenvectors & eigenvalues of the covariance matrix
16-
evalues, evectors = np.linalg.eigh(v)
16+
evalues, evectors = cp.linalg.eigh(v)
1717

1818
# sort eigenvalues and eigenvectors in decreasing order
19-
idx = np.argsort(evalues)[::-1]
19+
idx = cp.argsort(evalues)[::-1]
2020
evectors = evectors[:, idx]
2121
evalues = evalues[idx]
2222

@@ -25,7 +25,10 @@ def pca(data, dims_rescaled_data=2):
2525
evectors = evectors[:, :dims_rescaled_data]
2626

2727
# carry out the transformation on the data using eigenvectors
28-
tdata = np.dot(evectors.T, data.T).T
28+
tdata = cp.dot(evectors.T, data.T).T
29+
30+
cp.cuda.stream.get_current_stream().synchronize()
2931

3032
# return the transformed data, eigenvalues, and eigenvectors
33+
3134
return tdata, evalues, evectors

dpbench/benchmarks/rambo/rambo_cupy.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,18 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5-
import cupy as np
5+
import cupy as cp
66

77

88
def rambo(nevts, nout, C1, F1, Q1, output):
99
C = 2.0 * C1 - 1.0
10-
S = np.sqrt(1 - np.square(C))
11-
F = 2.0 * np.pi * F1
12-
Q = -np.log(Q1)
10+
S = cp.sqrt(1 - cp.square(C))
11+
F = 2.0 * cp.pi * F1
12+
Q = -cp.log(Q1)
1313

1414
output[:, :, 0] = Q
15-
output[:, :, 1] = Q * S * np.sin(F)
16-
output[:, :, 2] = Q * S * np.cos(F)
15+
output[:, :, 1] = Q * S * cp.sin(F)
16+
output[:, :, 2] = Q * S * cp.cos(F)
1717
output[:, :, 3] = Q * C
18+
19+
cp.cuda.stream.get_current_stream().synchronize()

0 commit comments

Comments
 (0)