Skip to content

Commit a1f7f26

Browse files
author
roxx30198
committed
Running pathfinder with rodinia
1 parent eec58be commit a1f7f26

File tree

17 files changed

+501
-2
lines changed

17 files changed

+501
-2
lines changed

dpbench/benchmarks/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ add_subdirectory(kmeans)
1010
add_subdirectory(knn)
1111
add_subdirectory(gpairs)
1212
add_subdirectory(dbscan)
13+
add_subdirectory(rodinia)
1314

1415
# generate dpcpp version into config
1516
set(FILE ${CMAKE_SOURCE_DIR}/dpbench/configs/framework_info/dpcpp.toml)
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
add_subdirectory(pathfinder)
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
add_subdirectory(pathfinder_sycl_native_ext)
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
"""
6+
7+
Pathfinder Computation
8+
9+
This algorithm finds the shortest path from the first row to the last.
10+
11+
Input
12+
---------
13+
rows<int_64> : Indicates the number of rows
14+
15+
cols<int_64> : Indicates the number of cols
16+
17+
pyramid height<int_64> : Indicates pyramid height
18+
19+
Output
20+
21+
--------
22+
23+
result<array<int_64>> : Indicates the minimum distance from first row to last
24+
25+
Method:
26+
27+
The elements are fed to the kernel row-wise and the minimum distance is computed based
28+
on the minimum weight of the neighbors above.
29+
This is done for all rows until last and result is returned.
30+
31+
32+
"""
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
LOW = 0
6+
HIGH = 10.0
7+
SEED = 9
8+
9+
10+
def initialize(rows, cols, pyramid_height, types_dict=None):
11+
import numpy as np
12+
import numpy.random as rnd
13+
14+
rnd.seed(SEED)
15+
16+
data, result = rnd.randint(
17+
LOW, HIGH, (rows * cols), dtype=np.int64
18+
), np.empty(cols, dtype=np.int64)
19+
20+
return (data, rows, cols, pyramid_height, result)
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
import dpnp
6+
import numba_dpex
7+
8+
9+
def MIN(a, b):
10+
return (a) if (a) <= (b) else (b)
11+
12+
13+
@numba_dpex.func
14+
def min_dpex(a, b):
15+
t = a if a <= b else b
16+
return t
17+
18+
19+
@numba_dpex.kernel(debug=True)
20+
def _pathfinder_kernel(device_src, device_dest, cols):
21+
current_element = numba_dpex.get_global_id(0)
22+
23+
left_ind = current_element - 1 if current_element >= 1 else 0
24+
right_ind = current_element + 1 if current_element < cols - 1 else cols - 1
25+
up_ind = current_element
26+
27+
left = device_src[left_ind]
28+
up = device_src[up_ind]
29+
right = device_src[right_ind]
30+
shortest = min_dpex(left, up)
31+
shortest = min_dpex(shortest, right)
32+
33+
device_dest[current_element] += shortest
34+
35+
36+
def pathfinder(data, rows, cols, pyramid_height, result):
37+
# create a temp list that hold first row of data as first element and empty numpy array as second element
38+
device_dest = dpnp.array(data[:cols], dtype=dpnp.int64) # first row
39+
device_src = dpnp.array([0] * cols, dtype=dpnp.int64)
40+
41+
t = 1
42+
43+
while True:
44+
if t >= rows:
45+
break
46+
device_src = device_dest
47+
device_dest = dpnp.array(
48+
data[t * cols : (t + 1) * cols], dtype=dpnp.int64
49+
)
50+
_pathfinder_kernel[numba_dpex.Range(cols)](
51+
device_src, device_dest, cols
52+
)
53+
t += 1
54+
55+
for i in range(cols):
56+
result[i] = device_dest[i]
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
import dpnp
6+
7+
8+
def min_dpex(a, b):
9+
return a if a <= b else b
10+
11+
12+
def _pathfinder_kernel(device_src, device_dest, cols, current_element):
13+
left_ind = current_element - 1 if current_element >= 1 else 0
14+
right_ind = current_element + 1 if current_element < cols - 1 else cols - 1
15+
up_ind = current_element
16+
17+
left = device_src[left_ind]
18+
up = device_src[up_ind]
19+
right = device_src[right_ind]
20+
shortest = min_dpex(left, up)
21+
shortest = min_dpex(shortest, right)
22+
23+
device_dest[current_element] += shortest
24+
25+
26+
def pathfinder(data, rows, cols, pyramid_height, result):
27+
# create a temp list that hold first row of data as first element and empty numpy array as second element
28+
device_dest = dpnp.array(data[:cols], dtype=dpnp.int64) # first row
29+
device_src = dpnp.array([0] * cols, dtype=dpnp.int64)
30+
31+
t = 1
32+
while True:
33+
if t >= rows:
34+
break
35+
device_src = device_dest
36+
device_dest = dpnp.array(
37+
data[t * cols : (t + 1) * cols], dtype=dpnp.int64
38+
)
39+
40+
for i in range(cols):
41+
_pathfinder_kernel(device_src, device_dest, cols, i)
42+
t += 1
43+
44+
for i in range(cols):
45+
result[i] = device_dest[i]
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
set(module_name pathfinder_sycl)
6+
set(py_module_name _${module_name})
7+
python_add_library(${py_module_name} MODULE ${module_name}/${py_module_name}.cpp)
8+
add_sycl_to_target(TARGET ${py_module_name} SOURCES ${module_name}/${py_module_name}.cpp)
9+
target_include_directories(${py_module_name} PRIVATE ${Dpctl_INCLUDE_DIRS})
10+
11+
file(RELATIVE_PATH py_module_dest ${CMAKE_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
12+
install(TARGETS ${py_module_name}
13+
DESTINATION ${py_module_dest}/${module_name}
14+
)
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
from .pathfinder_sycl._pathfinder_sycl import pathfinder as pathfinder_sycl
6+
7+
__all__ = ["pathfinder_sycl"]
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
// SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
2+
//
3+
// SPDX-License-Identifier: Apache-2.0
4+
5+
#include <CL/sycl.hpp>
6+
7+
using namespace sycl;
8+
9+
#define BLOCK_SIZE 128
10+
#define STR_SIZE 128
11+
#define DEVICE 0
12+
#define HALO 1
13+
14+
#define IN_RANGE(x, min, max) ((x) >= (min) && (x) <= (max))
15+
#define CLAMP_RANGE(x, min, max) x = (x < (min)) ? min : ((x > (max)) ? max : x)
16+
#define MIN(a, b) ((a) <= (b) ? (a) : (b))
17+
18+
void pathfinder_impl(int iteration,
19+
int *gpuWall,
20+
int *gpuSrc,
21+
int *gpuResults,
22+
int cols,
23+
int rows,
24+
int startStep,
25+
int border,
26+
sycl::nd_item<3> item_ct1,
27+
int *prev,
28+
int *result)
29+
{
30+
31+
int bx = item_ct1.get_group(2);
32+
int tx = item_ct1.get_local_id(2);
33+
34+
// each block finally computes result for a small block
35+
// after N iterations.
36+
// it is the non-overlapping small blocks that cover
37+
// all the input data
38+
39+
// calculate the small block size
40+
int small_block_cols = BLOCK_SIZE - iteration * HALO * 2;
41+
42+
// calculate the boundary for the block according to
43+
// the boundary of its small block
44+
int blkX = small_block_cols * bx - border;
45+
int blkXmax = blkX + BLOCK_SIZE - 1;
46+
47+
// calculate the global thread coordination
48+
int xidx = blkX + tx;
49+
50+
// effective range within this block that falls within
51+
// the valid range of the input data
52+
// used to rule out computation outside the boundary.
53+
int validXmin = (blkX < 0) ? -blkX : 0;
54+
int validXmax = (blkXmax > cols - 1) ? BLOCK_SIZE - 1 - (blkXmax - cols + 1)
55+
: BLOCK_SIZE - 1;
56+
57+
int W = tx - 1;
58+
int E = tx + 1;
59+
60+
W = (W < validXmin) ? validXmin : W;
61+
E = (E > validXmax) ? validXmax : E;
62+
63+
bool isValid = IN_RANGE(tx, validXmin, validXmax);
64+
65+
if (IN_RANGE(xidx, 0, cols - 1)) {
66+
prev[tx] = gpuSrc[xidx];
67+
}
68+
item_ct1.barrier(); // [Ronny] Added sync to avoid race on prev Aug. 14 2012
69+
bool computed;
70+
for (int i = 0; i < iteration; i++) {
71+
computed = false;
72+
if (IN_RANGE(tx, i + 1, BLOCK_SIZE - i - 2) && isValid) {
73+
computed = true;
74+
int left = prev[W];
75+
int up = prev[tx];
76+
int right = prev[E];
77+
int shortest = MIN(left, up);
78+
shortest = MIN(shortest, right);
79+
int index = cols * (startStep + i) + xidx;
80+
result[tx] = shortest + gpuWall[index];
81+
}
82+
item_ct1.barrier();
83+
if (i == iteration - 1)
84+
break;
85+
if (computed) // Assign the computation range
86+
prev[tx] = result[tx];
87+
item_ct1
88+
.barrier(); // [Ronny] Added sync to avoid race on prev Aug. 14 2012
89+
}
90+
91+
// update the global memory
92+
// after the last iteration, only threads coordinated within the
93+
// small block perform the calculation and switch on ``computed''
94+
if (computed) {
95+
gpuResults[xidx] = result[tx];
96+
}
97+
}

0 commit comments

Comments
 (0)