Skip to content

Commit 53de4a8

Browse files
author
roxx30198
committed
Added pathfinder sycl/ndpx impl
1 parent 6764112 commit 53de4a8

File tree

10 files changed

+603
-0
lines changed

10 files changed

+603
-0
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
add_subdirectory(pathfinder_sycl_native_ext)
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
"""Pathfinder Implementation.
6+
7+
This pathfinding algorithm finds the shortest path from the first row to the last.
8+
9+
Input
10+
---------
11+
rows<int_64> : Indicates the number of rows.
12+
13+
cols<int_64> : Indicates the number of cols.
14+
15+
pyramid height<int_64> : Indicates pyramid height.
16+
17+
block_size<int_64> : Indicates block size for parallel computation.
18+
19+
Output
20+
21+
--------
22+
23+
result<array<int_64>> : Indicates the minimum distance from first row to last.
24+
25+
Method:
26+
27+
The elements are fed to the kernel row-wise and the minimum distance is computed based
28+
on the minimum weight of the neighbors above.
29+
This is done for all rows until last and result is returned.
30+
31+
32+
"""
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
"""Pathfinder initialization."""
5+
LOW = 0
6+
HIGH = 10.0
7+
SEED = 9
8+
9+
10+
def initialize(rows, cols, types_dict=None):
11+
"""Initialize the input and output matrices for pathfinder.
12+
13+
Args:
14+
rows: number of rows.
15+
cols: number of cols.
16+
types_dict: data type of operand.
17+
18+
Returns:
19+
data: input matrix.
20+
result: result matrix.
21+
"""
22+
import numpy as np
23+
import numpy.random as rnd
24+
25+
rnd.seed(SEED)
26+
27+
return (
28+
rnd.randint(LOW, HIGH, (rows * cols), dtype=np.int64),
29+
np.empty(cols, dtype=np.int64),
30+
)
Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
"""Numba-dpex implementation for Pathfinder."""
5+
6+
import dpnp
7+
import numba_dpex
8+
9+
10+
@numba_dpex.func
11+
def IN_RANGE(x, min, max):
12+
"""Find if x is in range.
13+
14+
Args:
15+
x: Element to be checked.
16+
min: Range min.
17+
max: Range max.
18+
19+
Returns:
20+
true/false: If in range or not.
21+
"""
22+
return (x) >= (min) and (x) <= (max)
23+
24+
25+
@numba_dpex.func
26+
def min_dpex(a, b):
27+
"""Find the min.
28+
29+
Args:
30+
a: first element.
31+
b: second element.
32+
33+
Returns:
34+
t: min of two.
35+
"""
36+
t = a if a <= b else b
37+
return t
38+
39+
40+
@numba_dpex.kernel
41+
def _pathfinder_kernel(
42+
iteration,
43+
gpuWall,
44+
gpuSrc,
45+
gpuResults,
46+
cols,
47+
rows,
48+
startStep,
49+
border,
50+
block_size,
51+
):
52+
"""Kernel to compute the smallest element per iteration.
53+
54+
Args:
55+
iteration: current iteration.
56+
gpuWall: Matrix elements excluding that row.
57+
gpuSrc: input row.
58+
gpuResults: Intermediate results.
59+
cols: number of columns.
60+
rows: number of rows.
61+
startStep: starting point.
62+
border: max/min border.
63+
block_size: block size for computation.
64+
"""
65+
prev = numba_dpex.local.array((256), dtype=dpnp.int64)
66+
result = numba_dpex.local.array((256), dtype=dpnp.int64)
67+
68+
bx = numba_dpex.get_group_id(2)
69+
tx = numba_dpex.get_local_id(2)
70+
71+
small_block_cols = block_size - iteration * 2
72+
73+
blkX = small_block_cols * bx - border
74+
blkXmax = blkX + block_size - 1
75+
76+
xidx = blkX + tx
77+
78+
validXmin = -blkX if blkX < 0 else 0
79+
validXmax = (
80+
block_size - 1 - (blkXmax - cols + 1)
81+
if blkXmax > cols - 1
82+
else block_size - 1
83+
)
84+
85+
W = tx - 1
86+
E = tx + 1
87+
88+
W = validXmin if W < validXmin else W
89+
E = validXmax if E > validXmax else E
90+
91+
isValid = IN_RANGE(tx, validXmin, validXmax)
92+
93+
if IN_RANGE(xidx, 0, cols - 1):
94+
prev[tx] = gpuSrc[xidx]
95+
96+
numba_dpex.barrier(numba_dpex.LOCAL_MEM_FENCE)
97+
98+
for i in range(iteration):
99+
computed = False
100+
if IN_RANGE(tx, i + 1, block_size - i - 2) and isValid:
101+
computed = True
102+
left = prev[W]
103+
up = prev[tx]
104+
right = prev[E]
105+
shortest = min_dpex(left, up)
106+
shortest = min_dpex(shortest, right)
107+
index = cols * (startStep + i) + xidx
108+
result[tx] = shortest + gpuWall[index]
109+
110+
numba_dpex.barrier(numba_dpex.LOCAL_MEM_FENCE)
111+
if i == iteration - 1:
112+
break
113+
if computed:
114+
prev[tx] = result[tx]
115+
numba_dpex.barrier(numba_dpex.LOCAL_MEM_FENCE)
116+
117+
if computed:
118+
gpuResults[xidx] = result[tx]
119+
120+
121+
def pathfinder(data, rows, cols, pyramid_height, block_size, result):
122+
"""Compute smallest distance from top row to bottom.
123+
124+
Args:
125+
data: data matrix.
126+
rows: number of rows.
127+
cols: number of cols.
128+
pyramid_height: pyramid height.
129+
block_size: block size for parallel 2d-kernel.
130+
result: result of operation.
131+
"""
132+
# create a temp list that hold first row of data as first element and empty numpy array as second element
133+
borderCols = pyramid_height
134+
smallBlockCol = block_size - (pyramid_height) * 2
135+
blockCols = int(
136+
cols / smallBlockCol + (0 if cols % smallBlockCol == 0 else 1)
137+
)
138+
size = rows * cols
139+
140+
dimBlock = numba_dpex.Range(1, 1, block_size)
141+
dimGrid = numba_dpex.Range(1, 1, blockCols * block_size)
142+
143+
gpuResult = dpnp.zeros((2, cols), dtype=dpnp.int64)
144+
gpuWall = dpnp.array((size - cols), dtype=dpnp.int64)
145+
146+
gpuResult[0] = data[:cols]
147+
gpuWall = data[cols:]
148+
149+
dimBlock = numba_dpex.Range(1, 1, block_size)
150+
dimGrid = numba_dpex.Range(1, 1, blockCols * block_size)
151+
152+
src = 1
153+
dst = 0
154+
for t in range(0, rows - 1, pyramid_height):
155+
temp = src
156+
src = dst
157+
dst = temp
158+
159+
iteration = min(pyramid_height, rows - t - 1)
160+
161+
_pathfinder_kernel[numba_dpex.NdRange(dimGrid, dimBlock)](
162+
iteration,
163+
gpuWall,
164+
gpuResult[src],
165+
gpuResult[dst],
166+
cols,
167+
rows,
168+
t,
169+
borderCols,
170+
block_size,
171+
)
172+
173+
k = 0
174+
for i in gpuResult[dst]:
175+
result[k] = i
176+
k += 1
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
"""Pathfinder python serial implementation."""
5+
6+
7+
def _pathfinder_kernel(device_src, device_dest, cols, current_element):
8+
"""Compute shortest distance per column element.
9+
10+
Args:
11+
device_src: src row.
12+
device_dest: destination row.
13+
cols: number of cols.
14+
current_element: Current column element.
15+
"""
16+
left_ind = current_element - 1 if current_element >= 1 else 0
17+
right_ind = current_element + 1 if current_element < cols - 1 else cols - 1
18+
up_ind = current_element
19+
20+
left = device_src[left_ind]
21+
up = device_src[up_ind]
22+
right = device_src[right_ind]
23+
shortest = min(left, up, right)
24+
25+
device_dest[current_element] += shortest
26+
27+
28+
def pathfinder(data, rows, cols, pyramid_height, block_size, result):
29+
"""Compute smallest distance from top row to bottom.
30+
31+
Args:
32+
data: data matrix.
33+
rows: number of rows.
34+
cols: number of cols.
35+
pyramid_height: pyramid height.
36+
block_size: block size for parallel 2d-kernel.
37+
result: result of operation.
38+
"""
39+
# create a temp list that hold first row of data as first element and empty numpy array as second element
40+
device_dest = data[:cols] # first row
41+
device_src = [0] * cols
42+
43+
t = 1
44+
while t < rows:
45+
device_src = device_dest
46+
device_dest = data[t * cols : (t + 1) * cols]
47+
48+
for i in range(cols):
49+
_pathfinder_kernel(device_src, device_dest, cols, i)
50+
t += 1
51+
52+
for i in range(cols):
53+
result[i] = device_dest[i]
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
set(module_name pathfinder_sycl)
6+
set(py_module_name _${module_name})
7+
python_add_library(${py_module_name} MODULE ${module_name}/${py_module_name}.cpp)
8+
add_sycl_to_target(TARGET ${py_module_name} SOURCES ${module_name}/${py_module_name}.cpp)
9+
target_include_directories(${py_module_name} PRIVATE ${Dpctl_INCLUDE_DIRS})
10+
11+
file(RELATIVE_PATH py_module_dest ${CMAKE_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
12+
install(TARGETS ${py_module_name}
13+
DESTINATION ${py_module_dest}/${module_name}
14+
)
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
"""Sycl implementation for Pathfinder."""
5+
6+
from .pathfinder_sycl._pathfinder_sycl import pathfinder as pathfinder_sycl
7+
8+
__all__ = ["pathfinder_sycl"]

0 commit comments

Comments
 (0)