Skip to content

Commit

Permalink
Running pathfinder with rodinia
Browse files Browse the repository at this point in the history
  • Loading branch information
roxx30198 committed Oct 11, 2023
1 parent eec58be commit 08452c3
Show file tree
Hide file tree
Showing 17 changed files with 509 additions and 9 deletions.
17 changes: 9 additions & 8 deletions dpbench/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@
#
# SPDX-License-Identifier: Apache-2.0

add_subdirectory(black_scholes)
add_subdirectory(pairwise_distance)
add_subdirectory(l2_norm)
add_subdirectory(rambo)
add_subdirectory(kmeans)
add_subdirectory(knn)
add_subdirectory(gpairs)
add_subdirectory(dbscan)
#add_subdirectory(black_scholes)
#add_subdirectory(pairwise_distance)
#add_subdirectory(l2_norm)
#add_subdirectory(rambo)
#add_subdirectory(kmeans)
#add_subdirectory(knn)
#add_subdirectory(gpairs)
#add_subdirectory(dbscan)
add_subdirectory(rodinia)

# generate dpcpp version into config
set(FILE ${CMAKE_SOURCE_DIR}/dpbench/configs/framework_info/dpcpp.toml)
Expand Down
5 changes: 5 additions & 0 deletions dpbench/benchmarks/rodinia/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

add_subdirectory(pathfinder)
5 changes: 5 additions & 0 deletions dpbench/benchmarks/rodinia/pathfinder/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

add_subdirectory(pathfinder_sycl_native_ext)
32 changes: 32 additions & 0 deletions dpbench/benchmarks/rodinia/pathfinder/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

"""
Pathfinder Computation
This algorithm finds the shortest path from the first row to the last.
Input
---------
rows<int_64> : Indicates the number of rows
cols<int_64> : Indicates the number of cols
pyramid height<int_64> : Indicates pyramid height
Output
--------
result<array<int_64>> : Indicates the minimum distance from first row to last
Method:
The elements are fed to the kernel row-wise and the minimum distance is computed based
on the minimum weight of the neighbors above.
This is done for all rows until last and result is returned.
"""
20 changes: 20 additions & 0 deletions dpbench/benchmarks/rodinia/pathfinder/pathfinder_initialize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

LOW = 0
HIGH = 10.0
SEED = 9


def initialize(rows, cols, pyramid_height, types_dict=None):
import numpy as np
import numpy.random as rnd

rnd.seed(SEED)

data, result = rnd.randint(
LOW, HIGH, (rows * cols), dtype=np.int64
), np.empty(cols, dtype=np.int64)

return (data, rows, cols, pyramid_height, result)
56 changes: 56 additions & 0 deletions dpbench/benchmarks/rodinia/pathfinder/pathfinder_numba_dpex_k.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

import dpnp
import numba_dpex


def MIN(a, b):
return (a) if (a) <= (b) else (b)


@numba_dpex.func
def min_dpex(a, b):
t = a if a <= b else b
return t


@numba_dpex.kernel(debug=True)
def _pathfinder_kernel(device_src, device_dest, cols):
current_element = numba_dpex.get_global_id(0)

left_ind = current_element - 1 if current_element >= 1 else 0
right_ind = current_element + 1 if current_element < cols - 1 else cols - 1
up_ind = current_element

left = device_src[left_ind]
up = device_src[up_ind]
right = device_src[right_ind]
shortest = min_dpex(left, up)
shortest = min_dpex(shortest, right)

device_dest[current_element] += shortest


def pathfinder(data, rows, cols, pyramid_height, result):
# create a temp list that hold first row of data as first element and empty numpy array as second element
device_dest = dpnp.array(data[:cols], dtype=dpnp.int64) # first row
device_src = dpnp.array([0] * cols, dtype=dpnp.int64)

t = 1

while True:
if t >= rows:
break
device_src = device_dest
device_dest = dpnp.array(
data[t * cols : (t + 1) * cols], dtype=dpnp.int64
)
_pathfinder_kernel[numba_dpex.Range(cols)](
device_src, device_dest, cols
)
t += 1

for i in range(cols):
result[i] = device_dest[i]
45 changes: 45 additions & 0 deletions dpbench/benchmarks/rodinia/pathfinder/pathfinder_python.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

import dpnp


def min_dpex(a, b):
return a if a <= b else b


def _pathfinder_kernel(device_src, device_dest, cols, current_element):
left_ind = current_element - 1 if current_element >= 1 else 0
right_ind = current_element + 1 if current_element < cols - 1 else cols - 1
up_ind = current_element

left = device_src[left_ind]
up = device_src[up_ind]
right = device_src[right_ind]
shortest = min_dpex(left, up)
shortest = min_dpex(shortest, right)

device_dest[current_element] += shortest


def pathfinder(data, rows, cols, pyramid_height, result):
# create a temp list that hold first row of data as first element and empty numpy array as second element
device_dest = dpnp.array(data[:cols], dtype=dpnp.int64) # first row
device_src = dpnp.array([0] * cols, dtype=dpnp.int64)

t = 1
while True:
if t >= rows:
break
device_src = device_dest
device_dest = dpnp.array(
data[t * cols : (t + 1) * cols], dtype=dpnp.int64
)

for i in range(cols):
_pathfinder_kernel(device_src, device_dest, cols, i)
t += 1

for i in range(cols):
result[i] = device_dest[i]
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

set(module_name pathfinder_sycl)
set(py_module_name _${module_name})
python_add_library(${py_module_name} MODULE ${module_name}/${py_module_name}.cpp)
add_sycl_to_target(TARGET ${py_module_name} SOURCES ${module_name}/${py_module_name}.cpp)
target_include_directories(${py_module_name} PRIVATE ${Dpctl_INCLUDE_DIRS})

file(RELATIVE_PATH py_module_dest ${CMAKE_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
install(TARGETS ${py_module_name}
DESTINATION ${py_module_dest}/${module_name}
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

from .pathfinder_sycl._pathfinder_sycl import pathfinder as pathfinder_sycl

__all__ = ["pathfinder_sycl"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
// SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0

#include <CL/sycl.hpp>

using namespace sycl;

#define BLOCK_SIZE 128
#define STR_SIZE 128
#define DEVICE 0
#define HALO 1

#define IN_RANGE(x, min, max) ((x) >= (min) && (x) <= (max))
#define CLAMP_RANGE(x, min, max) x = (x < (min)) ? min : ((x > (max)) ? max : x)
#define MIN(a, b) ((a) <= (b) ? (a) : (b))

void pathfinder_impl(int iteration,
int64_t *gpuWall,
int64_t *gpuSrc,
int64_t *gpuResults,
int cols,
int rows,
int startStep,
int border,
sycl::nd_item<3> item_ct1,
int64_t *prev,
int64_t *result)
{

int bx = item_ct1.get_group(2);
int tx = item_ct1.get_local_id(2);

// each block finally computes result for a small block
// after N iterations.
// it is the non-overlapping small blocks that cover
// all the input data

// calculate the small block size
int small_block_cols = BLOCK_SIZE - iteration * HALO * 2;

// calculate the boundary for the block according to
// the boundary of its small block
int blkX = small_block_cols * bx - border;
int blkXmax = blkX + BLOCK_SIZE - 1;

// calculate the global thread coordination
int xidx = blkX + tx;

// effective range within this block that falls within
// the valid range of the input data
// used to rule out computation outside the boundary.
int validXmin = (blkX < 0) ? -blkX : 0;
int validXmax = (blkXmax > cols - 1) ? BLOCK_SIZE - 1 - (blkXmax - cols + 1)
: BLOCK_SIZE - 1;

int W = tx - 1;
int E = tx + 1;

W = (W < validXmin) ? validXmin : W;
E = (E > validXmax) ? validXmax : E;

bool isValid = IN_RANGE(tx, validXmin, validXmax);

if (IN_RANGE(xidx, 0, cols - 1)) {
prev[tx] = gpuSrc[xidx];
}
item_ct1.barrier(); // [Ronny] Added sync to avoid race on prev Aug. 14 2012
bool computed;
for (int i = 0; i < iteration; i++) {
computed = false;
if (IN_RANGE(tx, i + 1, BLOCK_SIZE - i - 2) && isValid) {
computed = true;
int64_t left = prev[W];
int64_t up = prev[tx];
int64_t right = prev[E];
int64_t shortest = MIN(left, up);
shortest = MIN(shortest, right);
int index = cols * (startStep + i) + xidx;
result[tx] = shortest + gpuWall[index];
}
item_ct1.barrier();
if (i == iteration - 1)
break;
if (computed) // Assign the computation range
prev[tx] = result[tx];
item_ct1
.barrier(); // [Ronny] Added sync to avoid race on prev Aug. 14 2012
}

// update the global memory
// after the last iteration, only threads coordinated within the
// small block perform the calculation and switch on ``computed''
if (computed) {
gpuResults[xidx] = result[tx];
}
}
Loading

0 comments on commit 08452c3

Please sign in to comment.