Skip to content

Commit

Permalink
Merge pull request #307 from roxx30198/gaussian
Browse files Browse the repository at this point in the history
rodinia/gaussian sycl and ndpx implementation
  • Loading branch information
ZzEeKkAa authored Nov 4, 2023
2 parents 3e7a9b2 + 8da8d99 commit 07f59c9
Show file tree
Hide file tree
Showing 22 changed files with 571 additions and 12 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/build_and_run.yml
Original file line number Diff line number Diff line change
Expand Up @@ -176,5 +176,8 @@ jobs:
- name: Run benchmarks
run: dpbench -i ${{env.WORKLOADS}} run -r2 --no-print-results || exit 1

- name: Run rodinia benchmarks
run: dpbench -i ${{env.WORKLOADS}} --last-run run -r2 --no-print-results --rodinia --no-dpbench|| exit 1

- name: Generate report
run: dpbench -i ${{env.WORKLOADS}} report || exit 1
4 changes: 4 additions & 0 deletions .github/workflows/conda-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,10 @@ jobs:
run: |
dpbench -i numpy -b azimint_hist run --npbench
- name: Run rodinia benchmark
run: |
dpbench run --rodinia --no-dpbench --no-validate -r 1
upload_anaconda:
name: Upload dppy/label/dev ['${{ matrix.os }}', python='${{ matrix.python }}']

Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ repos:
hooks:
- id: pydocstyle
# TODO: add packages one by one to enforce pydocstyle eventually
files: (^dpbench/config/|^scripts/|^dpbench/console/|^dpbench/infrastructure/benchmark_runner.py|^dpbench/infrastructure/benchmark_validation.py)
files: (^dpbench/config/|^scripts/|^dpbench/console/|^dpbench/infrastructure/benchmark_runner.py|^dpbench/infrastructure/benchmark_validation.py|^dpbench/benchmarks/rodinia)
args: ["--convention=google"]
# D417 does not work properly:
# https://github.com/PyCQA/pydocstyle/issues/459
Expand Down
1 change: 1 addition & 0 deletions dpbench/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ add_subdirectory(kmeans)
add_subdirectory(knn)
add_subdirectory(gpairs)
add_subdirectory(dbscan)
add_subdirectory(rodinia)

# generate dpcpp version into config
set(FILE ${CMAKE_SOURCE_DIR}/dpbench/configs/framework_info/dpcpp.toml)
Expand Down
5 changes: 5 additions & 0 deletions dpbench/benchmarks/rodinia/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

add_subdirectory(gaussian)
5 changes: 5 additions & 0 deletions dpbench/benchmarks/rodinia/gaussian/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

add_subdirectory(gaussian_sycl_native_ext)
19 changes: 19 additions & 0 deletions dpbench/benchmarks/rodinia/gaussian/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

"""Gaussian elimination implementation."""

"""This is sycl and numba-dpex implementation for gaussian elimination
Input
---------
size<int_64> : Forms an input matrix of dimensions (size x size)
Output
--------
result<array<float>> : Result of the given set of linear equations using
gaussian elimination.
Method:
The gaussian transformations are applied to the input matrix to form the
diagonal matrix in forward elimination, and then the equations are solved
to find the result in back substitution.
"""
45 changes: 45 additions & 0 deletions dpbench/benchmarks/rodinia/gaussian/gaussian_initialize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
"""Initialization function for matrices for gaussian elimination."""


def initialize(size, Lambda, types_dict=None):
"""Initialize the matrices based on size and type.
Args:
size: size for matrices(sizexsize).
Lambda: lambda value.
types_dict: data type of operand.
Returns: a: actual matrix.
b: base matrix (column matrix).
m: multiplier matrix.
result: result of operation.
"""
import math

import numpy as np

dtype = types_dict["float"]

coe = np.empty((2 * size - 1), dtype=dtype)
a = np.empty((size * size), dtype=dtype)

for i in range(size):
coe_i = 10 * math.exp(Lambda * i)
j = size - 1 + i
coe[j] = coe_i
j = size - 1 - i
coe[j] = coe_i

for i in range(size):
for j in range(size):
a[i * size + j] = coe[size - 1 - i + j]

return (
a,
np.ones(size, dtype=dtype),
np.zeros((size * size), dtype=dtype),
np.zeros(size, dtype=dtype),
)
135 changes: 135 additions & 0 deletions dpbench/benchmarks/rodinia/gaussian/gaussian_numba_dpex_k.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
"""Numba-dpex implementation for gaussian elimination."""

import dpctl
import numba_dpex


@numba_dpex.kernel()
def gaussian_kernel_1(m, a, size, t):
"""Find the multiplier matrix.
Args:
m: multiplier matrix.
a: input matrix.
size: sizew of matrix.
t: current iteration.
"""
if (
numba_dpex.get_local_id(2)
+ numba_dpex.get_group_id(2) * numba_dpex.get_local_size(2)
>= size - 1 - t
):
return

m[
size
* (
numba_dpex.get_local_size(2) * numba_dpex.get_group_id(2)
+ numba_dpex.get_local_id(2)
+ t
+ 1
)
+ t
] = (
a[
size
* (
numba_dpex.get_local_size(2) * numba_dpex.get_group_id(2)
+ numba_dpex.get_local_id(2)
+ t
+ 1
)
+ t
]
/ a[size * t + t]
)


@numba_dpex.kernel()
def gaussian_kernel_2(m, a, b, size, t):
"""Perform Gaussian elimination using gaussian operations for a iteration.
Args:
m: multiplier matrix.
a: input matrix.
b: column matrix.
size: size of matrices.
t: current iteration.
"""
if (
numba_dpex.get_local_id(2)
+ numba_dpex.get_group_id(2) * numba_dpex.get_local_size(2)
>= size - 1 - t
):
return

if (
numba_dpex.get_local_id(1)
+ numba_dpex.get_group_id(1) * numba_dpex.get_local_size(1)
>= size - t
):
return

xidx = numba_dpex.get_group_id(2) * numba_dpex.get_local_size(
2
) + numba_dpex.get_local_id(2)
yidx = numba_dpex.get_group_id(1) * numba_dpex.get_local_size(
1
) + numba_dpex.get_local_id(1)

a[size * (xidx + 1 + t) + (yidx + t)] -= (
m[size * (xidx + 1 + t) + t] * a[size * t + (yidx + t)]
)
if yidx == 0:
b[xidx + 1 + t] -= m[size * (xidx + 1 + t) + (yidx + t)] * b[t]


def gaussian(a, b, m, size, block_sizeXY, result):
"""Perform Gaussian elimination using gaussian operations.
Args:
a: input matrix.
b: column matrix.
m: multiplier matrix.
size: size of matrices.
block_sizeXY: grid size.
result: result matrix.
"""
device = dpctl.SyclDevice()
block_size = device.max_work_group_size
grid_size = int((size / block_size) + 0 if not (size % block_size) else 1)

blocksize2d = block_sizeXY
gridsize2d = int(
(size / blocksize2d) + (0 if not (size % blocksize2d) else 1)
)

global_range = numba_dpex.Range(1, 1, grid_size * block_size)
local_range = numba_dpex.Range(1, 1, block_size)

dim_blockXY = numba_dpex.Range(1, blocksize2d, blocksize2d)
dim_gridXY = numba_dpex.Range(
1, gridsize2d * blocksize2d, gridsize2d * blocksize2d
)

for t in range(size - 1):
gaussian_kernel_1[numba_dpex.NdRange(global_range, local_range)](
m, a, size, t
)

gaussian_kernel_2[numba_dpex.NdRange(dim_gridXY, dim_blockXY)](
m, a, b, size, t
)

for i in range(size):
result[size - i - 1] = b[size - i - 1]
for j in range(i):
result[size - i - 1] -= (
a[size * (size - i - 1) + (size - j - 1)] * result[size - j - 1]
)
result[size - i - 1] = (
result[size - i - 1] / a[size * (size - i - 1) + (size - i - 1)]
)
35 changes: 35 additions & 0 deletions dpbench/benchmarks/rodinia/gaussian/gaussian_python.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
"""Gaussian elimination python serial implementation."""


def gaussian(a, b, m, size, block_sizeXY, result):
"""Python serial implementation for gaussian elimination.
Args:
a: actual matrix.
b: base matrix (column matrix).
m: multiplier matrix.
size: size for matrices(sizexsize).
block_sizeXY: block size for parallel 2d-kernel.
result: result of operation.
"""
# Forward Elimination
for t in range(size - 1):
for i in range(t + 1, size):
m = a[i * size + t] / a[t * size + t]
for j in range(t, size):
a[i * size + j] = a[i * size + j] - m * a[t * size + j]
b[i] = b[i] - m * b[t]

# Back Substitution
for i in range(size):
result[size - i - 1] = b[size - i - 1]
for j in range(i):
result[size - i - 1] -= (
a[size * (size - i - 1) + (size - j - 1)] * result[size - j - 1]
)
result[size - i - 1] = (
result[size - i - 1] / a[size * (size - i - 1) + (size - i - 1)]
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

set(module_name gaussian_sycl)
set(py_module_name _${module_name})
python_add_library(${py_module_name} MODULE ${module_name}/${py_module_name}.cpp)
add_sycl_to_target(TARGET ${py_module_name} SOURCES ${module_name}/${py_module_name}.cpp)
target_include_directories(${py_module_name} PRIVATE ${Dpctl_INCLUDE_DIRS})

file(RELATIVE_PATH py_module_dest ${CMAKE_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
install(TARGETS ${py_module_name}
DESTINATION ${py_module_dest}/${module_name}
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
"""Sycl implementation for gaussian elimination."""

from .gaussian_sycl._gaussian_sycl import gaussian as gaussian_sycl

__all__ = ["gaussian_sycl"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
#include <CL/sycl.hpp>

using namespace sycl;

template <typename FpTy>
void gaussian_kernel_1(FpTy *m_device,
const FpTy *a_device,
int size,
int t,
sycl::nd_item<3> item_ct1)
{
if (item_ct1.get_local_id(2) +
item_ct1.get_group(2) * item_ct1.get_local_range().get(2) >=
size - 1 - t)
return;
m_device[size * (item_ct1.get_local_range().get(2) * item_ct1.get_group(2) +
item_ct1.get_local_id(2) + t + 1) +
t] = a_device[size * (item_ct1.get_local_range().get(2) *
item_ct1.get_group(2) +
item_ct1.get_local_id(2) + t + 1) +
t] /
a_device[size * t + t];
}

template <typename FpTy>
void gaussian_kernel_2(FpTy *m_device,
FpTy *a_device,
FpTy *b_device,
int size,
int j1,
int t,
sycl::nd_item<3> item_ct1)
{
if (item_ct1.get_local_id(2) +
item_ct1.get_group(2) * item_ct1.get_local_range().get(2) >=
size - 1 - t)
return;
if (item_ct1.get_local_id(1) +
item_ct1.get_group(1) * item_ct1.get_local_range().get(1) >=
size - t)
return;

int xidx = item_ct1.get_group(2) * item_ct1.get_local_range().get(2) +
item_ct1.get_local_id(2);
int yidx = item_ct1.get_group(1) * item_ct1.get_local_range().get(1) +
item_ct1.get_local_id(1);

a_device[size * (xidx + 1 + t) + (yidx + t)] -=
m_device[size * (xidx + 1 + t) + t] * a_device[size * t + (yidx + t)];
if (yidx == 0) {
b_device[xidx + 1 + t] -=
m_device[size * (xidx + 1 + t) + (yidx + t)] * b_device[t];
}
}
Loading

0 comments on commit 07f59c9

Please sign in to comment.