Skip to content

Commit

Permalink
Added gaussian implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
roxx30198 committed Nov 3, 2023
1 parent 263776f commit 2d7bb7c
Show file tree
Hide file tree
Showing 11 changed files with 513 additions and 0 deletions.
2 changes: 2 additions & 0 deletions dpbench/benchmarks/rodinia/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

add_subdirectory(gaussian)
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

add_subdirectory(gaussian_sycl_native_ext)
19 changes: 19 additions & 0 deletions dpbench/benchmarks/rodinia/gaussian/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

"""Gaussian elimination implementation."""

"""This is sycl and numba-dpex implementation for gaussian elimination
Input
---------
size<int_64> : Forms an input matrix of dimensions (size x size)
Output
--------
result<array<float>> : Result of the given set of linear equations using
gaussian elimination.
Method:
The gaussian transformations are applied to the input matrix to form the
diagonal matrix in forward elimination, and then the equations are solved
to find the result in back substitution.
"""
45 changes: 45 additions & 0 deletions dpbench/benchmarks/rodinia/gaussian/gaussian_initialize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
"""Initialization function for matrices for gaussian elimination."""


def initialize(size, Lambda, types_dict=None):
"""Initialize the matrices based on size and type.
Args:
size: size for matrices(sizexsize).
Lambda: lambda value.
types_dict: data type of operand.
Returns: a: actual matrix.
b: base matrix (column matrix).
m: multiplier matrix.
result: result of operation.
"""
import math

import numpy as np

dtype = types_dict["float"]

coe = np.empty((2 * size - 1), dtype=dtype)
a = np.empty((size * size), dtype=dtype)

for i in range(size):
coe_i = 10 * math.exp(Lambda * i)
j = size - 1 + i
coe[j] = coe_i
j = size - 1 - i
coe[j] = coe_i

for i in range(size):
for j in range(size):
a[i * size + j] = coe[size - 1 - i + j]

return (
a,
np.ones(size, dtype=dtype),
np.zeros((size * size), dtype=dtype),
np.zeros(size, dtype=dtype),
)
135 changes: 135 additions & 0 deletions dpbench/benchmarks/rodinia/gaussian/gaussian_numba_dpex_k.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
"""Numba-dpex implementation for gaussian elimination."""

import dpctl
import numba_dpex


@numba_dpex.kernel()
def gaussian_kernel_1(m, a, size, t):
"""Find the multiplier matrix.
Args:
m: multiplier matrix.
a: input matrix.
size: sizew of matrix.
t: current iteration.
"""
if (
numba_dpex.get_local_id(2)
+ numba_dpex.get_group_id(2) * numba_dpex.get_local_size(2)
>= size - 1 - t
):
return

m[
size
* (
numba_dpex.get_local_size(2) * numba_dpex.get_group_id(2)
+ numba_dpex.get_local_id(2)
+ t
+ 1
)
+ t
] = (
a[
size
* (
numba_dpex.get_local_size(2) * numba_dpex.get_group_id(2)
+ numba_dpex.get_local_id(2)
+ t
+ 1
)
+ t
]
/ a[size * t + t]
)


@numba_dpex.kernel()
def gaussian_kernel_2(m, a, b, size, t):
"""Perform Gaussian elimination using gaussian operations for a iteration.
Args:
m: multiplier matrix.
a: input matrix.
b: column matrix.
size: size of matrices.
t: current iteration.
"""
if (
numba_dpex.get_local_id(2)
+ numba_dpex.get_group_id(2) * numba_dpex.get_local_size(2)
>= size - 1 - t
):
return

if (
numba_dpex.get_local_id(1)
+ numba_dpex.get_group_id(1) * numba_dpex.get_local_size(1)
>= size - t
):
return

xidx = numba_dpex.get_group_id(2) * numba_dpex.get_local_size(
2
) + numba_dpex.get_local_id(2)
yidx = numba_dpex.get_group_id(1) * numba_dpex.get_local_size(
1
) + numba_dpex.get_local_id(1)

a[size * (xidx + 1 + t) + (yidx + t)] -= (
m[size * (xidx + 1 + t) + t] * a[size * t + (yidx + t)]
)
if yidx == 0:
b[xidx + 1 + t] -= m[size * (xidx + 1 + t) + (yidx + t)] * b[t]


def gaussian(a, b, m, size, block_sizeXY, result):
"""Perform Gaussian elimination using gaussian operations.
Args:
a: input matrix.
b: column matrix.
m: multiplier matrix.
size: size of matrices.
block_sizeXY: grid size.
result: result matrix.
"""
device = dpctl.SyclDevice()
block_size = device.max_work_group_size
grid_size = int((size / block_size) + 0 if not (size % block_size) else 1)

blocksize2d = block_sizeXY
gridsize2d = int(
(size / blocksize2d) + (0 if not (size % blocksize2d) else 1)
)

global_range = numba_dpex.Range(1, 1, grid_size * block_size)
local_range = numba_dpex.Range(1, 1, block_size)

dim_blockXY = numba_dpex.Range(1, blocksize2d, blocksize2d)
dim_gridXY = numba_dpex.Range(
1, gridsize2d * blocksize2d, gridsize2d * blocksize2d
)

for t in range(size - 1):
gaussian_kernel_1[numba_dpex.NdRange(global_range, local_range)](
m, a, size, t
)

gaussian_kernel_2[numba_dpex.NdRange(dim_gridXY, dim_blockXY)](
m, a, b, size, t
)

for i in range(size):
result[size - i - 1] = b[size - i - 1]
for j in range(i):
result[size - i - 1] -= (
a[size * (size - i - 1) + (size - j - 1)] * result[size - j - 1]
)
result[size - i - 1] = (
result[size - i - 1] / a[size * (size - i - 1) + (size - i - 1)]
)
35 changes: 35 additions & 0 deletions dpbench/benchmarks/rodinia/gaussian/gaussian_python.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
"""Gaussian elimination python serial implementation."""


def gaussian(a, b, m, size, block_sizeXY, result):
"""Python serial implementation for gaussian elimination.
Args:
a: actual matrix.
b: base matrix (column matrix).
m: multiplier matrix.
size: size for matrices(sizexsize).
block_sizeXY: block size for parallel 2d-kernel.
result: result of operation.
"""
# Forward Elimination
for t in range(size - 1):
for i in range(t + 1, size):
m = a[i * size + t] / a[t * size + t]
for j in range(t, size):
a[i * size + j] = a[i * size + j] - m * a[t * size + j]
b[i] = b[i] - m * b[t]

# Back Substitution
for i in range(size):
result[size - i - 1] = b[size - i - 1]
for j in range(i):
result[size - i - 1] -= (
a[size * (size - i - 1) + (size - j - 1)] * result[size - j - 1]
)
result[size - i - 1] = (
result[size - i - 1] / a[size * (size - i - 1) + (size - i - 1)]
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

set(module_name gaussian_sycl)
set(py_module_name _${module_name})
python_add_library(${py_module_name} MODULE ${module_name}/${py_module_name}.cpp)
add_sycl_to_target(TARGET ${py_module_name} SOURCES ${module_name}/${py_module_name}.cpp)
target_include_directories(${py_module_name} PRIVATE ${Dpctl_INCLUDE_DIRS})

file(RELATIVE_PATH py_module_dest ${CMAKE_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
install(TARGETS ${py_module_name}
DESTINATION ${py_module_dest}/${module_name}
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
"""Sycl implementation for gaussian elimination."""

from .gaussian_sycl._gaussian_sycl import gaussian as gaussian_sycl

__all__ = ["gaussian_sycl"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
#include <CL/sycl.hpp>

using namespace sycl;

template <typename FpTy>
void gaussian_kernel_1(FpTy *m_device,
const FpTy *a_device,
int size,
int t,
sycl::nd_item<3> item_ct1)
{
if (item_ct1.get_local_id(2) +
item_ct1.get_group(2) * item_ct1.get_local_range().get(2) >=
size - 1 - t)
return;
m_device[size * (item_ct1.get_local_range().get(2) * item_ct1.get_group(2) +
item_ct1.get_local_id(2) + t + 1) +
t] = a_device[size * (item_ct1.get_local_range().get(2) *
item_ct1.get_group(2) +
item_ct1.get_local_id(2) + t + 1) +
t] /
a_device[size * t + t];
}

template <typename FpTy>
void gaussian_kernel_2(FpTy *m_device,
FpTy *a_device,
FpTy *b_device,
int size,
int j1,
int t,
sycl::nd_item<3> item_ct1)
{
if (item_ct1.get_local_id(2) +
item_ct1.get_group(2) * item_ct1.get_local_range().get(2) >=
size - 1 - t)
return;
if (item_ct1.get_local_id(1) +
item_ct1.get_group(1) * item_ct1.get_local_range().get(1) >=
size - t)
return;

int xidx = item_ct1.get_group(2) * item_ct1.get_local_range().get(2) +
item_ct1.get_local_id(2);
int yidx = item_ct1.get_group(1) * item_ct1.get_local_range().get(1) +
item_ct1.get_local_id(1);

a_device[size * (xidx + 1 + t) + (yidx + t)] -=
m_device[size * (xidx + 1 + t) + t] * a_device[size * t + (yidx + t)];
if (yidx == 0) {
b_device[xidx + 1 + t] -=
m_device[size * (xidx + 1 + t) + (yidx + t)] * b_device[t];
}
}
Loading

0 comments on commit 2d7bb7c

Please sign in to comment.