Skip to content

Commit

Permalink
Gaussian elimination impl
Browse files Browse the repository at this point in the history
  • Loading branch information
roxx30198 committed Oct 30, 2023
1 parent 77459c0 commit 1820e49
Show file tree
Hide file tree
Showing 11 changed files with 469 additions and 0 deletions.
2 changes: 2 additions & 0 deletions dpbench/benchmarks/rodinia/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

add_subdirectory(gaussian)
5 changes: 5 additions & 0 deletions dpbench/benchmarks/rodinia/gaussian/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

add_subdirectory(gaussian_sycl_native_ext)
28 changes: 28 additions & 0 deletions dpbench/benchmarks/rodinia/gaussian/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

"""
Gaussian elimination implementation
This is sycl and numba-dpex implementation for gaussian elimination
Input
---------
size<int_64> : Forms an input matrix of dimensions (size x size)
Output
--------
result<array<float>> : Result of the given set of linear equations using
gaussian elimination.
Method:
The gaussian transformations are applied to the input matrix to form the
diagonal matrix in forward elimination, and then the equations are solved
to find the result in back substitution.
"""
34 changes: 34 additions & 0 deletions dpbench/benchmarks/rodinia/gaussian/gaussian_initialize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

LAMBDA = -0.01


def initialize(size, types_dict):
import math

import numpy as np

dtype = types_dict["float"]

coe = np.empty((2 * size - 1), dtype=dtype)
a = np.empty((size * size), dtype=dtype)

for i in range(size):
coe_i = 10 * math.exp(LAMBDA * i)
j = size - 1 + i
coe[j] = coe_i
j = size - 1 - i
coe[j] = coe_i

for i in range(size):
for j in range(size):
a[i * size + j] = coe[size - 1 - i + j]

return (
a,
np.ones(size, dtype=dtype),
np.zeros((size * size), dtype=dtype),
np.zeros(size, dtype=dtype),
)
107 changes: 107 additions & 0 deletions dpbench/benchmarks/rodinia/gaussian/gaussian_numba_dpex_k.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

import dpctl
import numba_dpex


@numba_dpex.kernel()
def gaussian_kernel_1(m, a, size, t):
if (
numba_dpex.get_local_id(2)
+ numba_dpex.get_group_id(2) * numba_dpex.get_local_size(2)
>= size - 1 - t
):
return

m[
size
* (
numba_dpex.get_local_size(2) * numba_dpex.get_group_id(2)
+ numba_dpex.get_local_id(2)
+ t
+ 1
)
+ t
] = (
a[
size
* (
numba_dpex.get_local_size(2) * numba_dpex.get_group_id(2)
+ numba_dpex.get_local_id(2)
+ t
+ 1
)
+ t
]
/ a[size * t + t]
)


@numba_dpex.kernel()
def gaussian_kernel_2(m, a, b, size, t):
if (
numba_dpex.get_local_id(2)
+ numba_dpex.get_group_id(2) * numba_dpex.get_local_size(2)
>= size - 1 - t
):
return

if (
numba_dpex.get_local_id(1)
+ numba_dpex.get_group_id(1) * numba_dpex.get_local_size(1)
>= size - t
):
return

xidx = numba_dpex.get_group_id(2) * numba_dpex.get_local_size(
2
) + numba_dpex.get_local_id(2)
yidx = numba_dpex.get_group_id(1) * numba_dpex.get_local_size(
1
) + numba_dpex.get_local_id(1)

a[size * (xidx + 1 + t) + (yidx + t)] -= (
m[size * (xidx + 1 + t) + t] * a[size * t + (yidx + t)]
)
if yidx == 0:
b[xidx + 1 + t] -= m[size * (xidx + 1 + t) + (yidx + t)] * b[t]


def gaussian(a, b, m, size, block_sizeXY, result):
device = dpctl.SyclDevice()
block_size = device.max_work_group_size
grid_size = int((size / block_size) + 0 if not (size % block_size) else 1)

blocksize2d = block_sizeXY
gridsize2d = int(
(size / blocksize2d) + (0 if not (size % blocksize2d) else 1)
)

global_range = numba_dpex.Range(1, 1, grid_size * block_size)
local_range = numba_dpex.Range(1, 1, block_size)

dim_blockXY = numba_dpex.Range(1, blocksize2d, blocksize2d)
dim_gridXY = numba_dpex.Range(
1, gridsize2d * blocksize2d, gridsize2d * blocksize2d
)

for t in range(size - 1):
gaussian_kernel_1[numba_dpex.NdRange(global_range, local_range)](
m, a, size, t
)

gaussian_kernel_2[numba_dpex.NdRange(dim_gridXY, dim_blockXY)](
m, a, b, size, t
)

for i in range(size):
result[size - i - 1] = b[size - i - 1]
for j in range(i):
result[size - i - 1] -= (
a[size * (size - i - 1) + (size - j - 1)] * result[size - j - 1]
)
result[size - i - 1] = (
result[size - i - 1] / a[size * (size - i - 1) + (size - i - 1)]
)
24 changes: 24 additions & 0 deletions dpbench/benchmarks/rodinia/gaussian/gaussian_python.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0


def gaussian(a, b, m, size, block_sizeXY, result):
# Forward Elimination
for t in range(size - 1):
for i in range(t + 1, size):
m = a[i * size + t] / a[t * size + t]
for j in range(t, size):
a[i * size + j] = a[i * size + j] - m * a[t * size + j]
b[i] = b[i] - m * b[t]

# Back Substitution
for i in range(size):
result[size - i - 1] = b[size - i - 1]
for j in range(i):
result[size - i - 1] -= (
a[size * (size - i - 1) + (size - j - 1)] * result[size - j - 1]
)
result[size - i - 1] = (
result[size - i - 1] / a[size * (size - i - 1) + (size - i - 1)]
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

set(module_name gaussian_sycl)
set(py_module_name _${module_name})
python_add_library(${py_module_name} MODULE ${module_name}/${py_module_name}.cpp)
add_sycl_to_target(TARGET ${py_module_name} SOURCES ${module_name}/${py_module_name}.cpp)
target_include_directories(${py_module_name} PRIVATE ${Dpctl_INCLUDE_DIRS})

file(RELATIVE_PATH py_module_dest ${CMAKE_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
install(TARGETS ${py_module_name}
DESTINATION ${py_module_dest}/${module_name}
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

from .gaussian_sycl._gaussian_sycl import gaussian as gaussian_sycl

__all__ = ["gaussian_sycl"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
#include <CL/sycl.hpp>

using namespace sycl;

template <typename FpTy>
void gaussian_kernel_1(FpTy *m_device,
const FpTy *a_device,
int size,
int t,
sycl::nd_item<3> item_ct1)
{
if (item_ct1.get_local_id(2) +
item_ct1.get_group(2) * item_ct1.get_local_range().get(2) >=
size - 1 - t)
return;
m_device[size * (item_ct1.get_local_range().get(2) * item_ct1.get_group(2) +
item_ct1.get_local_id(2) + t + 1) +
t] = a_device[size * (item_ct1.get_local_range().get(2) *
item_ct1.get_group(2) +
item_ct1.get_local_id(2) + t + 1) +
t] /
a_device[size * t + t];
}

template <typename FpTy>
void gaussian_kernel_2(FpTy *m_device,
FpTy *a_device,
FpTy *b_device,
int size,
int j1,
int t,
sycl::nd_item<3> item_ct1)
{
if (item_ct1.get_local_id(2) +
item_ct1.get_group(2) * item_ct1.get_local_range().get(2) >=
size - 1 - t)
return;
if (item_ct1.get_local_id(1) +
item_ct1.get_group(1) * item_ct1.get_local_range().get(1) >=
size - t)
return;

int xidx = item_ct1.get_group(2) * item_ct1.get_local_range().get(2) +
item_ct1.get_local_id(2);
int yidx = item_ct1.get_group(1) * item_ct1.get_local_range().get(1) +
item_ct1.get_local_id(1);

a_device[size * (xidx + 1 + t) + (yidx + t)] -=
m_device[size * (xidx + 1 + t) + t] * a_device[size * t + (yidx + t)];
if (yidx == 0) {
b_device[xidx + 1 + t] -=
m_device[size * (xidx + 1 + t) + (yidx + t)] * b_device[t];
}
}
Loading

0 comments on commit 1820e49

Please sign in to comment.