Skip to content

Commit 2d7bb7c

Browse files
author
roxx30198
committed
Added gaussian implementation
1 parent 263776f commit 2d7bb7c

File tree

11 files changed

+513
-0
lines changed

11 files changed

+513
-0
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
11
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
22
#
33
# SPDX-License-Identifier: Apache-2.0
4+
5+
add_subdirectory(gaussian)
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
11
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
22
#
33
# SPDX-License-Identifier: Apache-2.0
4+
5+
add_subdirectory(gaussian_sycl_native_ext)
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
"""Gaussian elimination implementation."""
6+
7+
"""This is sycl and numba-dpex implementation for gaussian elimination
8+
Input
9+
---------
10+
size<int_64> : Forms an input matrix of dimensions (size x size)
11+
Output
12+
--------
13+
result<array<float>> : Result of the given set of linear equations using
14+
gaussian elimination.
15+
Method:
16+
The gaussian transformations are applied to the input matrix to form the
17+
diagonal matrix in forward elimination, and then the equations are solved
18+
to find the result in back substitution.
19+
"""
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
"""Initialization function for matrices for gaussian elimination."""
5+
6+
7+
def initialize(size, Lambda, types_dict=None):
8+
"""Initialize the matrices based on size and type.
9+
10+
Args:
11+
size: size for matrices(sizexsize).
12+
Lambda: lambda value.
13+
types_dict: data type of operand.
14+
15+
Returns: a: actual matrix.
16+
b: base matrix (column matrix).
17+
m: multiplier matrix.
18+
result: result of operation.
19+
"""
20+
import math
21+
22+
import numpy as np
23+
24+
dtype = types_dict["float"]
25+
26+
coe = np.empty((2 * size - 1), dtype=dtype)
27+
a = np.empty((size * size), dtype=dtype)
28+
29+
for i in range(size):
30+
coe_i = 10 * math.exp(Lambda * i)
31+
j = size - 1 + i
32+
coe[j] = coe_i
33+
j = size - 1 - i
34+
coe[j] = coe_i
35+
36+
for i in range(size):
37+
for j in range(size):
38+
a[i * size + j] = coe[size - 1 - i + j]
39+
40+
return (
41+
a,
42+
np.ones(size, dtype=dtype),
43+
np.zeros((size * size), dtype=dtype),
44+
np.zeros(size, dtype=dtype),
45+
)
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
"""Numba-dpex implementation for gaussian elimination."""
5+
6+
import dpctl
7+
import numba_dpex
8+
9+
10+
@numba_dpex.kernel()
11+
def gaussian_kernel_1(m, a, size, t):
12+
"""Find the multiplier matrix.
13+
14+
Args:
15+
m: multiplier matrix.
16+
a: input matrix.
17+
size: sizew of matrix.
18+
t: current iteration.
19+
"""
20+
if (
21+
numba_dpex.get_local_id(2)
22+
+ numba_dpex.get_group_id(2) * numba_dpex.get_local_size(2)
23+
>= size - 1 - t
24+
):
25+
return
26+
27+
m[
28+
size
29+
* (
30+
numba_dpex.get_local_size(2) * numba_dpex.get_group_id(2)
31+
+ numba_dpex.get_local_id(2)
32+
+ t
33+
+ 1
34+
)
35+
+ t
36+
] = (
37+
a[
38+
size
39+
* (
40+
numba_dpex.get_local_size(2) * numba_dpex.get_group_id(2)
41+
+ numba_dpex.get_local_id(2)
42+
+ t
43+
+ 1
44+
)
45+
+ t
46+
]
47+
/ a[size * t + t]
48+
)
49+
50+
51+
@numba_dpex.kernel()
52+
def gaussian_kernel_2(m, a, b, size, t):
53+
"""Perform Gaussian elimination using gaussian operations for a iteration.
54+
55+
Args:
56+
m: multiplier matrix.
57+
a: input matrix.
58+
b: column matrix.
59+
size: size of matrices.
60+
t: current iteration.
61+
"""
62+
if (
63+
numba_dpex.get_local_id(2)
64+
+ numba_dpex.get_group_id(2) * numba_dpex.get_local_size(2)
65+
>= size - 1 - t
66+
):
67+
return
68+
69+
if (
70+
numba_dpex.get_local_id(1)
71+
+ numba_dpex.get_group_id(1) * numba_dpex.get_local_size(1)
72+
>= size - t
73+
):
74+
return
75+
76+
xidx = numba_dpex.get_group_id(2) * numba_dpex.get_local_size(
77+
2
78+
) + numba_dpex.get_local_id(2)
79+
yidx = numba_dpex.get_group_id(1) * numba_dpex.get_local_size(
80+
1
81+
) + numba_dpex.get_local_id(1)
82+
83+
a[size * (xidx + 1 + t) + (yidx + t)] -= (
84+
m[size * (xidx + 1 + t) + t] * a[size * t + (yidx + t)]
85+
)
86+
if yidx == 0:
87+
b[xidx + 1 + t] -= m[size * (xidx + 1 + t) + (yidx + t)] * b[t]
88+
89+
90+
def gaussian(a, b, m, size, block_sizeXY, result):
91+
"""Perform Gaussian elimination using gaussian operations.
92+
93+
Args:
94+
a: input matrix.
95+
b: column matrix.
96+
m: multiplier matrix.
97+
size: size of matrices.
98+
block_sizeXY: grid size.
99+
result: result matrix.
100+
"""
101+
device = dpctl.SyclDevice()
102+
block_size = device.max_work_group_size
103+
grid_size = int((size / block_size) + 0 if not (size % block_size) else 1)
104+
105+
blocksize2d = block_sizeXY
106+
gridsize2d = int(
107+
(size / blocksize2d) + (0 if not (size % blocksize2d) else 1)
108+
)
109+
110+
global_range = numba_dpex.Range(1, 1, grid_size * block_size)
111+
local_range = numba_dpex.Range(1, 1, block_size)
112+
113+
dim_blockXY = numba_dpex.Range(1, blocksize2d, blocksize2d)
114+
dim_gridXY = numba_dpex.Range(
115+
1, gridsize2d * blocksize2d, gridsize2d * blocksize2d
116+
)
117+
118+
for t in range(size - 1):
119+
gaussian_kernel_1[numba_dpex.NdRange(global_range, local_range)](
120+
m, a, size, t
121+
)
122+
123+
gaussian_kernel_2[numba_dpex.NdRange(dim_gridXY, dim_blockXY)](
124+
m, a, b, size, t
125+
)
126+
127+
for i in range(size):
128+
result[size - i - 1] = b[size - i - 1]
129+
for j in range(i):
130+
result[size - i - 1] -= (
131+
a[size * (size - i - 1) + (size - j - 1)] * result[size - j - 1]
132+
)
133+
result[size - i - 1] = (
134+
result[size - i - 1] / a[size * (size - i - 1) + (size - i - 1)]
135+
)
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
"""Gaussian elimination python serial implementation."""
5+
6+
7+
def gaussian(a, b, m, size, block_sizeXY, result):
8+
"""Python serial implementation for gaussian elimination.
9+
10+
Args:
11+
a: actual matrix.
12+
b: base matrix (column matrix).
13+
m: multiplier matrix.
14+
size: size for matrices(sizexsize).
15+
block_sizeXY: block size for parallel 2d-kernel.
16+
result: result of operation.
17+
"""
18+
# Forward Elimination
19+
for t in range(size - 1):
20+
for i in range(t + 1, size):
21+
m = a[i * size + t] / a[t * size + t]
22+
for j in range(t, size):
23+
a[i * size + j] = a[i * size + j] - m * a[t * size + j]
24+
b[i] = b[i] - m * b[t]
25+
26+
# Back Substitution
27+
for i in range(size):
28+
result[size - i - 1] = b[size - i - 1]
29+
for j in range(i):
30+
result[size - i - 1] -= (
31+
a[size * (size - i - 1) + (size - j - 1)] * result[size - j - 1]
32+
)
33+
result[size - i - 1] = (
34+
result[size - i - 1] / a[size * (size - i - 1) + (size - i - 1)]
35+
)
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
set(module_name gaussian_sycl)
6+
set(py_module_name _${module_name})
7+
python_add_library(${py_module_name} MODULE ${module_name}/${py_module_name}.cpp)
8+
add_sycl_to_target(TARGET ${py_module_name} SOURCES ${module_name}/${py_module_name}.cpp)
9+
target_include_directories(${py_module_name} PRIVATE ${Dpctl_INCLUDE_DIRS})
10+
11+
file(RELATIVE_PATH py_module_dest ${CMAKE_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
12+
install(TARGETS ${py_module_name}
13+
DESTINATION ${py_module_dest}/${module_name}
14+
)
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
"""Sycl implementation for gaussian elimination."""
5+
6+
from .gaussian_sycl._gaussian_sycl import gaussian as gaussian_sycl
7+
8+
__all__ = ["gaussian_sycl"]
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
// SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
2+
//
3+
// SPDX-License-Identifier: Apache-2.0
4+
#include <CL/sycl.hpp>
5+
6+
using namespace sycl;
7+
8+
template <typename FpTy>
9+
void gaussian_kernel_1(FpTy *m_device,
10+
const FpTy *a_device,
11+
int size,
12+
int t,
13+
sycl::nd_item<3> item_ct1)
14+
{
15+
if (item_ct1.get_local_id(2) +
16+
item_ct1.get_group(2) * item_ct1.get_local_range().get(2) >=
17+
size - 1 - t)
18+
return;
19+
m_device[size * (item_ct1.get_local_range().get(2) * item_ct1.get_group(2) +
20+
item_ct1.get_local_id(2) + t + 1) +
21+
t] = a_device[size * (item_ct1.get_local_range().get(2) *
22+
item_ct1.get_group(2) +
23+
item_ct1.get_local_id(2) + t + 1) +
24+
t] /
25+
a_device[size * t + t];
26+
}
27+
28+
template <typename FpTy>
29+
void gaussian_kernel_2(FpTy *m_device,
30+
FpTy *a_device,
31+
FpTy *b_device,
32+
int size,
33+
int j1,
34+
int t,
35+
sycl::nd_item<3> item_ct1)
36+
{
37+
if (item_ct1.get_local_id(2) +
38+
item_ct1.get_group(2) * item_ct1.get_local_range().get(2) >=
39+
size - 1 - t)
40+
return;
41+
if (item_ct1.get_local_id(1) +
42+
item_ct1.get_group(1) * item_ct1.get_local_range().get(1) >=
43+
size - t)
44+
return;
45+
46+
int xidx = item_ct1.get_group(2) * item_ct1.get_local_range().get(2) +
47+
item_ct1.get_local_id(2);
48+
int yidx = item_ct1.get_group(1) * item_ct1.get_local_range().get(1) +
49+
item_ct1.get_local_id(1);
50+
51+
a_device[size * (xidx + 1 + t) + (yidx + t)] -=
52+
m_device[size * (xidx + 1 + t) + t] * a_device[size * t + (yidx + t)];
53+
if (yidx == 0) {
54+
b_device[xidx + 1 + t] -=
55+
m_device[size * (xidx + 1 + t) + (yidx + t)] * b_device[t];
56+
}
57+
}

0 commit comments

Comments
 (0)