Skip to content
This repository was archived by the owner on Aug 30, 2024. It is now read-only.

Commit 44ddaeb

Browse files
sunjiweiswiftDDEle
authored andcommitted
update load/store
1 parent c39cdfc commit 44ddaeb

File tree

1 file changed

+14
-14
lines changed

1 file changed

+14
-14
lines changed

include/subgroup/tile/impl/load_xe.hpp

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -274,16 +274,16 @@ tile_load(tile_t& tile, payload_t& payload) {
274274

275275
reg_blk.xetla_select<load_elems, 1>(remained_start)
276276
.xetla_format<native_type_t<load_dtype>>() = xetla_load_global<
277-
load_dtype,
277+
native_type_t<load_dtype>,
278278
block_size_x / scale_factor,
279-
block_size_y,
280-
num_block,
279+
remained_blk_size_y,
280+
arr_len,
281281
trans,
282282
mem_transform,
283283
L1,
284284
L2>(
285-
(load_dtype*)::gpu::xetla::detail::xetla_get_tensor_base_address(
286-
tdesc),
285+
(native_type_t<load_dtype>*)::gpu::xetla::detail::
286+
xetla_get_tensor_base_address(tdesc),
287287
::gpu::xetla::detail::xetla_get_tensor_width_x(tdesc),
288288
::gpu::xetla::detail::xetla_get_tensor_width_y(tdesc),
289289
::gpu::xetla::detail::xetla_get_tensor_pitch_x(tdesc),
@@ -335,16 +335,16 @@ tile_load(tile_t& tile, payload_t& payload) {
335335
remained_ld_blk_size_y * block_size_x * arr_len;
336336

337337
reg_tmp.xetla_format<native_type_t<load_dtype>>() = xetla_load_global<
338-
load_dtype,
338+
native_type_t<load_dtype>,
339339
block_size_x / scale_factor,
340340
block_size_y,
341-
num_block,
341+
arr_len,
342342
trans,
343343
mem_transform,
344344
L1,
345345
L2>(
346-
(load_dtype*)::gpu::xetla::detail::xetla_get_tensor_base_address(
347-
tdesc),
346+
(native_type_t<load_dtype>*)::gpu::xetla::detail::
347+
xetla_get_tensor_base_address(tdesc),
348348
::gpu::xetla::detail::xetla_get_tensor_width_x(tdesc),
349349
::gpu::xetla::detail::xetla_get_tensor_width_y(tdesc),
350350
::gpu::xetla::detail::xetla_get_tensor_pitch_x(tdesc),
@@ -402,16 +402,16 @@ tile_load(tile_t& tile, payload_t& payload) {
402402
tdesc.xetla_format<uint32_t>(), block_widthx_widthy_arrlen);
403403
reg_blk.xetla_select<final_load_elems, 1>(final_start)
404404
.xetla_format<native_type_t<load_dtype>>() = xetla_load_global<
405-
load_dtype,
405+
native_type_t<load_dtype>,
406406
block_size_x / scale_factor,
407-
block_size_y,
408-
num_block,
407+
final_ld_blk_size_y,
408+
arr_len,
409409
trans,
410410
mem_transform,
411411
L1,
412412
L2>(
413-
(load_dtype*)::gpu::xetla::detail::xetla_get_tensor_base_address(
414-
tdesc),
413+
(native_type_t<load_dtype>*)::gpu::xetla::detail::
414+
xetla_get_tensor_base_address(tdesc),
415415
::gpu::xetla::detail::xetla_get_tensor_width_x(tdesc),
416416
::gpu::xetla::detail::xetla_get_tensor_width_y(tdesc),
417417
::gpu::xetla::detail::xetla_get_tensor_pitch_x(tdesc),

0 commit comments

Comments
 (0)