@@ -274,16 +274,16 @@ tile_load(tile_t& tile, payload_t& payload) {
274
274
275
275
reg_blk.xetla_select <load_elems, 1 >(remained_start)
276
276
.xetla_format <native_type_t <load_dtype>>() = xetla_load_global<
277
- load_dtype,
277
+ native_type_t < load_dtype> ,
278
278
block_size_x / scale_factor,
279
- block_size_y ,
280
- num_block ,
279
+ remained_blk_size_y ,
280
+ arr_len ,
281
281
trans,
282
282
mem_transform,
283
283
L1,
284
284
L2>(
285
- (load_dtype*)::gpu::xetla::detail::xetla_get_tensor_base_address (
286
- tdesc),
285
+ (native_type_t < load_dtype> *)::gpu::xetla::detail::
286
+ xetla_get_tensor_base_address ( tdesc),
287
287
::gpu::xetla::detail::xetla_get_tensor_width_x (tdesc),
288
288
::gpu::xetla::detail::xetla_get_tensor_width_y(tdesc),
289
289
::gpu::xetla::detail::xetla_get_tensor_pitch_x(tdesc),
@@ -335,16 +335,16 @@ tile_load(tile_t& tile, payload_t& payload) {
335
335
remained_ld_blk_size_y * block_size_x * arr_len;
336
336
337
337
reg_tmp.xetla_format <native_type_t <load_dtype>>() = xetla_load_global<
338
- load_dtype,
338
+ native_type_t < load_dtype> ,
339
339
block_size_x / scale_factor,
340
340
block_size_y,
341
- num_block ,
341
+ arr_len ,
342
342
trans,
343
343
mem_transform,
344
344
L1,
345
345
L2>(
346
- (load_dtype*)::gpu::xetla::detail::xetla_get_tensor_base_address (
347
- tdesc),
346
+ (native_type_t < load_dtype> *)::gpu::xetla::detail::
347
+ xetla_get_tensor_base_address ( tdesc),
348
348
::gpu::xetla::detail::xetla_get_tensor_width_x (tdesc),
349
349
::gpu::xetla::detail::xetla_get_tensor_width_y(tdesc),
350
350
::gpu::xetla::detail::xetla_get_tensor_pitch_x(tdesc),
@@ -402,16 +402,16 @@ tile_load(tile_t& tile, payload_t& payload) {
402
402
tdesc.xetla_format <uint32_t >(), block_widthx_widthy_arrlen);
403
403
reg_blk.xetla_select <final_load_elems, 1 >(final_start)
404
404
.xetla_format <native_type_t <load_dtype>>() = xetla_load_global<
405
- load_dtype,
405
+ native_type_t < load_dtype> ,
406
406
block_size_x / scale_factor,
407
- block_size_y ,
408
- num_block ,
407
+ final_ld_blk_size_y ,
408
+ arr_len ,
409
409
trans,
410
410
mem_transform,
411
411
L1,
412
412
L2>(
413
- (load_dtype*)::gpu::xetla::detail::xetla_get_tensor_base_address (
414
- tdesc),
413
+ (native_type_t < load_dtype> *)::gpu::xetla::detail::
414
+ xetla_get_tensor_base_address ( tdesc),
415
415
::gpu::xetla::detail::xetla_get_tensor_width_x (tdesc),
416
416
::gpu::xetla::detail::xetla_get_tensor_width_y(tdesc),
417
417
::gpu::xetla::detail::xetla_get_tensor_pitch_x(tdesc),
0 commit comments