@@ -45,7 +45,7 @@ function matmul_impl(a, b, c, d,
45
45
46
46
@unroll for i = 1 : NUM_FRAGMENTS_M
47
47
@unroll for j = 1 : NUM_FRAGMENTS_N
48
- tile = translate_const (warp_tile, (M = (i- 1 )* COMPUTE_OP_SHAPE. M, N = (j- 1 )* COMPUTE_OP_SHAPE. N))
48
+ tile = translate (warp_tile, (M = (i- 1 )* COMPUTE_OP_SHAPE. M, N = (j- 1 )* COMPUTE_OP_SHAPE. N))
49
49
@inbounds c_frags[i, j] = transf_sh2rf_c (Operator. load_c (OPERATOR, SHARED_C_LAYOUT, shmem_c, tile), tile)
50
50
end
51
51
end
@@ -84,15 +84,15 @@ function matmul_impl(a, b, c, d,
84
84
a_frags = MArray {Tuple{NUM_FRAGMENTS_M}, Operator.fragtype_a(OPERATOR, SHARED_A_LAYOUT)} (undef)
85
85
86
86
@unroll for i = 1 : NUM_FRAGMENTS_M
87
- a_tile = translate_const (warp_tile. MK, (M = (i- 1 )* COMPUTE_OP_SHAPE. M, K = 0 ))
87
+ a_tile = translate (warp_tile. MK, (M = (i- 1 )* COMPUTE_OP_SHAPE. M, K = 0 ))
88
88
@inbounds a_frags[i] = transf_sh2rf_a (Operator. load_a (OPERATOR, SHARED_A_LAYOUT, shmem_a, a_tile), a_tile)
89
89
end
90
90
91
91
# (3.3.2) Load a COMPUTE_WARP.K x COMPUTE_WARP.N tile of B from shared memory into registers
92
92
b_frags = MArray {Tuple{NUM_FRAGMENTS_N}, Operator.fragtype_b(OPERATOR, SHARED_B_LAYOUT)} (undef)
93
93
94
94
@unroll for j = 1 : NUM_FRAGMENTS_N
95
- b_tile = translate_const (warp_tile. KN, (K = 0 , N = (j- 1 )* COMPUTE_OP_SHAPE. N))
95
+ b_tile = translate (warp_tile. KN, (K = 0 , N = (j- 1 )* COMPUTE_OP_SHAPE. N))
96
96
@inbounds b_frags[j] = transf_sh2rf_b (Operator. load_b (OPERATOR, SHARED_B_LAYOUT, shmem_b, b_tile), b_tile)
97
97
end
98
98
@@ -114,7 +114,7 @@ function matmul_impl(a, b, c, d,
114
114
115
115
@unroll for i = 1 : NUM_FRAGMENTS_M
116
116
@unroll for j = 1 : NUM_FRAGMENTS_N
117
- tile = translate_const (warp_tile, (M = (i- 1 )* COMPUTE_OP_SHAPE. M, N = (j- 1 )* COMPUTE_OP_SHAPE. N))
117
+ tile = translate (warp_tile, (M = (i- 1 )* COMPUTE_OP_SHAPE. M, N = (j- 1 )* COMPUTE_OP_SHAPE. N))
118
118
Operator. store_d (OPERATOR, SHARED_D_LAYOUT, shmem_d, transf_rf2sh_d (c_frags[i, j], tile), tile)
119
119
end
120
120
end
0 commit comments