@@ -92,7 +92,7 @@ void main() {
92
92
93
93
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
94
94
95
- VEC4_T q_8w_linear(const u16vec3 out_pos, const uint16_t K) {
95
+ VEC4_T q_8w_linear(const u16vec2 out_pos, const uint16_t K) {
96
96
const uint16_t qmat2_pos_y = out_pos.x * uint16_t(4 );
97
97
98
98
VEC4_T outtex = VEC4_T(0 );
@@ -101,7 +101,7 @@ VEC4_T q_8w_linear(const u16vec3 out_pos, const uint16_t K) {
101
101
const VEC4_T scales = load_texel(t_scales, scales_pos);
102
102
103
103
for (uint16_t i = uint16_t(0 ), x = uint16_t(0 ); i < K; i += uint16_t(4 ), x++ ) {
104
- const VEC4_T mat1_tex = load_texel(t_mat1, u16vec3(x, out_pos.yz ));
104
+ const VEC4_T mat1_tex = load_texel(t_mat1, u16vec3(x, out_pos.y, 0 ));
105
105
const VEC4_T sums = VEC4_T(
106
106
dot (mat1_tex, load_texel(t_qmat2, u16vec3(x, qmat2_pos_y, 0 ))),
107
107
dot (mat1_tex, load_texel(t_qmat2, u16vec3(x, qmat2_pos_y + uint16_t(1 ), 0 ))),
@@ -117,16 +117,15 @@ VEC4_T q_8w_linear(const u16vec3 out_pos, const uint16_t K) {
117
117
}
118
118
119
119
void main() {
120
- const u16vec3 out_pos = u16vec3 (
120
+ const u16vec2 out_pos = u16vec2 (
121
121
gl_GlobalInvocationID.x / out_limits.y,
122
- gl_GlobalInvocationID.x % out_limits.y,
123
- 0 );
122
+ gl_GlobalInvocationID.x % out_limits.y);
124
123
if (out_pos.x >= out_limits.x) {
125
124
return ;
126
125
}
127
126
128
127
VEC4_T outtex = q_8w_linear(out_pos, uint16_t(mat1_sizes.x));
129
- write_texel(t_out, out_pos, outtex);
128
+ write_texel(t_out, u16vec3( out_pos, 0 ) , outtex);
130
129
}
131
130
132
131
#endif
0 commit comments