diff --git a/src/asm/aarch64/transform/forward.rs b/src/asm/aarch64/transform/forward.rs index 627c34adbc..2f339d5065 100644 --- a/src/asm/aarch64/transform/forward.rs +++ b/src/asm/aarch64/transform/forward.rs @@ -19,46 +19,6 @@ use debug_unreachable::debug_unreachable; use core::arch::aarch64::*; -#[inline] -unsafe fn vrshrq_n_s32_switch(a: int32x4_t, n: i32) -> int32x4_t { - match n { - 0 => a, - 1 => vrshrq_n_s32(a, 1), - 2 => vrshrq_n_s32(a, 2), - 3 => vrshrq_n_s32(a, 3), - 4 => vrshrq_n_s32(a, 4), - 5 => vrshrq_n_s32(a, 5), - 6 => vrshrq_n_s32(a, 6), - 7 => vrshrq_n_s32(a, 7), - 8 => vrshrq_n_s32(a, 8), - 9 => vrshrq_n_s32(a, 9), - 10 => vrshrq_n_s32(a, 10), - 11 => vrshrq_n_s32(a, 11), - 12 => vrshrq_n_s32(a, 12), - 13 => vrshrq_n_s32(a, 13), - 14 => vrshrq_n_s32(a, 14), - 15 => vrshrq_n_s32(a, 15), - 16 => vrshrq_n_s32(a, 16), - 17 => vrshrq_n_s32(a, 17), - 18 => vrshrq_n_s32(a, 18), - 19 => vrshrq_n_s32(a, 19), - 20 => vrshrq_n_s32(a, 20), - 21 => vrshrq_n_s32(a, 21), - 22 => vrshrq_n_s32(a, 22), - 23 => vrshrq_n_s32(a, 23), - 24 => vrshrq_n_s32(a, 24), - 25 => vrshrq_n_s32(a, 25), - 26 => vrshrq_n_s32(a, 26), - 27 => vrshrq_n_s32(a, 27), - 28 => vrshrq_n_s32(a, 28), - 29 => vrshrq_n_s32(a, 29), - 30 => vrshrq_n_s32(a, 30), - 31 => vrshrq_n_s32(a, 31), - 32 => vrshrq_n_s32(a, 32), - _ => unreachable!(), - } -} - #[derive(Clone, Copy)] #[repr(transparent)] struct I32X8(int32x4x2_t); @@ -92,10 +52,10 @@ impl TxOperations for I32X8 { } #[inline] - unsafe fn tx_mul(self, mul: (i32, i32)) -> Self { + unsafe fn tx_mul(self, mul: i32) -> Self { I32X8::new( - vrshrq_n_s32_switch(vmulq_n_s32(self.vec().0, mul.0), mul.1), - vrshrq_n_s32_switch(vmulq_n_s32(self.vec().1, mul.0), mul.1), + vrshrq_n_s32(vmulq_n_s32(self.vec().0, mul), SHIFT), + vrshrq_n_s32(vmulq_n_s32(self.vec().1, mul), SHIFT), ) } @@ -268,11 +228,8 @@ unsafe fn shift_left_neon(a: I32X8, shift: u8) -> I32X8 { } #[inline] -unsafe fn shift_right_neon(a: I32X8, shift: u8) -> I32X8 { - I32X8::new( - vrshrq_n_s32_switch(a.vec().0, shift.into()), - vrshrq_n_s32_switch(a.vec().1, shift.into()), - ) +unsafe fn shift_right_neon(a: I32X8) -> I32X8 { + I32X8::new(vrshrq_n_s32(a.vec().0, SHIFT), vrshrq_n_s32(a.vec().1, SHIFT)) } #[inline] @@ -285,11 +242,20 @@ unsafe fn round_shift_array_neon(arr: &mut [I32X8], bit: i8) { return; } if bit > 0 { - let shift = bit as u8; - for s in arr.chunks_exact_mut(4) { - for chunk in s { - *chunk = shift_right_neon(*chunk, shift); + if bit == 1 { + for s in arr.chunks_exact_mut(4) { + for chunk in s { + *chunk = shift_right_neon::<1>(*chunk) + } } + } else if bit == 2 { + for s in arr.chunks_exact_mut(4) { + for chunk in s { + *chunk = shift_right_neon::<2>(*chunk) + } + } + } else { + debug_unreachable!(); } } else { let shift = (-bit) as u8; diff --git a/src/asm/x86/transform/forward.rs b/src/asm/x86/transform/forward.rs index ab98ded604..5b901edca3 100644 --- a/src/asm/x86/transform/forward.rs +++ b/src/asm/x86/transform/forward.rs @@ -55,13 +55,13 @@ impl TxOperations for I32X8 { #[target_feature(enable = "avx2")] #[inline] - unsafe fn tx_mul(self, mul: (i32, i32)) -> Self { + unsafe fn tx_mul(self, mul: i32) -> Self { I32X8::new(_mm256_srav_epi32( _mm256_add_epi32( - _mm256_mullo_epi32(self.vec(), _mm256_set1_epi32(mul.0)), - _mm256_set1_epi32(1 << mul.1 >> 1), + _mm256_mullo_epi32(self.vec(), _mm256_set1_epi32(mul)), + _mm256_set1_epi32(1 << SHIFT >> 1), ), - _mm256_set1_epi32(mul.1), + _mm256_set1_epi32(SHIFT), )) } diff --git a/src/transform/forward.rs b/src/transform/forward.rs index 663e99580e..91bc97c3ee 100644 --- a/src/transform/forward.rs +++ b/src/transform/forward.rs @@ -39,8 +39,8 @@ pub mod rust { 0 } - fn tx_mul(self, mul: (i32, i32)) -> Self { - ((self * mul.0) + (1 << mul.1 >> 1)) >> mul.1 + fn tx_mul(self, mul: i32) -> Self { + ((self * mul) + (1 << SHIFT >> 1)) >> SHIFT } fn rshift1(self) -> Self { diff --git a/src/transform/forward_shared.rs b/src/transform/forward_shared.rs index 03b8c41db2..5a7314886d 100644 --- a/src/transform/forward_shared.rs +++ b/src/transform/forward_shared.rs @@ -185,7 +185,7 @@ macro_rules! impl_1d_tx { pub trait TxOperations: Copy { $($s)* fn zero() -> Self; - $($s)* fn tx_mul(self, _: (i32, i32)) -> Self; + $($s)* fn tx_mul(self, mul: i32) -> Self; $($s)* fn rshift1(self) -> Self; $($s)* fn add(self, b: Self) -> Self; $($s)* fn sub(self, b: Self) -> Self; @@ -222,9 +222,9 @@ macro_rules! impl_1d_tx { const SUB: $($s)* fn(T, T) -> T; #[$m] - $($s)* fn kernel(p0: T, p1: T, m: ((i32, i32), (i32, i32))) -> (T, T) { + $($s)* fn kernel(p0: T, p1: T, m: (i32, i32)) -> (T, T) { let t = Self::ADD(p1, p0); - let (a, out0) = (p0.tx_mul(m.0), t.tx_mul(m.1)); + let (a, out0) = (p0.tx_mul::(m.0), t.tx_mul::(m.1)); let out1 = Self::SUB(a, out0); (out0, out1) } @@ -261,11 +261,11 @@ trait RotateKernel { const SHIFT: $($s)* fn(T) -> T; #[$m] - $($s)* fn half_kernel( - p0: (T, T), p1: T, m: ((i32, i32), (i32, i32), (i32, i32)), + $($s)* fn half_kernel( + p0: (T, T), p1: T, m: (i32, i32, i32), ) -> (T, T) { let t = Self::ADD(p1, p0.0); - let (a, b, c) = (p0.1.tx_mul(m.0), p1.tx_mul(m.1), t.tx_mul(m.2)); + let (a, b, c) = (p0.1.tx_mul::(m.0), p1.tx_mul::(m.1), t.tx_mul::(m.2)); let out0 = b.add(c); let shifted = Self::SHIFT(c); let out1 = Self::SUB(a, shifted); @@ -273,8 +273,8 @@ trait RotateKernel { } #[$m] - $($s)* fn kernel(p0: T, p1: T, m: ((i32, i32), (i32, i32), (i32, i32))) -> (T, T) { - Self::half_kernel((p0, p0), p1, m) + $($s)* fn kernel(p0: T, p1: T, m: (i32, i32, i32)) -> (T, T) { + Self::half_kernel::((p0, p0), p1, m) } } @@ -282,9 +282,9 @@ trait RotateKernelNeg { const ADD: $($s)* fn(T, T) -> T; #[$m] - $($s)* fn kernel(p0: T, p1: T, m: ((i32, i32), (i32, i32), (i32, i32))) -> (T, T) { + $($s)* fn kernel(p0: T, p1: T, m: (i32, i32, i32)) -> (T, T) { let t = Self::ADD(p0, p1); - let (a, b, c) = (p0.tx_mul(m.0), p1.tx_mul(m.1), t.tx_mul(m.2)); + let (a, b, c) = (p0.tx_mul::(m.0), p1.tx_mul::(m.1), t.tx_mul::(m.2)); let out0 = b.sub(c); let out1 = c.sub(a); (out0, out1) @@ -405,7 +405,7 @@ $($s)* fn daala_fdst_iv_2_asym(p0: (T, T), p1h: T) -> (T, T) { // 473/512 = (Sin[3*Pi/8] + Cos[3*Pi/8])/Sqrt[2] = 0.9238795325112867 // 3135/4096 = (Sin[3*Pi/8] - Cos[3*Pi/8])*Sqrt[2] = 0.7653668647301795 // 4433/8192 = Cos[3*Pi/8]*Sqrt[2] = 0.5411961001461971 - RotateAdd::half_kernel(p0, p1h, ((473, 9), (3135, 12), (4433, 13))) + RotateAdd::half_kernel::<9, 12, 13>(p0, p1h, (473, 3135, 4433)) } #[$m] @@ -450,15 +450,15 @@ $($s)* fn daala_fdst_vii_4(coeffs: &mut [T]) { let t3 = q2; let t4 = q0.add(q3); // 7021/16384 ~= 2*Sin[2*Pi/9]/3 ~= 0.428525073124360 - let t0 = t0.tx_mul((7021, 14)); + let t0 = t0.tx_mul::<14>(7021); // 37837/32768 ~= 4*Sin[3*Pi/9]/3 ~= 1.154700538379252 - let t1 = t1.tx_mul((37837, 15)); + let t1 = t1.tx_mul::<15>(37837); // 21513/32768 ~= 2*Sin[4*Pi/9]/3 ~= 0.656538502008139 - let t2 = t2.tx_mul((21513, 15)); + let t2 = t2.tx_mul::<15>(21513); // 37837/32768 ~= 4*Sin[3*Pi/9]/3 ~= 1.154700538379252 - let t3 = t3.tx_mul((37837, 15)); + let t3 = t3.tx_mul::<15>(37837); // 467/2048 ~= 2*Sin[1*Pi/9]/3 ~= 0.228013428883779 - let t4 = t4.tx_mul((467, 11)); + let t4 = t4.tx_mul::<11>(467); let t3h = t3.rshift1(); let u4 = t4.add(t3h); coeffs[0] = t0.add(u4); @@ -471,7 +471,7 @@ $($s)* fn daala_fdst_vii_4(coeffs: &mut [T]) { $($s)* fn daala_fdct_ii_2(p0: T, p1: T) -> (T, T) { // 11585/8192 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951 // 11585/8192 = 2*Cos[Pi/4] = 1.4142135623730951 - let (p1, p0) = RotatePi4SubAvg::kernel(p1, p0, ((11585, 13), (11585, 13))); + let (p1, p0) = RotatePi4SubAvg::kernel::<13, 13>(p1, p0, (11585, 11585)); (p0, p1) } @@ -480,7 +480,7 @@ $($s)* fn daala_fdst_iv_2(p0: T, p1: T) -> (T, T) { // 10703/8192 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3065629648763766 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461971 // 3135/4096 = 2*Cos[3*Pi/8] = 0.7653668647301796 - RotateAddAvg::kernel(p0, p1, ((10703, 13), (8867, 14), (3135, 12))) + RotateAddAvg::kernel::<13, 14, 12>(p0, p1, (10703, 8867, 3135)) } #[$m] @@ -506,18 +506,18 @@ $($s)* fn daala_fdst_iv_4_asym( // 9633/16384 = (Sin[7*Pi/16] + Cos[7*Pi/16])/2 = 0.5879378012096793 // 12873/8192 = (Sin[7*Pi/16] - Cos[7*Pi/16])*2 = 1.5713899167742045 // 12785/32768 = Cos[7*Pi/16]*2 = 0.3901806440322565 - let (q0, q3) = RotateAddShift::half_kernel( + let (q0, q3) = RotateAddShift::half_kernel::<14, 13, 15>( q0, q3h, - ((9633, 14), (12873, 13), (12785, 15)), + (9633, 12873, 12785), ); // 11363/16384 = (Sin[5*Pi/16] + Cos[5*Pi/16])/2 = 0.6935199226610738 // 18081/32768 = (Sin[5*Pi/16] - Cos[5*Pi/16])*2 = 0.5517987585658861 // 4551/4096 = Cos[5*Pi/16]*2 = 1.1111404660392044 - let (q2, q1) = RotateSubShift::half_kernel( + let (q2, q1) = RotateSubShift::half_kernel::<14, 15, 12>( q2, q1h, - ((11363, 14), (18081, 15), (4551, 12)), + (11363, 18081, 4551), ); // Stage 1 @@ -527,7 +527,7 @@ $($s)* fn daala_fdst_iv_4_asym( // Stage 2 // 11585/8192 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951 // 11585/8192 = 2*Cos[Pi/4] = 1.4142135623730951 - let (q2, q1) = RotatePi4AddAvg::kernel(q2, q1, ((11585, 13), (11585, 13))); + let (q2, q1) = RotatePi4AddAvg::kernel::<13, 13>(q2, q1, (11585, 11585)); store_coeffs!(output, q0, q1, q2, q3); } @@ -583,22 +583,22 @@ $($s)* fn daala_fdst_iv_8( // 14699/16384 = Sin[15*Pi/32] - Cos[15*Pi/32] = 0.8971675863426363 // 803/8192 = Cos[15*Pi/32] = 0.0980171403295606 let (r0, r7) = - RotateAdd::kernel(r0, r7, ((17911, 14), (14699, 14), (803, 13))); + RotateAdd::kernel::<14, 14, 13>(r0, r7, (17911, 14699, 803)); // 20435/16384 = Sin[13*Pi/32] + Cos[13*Pi/32] = 1.24722501298667123 // 21845/32768 = Sin[13*Pi/32] - Cos[13*Pi/32] = 0.66665565847774650 // 1189/4096 = Cos[13*Pi/32] = 0.29028467725446233 let (r6, r1) = - RotateSub::kernel(r6, r1, ((20435, 14), (21845, 15), (1189, 12))); + RotateSub::kernel::<14, 15, 12>(r6, r1, (20435, 21845, 1189)); // 22173/16384 = Sin[11*Pi/32] + Cos[11*Pi/32] = 1.3533180011743526 // 3363/8192 = Sin[11*Pi/32] - Cos[11*Pi/32] = 0.4105245275223574 // 15447/32768 = Cos[11*Pi/32] = 0.47139673682599764 let (r2, r5) = - RotateAdd::kernel(r2, r5, ((22173, 14), (3363, 13), (15447, 15))); + RotateAdd::kernel::<14, 13, 15>(r2, r5, (22173, 3363, 15447)); // 23059/16384 = Sin[9*Pi/32] + Cos[9*Pi/32] = 1.4074037375263826 // 2271/16384 = Sin[9*Pi/32] - Cos[9*Pi/32] = 0.1386171691990915 // 5197/8192 = Cos[9*Pi/32] = 0.6343932841636455 let (r4, r3) = - RotateSub::kernel(r4, r3, ((23059, 14), (2271, 14), (5197, 13))); + RotateSub::kernel::<14, 14, 13>(r4, r3, (23059, 2271, 5197)); // Stage 1 let (r0, r3h) = butterfly_add(r0, r3); @@ -617,15 +617,15 @@ $($s)* fn daala_fdst_iv_8( // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969 // 3135/4096 = 2*Cos[3*Pi/8] = 0.7653668647301796 let (r3, r4) = - RotateSubAvg::kernel(r3, r4, ((10703, 13), (8867, 14), (3135, 12))); + RotateSubAvg::kernel::<13, 14, 12>(r3, r4, (10703, 8867, 3135)); // 10703/8192 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3065629648763766 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969 // 3135/4096 = 2*Cos[3*Pi/8] = 0.7653668647301796 let (r2, r5) = - RotateNegAvg::kernel(r2, r5, ((10703, 13), (8867, 14), (3135, 12))); + RotateNegAvg::kernel::<13, 14, 12>(r2, r5, (10703, 8867, 3135)); // 11585/8192 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951 // 11585/8192 = 2*Cos[Pi/4] = 1.4142135623730951 - let (r1, r6) = RotatePi4SubAvg::kernel(r1, r6, ((11585, 13), (11585, 13))); + let (r1, r6) = RotatePi4SubAvg::kernel::<13, 13>(r1, r6, (11585, 11585)); store_coeffs!(output, r0, r1, r2, r3, r4, r5, r6, r7); } @@ -665,12 +665,12 @@ $($s)* fn daala_fdst_iv_4( // 4551/4096 = (Sin[7*Pi/16] - Cos[7*Pi/16])*Sqrt[2] = 1.111140466039204 // 9041/32768 = Cos[7*Pi/16]*Sqrt[2] = 0.275899379282943 let (q0, q3) = - RotateAddShift::kernel(q0, q3, ((13623, 14), (4551, 12), (565, 11))); + RotateAddShift::kernel::<14, 12, 11>(q0, q3, (13623, 4551, 565)); // 16069/16384 = (Sin[5*Pi/16] + Cos[5*Pi/16])/Sqrt[2] = 0.9807852804032304 // 12785/32768 = (Sin[5*Pi/16] - Cos[5*Pi/16])*Sqrt[2] = 0.3901806440322566 // 1609/2048 = Cos[5*Pi/16]*Sqrt[2] = 0.7856949583871021 let (q2, q1) = - RotateSubShift::kernel(q2, q1, ((16069, 14), (12785, 15), (1609, 11))); + RotateSubShift::kernel::<14, 15, 11>(q2, q1, (16069, 12785, 1609)); // Stage 1 let (q2, q3) = butterfly_sub_asym((q2.rshift1(), q2), q3); @@ -679,7 +679,7 @@ $($s)* fn daala_fdst_iv_4( // Stage 2 // 11585/8192 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951 // 11585/8192 = 2*Cos[Pi/4] = 1.4142135623730951 - let (q2, q1) = RotatePi4AddAvg::kernel(q2, q1, ((11585, 13), (11585, 13))); + let (q2, q1) = RotatePi4AddAvg::kernel::<13, 13>(q2, q1, (11585, 11585)); store_coeffs!(output, q0, q1, q2, q3); } @@ -712,22 +712,22 @@ $($s)* fn daala_fdst_iv_8_asym( // 5197/4096 = (Sin[15*Pi/32] - Cos[15*Pi/32])*Sqrt[2] = 1.26878656832729 // 2271/16384 = Cos[15*Pi/32]*Sqrt[2] = 0.13861716919909 let (r0, r7) = - RotateAdd::half_kernel(r0, r7h, ((12665, 14), (5197, 12), (2271, 14))); + RotateAdd::half_kernel::<14, 12, 14>(r0, r7h, (12665, 5197, 2271)); // 14449/16384 = Sin[13*Pi/32] + Cos[13*Pi/32])/Sqrt[2] = 0.881921264348355 // 30893/32768 = Sin[13*Pi/32] - Cos[13*Pi/32])*Sqrt[2] = 0.942793473651995 // 3363/8192 = Cos[13*Pi/32]*Sqrt[2] = 0.410524527522357 let (r6, r1) = - RotateSub::half_kernel(r6, r1h, ((14449, 14), (30893, 15), (3363, 13))); + RotateSub::half_kernel::<14, 15, 13>(r6, r1h, (14449, 30893, 3363)); // 15679/16384 = Sin[11*Pi/32] + Cos[11*Pi/32])/Sqrt[2] = 0.956940335732209 // 1189/2048 = Sin[11*Pi/32] - Cos[11*Pi/32])*Sqrt[2] = 0.580569354508925 // 5461/8192 = Cos[11*Pi/32]*Sqrt[2] = 0.666655658477747 let (r2, r5) = - RotateAdd::half_kernel(r2, r5h, ((15679, 14), (1189, 11), (5461, 13))); + RotateAdd::half_kernel::<14, 11, 13>(r2, r5h, (15679, 1189, 5461)); // 16305/16384 = (Sin[9*Pi/32] + Cos[9*Pi/32])/Sqrt[2] = 0.9951847266721969 // 803/4096 = (Sin[9*Pi/32] - Cos[9*Pi/32])*Sqrt[2] = 0.1960342806591213 // 14699/16384 = Cos[9*Pi/32]*Sqrt[2] = 0.8971675863426364 let (r4, r3) = - RotateSub::half_kernel(r4, r3h, ((16305, 14), (803, 12), (14699, 14))); + RotateSub::half_kernel::<14, 12, 14>(r4, r3h, (16305, 803, 14699)); // Stage 1 let (r0, r3h) = butterfly_add(r0, r3); @@ -746,15 +746,15 @@ $($s)* fn daala_fdst_iv_8_asym( // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969 // 3135/4096 = 2*Cos[3*Pi/8] = 0.7653668647301796 let (r3, r4) = - RotateSubAvg::kernel(r3, r4, ((669, 9), (8867, 14), (3135, 12))); + RotateSubAvg::kernel::<9, 14, 12>(r3, r4, (669, 8867, 3135)); // 10703/8192 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3065629648763766 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969 // 3135/4096 = 2*Cos[3*Pi/8] = 0.7653668647301796 let (r2, r5) = - RotateNegAvg::kernel(r2, r5, ((669, 9), (8867, 14), (3135, 12))); + RotateNegAvg::kernel::<9, 14, 12>(r2, r5, (669, 8867, 3135)); // 11585/8192 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951 // 11585/8192 = 2*Cos[Pi/4] = 1.4142135623730951 - let (r1, r6) = RotatePi4SubAvg::kernel(r1, r6, ((5793, 12), (11585, 13))); + let (r1, r6) = RotatePi4SubAvg::kernel::<12, 13>(r1, r6, (5793, 11585)); store_coeffs!(output, r0, r1, r2, r3, r4, r5, r6, r7); } @@ -832,42 +832,42 @@ $($s)* fn daala_fdst_iv_16( // 11003/8192 = (Sin[31*Pi/64] - Cos[31*Pi/64])*Sqrt[2] = 1.34311790969404 // 1137/16384 = Cos[31*Pi/64]*Sqrt[2] = 0.06939217050794 let (s0, sf) = - RotateAddShift::kernel(s0, sf, ((24279, 15), (11003, 13), (1137, 14))); + RotateAddShift::kernel::<15, 13, 14>(s0, sf, (24279, 11003, 1137)); // 1645/2048 = (Sin[29*Pi/64] + Cos[29*Pi/64])/Sqrt[2] = 0.8032075314806449 // 305/256 = (Sin[29*Pi/64] - Cos[29*Pi/64])*Sqrt[2] = 1.1913986089848667 // 425/2048 = Cos[29*Pi/64]*Sqrt[2] = 0.2075082269882116 let (se, s1) = - RotateSubShift::kernel(se, s1, ((1645, 11), (305, 8), (425, 11))); + RotateSubShift::kernel::<11, 8, 11>(se, s1, (1645, 305, 425)); // 14053/32768 = (Sin[27*Pi/64] + Cos[27*Pi/64])/Sqrt[2] = 0.85772861000027 // 8423/8192 = (Sin[27*Pi/64] - Cos[27*Pi/64])*Sqrt[2] = 1.02820548838644 // 2815/8192 = Cos[27*Pi/64]*Sqrt[2] = 0.34362586580705 let (s2, sd) = - RotateAddShift::kernel(s2, sd, ((14053, 14), (8423, 13), (2815, 13))); + RotateAddShift::kernel::<14, 13, 13>(s2, sd, (14053, 8423, 2815)); // 14811/16384 = (Sin[25*Pi/64] + Cos[25*Pi/64])/Sqrt[2] = 0.90398929312344 // 7005/8192 = (Sin[25*Pi/64] - Cos[25*Pi/64])*Sqrt[2] = 0.85511018686056 // 3903/8192 = Cos[25*Pi/64]*Sqrt[2] = 0.47643419969316 let (sc, s3) = - RotateSubShift::kernel(sc, s3, ((14811, 14), (7005, 13), (3903, 13))); + RotateSubShift::kernel::<14, 13, 13>(sc, s3, (14811, 7005, 3903)); // 30853/32768 = (Sin[23*Pi/64] + Cos[23*Pi/64])/Sqrt[2] = 0.94154406518302 // 11039/16384 = (Sin[23*Pi/64] - Cos[23*Pi/64])*Sqrt[2] = 0.67377970678444 // 9907/16384 = Cos[23*Pi/64]*Sqrt[2] = 0.60465421179080 let (s4, sb) = - RotateAddShift::kernel(s4, sb, ((30853, 15), (11039, 14), (9907, 14))); + RotateAddShift::kernel::<15, 14, 14>(s4, sb, (30853, 11039, 9907)); // 15893/16384 = (Sin[21*Pi/64] + Cos[21*Pi/64])/Sqrt[2] = 0.97003125319454 // 3981/8192 = (Sin[21*Pi/64] - Cos[21*Pi/64])*Sqrt[2] = 0.89716758634264 // 1489/2048 = Cos[21*Pi/64]*Sqrt[2] = 0.72705107329128 let (sa, s5) = - RotateSubShift::kernel(sa, s5, ((15893, 14), (3981, 13), (1489, 11))); + RotateSubShift::kernel::<14, 13, 11>(sa, s5, (15893, 3981, 1489)); // 32413/32768 = (Sin[19*Pi/64] + Cos[19*Pi/64])/Sqrt[2] = 0.98917650996478 // 601/2048 = (Sin[19*Pi/64] - Cos[19*Pi/64])*Sqrt[2] = 0.29346094891072 // 13803/16384 = Cos[19*Pi/64]*Sqrt[2] = 0.84244603550942 let (s6, s9) = - RotateAddShift::kernel(s6, s9, ((32413, 15), (601, 11), (13803, 14))); + RotateAddShift::kernel::<15, 11, 14>(s6, s9, (32413, 601, 13803)); // 32729/32768 = (Sin[17*Pi/64] + Cos[17*Pi/64])/Sqrt[2] = 0.99879545620517 // 201/2048 = (Sin[17*Pi/64] - Cos[17*Pi/64])*Sqrt[2] = 0.09813534865484 // 1945/2048 = Cos[17*Pi/64]*Sqrt[2] = 0.94972778187775 let (s8, s7) = - RotateSubShift::kernel(s8, s7, ((32729, 15), (201, 11), (1945, 11))); + RotateSubShift::kernel::<15, 11, 11>(s8, s7, (32729, 201, 1945)); // Stage 1 let (s0, s7) = butterfly_sub_asym((s0.rshift1(), s0), s7); @@ -894,22 +894,22 @@ $($s)* fn daala_fdst_iv_16( // 1609/2048 = Sin[7*Pi/16] - Cos[7*Pi/16] = 0.7856949583871022 // 12785/32768 = 2*Cos[7*Pi/16] = 0.3901806440322565 let (s8, s7) = - RotateAddAvg::kernel(s8, s7, ((301, 8), (1609, 11), (12785, 15))); + RotateAddAvg::kernel::<8, 11, 15>(s8, s7, (301, 1609, 12785)); // 11363/8192 = Sin[5*Pi/16] + Cos[5*Pi/16] = 1.3870398453221475 // 9041/32768 = Sin[5*Pi/16] - Cos[5*Pi/16] = 0.2758993792829431 // 4551/8192 = Cos[5*Pi/16] = 0.5555702330196022 let (s9, s6) = - RotateAdd::kernel(s9h, s6h, ((11363, 13), (9041, 15), (4551, 13))); + RotateAdd::kernel::<13, 15, 13>(s9h, s6h, (11363, 9041, 4551)); // 5681/4096 = Sin[5*Pi/16] + Cos[5*Pi/16] = 1.3870398453221475 // 9041/32768 = Sin[5*Pi/16] - Cos[5*Pi/16] = 0.2758993792829431 // 4551/4096 = 2*Cos[5*Pi/16] = 1.1111404660392044 let (s5, sa) = - RotateNegAvg::kernel(s5, sa, ((5681, 12), (9041, 15), (4551, 12))); + RotateNegAvg::kernel::<12, 15, 12>(s5, sa, (5681, 9041, 4551)); // 9633/8192 = Sin[7*Pi/16] + Cos[7*Pi/16] = 1.1758756024193586 // 12873/16384 = Sin[7*Pi/16] - Cos[7*Pi/16] = 0.7856949583871022 // 6393/32768 = Cos[7*Pi/16] = 0.1950903220161283 let (s4, sb) = - RotateNeg::kernel(s4h, sbh, ((9633, 13), (12873, 14), (6393, 15))); + RotateNeg::kernel::<13, 14, 15>(s4h, sbh, (9633, 12873, 6393)); // Stage 4 let (s2, sc) = butterfly_add_asym(s2, sch); @@ -926,21 +926,21 @@ $($s)* fn daala_fdst_iv_16( // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969 // 3135/4096 = 2*Cos[7*Pi/8] = 0.7653668647301796 let (sc, s3) = - RotateAddAvg::kernel(sc, s3, ((669, 9), (8867, 14), (3135, 12))); + RotateAddAvg::kernel::<9, 14, 12>(sc, s3, (669, 8867, 3135)); // 669/512 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3870398453221475 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969 // 3135/4096 = 2*Cos[3*Pi/8] = 0.7653668647301796 let (s2, sd) = - RotateNegAvg::kernel(s2, sd, ((669, 9), (8867, 14), (3135, 12))); + RotateNegAvg::kernel::<9, 14, 12>(s2, sd, (669, 8867, 3135)); // 5793/4096 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951 // 11585/8192 = 2*Cos[Pi/4] = 1.4142135623730951 - let (sa, s5) = RotatePi4AddAvg::kernel(sa, s5, ((5793, 12), (11585, 13))); + let (sa, s5) = RotatePi4AddAvg::kernel::<12, 13>(sa, s5, (5793, 11585)); // 5793/4096 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951 // 11585/8192 = 2*Cos[Pi/4] = 1.4142135623730951 - let (s6, s9) = RotatePi4AddAvg::kernel(s6, s9, ((5793, 12), (11585, 13))); + let (s6, s9) = RotatePi4AddAvg::kernel::<12, 13>(s6, s9, (5793, 11585)); // 5793/4096 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951 // 11585/8192 = 2*Cos[Pi/4] = 1.4142135623730951 - let (se, s1) = RotatePi4AddAvg::kernel(se, s1, ((5793, 12), (11585, 13))); + let (se, s1) = RotatePi4AddAvg::kernel::<12, 13>(se, s1, (5793, 11585)); store_coeffs!( output, s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sa, sb, sc, sd, se, sf @@ -1022,50 +1022,50 @@ $($s)* fn daala_fdst_iv_16_asym( // 62241/32768 = (Sin[31*Pi/64] - Cos[31*Pi/64])*2 = 1.8994555637555088 // 201/16384 = Cos[31*Pi/64]*2 = 0.0981353486548360 let (s0, sf) = - RotateAddShift::half_kernel(s0, sfh, ((1073, 11), (62241, 15), (201, 11))); + RotateAddShift::half_kernel::<11, 15, 11>(s0, sfh, (1073, 62241, 201)); // 18611/32768 = (Sin[29*Pi/64] + Cos[29*Pi/64])/2 = 0.5679534922100714 // 55211/32768 = (Sin[29*Pi/64] - Cos[29*Pi/64])*2 = 1.6848920710188384 // 601/2048 = Cos[29*Pi/64]*2 = 0.2934609489107235 - let (se, s1) = RotateSubShift::half_kernel( + let (se, s1) = RotateSubShift::half_kernel::<15, 15, 11>( se, s1h, - ((18611, 15), (55211, 15), (601, 11)), + (18611, 55211, 601), ); // 9937/16384 = (Sin[27*Pi/64] + Cos[27*Pi/64])/2 = 0.6065057165489039 // 1489/1024 = (Sin[27*Pi/64] - Cos[27*Pi/64])*2 = 1.4541021465825602 // 3981/8192 = Cos[27*Pi/64]*2 = 0.4859603598065277 let (s2, sd) = - RotateAddShift::half_kernel(s2, sdh, ((9937, 14), (1489, 10), (3981, 13))); + RotateAddShift::half_kernel::<14, 10, 13>(s2, sdh, (9937, 1489, 3981)); // 10473/16384 = (Sin[25*Pi/64] + Cos[25*Pi/64])/2 = 0.6392169592876205 // 39627/32768 = (Sin[25*Pi/64] - Cos[25*Pi/64])*2 = 1.2093084235816014 // 11039/16384 = Cos[25*Pi/64]*2 = 0.6737797067844401 - let (sc, s3) = RotateSubShift::half_kernel( + let (sc, s3) = RotateSubShift::half_kernel::<14, 15, 14>( sc, s3h, - ((10473, 14), (39627, 15), (11039, 14)), + (10473, 39627, 11039), ); // 2727/4096 = (Sin[23*Pi/64] + Cos[23*Pi/64])/2 = 0.6657721932768628 // 3903/4096 = (Sin[23*Pi/64] - Cos[23*Pi/64])*2 = 0.9528683993863225 // 7005/8192 = Cos[23*Pi/64]*2 = 0.8551101868605642 let (s4, sb) = - RotateAddShift::half_kernel(s4, sbh, ((2727, 12), (3903, 12), (7005, 13))); + RotateAddShift::half_kernel::<12, 12, 13>(s4, sbh, (2727, 3903, 7005)); // 5619/8192 = (Sin[21*Pi/64] + Cos[21*Pi/64])/2 = 0.6859156770967569 // 2815/4096 = (Sin[21*Pi/64] - Cos[21*Pi/64])*2 = 0.6872517316141069 // 8423/8192 = Cos[21*Pi/64]*2 = 1.0282054883864433 let (sa, s5) = - RotateSubShift::half_kernel(sa, s5h, ((5619, 13), (2815, 12), (8423, 13))); + RotateSubShift::half_kernel::<13, 12, 13>(sa, s5h, (5619, 2815, 8423)); // 2865/4096 = (Sin[19*Pi/64] + Cos[19*Pi/64])/2 = 0.6994534179865391 // 13588/32768 = (Sin[19*Pi/64] - Cos[19*Pi/64])*2 = 0.4150164539764232 // 305/256 = Cos[19*Pi/64]*2 = 1.1913986089848667 let (s6, s9) = - RotateAddShift::half_kernel(s6, s9h, ((2865, 12), (13599, 15), (305, 8))); + RotateAddShift::half_kernel::<12, 15, 8>(s6, s9h, (2865, 13599, 305)); // 23143/32768 = (Sin[17*Pi/64] + Cos[17*Pi/64])/2 = 0.7062550401009887 // 1137/8192 = (Sin[17*Pi/64] - Cos[17*Pi/64])*2 = 0.1387843410158816 // 11003/8192 = Cos[17*Pi/64]*2 = 1.3431179096940367 - let (s8, s7) = RotateSubShift::half_kernel( + let (s8, s7) = RotateSubShift::half_kernel::<15, 13, 13>( s8, s7h, - ((23143, 15), (1137, 13), (11003, 13)), + (23143, 1137, 11003), ); // Stage 1 @@ -1093,22 +1093,22 @@ $($s)* fn daala_fdst_iv_16_asym( // 12873/16384 = Sin[7*Pi/16] - Cos[7*Pi/16] = 0.7856949583871022 // 6393/32768 = Cos[7*Pi/16] = 0.1950903220161283 let (s8, s7) = - RotateAdd::kernel(s8, s7, ((9633, 13), (12873, 14), (6393, 15))); + RotateAdd::kernel::<13, 14, 15>(s8, s7, (9633, 12873, 6393)); // 22725/16384 = Sin[5*Pi/16] + Cos[5*Pi/16] = 1.3870398453221475 // 9041/32768 = Sin[5*Pi/16] - Cos[5*Pi/16] = 0.2758993792829431 // 4551/8192 = Cos[5*Pi/16] = 0.5555702330196022 let (s9, s6) = - RotateAdd::kernel(s9h, s6h, ((22725, 14), (9041, 15), (4551, 13))); + RotateAdd::kernel::<14, 15, 13>(s9h, s6h, (22725, 9041, 4551)); // 11363/8192 = Sin[5*Pi/16] + Cos[5*Pi/16] = 1.3870398453221475 // 9041/32768 = Sin[5*Pi/16] - Cos[5*Pi/16] = 0.2758993792829431 // 4551/8192 = Cos[5*Pi/16] = 0.5555702330196022 let (s5, sa) = - RotateNeg::kernel(s5, sa, ((11363, 13), (9041, 15), (4551, 13))); + RotateNeg::kernel::<13, 15, 13>(s5, sa, (11363, 9041, 4551)); // 9633/32768 = Sin[7*Pi/16] + Cos[7*Pi/16] = 1.1758756024193586 // 12873/16384 = Sin[7*Pi/16] - Cos[7*Pi/16] = 0.7856949583871022 // 6393/32768 = Cos[7*Pi/16] = 0.1950903220161283 let (s4, sb) = - RotateNeg::kernel(s4h, sbh, ((9633, 13), (12873, 14), (6393, 15))); + RotateNeg::kernel::<13, 14, 15>(s4h, sbh, (9633, 12873, 6393)); // Stage 4 let (s2, sc) = butterfly_add_asym(s2, sch); @@ -1125,21 +1125,21 @@ $($s)* fn daala_fdst_iv_16_asym( // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969 // 3135/8192 = Cos[3*Pi/8] = 0.3826834323650898 let (sc, s3) = - RotateAdd::kernel(sc, s3, ((10703, 13), (8867, 14), (3135, 13))); + RotateAdd::kernel::<13, 14, 13>(sc, s3, (10703, 8867, 3135)); // 10703/8192 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3870398453221475 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969 // 3135/8192 = Cos[3*Pi/8] = 0.3826834323650898 let (s2, sd) = - RotateNeg::kernel(s2, sd, ((10703, 13), (8867, 14), (3135, 13))); + RotateNeg::kernel::<13, 14, 13>(s2, sd, (10703, 8867, 3135)); // 11585/8192 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951 // 5793/8192 = Cos[Pi/4] = 0.7071067811865475 - let (sa, s5) = RotatePi4Add::kernel(sa, s5, ((11585, 13), (5793, 13))); + let (sa, s5) = RotatePi4Add::kernel::<13, 13>(sa, s5, (11585, 5793)); // 11585/8192 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951 // 5793/8192 = Cos[Pi/4] = 0.7071067811865475 - let (s6, s9) = RotatePi4Add::kernel(s6, s9, ((11585, 13), (5793, 13))); + let (s6, s9) = RotatePi4Add::kernel::<13, 13>(s6, s9, (11585, 5793)); // 11585/8192 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951 // 5793/8192 = Cos[Pi/4] = 0.7071067811865475 - let (se, s1) = RotatePi4Add::kernel(se, s1, ((11585, 13), (5793, 13))); + let (se, s1) = RotatePi4Add::kernel::<13, 13>(se, s1, (11585, 5793)); store_coeffs!( output, s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sa, sb, sc, sd, se, sf @@ -1368,82 +1368,82 @@ $($s)* fn daala_fdst_iv_32_asym( // 22595/16384 = (Sin[63*Pi/128] - Cos[63*Pi/128])*Sqrt[2] = 1.37908108947413 // 1137/32768 = Cos[63*Pi/128]*Sqrt[2] = 0.03470653821440 let (t0, tv) = - RotateAdd::half_kernel(t0, tvh, ((5933, 13), (22595, 14), (1137, 15))); + RotateAdd::half_kernel::<13, 14, 15>(t0, tvh, (5933, 22595, 1137)); // 6203/8192 = (Sin[61*Pi/128] + Cos[61*Pi/128])/Sqrt[2] = 0.75720884650648 // 21403/16384 = (Sin[61*Pi/128] - Cos[61*Pi/128])*Sqrt[2] = 1.30634568590755 // 3409/32768 = Cos[61*Pi/128]*Sqrt[2] = 0.10403600355271 let (tu, t1) = - RotateSub::half_kernel(tu, t1h, ((6203, 13), (21403, 14), (3409, 15))); + RotateSub::half_kernel::<13, 14, 15>(tu, t1h, (6203, 21403, 3409)); // 25833/32768 = (Sin[59*Pi/128] + Cos[59*Pi/128])/Sqrt[2] = 0.78834642762661 // 315/256 = (Sin[59*Pi/128] - Cos[59*Pi/128])*Sqrt[2] = 1.23046318116125 // 5673/32768 = Cos[59*Pi/128]*Sqrt[2] = 0.17311483704598 let (t2, tt) = - RotateAdd::half_kernel(t2, tth, ((25833, 15), (315, 8), (5673, 15))); + RotateAdd::half_kernel::<15, 8, 15>(t2, tth, (25833, 315, 5673)); // 26791/32768 = (Sin[57*Pi/128] + Cos[57*Pi/128])/Sqrt[2] = 0.81758481315158 // 4717/4096 = (Sin[57*Pi/128] - Cos[57*Pi/128])*Sqrt[2] = 1.15161638283569 // 7923/32768 = Cos[57*Pi/128]*Sqrt[2] = 0.24177662173374 let (ts, t3) = - RotateSub::half_kernel(ts, t3h, ((26791, 15), (4717, 12), (7923, 15))); + RotateSub::half_kernel::<15, 12, 15>(ts, t3h, (26791, 4717, 7923)); // 6921/8192 = (Sin[55*Pi/128] + Cos[55*Pi/128])/Sqrt[2] = 0.84485356524971 // 17531/16384 = (Sin[55*Pi/128] - Cos[55*Pi/128])*Sqrt[2] = 1.06999523977419 // 10153/32768 = Cos[55*Pi/128]*Sqrt[2] = 0.30985594536261 let (t4, tr) = - RotateAdd::half_kernel(t4, trh, ((6921, 13), (17531, 14), (10153, 15))); + RotateAdd::half_kernel::<13, 14, 15>(t4, trh, (6921, 17531, 10153)); // 28511/32768 = (Sin[53*Pi/128] + Cos[53*Pi/128])/Sqrt[2] = 0.87008699110871 // 32303/32768 = (Sin[53*Pi/128] - Cos[53*Pi/128])*Sqrt[2] = 0.98579638445957 // 1545/4096 = Cos[53*Pi/128]*Sqrt[2] = 0.37718879887893 let (tq, t5) = - RotateSub::half_kernel(tq, t5h, ((28511, 15), (32303, 15), (1545, 12))); + RotateSub::half_kernel::<15, 15, 12>(tq, t5h, (28511, 32303, 1545)); // 29269/32768 = (Sin[51*Pi/128] + Cos[51*Pi/128])/Sqrt[2] = 0.89322430119552 // 14733/16384 = (Sin[51*Pi/128] - Cos[51*Pi/128])*Sqrt[2] = 0.89922265930921 // 1817/4096 = Cos[51*Pi/128]*Sqrt[2] = 0.44361297154091 let (t6, tp) = - RotateAdd::half_kernel(t6, tph, ((29269, 15), (14733, 14), (1817, 12))); + RotateAdd::half_kernel::<15, 14, 12>(t6, tph, (29269, 14733, 1817)); // 29957/32768 = (Sin[49*Pi/128] + Cos[49*Pi/128])/Sqrt[2] = 0.91420975570353 // 13279/16384 = (Sin[49*Pi/128] - Cos[49*Pi/128])*Sqrt[2] = 0.81048262800998 // 8339/16384 = Cos[49*Pi/128]*Sqrt[2] = 0.50896844169854 let (to, t7) = - RotateSub::half_kernel(to, t7h, ((29957, 15), (13279, 14), (8339, 14))); + RotateSub::half_kernel::<15, 14, 14>(to, t7h, (29957, 13279, 8339)); // 7643/8192 = (Sin[47*Pi/128] + Cos[47*Pi/128])/Sqrt[2] = 0.93299279883474 // 11793/16384 = (Sin[47*Pi/128] - Cos[47*Pi/128])*Sqrt[2] = 0.71979007306998 // 18779/32768 = Cos[47*Pi/128]*Sqrt[2] = 0.57309776229975 let (t8, tn) = - RotateAdd::half_kernel(t8, tnh, ((7643, 13), (11793, 14), (18779, 15))); + RotateAdd::half_kernel::<13, 14, 15>(t8, tnh, (7643, 11793, 18779)); // 15557/16384 = (Sin[45*Pi/128] + Cos[45*Pi/128])/Sqrt[2] = 0.94952818059304 // 20557/32768 = (Sin[45*Pi/128] - Cos[45*Pi/128])*Sqrt[2] = 0.62736348079778 // 20835/32768 = Cos[45*Pi/128]*Sqrt[2] = 0.63584644019415 let (tm, t9) = - RotateSub::half_kernel(tm, t9h, ((15557, 14), (20557, 15), (20835, 15))); + RotateSub::half_kernel::<14, 15, 15>(tm, t9h, (15557, 20557, 20835)); // 31581/32768 = (Sin[43*Pi/128] + Cos[43*Pi/128])/Sqrt[2] = 0.96377606579544 // 17479/32768 = (Sin[43*Pi/128] - Cos[43*Pi/128])*Sqrt[2] = 0.53342551494980 // 22841/32768 = Cos[43*Pi/128]*Sqrt[2] = 0.69706330832054 let (ta, tl) = - RotateAdd::half_kernel(ta, tlh, ((31581, 15), (17479, 15), (22841, 15))); + RotateAdd::half_kernel::<15, 15, 15>(ta, tlh, (31581, 17479, 22841)); // 7993/8192 = (Sin[41*Pi/128] + Cos[41*Pi/128])/Sqrt[2] = 0.97570213003853 // 14359/32768 = (Sin[41*Pi/128] - Cos[41*Pi/128])*Sqrt[2] = 0.43820248031374 // 3099/4096 = Cos[41*Pi/128]*Sqrt[2] = 0.75660088988166 let (tk, tb) = - RotateSub::half_kernel(tk, tbh, ((7993, 13), (14359, 15), (3099, 12))); + RotateSub::half_kernel::<13, 15, 12>(tk, tbh, (7993, 14359, 3099)); // 16143/16384 = (Sin[39*Pi/128] + Cos[39*Pi/128])/Sqrt[2] = 0.98527764238894 // 2801/8192 = (Sin[39*Pi/128] - Cos[39*Pi/128])*Sqrt[2] = 0.34192377752060 // 26683/32768 = Cos[39*Pi/128]*Sqrt[2] = 0.81431575362864 let (tc, tj) = - RotateAdd::half_kernel(tc, tjh, ((16143, 14), (2801, 13), (26683, 15))); + RotateAdd::half_kernel::<14, 13, 15>(tc, tjh, (16143, 2801, 26683)); // 16261/16384 = (Sin[37*Pi/128] + Cos[37*Pi/128])/Sqrt[2] = 0.99247953459871 // 4011/16384 = (Sin[37*Pi/128] - Cos[37*Pi/128])*Sqrt[2] = 0.24482135039843 // 14255/16384 = Cos[37*Pi/128]*Sqrt[2] = 0.87006885939949 let (ti, td) = - RotateSub::half_kernel(ti, tdh, ((16261, 14), (4011, 14), (14255, 14))); + RotateSub::half_kernel::<14, 14, 14>(ti, tdh, (16261, 4011, 14255)); // 32679/32768 = (Sin[35*Pi/128] + Cos[35*Pi/128])/Sqrt[2] = 0.99729045667869 // 4821/32768 = (Sin[35*Pi/128] - Cos[35*Pi/128])*Sqrt[2] = 0.14712912719933 // 30269/32768 = Cos[35*Pi/128]*Sqrt[2] = 0.92372589307902 let (te, th) = - RotateAdd::half_kernel(te, thh, ((32679, 15), (4821, 15), (30269, 15))); + RotateAdd::half_kernel::<15, 15, 15>(te, thh, (32679, 4821, 30269)); // 16379/16384 = (Sin[33*Pi/128] + Cos[33*Pi/128])/Sqrt[2] = 0.99969881869620 // 201/4096 = (Sin[33*Pi/128] - Cos[33*Pi/128])*Sqrt[2] = 0.04908245704582 // 15977/16384 = Cos[33*Pi/128]*Sqrt[2] = 0.97515759017329 let (tg, tf) = - RotateSub::half_kernel(tg, tfh, ((16379, 14), (201, 12), (15977, 14))); + RotateSub::half_kernel::<14, 12, 14>(tg, tfh, (16379, 201, 15977)); // Stage 1 let (t0, tfh) = butterfly_add(t0, tf); @@ -1486,42 +1486,42 @@ $($s)* fn daala_fdst_iv_32_asym( // 14699/16384 = Sin[15*Pi/32] - Cos[15*Pi/32] = 0.8971675863426363 // 803/8192 = Cos[15*Pi/32] = 0.0980171403295606 let (tf, tg) = - RotateSub::kernel(tf, tg, ((17911, 14), (14699, 14), (803, 13))); + RotateSub::kernel::<14, 14, 13>(tf, tg, (17911, 14699, 803)); // 10217/8192 = Sin[13*Pi/32] + Cos[13*Pi/32] = 1.2472250129866712 // 5461/8192 = Sin[13*Pi/32] - Cos[13*Pi/32] = 0.6666556584777465 // 1189/4096 = Cos[13*Pi/32] = 0.2902846772544623 let (th, te) = - RotateAdd::kernel(th, te, ((10217, 13), (5461, 13), (1189, 12))); + RotateAdd::kernel::<13, 13, 12>(th, te, (10217, 5461, 1189)); // 5543/4096 = Sin[11*Pi/32] + Cos[11*Pi/32] = 1.3533180011743526 // 3363/8192 = Sin[11*Pi/32] - Cos[11*Pi/32] = 0.4105245275223574 // 7723/16384 = Cos[11*Pi/32] = 0.4713967368259976 let (ti, td) = - RotateAdd::kernel(ti, td, ((5543, 12), (3363, 13), (7723, 14))); + RotateAdd::kernel::<12, 13, 14>(ti, td, (5543, 3363, 7723)); // 11529/8192 = Sin[9*Pi/32] + Cos[9*Pi/32] = 1.4074037375263826 // 2271/16384 = Sin[9*Pi/32] - Cos[9*Pi/32] = 0.1386171691990915 // 5197/8192 = Cos[9*Pi/32] = 0.6343932841636455 let (tc, tj) = - RotateSub::kernel(tc, tj, ((11529, 13), (2271, 14), (5197, 13))); + RotateSub::kernel::<13, 14, 13>(tc, tj, (11529, 2271, 5197)); // 11529/8192 = Sin[9*Pi/32] + Cos[9*Pi/32] = 1.4074037375263826 // 2271/16384 = Sin[9*Pi/32] - Cos[9*Pi/32] = 0.1386171691990915 // 5197/8192 = Cos[9*Pi/32] = 0.6343932841636455 let (tb, tk) = - RotateNeg::kernel(tb, tk, ((11529, 13), (2271, 14), (5197, 13))); + RotateNeg::kernel::<13, 14, 13>(tb, tk, (11529, 2271, 5197)); // 5543/4096 = Sin[11*Pi/32] + Cos[11*Pi/32] = 1.3533180011743526 // 3363/8192 = Sin[11*Pi/32] - Cos[11*Pi/32] = 0.4105245275223574 // 7723/16384 = Cos[11*Pi/32] = 0.4713967368259976 let (ta, tl) = - RotateNeg::kernel(ta, tl, ((5543, 12), (3363, 13), (7723, 14))); + RotateNeg::kernel::<12, 13, 14>(ta, tl, (5543, 3363, 7723)); // 10217/8192 = Sin[13*Pi/32] + Cos[13*Pi/32] = 1.2472250129866712 // 5461/8192 = Sin[13*Pi/32] - Cos[13*Pi/32] = 0.6666556584777465 // 1189/4096 = Cos[13*Pi/32] = 0.2902846772544623 let (t9, tm) = - RotateNeg::kernel(t9, tm, ((10217, 13), (5461, 13), (1189, 12))); + RotateNeg::kernel::<13, 13, 12>(t9, tm, (10217, 5461, 1189)); // 17911/16384 = Sin[15*Pi/32] + Cos[15*Pi/32] = 1.0932018670017576 // 14699/16384 = Sin[15*Pi/32] - Cos[15*Pi/32] = 0.8971675863426363 // 803/8192 = Cos[15*Pi/32] = 0.0980171403295606 let (t8, tn) = - RotateNeg::kernel(t8, tn, ((17911, 14), (14699, 14), (803, 13))); + RotateNeg::kernel::<14, 14, 13>(t8, tn, (17911, 14699, 803)); // Stage 4 let (t3, t0h) = butterfly_sub(t3, t0); @@ -1545,22 +1545,22 @@ $($s)* fn daala_fdst_iv_32_asym( // 301/256 = Sin[7*Pi/16] + Cos[7*Pi/16] = 1.1758756024193586 // 1609/2048 = Sin[7*Pi/16] - Cos[7*Pi/16] = 0.7856949583871022 // 6393/32768 = Cos[7*Pi/16] = 0.1950903220161283 - let (to, t7) = RotateAdd::kernel(to, t7, ((301, 8), (1609, 11), (6393, 15))); + let (to, t7) = RotateAdd::kernel::<8, 11, 15>(to, t7, (301, 1609, 6393)); // 11363/8192 = Sin[5*Pi/16] + Cos[5*Pi/16] = 1.3870398453221475 // 9041/32768 = Sin[5*Pi/16] - Cos[5*Pi/16] = 0.2758993792829431 // 4551/8192 = Cos[5*Pi/16] = 0.5555702330196022 let (tph, t6h) = - RotateAdd::kernel(tph, t6h, ((11363, 13), (9041, 15), (4551, 13))); + RotateAdd::kernel::<13, 15, 13>(tph, t6h, (11363, 9041, 4551)); // 5681/4096 = Sin[5*Pi/16] + Cos[5*Pi/16] = 1.3870398453221475 // 9041/32768 = Sin[5*Pi/16] - Cos[5*Pi/16] = 0.2758993792829431 // 4551/8192 = Cos[5*Pi/16] = 0.5555702330196022 let (t5, tq) = - RotateNeg::kernel(t5, tq, ((5681, 12), (9041, 15), (4551, 13))); + RotateNeg::kernel::<12, 15, 13>(t5, tq, (5681, 9041, 4551)); // 9633/8192 = Sin[7*Pi/16] + Cos[7*Pi/16] = 1.1758756024193586 // 12873/16384 = Sin[7*Pi/16] - Cos[7*Pi/16] = 0.7856949583871022 // 6393/32768 = Cos[7*Pi/16] = 0.1950903220161283 let (t4h, trh) = - RotateNeg::kernel(t4h, trh, ((9633, 13), (12873, 14), (6393, 15))); + RotateNeg::kernel::<13, 14, 15>(t4h, trh, (9633, 12873, 6393)); // Stage 6 let (t1, t0) = butterfly_add_asym(t1, t0h); @@ -1584,42 +1584,42 @@ $($s)* fn daala_fdst_iv_32_asym( // 669/512 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3065629648763766 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969 // 3135/8192 = Cos[3*Pi/8] = 0.3826834323650898 - let (t2, tt) = RotateNeg::kernel(t2, tt, ((669, 9), (8867, 14), (3135, 13))); + let (t2, tt) = RotateNeg::kernel::<9, 14, 13>(t2, tt, (669, 8867, 3135)); // 669/512 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3065629648763766 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969 // 3135/8192 = Cos[3*Pi/8] = 0.3826834323650898 - let (ts, t3) = RotateAdd::kernel(ts, t3, ((669, 9), (8867, 14), (3135, 13))); + let (ts, t3) = RotateAdd::kernel::<9, 14, 13>(ts, t3, (669, 8867, 3135)); // 669/512 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3065629648763766 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969 // 3135/8192 = Cos[3*Pi/8] = 0.3826834323650898 - let (ta, tl) = RotateNeg::kernel(ta, tl, ((669, 9), (8867, 14), (3135, 13))); + let (ta, tl) = RotateNeg::kernel::<9, 14, 13>(ta, tl, (669, 8867, 3135)); // 669/512 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3065629648763766 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969 // 3135/8192 = Cos[3*Pi/8] = 0.3826834323650898 - let (tk, tb) = RotateAdd::kernel(tk, tb, ((669, 9), (8867, 14), (3135, 13))); + let (tk, tb) = RotateAdd::kernel::<9, 14, 13>(tk, tb, (669, 8867, 3135)); // 669/512 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3065629648763766 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969 // 3135/8192 = Cos[3*Pi/8] = 0.3826834323650898 - let (tc, tj) = RotateAdd::kernel(tc, tj, ((669, 9), (8867, 14), (3135, 13))); + let (tc, tj) = RotateAdd::kernel::<9, 14, 13>(tc, tj, (669, 8867, 3135)); // 669/512 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3065629648763766 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969 // 3135/8192 = Cos[3*Pi/8] = 0.3826834323650898 - let (ti, td) = RotateNeg::kernel(ti, td, ((669, 9), (8867, 14), (3135, 13))); + let (ti, td) = RotateNeg::kernel::<9, 14, 13>(ti, td, (669, 8867, 3135)); // 5793/4096 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951 // 5793/8192 = Cos[Pi/4] = 0.7071067811865475 - let (tu, t1) = RotatePi4Add::kernel(tu, t1, ((5793, 12), (5793, 13))); + let (tu, t1) = RotatePi4Add::kernel::<12, 13>(tu, t1, (5793, 5793)); // 5793/4096 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951 // 5793/8192 = Cos[Pi/4] = 0.7071067811865475 - let (tq, t5) = RotatePi4Add::kernel(tq, t5, ((5793, 12), (5793, 13))); + let (tq, t5) = RotatePi4Add::kernel::<12, 13>(tq, t5, (5793, 5793)); // 5793/4096 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951 // 5793/8192 = Cos[Pi/4] = 0.7071067811865475 - let (tp, t6) = RotatePi4Sub::kernel(tp, t6, ((5793, 12), (5793, 13))); + let (tp, t6) = RotatePi4Sub::kernel::<12, 13>(tp, t6, (5793, 5793)); // 5793/4096 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951 // 5793/8192 = Cos[Pi/4] = 0.7071067811865475 - let (tm, t9) = RotatePi4Add::kernel(tm, t9, ((5793, 12), (5793, 13))); + let (tm, t9) = RotatePi4Add::kernel::<12, 13>(tm, t9, (5793, 5793)); // 5793/4096 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951 // 5793/8192 = Cos[Pi/4] = 0.7071067811865475 - let (te, th) = RotatePi4Add::kernel(te, th, ((5793, 12), (5793, 13))); + let (te, th) = RotatePi4Add::kernel::<12, 13>(te, th, (5793, 5793)); store_coeffs!( output, t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, ta, tb, tc, td, te, tf,