Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integrate lossless forward transform intrinsics #3270

Merged
merged 5 commits into from
Oct 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 14 additions & 18 deletions src/asm/aarch64/transform/inverse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,30 +16,26 @@ use crate::{Pixel, PixelType};
use crate::asm::shared::transform::inverse::*;
use crate::asm::shared::transform::*;

#[inline]
pub fn inverse_transform_add_lossless<T: Pixel>(
pub fn inverse_transform_add<T: Pixel>(
input: &[T::Coeff], output: &mut PlaneRegionMut<'_, T>, eob: usize,
bd: usize, cpu: CpuFeatureLevel,
tx_size: TxSize, tx_type: TxType, bd: usize, cpu: CpuFeatureLevel,
) {
match T::type_enum() {
PixelType::U8 => {
if let Some(func) = INV_TXFM_WHT_FN[cpu.as_index()] {
return call_inverse_func(func, input, output, eob, 4, 4, bd);
if tx_type == TxType::WHT_WHT {
debug_assert!(tx_size == TxSize::TX_4X4);
match T::type_enum() {
PixelType::U8 => {
if let Some(func) = INV_TXFM_WHT_FN[cpu.as_index()] {
return call_inverse_func(func, input, output, eob, 4, 4, bd);
}
}
}
PixelType::U16 => {
if let Some(func) = INV_TXFM_WHT_HBD_FN[cpu.as_index()] {
return call_inverse_hbd_func(func, input, output, eob, 4, 4, bd);
PixelType::U16 if bd == 10 => {
if let Some(func) = INV_TXFM_WHT_HBD_FN[cpu.as_index()] {
return call_inverse_hbd_func(func, input, output, eob, 4, 4, bd);
}
}
PixelType::U16 => {}
}
}
rust::inverse_transform_add_lossless(input, output, eob, bd, cpu);
}

pub fn inverse_transform_add<T: Pixel>(
input: &[T::Coeff], output: &mut PlaneRegionMut<'_, T>, eob: usize,
tx_size: TxSize, tx_type: TxType, bd: usize, cpu: CpuFeatureLevel,
) {
match T::type_enum() {
PixelType::U8 => {
if let Some(func) = INV_TXFM_FNS[cpu.as_index()]
Expand Down
20 changes: 15 additions & 5 deletions src/asm/shared/transform/inverse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,8 @@ pub mod test {
let mut eob = 0;
let mut exit = 0;

let scan = av1_scan_orders[tx_size as usize][tx_type as usize].scan;
// Wrap WHT_WHT (16) to DCT_DCT (0) scan table
let scan = av1_scan_orders[tx_size as usize][(tx_type as usize) & 15].scan;

for (i, &pos) in scan.iter().enumerate() {
exit = i;
Expand Down Expand Up @@ -148,7 +149,10 @@ pub mod test {
for sub_h in 0..sub_h_iterations {
let mut src_storage = [T::zero(); 64 * 64];
let src = &mut src_storage[..tx_size.area()];
let mut dst = Plane::from_slice(&[T::zero(); 64 * 64], 64);
let mut dst = Plane::from_slice(
&[T::zero(); 64 * 64][..tx_size.area()],
tx_size.width(),
);
let mut res_storage: Aligned<[MaybeUninit<i16>; 64 * 64]> =
unsafe { Aligned::uninitialized() };
let res = &mut res_storage.data[..tx_size.area()];
Expand Down Expand Up @@ -229,13 +233,16 @@ pub mod test {
};

($TYPES64:tt, $DIMS64:tt, $TYPES32:tt, $DIMS32:tt, $TYPES16:tt, $DIMS16:tt,
$TYPES84:tt, $DIMS84:tt) => {
$TYPES84:tt, $DIMS84:tt, $TYPES4:tt, $DIMS4:tt) => {
test_itx_fns!([$TYPES64], $DIMS64);
test_itx_fns!([$TYPES64, $TYPES32], $DIMS32);
test_itx_fns!([$TYPES64, $TYPES32, $TYPES16], $DIMS16);
test_itx_fns!(
[$TYPES64, $TYPES32, $TYPES16, $TYPES84], $DIMS84
);
test_itx_fns!(
[$TYPES64, $TYPES32, $TYPES16, $TYPES84, $TYPES4], $DIMS4
);
};
}

Expand All @@ -260,13 +267,16 @@ pub mod test {
(TxType::FLIPADST_FLIPADST, flipadst, flipadst)
],
[(16, 16)],
// 8x, 4x and 16x (minus 16x16)
// 8x, 4x and 16x (minus 16x16 and 4x4)
[
(TxType::V_ADST, adst, identity),
(TxType::H_ADST, identity, adst),
(TxType::V_FLIPADST, flipadst, identity),
(TxType::H_FLIPADST, identity, flipadst)
],
[(16, 8), (8, 16), (16, 4), (4, 16), (8, 8), (8, 4), (4, 8), (4, 4)]
[(16, 8), (8, 16), (16, 4), (4, 16), (8, 8), (8, 4), (4, 8)],
// 4x4
[(TxType::WHT_WHT, wht, wht)],
[(4, 4)]
);
}
4 changes: 2 additions & 2 deletions src/asm/x86/transform/forward.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@ use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;

pub use crate::transform::forward::rust::forward_transform_lossless;

type TxfmFuncI32X8 = unsafe fn(&mut [I32X8]);

#[inline]
Expand All @@ -41,6 +39,7 @@ fn get_func_i32x8(t: TxfmType) -> TxfmFuncI32X8 {
Identity8 => fidentity,
Identity16 => fidentity,
Identity32 => fidentity,
WHT4 => fwht4,
}
}

Expand Down Expand Up @@ -509,6 +508,7 @@ unsafe fn forward_transform_avx2<T: Coefficient>(
/// # Panics
///
/// - If called with an invalid combination of `tx_size` and `tx_type`
#[inline]
pub fn forward_transform<T: Coefficient>(
input: &[i16], output: &mut [MaybeUninit<T>], stride: usize,
tx_size: TxSize, tx_type: TxType, bd: usize, cpu: CpuFeatureLevel,
Expand Down
31 changes: 13 additions & 18 deletions src/asm/x86/transform/inverse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,30 +16,25 @@ use crate::{Pixel, PixelType};
use crate::asm::shared::transform::inverse::*;
use crate::asm::shared::transform::*;

#[inline]
pub fn inverse_transform_add_lossless<T: Pixel>(
pub fn inverse_transform_add<T: Pixel>(
input: &[T::Coeff], output: &mut PlaneRegionMut<'_, T>, eob: usize,
bd: usize, cpu: CpuFeatureLevel,
tx_size: TxSize, tx_type: TxType, bd: usize, cpu: CpuFeatureLevel,
) {
match T::type_enum() {
PixelType::U8 => {
if let Some(func) = INV_TXFM_WHT_FN[cpu.as_index()] {
return call_inverse_func(func, input, output, eob, 4, 4, bd);
if tx_type == TxType::WHT_WHT {
debug_assert!(tx_size == TxSize::TX_4X4);
match T::type_enum() {
PixelType::U8 => {
if let Some(func) = INV_TXFM_WHT_FN[cpu.as_index()] {
return call_inverse_func(func, input, output, eob, 4, 4, bd);
}
}
}
PixelType::U16 => {
if let Some(func) = INV_TXFM_WHT_HBD_FN[cpu.as_index()] {
return call_inverse_hbd_func(func, input, output, eob, 4, 4, bd);
PixelType::U16 => {
if let Some(func) = INV_TXFM_WHT_HBD_FN[cpu.as_index()] {
return call_inverse_hbd_func(func, input, output, eob, 4, 4, bd);
}
}
}
}
rust::inverse_transform_add_lossless(input, output, eob, bd, cpu);
}

pub fn inverse_transform_add<T: Pixel>(
input: &[T::Coeff], output: &mut PlaneRegionMut<'_, T>, eob: usize,
tx_size: TxSize, tx_type: TxType, bd: usize, cpu: CpuFeatureLevel,
) {
match T::type_enum() {
PixelType::U8 => {
if let Some(func) = INV_TXFM_FNS[cpu.as_index()]
Expand Down
31 changes: 1 addition & 30 deletions src/transform/forward.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,36 +92,7 @@ pub mod rust {
Identity8 => fidentity,
Identity16 => fidentity,
Identity32 => fidentity,
}
}

pub fn forward_transform_lossless<T: Coefficient>(
input: &[i16], output: &mut [T], stride: usize, _cpu: CpuFeatureLevel,
) {
let mut tmp = [0i32; 4 * 4];
let buf = &mut tmp[..];
let mut col_coeffs_backing = [0i32; 4];
let col_coeffs = &mut col_coeffs_backing[..];

// Columns
for c in 0..4 {
for r in 0..4 {
col_coeffs[r] = (input[r * stride + c]).into();
}
fwht4(col_coeffs);
for r in 0..4 {
buf[r * 4 + c] = col_coeffs[r];
}
}

// Rows
for r in 0..4 {
let row_coeffs = &mut buf[r * 4..];
fwht4(row_coeffs);
av1_round_shift_array(row_coeffs, 4, -2);
for c in 0..4 {
output[c * 4 + r] = T::cast_from(row_coeffs[c]);
}
WHT4 => fwht4,
}
}

Expand Down
22 changes: 16 additions & 6 deletions src/transform/forward_shared.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ const FWD_SHIFT_32X8: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
const FWD_SHIFT_16X64: TxfmShifts = [[4, -2, 0], [2, 0, 0], [0, 0, 2]];
const FWD_SHIFT_64X16: TxfmShifts = [[4, -2, 0], [2, 0, 0], [0, 0, 2]];

const FWD_SHIFT_4X4_WHT: TxfmShift = [0, 0, 2];

pub const FWD_TXFM_SHIFT_LS: [TxfmShifts; TxSize::TX_SIZES_ALL] = [
FWD_SHIFT_4X4,
FWD_SHIFT_8X8,
Expand Down Expand Up @@ -75,31 +77,35 @@ pub enum TxfmType {
Identity8,
Identity16,
Identity32,
WHT4,
}

impl TxfmType {
const TX_TYPES_1D: usize = 4;
const TX_TYPES_1D: usize = 5;
const AV1_TXFM_TYPE_LS: [[Option<TxfmType>; Self::TX_TYPES_1D]; 5] = [
[
Some(TxfmType::DCT4),
Some(TxfmType::ADST4),
Some(TxfmType::ADST4),
Some(TxfmType::Identity4),
Some(TxfmType::WHT4),
],
[
Some(TxfmType::DCT8),
Some(TxfmType::ADST8),
Some(TxfmType::ADST8),
Some(TxfmType::Identity8),
None,
],
[
Some(TxfmType::DCT16),
Some(TxfmType::ADST16),
Some(TxfmType::ADST16),
Some(TxfmType::Identity16),
None,
],
[Some(TxfmType::DCT32), None, None, Some(TxfmType::Identity32)],
[Some(TxfmType::DCT64), None, None, None],
[Some(TxfmType::DCT32), None, None, Some(TxfmType::Identity32), None],
[Some(TxfmType::DCT64), None, None, None, None],
];
}

Expand Down Expand Up @@ -129,12 +135,17 @@ impl Txfm2DFlipCfg {
let txfm_type_row =
TxfmType::AV1_TXFM_TYPE_LS[txw_idx][tx_type_1d_row as usize].unwrap();
let (ud_flip, lr_flip) = Self::get_flip_cfg(tx_type);
let shift = if tx_type == TxType::WHT_WHT {
FWD_SHIFT_4X4_WHT
} else {
FWD_TXFM_SHIFT_LS[tx_size as usize][(bd - 8) / 2]
};

Txfm2DFlipCfg {
tx_size,
ud_flip,
lr_flip,
shift: FWD_TXFM_SHIFT_LS[tx_size as usize][(bd - 8) / 2],
shift,
txfm_type_col,
txfm_type_row,
}
Expand All @@ -145,7 +156,7 @@ impl Txfm2DFlipCfg {
use self::TxType::*;
match tx_type {
DCT_DCT | ADST_DCT | DCT_ADST | ADST_ADST | IDTX | V_DCT | H_DCT
| V_ADST | H_ADST => (false, false),
| V_ADST | H_ADST | WHT_WHT => (false, false),
FLIPADST_DCT | FLIPADST_ADST | V_FLIPADST => (true, false),
DCT_FLIPADST | ADST_FLIPADST | H_FLIPADST => (false, true),
FLIPADST_FLIPADST => (true, true),
Expand Down Expand Up @@ -1728,7 +1739,6 @@ $($s)* fn daala_fdct64<T: TxOperations>(coeffs: &mut [T]) {
#[$m]
$($s)* fn fidentity<T: TxOperations>(_coeffs: &mut [T]) {}

#[allow(unused)]
#[$m]
$($s)* fn fwht4<T: TxOperations>(coeffs: &mut [T]) {
assert!(coeffs.len() >= 4);
Expand Down
Loading
Loading