Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add pre-processing denoising #2931

Draft
wants to merge 13 commits into
base: master
Choose a base branch
from
8 changes: 6 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ binaries = [
"fern",
"console",
"av-metrics",
"nom",
]
default = ["binaries", "asm", "threading", "signal_support"]
asm = ["nasm-rs", "cc"]
Expand Down Expand Up @@ -103,11 +102,16 @@ simd_helpers = "0.1"
wasm-bindgen = { version = "0.2.63", optional = true }
rust_hawktracer = "0.7.0"
const_fn_assert = "0.1.2"
nom = { version = "7.0.0", optional = true }
# `unreachable!` macro which panics in debug mode
# and optimizes away in release mode
new_debug_unreachable = "1.0.4"
once_cell = "1.13.0"
av1-grain = { version = "0.2.0", features = ["serialize"] }
serde-big-array = { version = "0.4.1", optional = true }
# Used for parsing film grain table files
nom = "7.0.0"
wide = { git = "https://github.com/shssoichiro/wide", branch = "additional-functions" }
num-complex = "0.4.2"

[dependencies.image]
version = "0.24.3"
Expand Down
27 changes: 26 additions & 1 deletion benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
// Media Patent License 1.0 was not distributed with this source code in the
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.

mod denoise;
mod dist;
mod mc;
mod plane;
Expand All @@ -23,12 +24,15 @@ use rav1e::bench::partition::*;
use rav1e::bench::predict::*;
use rav1e::bench::rdo::*;
use rav1e::bench::transform::*;
use rav1e::prelude::*;

use crate::plane::plane;
use crate::rdo::rdo;
use crate::transform::{forward_transforms, inverse_transforms};

use criterion::*;
use rand::Rng;
use rand_chacha::ChaChaRng;
use std::sync::Arc;
use std::time::Duration;

Expand Down Expand Up @@ -193,6 +197,26 @@ fn update_cdf_4(b: &mut Bencher) {
});
}

fn fill_plane<T: Pixel>(ra: &mut ChaChaRng, plane: &mut Plane<T>) {
let stride = plane.cfg.stride;
for row in plane.data_origin_mut().chunks_mut(stride) {
for pixel in row {
let v: u8 = ra.gen();
*pixel = T::cast_from(v);
}
}
}

fn new_plane<T: Pixel>(
ra: &mut ChaChaRng, width: usize, height: usize,
) -> Plane<T> {
let mut p = Plane::new(width, height, 0, 0, 128 + 8, 128 + 8);

fill_plane(ra, &mut p);

p
}

criterion_group!(intra_prediction, predict::pred_bench,);

criterion_group!(cfl, cfl_rdo);
Expand All @@ -217,5 +241,6 @@ criterion_main!(
ec,
rdo,
plane,
mc::mc
mc::mc,
denoise::denoise
);
76 changes: 76 additions & 0 deletions benches/denoise.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
use super::new_plane;
use criterion::*;
use rand::SeedableRng;
use rand_chacha::ChaChaRng;
use rav1e::bench::denoise::*;
use rav1e::prelude::*;
use std::collections::BTreeMap;
use std::sync::Arc;

fn bench_dft_denoiser_8b(c: &mut Criterion) {
let mut ra = ChaChaRng::from_seed([0; 32]);
let w = 640;
let h = 480;
let mut frame_queue = BTreeMap::new();
for i in 0..3 {
frame_queue.insert(
i,
Some(Arc::new(Frame {
planes: [
new_plane::<u8>(&mut ra, w, h),
new_plane::<u8>(&mut ra, w / 2, h / 2),
new_plane::<u8>(&mut ra, w / 2, h / 2),
],
})),
);
}
frame_queue.insert(3, None);

c.bench_function("dft_denoiser_8b", |b| {
b.iter_with_setup(
|| DftDenoiser::new(2.0, w, h, 8, ChromaSampling::Cs420),
|mut denoiser| {
for _ in 0..3 {
let _ = black_box(denoiser.filter_frame(&frame_queue));
}
},
)
});
}

fn bench_dft_denoiser_10b(c: &mut Criterion) {
let mut ra = ChaChaRng::from_seed([0; 32]);
let w = 640;
let h = 480;
let mut frame_queue = BTreeMap::new();
for i in 0..3 {
let mut frame = Frame {
planes: [
new_plane::<u16>(&mut ra, w, h),
new_plane::<u16>(&mut ra, w / 2, h / 2),
new_plane::<u16>(&mut ra, w / 2, h / 2),
],
};
for p in 0..3 {
// Shift from 16-bit to 10-bit
frame.planes[p].data.iter_mut().for_each(|pix| {
*pix = *pix >> 6;
});
}
frame_queue.insert(i, Some(Arc::new(frame)));
}
frame_queue.insert(3, None);

c.bench_function("dft_denoiser_10b", |b| {
b.iter_with_setup(
|| DftDenoiser::new(2.0, w, h, 10, ChromaSampling::Cs420),
|mut denoiser| {
for _ in 0..3 {
let _ = black_box(denoiser.filter_frame(&frame_queue));
}
},
)
});
}

criterion_group!(denoise, bench_dft_denoiser_8b, bench_dft_denoiser_10b);
21 changes: 1 addition & 20 deletions benches/dist.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

#![allow(clippy::trivially_copy_pass_by_ref)]

use super::new_plane;
use criterion::*;
use rand::{Rng, SeedableRng};
use rand_chacha::ChaChaRng;
Expand Down Expand Up @@ -69,26 +70,6 @@ const DIST_BENCH_SET: &[(BlockSize, usize)] = &[
(BLOCK_64X16, 10),
];

fn fill_plane<T: Pixel>(ra: &mut ChaChaRng, plane: &mut Plane<T>) {
let stride = plane.cfg.stride;
for row in plane.data_origin_mut().chunks_mut(stride) {
for pixel in row {
let v: u8 = ra.gen();
*pixel = T::cast_from(v);
}
}
}

fn new_plane<T: Pixel>(
ra: &mut ChaChaRng, width: usize, height: usize,
) -> Plane<T> {
let mut p = Plane::new(width, height, 0, 0, 128 + 8, 128 + 8);

fill_plane(ra, &mut p);

p
}

type DistFn<T> = fn(
plane_org: &PlaneRegion<'_, T>,
plane_ref: &PlaneRegion<'_, T>,
Expand Down
23 changes: 2 additions & 21 deletions benches/mc.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#![allow(clippy::unit_arg)]

use super::new_plane;
use criterion::*;
use rand::{Rng, SeedableRng};
use rand::SeedableRng;
use rand_chacha::ChaChaRng;
use rav1e::bench::cpu_features::*;
use rav1e::bench::frame::{AsRegion, PlaneOffset, PlaneSlice};
Expand Down Expand Up @@ -525,26 +526,6 @@ criterion_group!(
bench_prep_8tap_center_hbd
);

fn fill_plane<T: Pixel>(ra: &mut ChaChaRng, plane: &mut Plane<T>) {
let stride = plane.cfg.stride;
for row in plane.data_origin_mut().chunks_mut(stride) {
for pixel in row {
let v: u8 = ra.gen();
*pixel = T::cast_from(v);
}
}
}

fn new_plane<T: Pixel>(
ra: &mut ChaChaRng, width: usize, height: usize,
) -> Plane<T> {
let mut p = Plane::new(width, height, 0, 0, 128 + 8, 128 + 8);

fill_plane(ra, &mut p);

p
}

fn get_params<T: Pixel>(
rec_plane: &Plane<T>, po: PlaneOffset, mv: MotionVector,
) -> (i32, i32, PlaneSlice<T>) {
Expand Down
3 changes: 2 additions & 1 deletion clippy.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
too-many-arguments-threshold = 16
cognitive-complexity-threshold = 40
trivial-copy-size-limit = 16 # 128-bits = 2 64-bit registers
trivial-copy-size-limit = 16 # 128-bits = 2 64-bit registers
doc-valid-idents = ["DFTTest", "DFTTest2"] # 128-bits = 2 64-bit registers
msrv = "1.60"
3 changes: 3 additions & 0 deletions src/api/config/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ pub struct EncoderConfig {
pub tune: Tune,
/// Parameters for grain synthesis.
pub film_grain_params: Option<Vec<GrainTableSegment>>,
/// Strength of denoising, 0 = disabled
pub denoise_strength: u8,
/// Number of tiles horizontally. Must be a power of two.
///
/// Overridden by [`tiles`], if present.
Expand Down Expand Up @@ -159,6 +161,7 @@ impl EncoderConfig {
bitrate: 0,
tune: Tune::default(),
film_grain_params: None,
denoise_strength: 0,
tile_cols: 0,
tile_rows: 0,
tiles: 0,
Expand Down
33 changes: 32 additions & 1 deletion src/api/internal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use crate::api::{
};
use crate::color::ChromaSampling::Cs400;
use crate::cpu_features::CpuFeatureLevel;
use crate::denoise::{DftDenoiser, TEMPORAL_RADIUS};
use crate::dist::get_satd;
use crate::encoder::*;
use crate::frame::*;
Expand Down Expand Up @@ -220,7 +221,7 @@ impl<T: Pixel> FrameData<T> {
}
}

type FrameQueue<T> = BTreeMap<u64, Option<Arc<Frame<T>>>>;
pub(crate) type FrameQueue<T> = BTreeMap<u64, Option<Arc<Frame<T>>>>;
type FrameDataQueue<T> = BTreeMap<u64, Option<FrameData<T>>>;

// the fields pub(super) are accessed only by the tests
Expand Down Expand Up @@ -248,6 +249,7 @@ pub(crate) struct ContextInner<T: Pixel> {
/// Maps `output_frameno` to `gop_input_frameno_start`.
pub(crate) gop_input_frameno_start: BTreeMap<u64, u64>,
keyframe_detector: SceneChangeDetector<T>,
denoiser: Option<DftDenoiser<T>>,
pub(crate) config: Arc<EncoderConfig>,
seq: Arc<Sequence>,
pub(crate) rc_state: RCState,
Expand Down Expand Up @@ -295,6 +297,16 @@ impl<T: Pixel> ContextInner<T> {
lookahead_distance,
seq.clone(),
),
denoiser: if enc.denoise_strength > 0 {
Some(DftDenoiser::<T>::new(
enc.denoise_strength as f32 / 10.0,
enc.width,
enc.height,
enc.bit_depth,
))
} else {
None
},
config: Arc::new(enc.clone()),
seq,
rc_state: RCState::new(
Expand Down Expand Up @@ -359,6 +371,25 @@ impl<T: Pixel> ContextInner<T> {
self.t35_q.insert(input_frameno, params.t35_metadata);
}

// If denoising is enabled, run it now because we want the entire
// encoding process, including lookahead, to see the denoised frame.
if let Some(ref mut denoiser) = self.denoiser {
loop {
let denoiser_frame = denoiser.cur_frameno;
if (!is_flushing
&& input_frameno >= denoiser_frame + TEMPORAL_RADIUS as u64)
|| (is_flushing && Some(denoiser_frame) < self.limit)
{
self.frame_q.insert(
denoiser_frame,
Some(Arc::new(denoiser.filter_frame(&self.frame_q).unwrap())),
);
} else {
break;
}
}
}

if !self.needs_more_frame_q_lookahead(self.next_lookahead_frame) {
let lookahead_frames = self
.frame_q
Expand Down
2 changes: 2 additions & 0 deletions src/api/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2131,6 +2131,7 @@ fn log_q_exp_overflow() {
tile_cols: 0,
tile_rows: 0,
tiles: 0,
denoise_strength: 0,
speed_settings: SpeedSettings {
multiref: false,
fast_deblock: true,
Expand Down Expand Up @@ -2207,6 +2208,7 @@ fn guess_frame_subtypes_assert() {
tile_cols: 0,
tile_rows: 0,
tiles: 0,
denoise_strength: 0,
speed_settings: SpeedSettings {
multiref: false,
fast_deblock: true,
Expand Down
14 changes: 0 additions & 14 deletions src/asm/x86/transform/forward.rs
Original file line number Diff line number Diff line change
Expand Up @@ -316,20 +316,6 @@ impl SizeClass1D {
}
}

fn cast<const N: usize, T>(x: &[T]) -> &[T; N] {
// SAFETY: we perform a bounds check with [..N],
// so casting to *const [T; N] is valid because the bounds
// check guarantees that x has N elements
unsafe { &*(&x[..N] as *const [T] as *const [T; N]) }
}

fn cast_mut<const N: usize, T>(x: &mut [T]) -> &mut [T; N] {
// SAFETY: we perform a bounds check with [..N],
// so casting to *mut [T; N] is valid because the bounds
// check guarantees that x has N elements
unsafe { &mut *(&mut x[..N] as *mut [T] as *mut [T; N]) }
}

#[allow(clippy::identity_op, clippy::erasing_op)]
#[target_feature(enable = "avx2")]
unsafe fn forward_transform_avx2<T: Coefficient>(
Expand Down
Loading