From dd023f740b954ca017d28f153a68a173d97a543b Mon Sep 17 00:00:00 2001 From: Andrew Date: Sat, 8 Mar 2025 09:22:08 +1300 Subject: [PATCH 1/2] Improve memory usage of evaluator --- src/evaluate.rs | 26 ++++++++++++++------------ src/lib.rs | 22 ++++++++++++++++------ src/png/mod.rs | 12 ++++++------ 3 files changed, 36 insertions(+), 24 deletions(-) diff --git a/src/evaluate.rs b/src/evaluate.rs index 5fe06ea9..79078138 100644 --- a/src/evaluate.rs +++ b/src/evaluate.rs @@ -22,6 +22,7 @@ use crate::{atomicmin::AtomicMin, deflate, filters::RowFilter, png::PngImage, De pub(crate) struct Candidate { pub image: Arc, pub idat_data: Vec, + pub estimated_output_size: usize, pub filtered: Vec, pub filter: RowFilter, // For determining tie-breaker @@ -29,15 +30,9 @@ pub(crate) struct Candidate { } impl Candidate { - /// Return an estimate of the output size which can help with evaluation of very small data - #[must_use] - pub fn estimated_output_size(&self) -> usize { - self.idat_data.len() + self.image.key_chunks_size() - } - fn cmp_key(&self) -> impl Ord { ( - self.estimated_output_size(), + self.estimated_output_size, self.image.data.len(), self.filter, // Prefer the later image added (e.g. baseline, which is always added last) @@ -52,6 +47,7 @@ pub(crate) struct Evaluator { filters: IndexSet, deflater: Deflaters, optimize_alpha: bool, + final_round: bool, nth: AtomicUsize, executed: Arc, best_candidate_size: Arc, @@ -69,6 +65,7 @@ impl Evaluator { filters: IndexSet, deflater: Deflaters, optimize_alpha: bool, + final_round: bool, ) -> Self { #[cfg(feature = "parallel")] let eval_channel = unbounded(); @@ -77,6 +74,7 @@ impl Evaluator { filters, deflater, optimize_alpha, + final_round, nth: AtomicUsize::new(0), executed: Arc::new(AtomicUsize::new(0)), best_candidate_size: Arc::new(AtomicMin::new(None)), @@ -127,6 +125,7 @@ impl Evaluator { let filters = self.filters.clone(); let deflater = self.deflater; let optimize_alpha = self.optimize_alpha; + let final_round = self.final_round; let executed = self.executed.clone(); let best_candidate_size = self.best_candidate_size.clone(); let description = description.to_string(); @@ -149,21 +148,24 @@ impl Evaluator { let filtered = image.filter_image(filter, optimize_alpha); let idat_data = deflater.deflate(&filtered, best_candidate_size.get()); if let Ok(idat_data) = idat_data { + let estimated_output_size = image.estimated_output_size(&idat_data); + // In the final round, we need the IDAT data but not the filtered data + // Otherwise, we want to keep the filtered data for the next round let new = Candidate { image: image.clone(), - idat_data, - filtered, + idat_data: if final_round { idat_data } else { vec![] }, + estimated_output_size, + filtered: if final_round { vec![] } else { filtered }, filter, nth, }; - let size = new.estimated_output_size(); - best_candidate_size.set_min(size); + best_candidate_size.set_min(estimated_output_size); trace!( "Eval: {}-bit {:23} {:8} {} bytes", image.ihdr.bit_depth, description, filter, - size + estimated_output_size ); #[cfg(feature = "parallel")] diff --git a/src/lib.rs b/src/lib.rs index 10c85d09..cfe278d5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -355,7 +355,7 @@ fn optimize_png( let max_size = if opts.force { None } else { - Some(png.estimated_output_size()) + Some(png.raw.estimated_output_size(&png.idat_data)) }; if let Some(result) = optimize_raw(raw.clone(), &opts, deadline.clone(), max_size) { png.raw = result.image; @@ -433,7 +433,13 @@ fn optimize_raw( indexset! {RowFilter::None, RowFilter::Bigrams} }; // This will collect all versions of images and pick one that compresses best - let eval = Evaluator::new(deadline.clone(), eval_filters.clone(), eval_deflater, false); + let eval = Evaluator::new( + deadline.clone(), + eval_filters.clone(), + eval_deflater, + false, + opts.deflate == eval_deflater, + ); let mut new_image = perform_reductions(image.clone(), opts, &deadline, &eval); let eval_result = eval.get_best_candidate(); if let Some(ref result) = eval_result { @@ -464,7 +470,9 @@ fn optimize_raw( (eval_result?, eval_deflater) }; - if max_size.map_or(true, |max_size| result.estimated_output_size() < max_size) { + if !result.idat_data.is_empty() + && max_size.map_or(true, |max_size| result.estimated_output_size < max_size) + { debug!("Found better result:"); debug!(" {}, f = {}", deflater, result.filter); return Some(result); @@ -499,9 +507,10 @@ fn perform_trials( filters, eval_deflater, opts.optimize_alpha, + opts.deflate == eval_deflater, ); if let Some(result) = &eval_result { - eval.set_best_size(result.estimated_output_size()); + eval.set_best_size(result.estimated_output_size); } eval.try_image(image.clone()); if let Some(result) = eval.get_best_candidate() { @@ -520,8 +529,9 @@ fn perform_trials( debug!("Trying filter {} with {}", result.filter, opts.deflate); match opts.deflate.deflate(&result.filtered, max_size) { Ok(idat_data) => { + result.estimated_output_size = result.image.estimated_output_size(&idat_data); result.idat_data = idat_data; - trace!("{} bytes", result.estimated_output_size()); + trace!("{} bytes", result.estimated_output_size); } Err(PngError::DeflatedDataTooLong(bytes)) => { trace!(">{bytes} bytes"); @@ -545,7 +555,7 @@ fn perform_trials( } debug!("Trying {} filters with {}", filters.len(), opts.deflate); - let eval = Evaluator::new(deadline, filters, opts.deflate, opts.optimize_alpha); + let eval = Evaluator::new(deadline, filters, opts.deflate, opts.optimize_alpha, true); if let Some(max_size) = max_size { eval.set_best_size(max_size); } diff --git a/src/png/mod.rs b/src/png/mod.rs index ecb6b403..a872107a 100644 --- a/src/png/mod.rs +++ b/src/png/mod.rs @@ -185,12 +185,6 @@ impl PngData { }) } - /// Return an estimate of the output size which can help with evaluation of very small data - #[must_use] - pub fn estimated_output_size(&self) -> usize { - self.idat_data.len() + self.raw.key_chunks_size() - } - /// Format the `PngData` struct into a valid PNG bytestream #[must_use] pub fn output(&self) -> Vec { @@ -355,6 +349,12 @@ impl PngImage { } } + /// Return an estimate of the output size which can help with evaluation of very small data + #[must_use] + pub fn estimated_output_size(&self, idat_data: &[u8]) -> usize { + idat_data.len() + self.key_chunks_size() + } + /// Return an iterator over the scanlines of the image #[inline] #[must_use] From 5a2e4387ceccbc6a13d2d3a4ba573f85442c1056 Mon Sep 17 00:00:00 2001 From: Andrew Date: Sat, 8 Mar 2025 14:29:38 +1300 Subject: [PATCH 2/2] Add data_is_compressed property --- src/evaluate.rs | 11 +++++------ src/lib.rs | 39 +++++++++++++++++++-------------------- 2 files changed, 24 insertions(+), 26 deletions(-) diff --git a/src/evaluate.rs b/src/evaluate.rs index 79078138..f15aaed9 100644 --- a/src/evaluate.rs +++ b/src/evaluate.rs @@ -21,9 +21,9 @@ use crate::{atomicmin::AtomicMin, deflate, filters::RowFilter, png::PngImage, De pub(crate) struct Candidate { pub image: Arc, - pub idat_data: Vec, + pub data: Vec, + pub data_is_compressed: bool, pub estimated_output_size: usize, - pub filtered: Vec, pub filter: RowFilter, // For determining tie-breaker nth: usize, @@ -149,13 +149,12 @@ impl Evaluator { let idat_data = deflater.deflate(&filtered, best_candidate_size.get()); if let Ok(idat_data) = idat_data { let estimated_output_size = image.estimated_output_size(&idat_data); - // In the final round, we need the IDAT data but not the filtered data - // Otherwise, we want to keep the filtered data for the next round + // For the final round we need the IDAT data, otherwise the filtered data let new = Candidate { image: image.clone(), - idat_data: if final_round { idat_data } else { vec![] }, + data: if final_round { idat_data } else { filtered }, + data_is_compressed: final_round, estimated_output_size, - filtered: if final_round { vec![] } else { filtered }, filter, nth, }; diff --git a/src/lib.rs b/src/lib.rs index cfe278d5..cd5d39d7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -170,7 +170,7 @@ impl RawImage { let mut png = PngData { raw: result.image, - idat_data: result.idat_data, + idat_data: result.data, aux_chunks, frames: Vec::new(), }; @@ -359,7 +359,7 @@ fn optimize_png( }; if let Some(result) = optimize_raw(raw.clone(), &opts, deadline.clone(), max_size) { png.raw = result.image; - png.idat_data = result.idat_data; + png.idat_data = result.data; recompress_frames(png, &opts, deadline, result.filter)?; postprocess_chunks(&mut png.aux_chunks, &png.raw.ihdr, &raw.ihdr); } @@ -470,7 +470,7 @@ fn optimize_raw( (eval_result?, eval_deflater) }; - if !result.idat_data.is_empty() + if result.data_is_compressed && max_size.map_or(true, |max_size| result.estimated_output_size < max_size) { debug!("Found better result:"); @@ -517,27 +517,26 @@ fn perform_trials( eval_result = Some(result); } } - if opts.deflate == eval_deflater { - // No further compression required - return eval_result; - } // We should have a result here - fail if not (e.g. deadline passed) let mut result = eval_result?; - // Recompress with the main deflater - debug!("Trying filter {} with {}", result.filter, opts.deflate); - match opts.deflate.deflate(&result.filtered, max_size) { - Ok(idat_data) => { - result.estimated_output_size = result.image.estimated_output_size(&idat_data); - result.idat_data = idat_data; - trace!("{} bytes", result.estimated_output_size); - } - Err(PngError::DeflatedDataTooLong(bytes)) => { - trace!(">{bytes} bytes"); - } - Err(_) => (), - }; + if !result.data_is_compressed { + // Compress with the main deflater + debug!("Trying filter {} with {}", result.filter, opts.deflate); + match opts.deflate.deflate(&result.data, max_size) { + Ok(idat_data) => { + result.estimated_output_size = result.image.estimated_output_size(&idat_data); + result.data = idat_data; + result.data_is_compressed = true; + trace!("{} bytes", result.estimated_output_size); + } + Err(PngError::DeflatedDataTooLong(bytes)) => { + trace!(">{bytes} bytes"); + } + Err(_) => (), + }; + } return Some(result); }