From f983d809f96de1d57f26e4d80bb517b1c1298bb5 Mon Sep 17 00:00:00 2001 From: aarnav Date: Mon, 5 May 2025 15:17:20 +0200 Subject: [PATCH 1/9] Make input loading fallible in SyncFromDiskStage --- libafl/src/stages/sync.rs | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/libafl/src/stages/sync.rs b/libafl/src/stages/sync.rs index 74e87d1e390..ff398861f75 100644 --- a/libafl/src/stages/sync.rs +++ b/libafl/src/stages/sync.rs @@ -8,16 +8,16 @@ use core::{marker::PhantomData, time::Duration}; use std::path::{Path, PathBuf}; use libafl_bolts::{ - Named, current_time, + current_time, fs::find_new_files_rec, shmem::{ShMem, ShMemProvider}, + Named, }; use serde::{Deserialize, Serialize}; use crate::{ - Error, HasMetadata, HasNamedMetadata, corpus::{Corpus, CorpusId, HasCurrentCorpusId}, - events::{Event, EventConfig, EventFirer, EventWithStats, llmp::LlmpEventConverter}, + events::{llmp::LlmpEventConverter, Event, EventConfig, EventFirer, EventWithStats}, executors::{Executor, ExitKind, HasObservers}, fuzzer::{Evaluator, EvaluatorObservers, ExecutionProcessor, HasObjective}, inputs::{Input, InputConverter}, @@ -26,6 +26,7 @@ use crate::{ HasCorpus, HasCurrentTestcase, HasExecutions, HasRand, HasSolutions, MaybeHasClientPerfMonitor, Stoppable, }, + Error, HasMetadata, HasNamedMetadata, }; /// Default name for `SyncFromDiskStage`; derived from AFL++ @@ -75,7 +76,7 @@ impl Named for SyncFromDiskStage { impl Stage for SyncFromDiskStage where - CB: FnMut(&mut Z, &mut S, &Path) -> Result, + CB: FnMut(&mut Z, &mut S, &Path) -> Result, Error>, Z: Evaluator, S: HasCorpus + HasRand @@ -134,6 +135,9 @@ where .unwrap() .left_to_sync .retain(|p| p != &path); + let Some(input) = input else { + continue; + }; log::debug!("Syncing and evaluating {path:?}"); fuzzer.evaluate_input(state, executor, manager, &input)?; } @@ -174,7 +178,7 @@ impl SyncFromDiskStage { } /// Function type when the callback in `SyncFromDiskStage` is not a lambda -pub type SyncFromDiskFunction = fn(&mut Z, &mut S, &Path) -> Result; +pub type SyncFromDiskFunction = fn(&mut Z, &mut S, &Path) -> Result, Error>; impl SyncFromDiskStage, E, EM, I, S, Z> where @@ -185,12 +189,15 @@ where /// Creates a new [`SyncFromDiskStage`] invoking `Input::from_file` to load inputs #[must_use] pub fn with_from_file(sync_dirs: Vec, interval: Duration) -> Self { - fn load_callback(_: &mut Z, _: &mut S, p: &Path) -> Result + fn load_callback(_: &mut Z, _: &mut S, p: &Path) -> Result, Error> where I: Input, S: HasCorpus, { - Input::from_file(p) + match Input::from_file(p) { + Err(err) => Err(err), + Ok(input) => Ok(Some(input)), + } } Self { interval, From 64ddded1dde8fe5b6b4a540f28d2487cc134acf4 Mon Sep 17 00:00:00 2001 From: aarnav Date: Mon, 5 May 2025 15:21:19 +0200 Subject: [PATCH 2/9] fmt --- libafl/src/stages/sync.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/libafl/src/stages/sync.rs b/libafl/src/stages/sync.rs index ff398861f75..4c752e5f94d 100644 --- a/libafl/src/stages/sync.rs +++ b/libafl/src/stages/sync.rs @@ -8,16 +8,16 @@ use core::{marker::PhantomData, time::Duration}; use std::path::{Path, PathBuf}; use libafl_bolts::{ - current_time, + Named, current_time, fs::find_new_files_rec, shmem::{ShMem, ShMemProvider}, - Named, }; use serde::{Deserialize, Serialize}; use crate::{ + Error, HasMetadata, HasNamedMetadata, corpus::{Corpus, CorpusId, HasCurrentCorpusId}, - events::{llmp::LlmpEventConverter, Event, EventConfig, EventFirer, EventWithStats}, + events::{Event, EventConfig, EventFirer, EventWithStats, llmp::LlmpEventConverter}, executors::{Executor, ExitKind, HasObservers}, fuzzer::{Evaluator, EvaluatorObservers, ExecutionProcessor, HasObjective}, inputs::{Input, InputConverter}, @@ -26,7 +26,6 @@ use crate::{ HasCorpus, HasCurrentTestcase, HasExecutions, HasRand, HasSolutions, MaybeHasClientPerfMonitor, Stoppable, }, - Error, HasMetadata, HasNamedMetadata, }; /// Default name for `SyncFromDiskStage`; derived from AFL++ From fc5d82a6c4eb28dd6deed47255f63cb9b9ad9ba6 Mon Sep 17 00:00:00 2001 From: aarnav Date: Thu, 8 May 2025 17:08:12 +0200 Subject: [PATCH 3/9] Add InvalidInput in Error enum and skip the Input in SyncFromDiskStage if it is encountered --- libafl/src/stages/sync.rs | 36 ++++++++++++++++++++---------------- libafl_bolts/src/lib.rs | 12 ++++++++++++ 2 files changed, 32 insertions(+), 16 deletions(-) diff --git a/libafl/src/stages/sync.rs b/libafl/src/stages/sync.rs index 4c752e5f94d..3d6ee3cf42b 100644 --- a/libafl/src/stages/sync.rs +++ b/libafl/src/stages/sync.rs @@ -58,6 +58,7 @@ impl SyncFromDiskMetadata { } /// A stage that loads testcases from disk to sync with other fuzzers such as AFL++ +/// When syncing, the stage will ignore `Error::InvalidInput` and will skip the file. #[derive(Debug)] pub struct SyncFromDiskStage { name: Cow<'static, str>, @@ -75,7 +76,7 @@ impl Named for SyncFromDiskStage { impl Stage for SyncFromDiskStage where - CB: FnMut(&mut Z, &mut S, &Path) -> Result, Error>, + CB: FnMut(&mut Z, &mut S, &Path) -> Result, Z: Evaluator, S: HasCorpus + HasRand @@ -125,18 +126,23 @@ where let to_sync = sync_from_disk_metadata.left_to_sync.clone(); log::debug!("Number of files to sync: {:?}", to_sync.len()); for path in to_sync { - let input = (self.load_callback)(fuzzer, state, &path)?; + let input = (self.load_callback)(fuzzer, state, &path); + let input_is_invalid = matches!(input, Err(Error::InvalidInput(_))); // Removing each path from the `left_to_sync` Vec before evaluating // prevents duplicate processing and ensures that each file is evaluated only once. This approach helps - // avoid potential infinite loops that may occur if a file is an objective. - state - .metadata_mut::() - .unwrap() - .left_to_sync - .retain(|p| p != &path); - let Some(input) = input else { + // avoid potential infinite loops that may occur if a file is an objective or an invalid input. + if input.is_ok() || input_is_invalid { + state + .metadata_mut::() + .unwrap() + .left_to_sync + .retain(|p| p != &path); + } + if input_is_invalid { + log::debug!("Invalid input found in {path:?} when syncing; skipping;"); continue; - }; + } + let input = input?; log::debug!("Syncing and evaluating {path:?}"); fuzzer.evaluate_input(state, executor, manager, &input)?; } @@ -164,6 +170,7 @@ where impl SyncFromDiskStage { /// Creates a new [`SyncFromDiskStage`] + /// To skip a file, you can return `Error::invalid_input` in `load_callback` #[must_use] pub fn new(sync_dirs: Vec, load_callback: CB, interval: Duration, name: &str) -> Self { Self { @@ -177,7 +184,7 @@ impl SyncFromDiskStage { } /// Function type when the callback in `SyncFromDiskStage` is not a lambda -pub type SyncFromDiskFunction = fn(&mut Z, &mut S, &Path) -> Result, Error>; +pub type SyncFromDiskFunction = fn(&mut Z, &mut S, &Path) -> Result; impl SyncFromDiskStage, E, EM, I, S, Z> where @@ -188,15 +195,12 @@ where /// Creates a new [`SyncFromDiskStage`] invoking `Input::from_file` to load inputs #[must_use] pub fn with_from_file(sync_dirs: Vec, interval: Duration) -> Self { - fn load_callback(_: &mut Z, _: &mut S, p: &Path) -> Result, Error> + fn load_callback(_: &mut Z, _: &mut S, p: &Path) -> Result where I: Input, S: HasCorpus, { - match Input::from_file(p) { - Err(err) => Err(err), - Ok(input) => Ok(Some(input)), - } + Input::from_file(p) } Self { interval, diff --git a/libafl_bolts/src/lib.rs b/libafl_bolts/src/lib.rs index 9b9ca7e4fd2..4bee15189c1 100644 --- a/libafl_bolts/src/lib.rs +++ b/libafl_bolts/src/lib.rs @@ -341,6 +341,8 @@ pub enum Error { InvalidCorpus(String, ErrorBacktrace), /// Error specific to a runtime like QEMU or Frida Runtime(String, ErrorBacktrace), + /// The `Input` was invalid. + InvalidInput(ErrorBacktrace), } impl Error { @@ -369,6 +371,12 @@ impl Error { Error::EmptyOptional(arg.into(), ErrorBacktrace::new()) } + /// The `Input` was invalid + #[must_use] + pub fn invalid_input() -> Self { + Error::InvalidInput(ErrorBacktrace::new()) + } + /// Key not in Map #[must_use] pub fn key_not_found(arg: S) -> Self @@ -580,6 +588,10 @@ impl Display for Error { write!(f, "Runtime error: {0}", &s)?; display_error_backtrace(f, b) } + Self::InvalidInput(b) => { + write!(f, "Encountered an invalid input")?; + display_error_backtrace(f, b) + } } } } From ce61f00e76bf55f3361e07358a105307f1966341 Mon Sep 17 00:00:00 2001 From: aarnav Date: Sun, 11 May 2025 12:59:50 +0200 Subject: [PATCH 4/9] sync: remove file if error on loading in SyncFromDiskStage --- libafl/src/stages/sync.rs | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/libafl/src/stages/sync.rs b/libafl/src/stages/sync.rs index 3d6ee3cf42b..4f0e5f7eed8 100644 --- a/libafl/src/stages/sync.rs +++ b/libafl/src/stages/sync.rs @@ -8,16 +8,16 @@ use core::{marker::PhantomData, time::Duration}; use std::path::{Path, PathBuf}; use libafl_bolts::{ - Named, current_time, + current_time, fs::find_new_files_rec, shmem::{ShMem, ShMemProvider}, + Named, }; use serde::{Deserialize, Serialize}; use crate::{ - Error, HasMetadata, HasNamedMetadata, corpus::{Corpus, CorpusId, HasCurrentCorpusId}, - events::{Event, EventConfig, EventFirer, EventWithStats, llmp::LlmpEventConverter}, + events::{llmp::LlmpEventConverter, Event, EventConfig, EventFirer, EventWithStats}, executors::{Executor, ExitKind, HasObservers}, fuzzer::{Evaluator, EvaluatorObservers, ExecutionProcessor, HasObjective}, inputs::{Input, InputConverter}, @@ -26,6 +26,7 @@ use crate::{ HasCorpus, HasCurrentTestcase, HasExecutions, HasRand, HasSolutions, MaybeHasClientPerfMonitor, Stoppable, }, + Error, HasMetadata, HasNamedMetadata, }; /// Default name for `SyncFromDiskStage`; derived from AFL++ @@ -126,23 +127,22 @@ where let to_sync = sync_from_disk_metadata.left_to_sync.clone(); log::debug!("Number of files to sync: {:?}", to_sync.len()); for path in to_sync { - let input = (self.load_callback)(fuzzer, state, &path); - let input_is_invalid = matches!(input, Err(Error::InvalidInput(_))); // Removing each path from the `left_to_sync` Vec before evaluating // prevents duplicate processing and ensures that each file is evaluated only once. This approach helps // avoid potential infinite loops that may occur if a file is an objective or an invalid input. - if input.is_ok() || input_is_invalid { - state - .metadata_mut::() - .unwrap() - .left_to_sync - .retain(|p| p != &path); - } - if input_is_invalid { - log::debug!("Invalid input found in {path:?} when syncing; skipping;"); - continue; - } - let input = input?; + state + .metadata_mut::() + .unwrap() + .left_to_sync + .retain(|p| p != &path); + let input = match (self.load_callback)(fuzzer, state, &path) { + Ok(input) => input, + Err(Error::InvalidInput(_)) => { + log::debug!("Invalid input found in {path:?} when syncing; skipping;"); + continue; + } + Err(e) => return Err(e), + }; log::debug!("Syncing and evaluating {path:?}"); fuzzer.evaluate_input(state, executor, manager, &input)?; } From 446502ea1e6349bec9e85cde948d15e66221f5e0 Mon Sep 17 00:00:00 2001 From: aarnav Date: Sun, 11 May 2025 13:04:47 +0200 Subject: [PATCH 5/9] add reason to Error::InvalidInput --- libafl/src/stages/sync.rs | 6 ++++-- libafl_bolts/src/lib.rs | 23 ++++++++++++++--------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/libafl/src/stages/sync.rs b/libafl/src/stages/sync.rs index 4f0e5f7eed8..05c70ba516e 100644 --- a/libafl/src/stages/sync.rs +++ b/libafl/src/stages/sync.rs @@ -137,8 +137,10 @@ where .retain(|p| p != &path); let input = match (self.load_callback)(fuzzer, state, &path) { Ok(input) => input, - Err(Error::InvalidInput(_)) => { - log::debug!("Invalid input found in {path:?} when syncing; skipping;"); + Err(Error::InvalidInput(reason, _)) => { + log::debug!( + "Invalid input found in {path:?} when syncing; reason {reason}; skipping;" + ); continue; } Err(e) => return Err(e), diff --git a/libafl_bolts/src/lib.rs b/libafl_bolts/src/lib.rs index 4bee15189c1..a1475c57ddc 100644 --- a/libafl_bolts/src/lib.rs +++ b/libafl_bolts/src/lib.rs @@ -62,7 +62,9 @@ type String = &'static str; /// Good enough for simple errors, for anything else, use the `alloc` feature. #[cfg(not(feature = "alloc"))] macro_rules! format { - ($fmt:literal) => {{ $fmt }}; + ($fmt:literal) => {{ + $fmt + }}; } #[cfg(feature = "std")] @@ -164,7 +166,7 @@ use std::time::{SystemTime, UNIX_EPOCH}; #[cfg(all(unix, feature = "std"))] use std::{ fs::File, - io::{Write, stderr, stdout}, + io::{stderr, stdout, Write}, os::fd::{AsRawFd, FromRawFd, RawFd}, panic, }; @@ -342,7 +344,7 @@ pub enum Error { /// Error specific to a runtime like QEMU or Frida Runtime(String, ErrorBacktrace), /// The `Input` was invalid. - InvalidInput(ErrorBacktrace), + InvalidInput(String, ErrorBacktrace), } impl Error { @@ -373,8 +375,11 @@ impl Error { /// The `Input` was invalid #[must_use] - pub fn invalid_input() -> Self { - Error::InvalidInput(ErrorBacktrace::new()) + pub fn invalid_input(reason: S) -> Self + where + S: Into, + { + Error::InvalidInput(reason.into(), ErrorBacktrace::new()) } /// Key not in Map @@ -588,8 +593,8 @@ impl Display for Error { write!(f, "Runtime error: {0}", &s)?; display_error_backtrace(f, b) } - Self::InvalidInput(b) => { - write!(f, "Encountered an invalid input")?; + Self::InvalidInput(s, b) => { + write!(f, "Encountered an invalid input: {0}", &s)?; display_error_backtrace(f, b) } } @@ -1076,7 +1081,7 @@ pub fn get_thread_id() -> u64 { #[allow(clippy::cast_sign_loss)] /// Return thread ID without using TLS pub fn get_thread_id() -> u64 { - use libc::{SYS_gettid, syscall}; + use libc::{syscall, SYS_gettid}; unsafe { syscall(SYS_gettid) as u64 } } @@ -1429,7 +1434,7 @@ macro_rules! nonnull_raw_mut { #[allow(missing_docs)] // expect somehow breaks here pub mod pybind { - use pyo3::{Bound, PyResult, pymodule, types::PyModule}; + use pyo3::{pymodule, types::PyModule, Bound, PyResult}; #[macro_export] macro_rules! unwrap_me_body { From fce5ec44476dbfd66e0c59130b7e02c0d0e8dba8 Mon Sep 17 00:00:00 2001 From: aarnav Date: Wed, 14 May 2025 07:59:15 +0200 Subject: [PATCH 6/9] sync make failure log a warning. clippy, fmt --- libafl/src/stages/sync.rs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/libafl/src/stages/sync.rs b/libafl/src/stages/sync.rs index 8029b831310..b31b97622ef 100644 --- a/libafl/src/stages/sync.rs +++ b/libafl/src/stages/sync.rs @@ -8,16 +8,16 @@ use core::{marker::PhantomData, time::Duration}; use std::path::{Path, PathBuf}; use libafl_bolts::{ - current_time, + Named, current_time, fs::find_new_files_rec, shmem::{ShMem, ShMemProvider}, - Named, }; use serde::{Deserialize, Serialize}; use crate::{ + Error, HasMetadata, HasNamedMetadata, corpus::{Corpus, CorpusId, HasCurrentCorpusId}, - events::{llmp::LlmpEventConverter, Event, EventConfig, EventFirer, EventWithStats}, + events::{Event, EventConfig, EventFirer, EventWithStats, llmp::LlmpEventConverter}, executors::{Executor, ExitKind, HasObservers}, fuzzer::{Evaluator, EvaluatorObservers, ExecutionProcessor, HasObjective}, inputs::{Input, InputConverter}, @@ -26,7 +26,6 @@ use crate::{ HasCorpus, HasCurrentTestcase, HasExecutions, HasRand, HasSolutions, MaybeHasClientPerfMonitor, Stoppable, }, - Error, HasMetadata, HasNamedMetadata, }; /// Default name for `SyncFromDiskStage`; derived from AFL++ @@ -138,8 +137,8 @@ where let input = match (self.load_callback)(fuzzer, state, &path) { Ok(input) => input, Err(Error::InvalidInput(reason, _)) => { - log::debug!( - "Invalid input found in {path:?} when syncing; reason {reason}; skipping;" + log::warning!( + "Invalid input found in {} when syncing; reason {reason}; skipping;", path.display() ); continue; } From f2f4becbe56bce343aab97c386f88f8d1243a015 Mon Sep 17 00:00:00 2001 From: aarnav Date: Wed, 14 May 2025 08:02:26 +0200 Subject: [PATCH 7/9] typo --- libafl/src/stages/sync.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libafl/src/stages/sync.rs b/libafl/src/stages/sync.rs index b31b97622ef..b0e7a95e725 100644 --- a/libafl/src/stages/sync.rs +++ b/libafl/src/stages/sync.rs @@ -137,7 +137,7 @@ where let input = match (self.load_callback)(fuzzer, state, &path) { Ok(input) => input, Err(Error::InvalidInput(reason, _)) => { - log::warning!( + log::warn!( "Invalid input found in {} when syncing; reason {reason}; skipping;", path.display() ); continue; From a67b0d9dd9a11dc37e4a8e2b5c9e521b570eaacf Mon Sep 17 00:00:00 2001 From: aarnav Date: Wed, 14 May 2025 08:02:54 +0200 Subject: [PATCH 8/9] fmt --- libafl/src/stages/sync.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libafl/src/stages/sync.rs b/libafl/src/stages/sync.rs index b0e7a95e725..8c8641358f6 100644 --- a/libafl/src/stages/sync.rs +++ b/libafl/src/stages/sync.rs @@ -138,7 +138,8 @@ where Ok(input) => input, Err(Error::InvalidInput(reason, _)) => { log::warn!( - "Invalid input found in {} when syncing; reason {reason}; skipping;", path.display() + "Invalid input found in {} when syncing; reason {reason}; skipping;", + path.display() ); continue; } From 63c83cdb474aa2aa13f5fb34835694a975ed50e3 Mon Sep 17 00:00:00 2001 From: aarnav Date: Wed, 14 May 2025 08:15:38 +0200 Subject: [PATCH 9/9] fmt --- libafl_bolts/src/lib.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/libafl_bolts/src/lib.rs b/libafl_bolts/src/lib.rs index 414d960a226..83805bf087b 100644 --- a/libafl_bolts/src/lib.rs +++ b/libafl_bolts/src/lib.rs @@ -62,9 +62,7 @@ type String = &'static str; /// Good enough for simple errors, for anything else, use the `alloc` feature. #[cfg(not(feature = "alloc"))] macro_rules! format { - ($fmt:literal) => {{ - $fmt - }}; + ($fmt:literal) => {{ $fmt }}; } #[cfg(feature = "std")] @@ -166,7 +164,7 @@ use std::time::{SystemTime, UNIX_EPOCH}; #[cfg(all(unix, feature = "std"))] use std::{ fs::File, - io::{stderr, stdout, Write}, + io::{Write, stderr, stdout}, os::fd::{AsRawFd, FromRawFd, RawFd}, panic, }; @@ -1105,7 +1103,7 @@ pub fn get_thread_id() -> u64 { #[allow(clippy::cast_sign_loss)] /// Return thread ID without using TLS pub fn get_thread_id() -> u64 { - use libc::{syscall, SYS_gettid}; + use libc::{SYS_gettid, syscall}; unsafe { syscall(SYS_gettid) as u64 } } @@ -1458,7 +1456,7 @@ macro_rules! nonnull_raw_mut { #[allow(missing_docs)] // expect somehow breaks here pub mod pybind { - use pyo3::{pymodule, types::PyModule, Bound, PyResult}; + use pyo3::{Bound, PyResult, pymodule, types::PyModule}; #[macro_export] macro_rules! unwrap_me_body {