From 97240acc1a5940cb724da9f6f101a307436f0caf Mon Sep 17 00:00:00 2001 From: Romain Malmain Date: Mon, 14 Apr 2025 10:06:26 +0200 Subject: [PATCH] draft for corpus refactoring --- libafl/Cargo.toml | 5 +- libafl/src/corpus/cached.rs | 282 ------------ libafl/src/corpus/combined.rs | 392 +++++++++++++++++ libafl/src/corpus/inmemory.rs | 479 --------------------- libafl/src/corpus/inmemory_ondisk.rs | 519 ----------------------- libafl/src/corpus/mod.rs | 149 ++++--- libafl/src/corpus/nop.rs | 23 +- libafl/src/corpus/ondisk.rs | 279 ------------ libafl/src/corpus/single.rs | 114 +++++ libafl/src/corpus/store/inmemory.rs | 104 +++++ libafl/src/corpus/store/maps.rs | 276 ++++++++++++ libafl/src/corpus/store/mod.rs | 72 ++++ libafl/src/corpus/store/ondisk.rs | 321 ++++++++++++++ libafl/src/corpus/testcase.rs | 351 +++++---------- libafl/src/executors/inprocess/mod.rs | 11 +- libafl/src/feedbacks/mod.rs | 3 - libafl/src/fuzzer/mod.rs | 39 +- libafl/src/mutators/encoded_mutations.rs | 8 +- libafl/src/mutators/gramatron.rs | 7 +- libafl/src/mutators/mutations.rs | 23 +- libafl/src/mutators/numeric.rs | 4 +- libafl/src/state/mod.rs | 18 +- 22 files changed, 1544 insertions(+), 1935 deletions(-) delete mode 100644 libafl/src/corpus/cached.rs create mode 100644 libafl/src/corpus/combined.rs delete mode 100644 libafl/src/corpus/inmemory.rs delete mode 100644 libafl/src/corpus/inmemory_ondisk.rs delete mode 100644 libafl/src/corpus/ondisk.rs create mode 100644 libafl/src/corpus/single.rs create mode 100644 libafl/src/corpus/store/inmemory.rs create mode 100644 libafl/src/corpus/store/maps.rs create mode 100644 libafl/src/corpus/store/mod.rs create mode 100644 libafl/src/corpus/store/ondisk.rs diff --git a/libafl/Cargo.toml b/libafl/Cargo.toml index 2b1f227d000..a44acdf0297 100644 --- a/libafl/Cargo.toml +++ b/libafl/Cargo.toml @@ -227,7 +227,7 @@ hashbrown = { workspace = true, features = [ "ahash", ], default-features = false } # A faster hashmap, nostd compatible num-traits = { workspace = true, default-features = false } -serde = { workspace = true, features = ["alloc"] } # serialization lib +serde = { workspace = true, features = ["alloc", "rc"] } # serialization lib postcard = { workspace = true } # no_std compatible serde serialization format bincode = { version = "1.3.3", optional = true } bitbybit = { workspace = true } @@ -236,9 +236,10 @@ ahash = { workspace = true } # The hash function already used in hashbrown meminterval = { workspace = true, features = ["serde"] } backtrace = { workspace = true, optional = true } # Used to get the stacktrace in StacktraceObserver typed-builder = { workspace = true, optional = true } # Implement the builder pattern at compiletime +fs4 = "0.13.1" serde_json = { workspace = true, optional = true, default-features = false, features = [ - "alloc", + "alloc" ] } nix = { workspace = true, optional = true, features = [ "signal", diff --git a/libafl/src/corpus/cached.rs b/libafl/src/corpus/cached.rs deleted file mode 100644 index c9befc19e28..00000000000 --- a/libafl/src/corpus/cached.rs +++ /dev/null @@ -1,282 +0,0 @@ -//! The [`CachedOnDiskCorpus`] stores [`Testcase`]s to disk, keeping a subset of them in memory/cache, evicting in a FIFO manner. - -use alloc::{collections::vec_deque::VecDeque, string::String}; -use core::cell::{Ref, RefCell, RefMut}; -use std::path::Path; - -use serde::{Deserialize, Serialize}; - -use crate::{ - Error, - corpus::{ - Corpus, CorpusId, HasTestcase, Testcase, inmemory_ondisk::InMemoryOnDiskCorpus, - ondisk::OnDiskMetadataFormat, - }, - inputs::Input, -}; - -/// A corpus that keeps a maximum number of [`Testcase`]s in memory -/// and load them from disk, when they are being used. -/// The eviction policy is FIFO. -#[derive(Default, Serialize, Deserialize, Clone, Debug)] -pub struct CachedOnDiskCorpus { - inner: InMemoryOnDiskCorpus, - cached_indexes: RefCell>, - cache_max_len: usize, -} - -impl CachedOnDiskCorpus -where - I: Input, -{ - fn cache_testcase<'a>( - &'a self, - testcase: &'a RefCell>, - id: CorpusId, - ) -> Result<(), Error> { - if testcase.borrow().input().is_none() { - self.load_input_into(&mut testcase.borrow_mut())?; - let mut borrowed_num = 0; - while self.cached_indexes.borrow().len() >= self.cache_max_len { - let removed = self.cached_indexes.borrow_mut().pop_front().unwrap(); - - if let Ok(mut borrowed) = self.inner.get_from_all(removed)?.try_borrow_mut() { - *borrowed.input_mut() = None; - } else { - self.cached_indexes.borrow_mut().push_back(removed); - borrowed_num += 1; - if self.cache_max_len == borrowed_num { - break; - } - } - } - self.cached_indexes.borrow_mut().push_back(id); - } - Ok(()) - } -} - -impl Corpus for CachedOnDiskCorpus -where - I: Input, -{ - /// Returns the number of all enabled entries - #[inline] - fn count(&self) -> usize { - self.inner.count() - } - - /// Returns the number of all disabled entries - fn count_disabled(&self) -> usize { - self.inner.count_disabled() - } - - /// Returns the number of elements including disabled entries - #[inline] - fn count_all(&self) -> usize { - self.inner.count_all() - } - - /// Add an enabled testcase to the corpus and return its index - #[inline] - fn add(&mut self, testcase: Testcase) -> Result { - self.inner.add(testcase) - } - - /// Add a disabled testcase to the corpus and return its index - #[inline] - fn add_disabled(&mut self, testcase: Testcase) -> Result { - self.inner.add_disabled(testcase) - } - - /// Replaces the testcase at the given idx - #[inline] - fn replace(&mut self, id: CorpusId, testcase: Testcase) -> Result, Error> { - // TODO finish - self.inner.replace(id, testcase) - } - - /// Removes an entry from the corpus, returning it if it was present; considers both enabled and disabled testcases. - fn remove(&mut self, id: CorpusId) -> Result, Error> { - let testcase = self.inner.remove(id)?; - self.cached_indexes.borrow_mut().retain(|e| *e != id); - Ok(testcase) - } - - /// Get by id; considers only enabled testcases - #[inline] - fn get(&self, id: CorpusId) -> Result<&RefCell>, Error> { - let testcase = { self.inner.get(id)? }; - self.cache_testcase(testcase, id)?; - Ok(testcase) - } - /// Get by id; considers both enabled and disabled testcases - #[inline] - fn get_from_all(&self, id: CorpusId) -> Result<&RefCell>, Error> { - let testcase = { self.inner.get_from_all(id)? }; - self.cache_testcase(testcase, id)?; - Ok(testcase) - } - - /// Current testcase scheduled - #[inline] - fn current(&self) -> &Option { - self.inner.current() - } - - /// Current testcase scheduled (mutable) - #[inline] - fn current_mut(&mut self) -> &mut Option { - self.inner.current_mut() - } - - #[inline] - fn next(&self, id: CorpusId) -> Option { - self.inner.next(id) - } - - /// Peek the next free corpus id - #[inline] - fn peek_free_id(&self) -> CorpusId { - self.inner.peek_free_id() - } - - #[inline] - fn prev(&self, id: CorpusId) -> Option { - self.inner.prev(id) - } - - #[inline] - fn first(&self) -> Option { - self.inner.first() - } - - #[inline] - fn last(&self) -> Option { - self.inner.last() - } - - /// Get the nth corpus id; considers only enabled testcases - #[inline] - fn nth(&self, nth: usize) -> CorpusId { - self.inner.nth(nth) - } - /// Get the nth corpus id; considers both enabled and disabled testcases - #[inline] - fn nth_from_all(&self, nth: usize) -> CorpusId { - self.inner.nth_from_all(nth) - } - - #[inline] - fn load_input_into(&self, testcase: &mut Testcase) -> Result<(), Error> { - self.inner.load_input_into(testcase) - } - - #[inline] - fn store_input_from(&self, testcase: &Testcase) -> Result<(), Error> { - self.inner.store_input_from(testcase) - } -} - -impl HasTestcase for CachedOnDiskCorpus -where - I: Input, -{ - fn testcase(&self, id: CorpusId) -> Result>, Error> { - Ok(self.get(id)?.borrow()) - } - - fn testcase_mut(&self, id: CorpusId) -> Result>, Error> { - Ok(self.get(id)?.borrow_mut()) - } -} - -impl CachedOnDiskCorpus { - /// Creates the [`CachedOnDiskCorpus`]. - /// - /// This corpus stores (and reads) all testcases to/from disk - /// - /// By default, it stores metadata for each [`Testcase`] as prettified json. - /// Metadata will be written to a file named `..metadata` - /// the metadata may include objective reason, specific information for a fuzz job, and more. - /// - /// If you don't want metadata, use [`CachedOnDiskCorpus::no_meta`]. - /// to pick a different metadata format, use [`CachedOnDiskCorpus::with_meta_format`]. - /// - /// Will error, if [`std::fs::create_dir_all()`] failed for `dir_path`. - pub fn new

(dir_path: P, cache_max_len: usize) -> Result - where - P: AsRef, - { - Self::_new(InMemoryOnDiskCorpus::new(dir_path)?, cache_max_len) - } - - /// Creates an [`CachedOnDiskCorpus`] that does not store [`Testcase`] metadata to disk. - pub fn no_meta

(dir_path: P, cache_max_len: usize) -> Result - where - P: AsRef, - { - Self::_new(InMemoryOnDiskCorpus::no_meta(dir_path)?, cache_max_len) - } - - /// Creates the [`CachedOnDiskCorpus`] specifying the format in which `Metadata` will be saved to disk. - /// - /// Will error, if [`std::fs::create_dir_all()`] failed for `dir_path`. - pub fn with_meta_format

( - dir_path: P, - cache_max_len: usize, - meta_format: Option, - ) -> Result - where - P: AsRef, - { - Self::_new( - InMemoryOnDiskCorpus::with_meta_format(dir_path, meta_format)?, - cache_max_len, - ) - } - - /// Creates the [`CachedOnDiskCorpus`] specifying the metadata format and the prefix to prepend - /// to each testcase. - /// - /// Will error, if [`std::fs::create_dir_all()`] failed for `dir_path`. - pub fn with_meta_format_and_prefix

( - dir_path: P, - cache_max_len: usize, - meta_format: Option, - prefix: Option, - locking: bool, - ) -> Result - where - P: AsRef, - { - Self::_new( - InMemoryOnDiskCorpus::with_meta_format_and_prefix( - dir_path, - meta_format, - prefix, - locking, - )?, - cache_max_len, - ) - } - - /// Internal constructor `fn` - fn _new(on_disk_corpus: InMemoryOnDiskCorpus, cache_max_len: usize) -> Result { - if cache_max_len == 0 { - return Err(Error::illegal_argument( - "The max cache len in CachedOnDiskCorpus cannot be 0", - )); - } - Ok(Self { - inner: on_disk_corpus, - cached_indexes: RefCell::new(VecDeque::new()), - cache_max_len, - }) - } - - /// Fetch the inner corpus - pub fn inner(&self) -> &InMemoryOnDiskCorpus { - &self.inner - } -} diff --git a/libafl/src/corpus/combined.rs b/libafl/src/corpus/combined.rs new file mode 100644 index 00000000000..e02bbbf92e1 --- /dev/null +++ b/libafl/src/corpus/combined.rs @@ -0,0 +1,392 @@ +use core::{cell::RefCell, marker::PhantomData}; +use std::{collections::VecDeque, rc::Rc, vec::Vec}; + +use libafl_bolts::Error; +use serde::{Deserialize, Serialize}; + +use super::{Corpus, CorpusCounter, CorpusId, Testcase, store::Store}; + +/// A [`CombinedCorpus`] tries first to use the main store according to some policy. +/// If it fails, it falls back to the secondary store. +#[derive(Default, Serialize, Deserialize, Clone, Debug)] +pub struct CombinedCorpus { + /// The cache store + cache_store: RefCell, + /// The fallback store + fallback_store: FS, + /// The policty taking decisions + cache: RefCell, + /// The corpus ID counter + counter: CorpusCounter, + /// The keys in order (use `Vec::binary_search`) + keys: Vec, + /// The current ID + current: Option, + phantom: PhantomData, +} + +pub struct FifoCache { + cached_ids: VecDeque, + cache_max_len: usize, + phantom: PhantomData<(I, CS, FS)>, +} + +pub struct IdentityCache; + +pub trait Cache { + fn add( + &mut self, + id: CorpusId, + testcase: Testcase, + cache_store: &mut CS, + fallback_store: &mut FS, + ) -> Result<(), Error>; + + fn add_disabled( + &mut self, + id: CorpusId, + testcase: Testcase, + cache_store: &mut CS, + fallback_store: &mut FS, + ) -> Result<(), Error>; + + fn replace( + &mut self, + id: CorpusId, + testcase: Testcase, + cache_store: &mut CS, + fallback_store: &mut FS, + ) -> Result, Error>; + + fn remove( + &mut self, + id: CorpusId, + cache_store: &mut CS, + fallback_store: &mut FS, + ) -> Result>>, Error>; + + fn get( + &mut self, + id: CorpusId, + cache_store: &mut CS, + fallback_store: &FS, + ) -> Result>>, Error>; + + fn get_from_all( + &mut self, + id: CorpusId, + cache_store: &mut CS, + fallback_store: &FS, + ) -> Result>>, Error>; +} + +impl FifoCache +where + CS: Store, + FS: Store, + I: Clone, +{ + fn get_inner( + &mut self, + id: CorpusId, + cache_store: &mut CS, + fallback_store: &FS, + cache_add_fn: CAF, + cache_get_fn: CGF, + cache_rm_fn: CRF, + fallback_get_fn: FGF, + ) -> Result>>, Error> + where + CAF: FnOnce(&mut CS, CorpusId, Testcase) -> Result<(), Error>, + CGF: FnOnce(&CS, CorpusId) -> Result>>, Error>, + CRF: FnOnce(&mut CS, CorpusId) -> Result>>, Error>, + FGF: FnOnce(&FS, CorpusId) -> Result>>, Error>, + { + if self.cached_ids.contains(&id) { + cache_get_fn(cache_store, id) + } else { + // tescase is not cached, fetch it from fallback + let fb_tc = fallback_get_fn(&fallback_store, id)?; + cache_add_fn(cache_store, id, fb_tc.borrow().clone())?; + + if self.cached_ids.len() == self.cache_max_len { + let to_evict = self.cached_ids.pop_back().unwrap(); + cache_rm_fn(cache_store, to_evict)?; + } + + debug_assert!(self.cached_ids.len() < self.cache_max_len); + + self.cached_ids.push_front(id); + + Ok(fb_tc) + } + } +} + +impl Cache for IdentityCache +where + CS: Store, + FS: Store, + I: Clone, +{ + fn add( + &mut self, + id: CorpusId, + testcase: Testcase, + cache_store: &mut CS, + fallback_store: &mut FS, + ) -> Result<(), Error> { + cache_store.add(id, testcase.clone())?; + fallback_store.add(id, testcase.clone()) + } + + fn add_disabled( + &mut self, + id: CorpusId, + testcase: Testcase, + cache_store: &mut CS, + fallback_store: &mut FS, + ) -> Result<(), Error> { + cache_store.add_disabled(id, testcase.clone())?; + fallback_store.add_disabled(id, testcase.clone()) + } + + fn replace( + &mut self, + id: CorpusId, + testcase: Testcase, + cache_store: &mut CS, + fallback_store: &mut FS, + ) -> Result, Error> { + cache_store.replace(id, testcase.clone())?; + fallback_store.replace(id, testcase.clone()) + } + + fn remove( + &mut self, + id: CorpusId, + cache_store: &mut CS, + fallback_store: &mut FS, + ) -> Result>>, Error> { + cache_store.remove(id)?; + fallback_store.remove(id) + } + + fn get( + &mut self, + id: CorpusId, + cache_store: &mut CS, + _fallback_store: &FS, + ) -> Result>>, Error> { + cache_store.get(id) + } + + fn get_from_all( + &mut self, + id: CorpusId, + cache_store: &mut CS, + _fallback_store: &FS, + ) -> Result>>, Error> { + cache_store.get_from_all(id) + } +} + +impl Cache for FifoCache +where + CS: Store, + FS: Store, + I: Clone, +{ + fn get( + &mut self, + id: CorpusId, + cache_store: &mut CS, + fallback_store: &FS, + ) -> Result>>, Error> { + self.get_inner( + id, + cache_store, + fallback_store, + |cache_store, corpus_id, testcase| cache_store.add(corpus_id, testcase), + |cache_store, corpus_id| cache_store.get(corpus_id), + |cache_store, corpus_id| cache_store.remove(corpus_id), + |fallback_store, corpus_id| fallback_store.get(corpus_id), + ) + } + + fn get_from_all( + &mut self, + id: CorpusId, + cache_store: &mut CS, + fallback_store: &FS, + ) -> Result>>, Error> { + self.get_inner( + id, + cache_store, + fallback_store, + |cache_store, corpus_id, testcase| cache_store.add(corpus_id, testcase), + |cache_store, corpus_id| cache_store.get(corpus_id), + |cache_store, corpus_id| cache_store.remove(corpus_id), + |fallback_store, corpus_id| fallback_store.get_from_all(corpus_id), + ) + } + + fn add( + &mut self, + id: CorpusId, + testcase: Testcase, + _cache_store: &mut CS, + fallback_store: &mut FS, + ) -> Result<(), Error> { + fallback_store.add(id, testcase) + } + + fn add_disabled( + &mut self, + id: CorpusId, + testcase: Testcase, + _cache_store: &mut CS, + fallback_store: &mut FS, + ) -> Result<(), Error> { + fallback_store.add_disabled(id, testcase) + } + + fn replace( + &mut self, + id: CorpusId, + testcase: Testcase, + cache_store: &mut CS, + fallback_store: &mut FS, + ) -> Result, Error> { + if self.cached_ids.contains(&id) { + cache_store.replace(id, testcase.clone())?; + } + + fallback_store.replace(id, testcase) + } + + fn remove( + &mut self, + id: CorpusId, + cache_store: &mut CS, + fallback_store: &mut FS, + ) -> Result>>, Error> { + if self.cached_ids.contains(&id) { + self.cached_ids.retain(|elt| *elt != id); + cache_store.remove(id)?; + } + + fallback_store.remove(id) + } +} + +impl Corpus for CombinedCorpus +where + C: Cache, + CS: Store, + FS: Store, + I: Clone, +{ + fn count(&self) -> usize { + self.fallback_store.count() + } + + fn count_disabled(&self) -> usize { + self.fallback_store.count_disabled() + } + + fn count_all(&self) -> usize { + self.fallback_store.count_all() + } + + fn add(&mut self, testcase: Testcase) -> Result { + let new_id = self.counter.new_id(); + + self.cache.borrow_mut().add( + new_id, + testcase, + &mut *self.cache_store.borrow_mut(), + &mut self.fallback_store, + )?; + + Ok(new_id) + } + + fn add_disabled(&mut self, testcase: Testcase) -> Result { + let new_id = self.counter.new_id(); + + self.cache.borrow_mut().add_disabled( + new_id, + testcase, + &mut *self.cache_store.borrow_mut(), + &mut self.fallback_store, + )?; + + Ok(new_id) + } + + fn replace(&mut self, id: CorpusId, testcase: Testcase) -> Result, Error> { + self.cache.borrow_mut().replace( + id, + testcase, + &mut *self.cache_store.borrow_mut(), + &mut self.fallback_store, + ) + } + + fn remove(&mut self, id: CorpusId) -> Result>>, Error> { + self.cache.borrow_mut().remove( + id, + &mut *self.cache_store.borrow_mut(), + &mut self.fallback_store, + ) + } + + fn get(&self, id: CorpusId) -> Result>>, Error> { + self.cache.borrow_mut().get( + id, + &mut *self.cache_store.borrow_mut(), + &self.fallback_store, + ) + } + + fn get_from_all(&self, id: CorpusId) -> Result>>, Error> { + self.cache.borrow_mut().get_from_all( + id, + &mut *self.cache_store.borrow_mut(), + &self.fallback_store, + ) + } + + fn current(&self) -> &Option { + &self.current + } + + fn current_mut(&mut self) -> &mut Option { + &mut self.current + } + + fn next(&self, id: CorpusId) -> Option { + self.fallback_store.next(id) + } + + fn prev(&self, id: CorpusId) -> Option { + self.fallback_store.prev(id) + } + + fn first(&self) -> Option { + self.fallback_store.first() + } + + fn last(&self) -> Option { + self.fallback_store.last() + } + + fn nth(&self, nth: usize) -> CorpusId { + self.fallback_store.nth(nth) + } + + fn nth_from_all(&self, nth: usize) -> CorpusId { + self.fallback_store.nth_from_all(nth) + } +} diff --git a/libafl/src/corpus/inmemory.rs b/libafl/src/corpus/inmemory.rs deleted file mode 100644 index d4760386bf6..00000000000 --- a/libafl/src/corpus/inmemory.rs +++ /dev/null @@ -1,479 +0,0 @@ -//! In-memory corpus, keeps all test cases in memory at all times - -use alloc::vec::Vec; -use core::cell::{Ref, RefCell, RefMut}; - -use serde::{Deserialize, Serialize}; - -use super::HasTestcase; -use crate::{ - Error, - corpus::{Corpus, CorpusId, Testcase}, -}; - -/// Keep track of the stored `Testcase` and the siblings ids (insertion order) -#[cfg(not(feature = "corpus_btreemap"))] -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct TestcaseStorageItem { - /// The stored testcase - pub testcase: RefCell>, - /// Previously inserted id - pub prev: Option, - /// Following inserted id - pub next: Option, -} - -/// The map type in which testcases are stored (disable the feature `corpus_btreemap` to use a `HashMap` instead of `BTreeMap`) -#[derive(Default, Serialize, Deserialize, Clone, Debug)] -pub struct TestcaseStorageMap { - #[cfg(not(feature = "corpus_btreemap"))] - /// A map of `CorpusId` to `TestcaseStorageItem` - pub map: hashbrown::HashMap>, - #[cfg(feature = "corpus_btreemap")] - /// A map of `CorpusId` to `Testcase`. - pub map: alloc::collections::btree_map::BTreeMap>>, - /// The keys in order (use `Vec::binary_search`) - pub keys: Vec, - /// First inserted id - #[cfg(not(feature = "corpus_btreemap"))] - first_id: Option, - /// Last inserted id - #[cfg(not(feature = "corpus_btreemap"))] - last_id: Option, -} - -impl TestcaseStorageMap { - /// Insert a key in the keys set - fn insert_key(&mut self, id: CorpusId) { - if let Err(idx) = self.keys.binary_search(&id) { - self.keys.insert(idx, id); - } - } - - /// Remove a key from the keys set - fn remove_key(&mut self, id: CorpusId) { - if let Ok(idx) = self.keys.binary_search(&id) { - self.keys.remove(idx); - } - } - - /// Replace a testcase given a `CorpusId` - #[cfg(not(feature = "corpus_btreemap"))] - pub fn replace(&mut self, id: CorpusId, testcase: Testcase) -> Option> { - match self.map.get_mut(&id) { - Some(entry) => Some(entry.testcase.replace(testcase)), - _ => None, - } - } - - /// Replace a testcase given a `CorpusId` - #[cfg(feature = "corpus_btreemap")] - pub fn replace(&mut self, id: CorpusId, testcase: Testcase) -> Option> { - self.map.get_mut(&id).map(|entry| entry.replace(testcase)) - } - - /// Remove a testcase given a [`CorpusId`] - #[cfg(not(feature = "corpus_btreemap"))] - pub fn remove(&mut self, id: CorpusId) -> Option>> { - match self.map.remove(&id) { - Some(item) => { - self.remove_key(id); - match item.prev { - Some(prev) => { - self.map.get_mut(&prev).unwrap().next = item.next; - } - _ => { - // first elem - self.first_id = item.next; - } - } - match item.next { - Some(next) => { - self.map.get_mut(&next).unwrap().prev = item.prev; - } - _ => { - // last elem - self.last_id = item.prev; - } - } - Some(item.testcase) - } - _ => None, - } - } - - /// Remove a testcase given a [`CorpusId`] - #[cfg(feature = "corpus_btreemap")] - pub fn remove(&mut self, id: CorpusId) -> Option>> { - self.remove_key(id); - self.map.remove(&id) - } - - /// Get a testcase given a `CorpusId` - #[cfg(not(feature = "corpus_btreemap"))] - #[must_use] - pub fn get(&self, id: CorpusId) -> Option<&RefCell>> { - self.map.get(&id).as_ref().map(|x| &x.testcase) - } - - /// Get a testcase given a `CorpusId` - #[cfg(feature = "corpus_btreemap")] - #[must_use] - pub fn get(&self, id: CorpusId) -> Option<&RefCell>> { - self.map.get(&id) - } - - /// Get the next id given a `CorpusId` (creation order) - #[cfg(not(feature = "corpus_btreemap"))] - #[must_use] - pub fn next(&self, id: CorpusId) -> Option { - match self.map.get(&id) { - Some(item) => item.next, - _ => None, - } - } - - /// Get the next id given a `CorpusId` (creation order) - #[cfg(feature = "corpus_btreemap")] - #[must_use] - pub fn next(&self, id: CorpusId) -> Option { - // TODO see if using self.keys is faster - let mut range = self - .map - .range((core::ops::Bound::Included(id), core::ops::Bound::Unbounded)); - if let Some((this_id, _)) = range.next() { - if id != *this_id { - return None; - } - } - if let Some((next_id, _)) = range.next() { - Some(*next_id) - } else { - None - } - } - - /// Get the previous id given a `CorpusId` (creation order) - #[cfg(not(feature = "corpus_btreemap"))] - #[must_use] - pub fn prev(&self, id: CorpusId) -> Option { - match self.map.get(&id) { - Some(item) => item.prev, - _ => None, - } - } - - /// Get the previous id given a `CorpusId` (creation order) - #[cfg(feature = "corpus_btreemap")] - #[must_use] - pub fn prev(&self, id: CorpusId) -> Option { - // TODO see if using self.keys is faster - let mut range = self - .map - .range((core::ops::Bound::Unbounded, core::ops::Bound::Included(id))); - if let Some((this_id, _)) = range.next_back() { - if id != *this_id { - return None; - } - } - if let Some((prev_id, _)) = range.next_back() { - Some(*prev_id) - } else { - None - } - } - - /// Get the first created id - #[cfg(not(feature = "corpus_btreemap"))] - #[must_use] - pub fn first(&self) -> Option { - self.first_id - } - - /// Get the first created id - #[cfg(feature = "corpus_btreemap")] - #[must_use] - pub fn first(&self) -> Option { - self.map.iter().next().map(|x| *x.0) - } - - /// Get the last created id - #[cfg(not(feature = "corpus_btreemap"))] - #[must_use] - pub fn last(&self) -> Option { - self.last_id - } - - /// Get the last created id - #[cfg(feature = "corpus_btreemap")] - #[must_use] - pub fn last(&self) -> Option { - self.map.iter().next_back().map(|x| *x.0) - } - - fn new() -> Self { - Self { - #[cfg(not(feature = "corpus_btreemap"))] - map: hashbrown::HashMap::default(), - #[cfg(feature = "corpus_btreemap")] - map: alloc::collections::BTreeMap::default(), - keys: Vec::default(), - #[cfg(not(feature = "corpus_btreemap"))] - first_id: None, - #[cfg(not(feature = "corpus_btreemap"))] - last_id: None, - } - } -} -/// Storage map for the testcases (used in `Corpus` implementations) with an incremental index -#[derive(Default, Serialize, Deserialize, Clone, Debug)] -pub struct TestcaseStorage { - /// The map in which enabled testcases are stored - pub enabled: TestcaseStorageMap, - /// The map in which disabled testcases are stored - pub disabled: TestcaseStorageMap, - /// The progressive id for both maps - progressive_id: usize, -} - -impl TestcaseStorage { - /// Insert a testcase assigning a `CorpusId` to it - pub fn insert(&mut self, testcase: RefCell>) -> CorpusId { - self.insert_inner(testcase, false) - } - - #[must_use] - /// Peek the next free corpus id - pub fn peek_free_id(&self) -> CorpusId { - CorpusId::from(self.progressive_id) - } - - /// Insert a testcase assigning a `CorpusId` to it - pub fn insert_disabled(&mut self, testcase: RefCell>) -> CorpusId { - self.insert_inner(testcase, true) - } - - /// Insert a testcase assigning a `CorpusId` to it - #[cfg(not(feature = "corpus_btreemap"))] - fn insert_inner(&mut self, testcase: RefCell>, is_disabled: bool) -> CorpusId { - let id = CorpusId::from(self.progressive_id); - self.progressive_id += 1; - let corpus = if is_disabled { - &mut self.disabled - } else { - &mut self.enabled - }; - let prev = if let Some(last_id) = corpus.last_id { - corpus.map.get_mut(&last_id).unwrap().next = Some(id); - Some(last_id) - } else { - None - }; - if corpus.first_id.is_none() { - corpus.first_id = Some(id); - } - corpus.last_id = Some(id); - corpus.insert_key(id); - corpus.map.insert( - id, - TestcaseStorageItem { - testcase, - prev, - next: None, - }, - ); - id - } - - /// Insert a testcase assigning a `CorpusId` to it - #[cfg(feature = "corpus_btreemap")] - fn insert_inner(&mut self, testcase: RefCell>, is_disabled: bool) -> CorpusId { - let id = CorpusId::from(self.progressive_id); - self.progressive_id += 1; - let corpus = if is_disabled { - &mut self.disabled - } else { - &mut self.enabled - }; - corpus.insert_key(id); - corpus.map.insert(id, testcase); - id - } - - /// Create new `TestcaseStorage` - #[must_use] - pub fn new() -> Self { - Self { - enabled: TestcaseStorageMap::new(), - disabled: TestcaseStorageMap::new(), - progressive_id: 0, - } - } -} - -/// A corpus handling all in memory. -#[derive(Default, Serialize, Deserialize, Clone, Debug)] -pub struct InMemoryCorpus { - storage: TestcaseStorage, - current: Option, -} - -impl Corpus for InMemoryCorpus { - /// Returns the number of all enabled entries - #[inline] - fn count(&self) -> usize { - self.storage.enabled.map.len() - } - - /// Returns the number of all disabled entries - fn count_disabled(&self) -> usize { - self.storage.disabled.map.len() - } - - /// Returns the number of elements including disabled entries - #[inline] - fn count_all(&self) -> usize { - self.storage - .enabled - .map - .len() - .saturating_add(self.storage.disabled.map.len()) - } - - /// Add an enabled testcase to the corpus and return its index - #[inline] - fn add(&mut self, testcase: Testcase) -> Result { - Ok(self.storage.insert(RefCell::new(testcase))) - } - - /// Add a disabled testcase to the corpus and return its index - #[inline] - fn add_disabled(&mut self, testcase: Testcase) -> Result { - Ok(self.storage.insert_disabled(RefCell::new(testcase))) - } - - /// Replaces the testcase at the given id - #[inline] - fn replace(&mut self, id: CorpusId, testcase: Testcase) -> Result, Error> { - self.storage.enabled.replace(id, testcase).ok_or_else(|| { - Error::key_not_found(format!("Index {id} not found, could not replace.")) - }) - } - - /// Removes an entry from the corpus, returning it if it was present; considers both enabled and disabled testcases - #[inline] - fn remove(&mut self, id: CorpusId) -> Result, Error> { - let mut testcase = self.storage.enabled.remove(id); - if testcase.is_none() { - testcase = self.storage.disabled.remove(id); - } - testcase - .map(|x| x.take()) - .ok_or_else(|| Error::key_not_found(format!("Index {id} not found"))) - } - - /// Get by id; considers only enabled testcases - #[inline] - fn get(&self, id: CorpusId) -> Result<&RefCell>, Error> { - self.storage - .enabled - .get(id) - .ok_or_else(|| Error::key_not_found(format!("Index {id} not found"))) - } - /// Get by id; considers both enabled and disabled testcases - #[inline] - fn get_from_all(&self, id: CorpusId) -> Result<&RefCell>, Error> { - let mut testcase = self.storage.enabled.get(id); - if testcase.is_none() { - testcase = self.storage.disabled.get(id); - } - testcase.ok_or_else(|| Error::key_not_found(format!("Index {id} not found"))) - } - - /// Current testcase scheduled - #[inline] - fn current(&self) -> &Option { - &self.current - } - - /// Current testcase scheduled (mutable) - #[inline] - fn current_mut(&mut self) -> &mut Option { - &mut self.current - } - - #[inline] - fn next(&self, id: CorpusId) -> Option { - self.storage.enabled.next(id) - } - - /// Peek the next free corpus id - #[inline] - fn peek_free_id(&self) -> CorpusId { - self.storage.peek_free_id() - } - - #[inline] - fn prev(&self, id: CorpusId) -> Option { - self.storage.enabled.prev(id) - } - - #[inline] - fn first(&self) -> Option { - self.storage.enabled.first() - } - - #[inline] - fn last(&self) -> Option { - self.storage.enabled.last() - } - - /// Get the nth corpus id; considers only enabled testcases - #[inline] - fn nth(&self, nth: usize) -> CorpusId { - self.storage.enabled.keys[nth] - } - - /// Get the nth corpus id; considers both enabled and disabled testcases - #[inline] - fn nth_from_all(&self, nth: usize) -> CorpusId { - let enabled_count = self.count(); - if nth >= enabled_count { - return self.storage.disabled.keys[nth.saturating_sub(enabled_count)]; - } - self.storage.enabled.keys[nth] - } - - #[inline] - fn load_input_into(&self, _: &mut Testcase) -> Result<(), Error> { - // Inputs never get evicted, nothing to load here. - Ok(()) - } - - #[inline] - fn store_input_from(&self, _: &Testcase) -> Result<(), Error> { - Ok(()) - } -} - -impl HasTestcase for InMemoryCorpus { - fn testcase(&self, id: CorpusId) -> Result>, Error> { - Ok(self.get(id)?.borrow()) - } - - fn testcase_mut(&self, id: CorpusId) -> Result>, Error> { - Ok(self.get(id)?.borrow_mut()) - } -} - -impl InMemoryCorpus { - /// Creates a new [`InMemoryCorpus`], keeping all [`Testcase`]`s` in memory. - /// This is the simplest and fastest option, however test progress will be lost on exit or on OOM. - #[must_use] - pub fn new() -> Self { - Self { - storage: TestcaseStorage::new(), - current: None, - } - } -} diff --git a/libafl/src/corpus/inmemory_ondisk.rs b/libafl/src/corpus/inmemory_ondisk.rs deleted file mode 100644 index 70b48bfc081..00000000000 --- a/libafl/src/corpus/inmemory_ondisk.rs +++ /dev/null @@ -1,519 +0,0 @@ -//! The [`InMemoryOnDiskCorpus`] stores [`Testcase`]s to disk. -//! -//! Additionally, _all_ of them are kept in memory. -//! For a lower memory footprint, consider using [`crate::corpus::CachedOnDiskCorpus`] -//! which only stores a certain number of [`Testcase`]s and removes additional ones in a FIFO manner. - -use alloc::string::{String, ToString}; -use core::cell::{Ref, RefCell, RefMut}; -use std::{ - fs, - fs::{File, OpenOptions}, - io, - io::{Read, Seek, SeekFrom, Write}, - path::{Path, PathBuf}, -}; - -use fs2::FileExt; -#[cfg(feature = "gzip")] -use libafl_bolts::compress::GzipCompressor; -use serde::{Deserialize, Serialize}; - -use super::{ - HasTestcase, - ondisk::{OnDiskMetadata, OnDiskMetadataFormat}, -}; -use crate::{ - Error, HasMetadata, - corpus::{Corpus, CorpusId, InMemoryCorpus, Testcase}, - inputs::Input, -}; - -/// Creates the given `path` and returns an error if it fails. -/// If the create succeeds, it will return the file. -/// If the create fails for _any_ reason, including, but not limited to, a preexisting existing file of that name, -/// it will instead return the respective [`io::Error`]. -fn create_new>(path: P) -> Result { - OpenOptions::new() - .write(true) - .read(true) - .create_new(true) - .open(path) -} - -/// Tries to create the given `path` and returns `None` _only_ if the file already existed. -/// If the create succeeds, it will return the file. -/// If the create fails for some other reason, it will instead return the respective [`io::Error`]. -fn try_create_new>(path: P) -> Result, io::Error> { - match create_new(path) { - Ok(ret) => Ok(Some(ret)), - Err(err) if err.kind() == io::ErrorKind::AlreadyExists => Ok(None), - Err(err) => Err(err), - } -} - -/// A corpus able to store [`Testcase`]s to disk, while also keeping all of them in memory. -/// -/// Metadata is written to a `..metadata` file in the same folder by default. -#[derive(Default, Serialize, Deserialize, Clone, Debug)] -pub struct InMemoryOnDiskCorpus { - inner: InMemoryCorpus, - dir_path: PathBuf, - meta_format: Option, - prefix: Option, - locking: bool, -} - -impl Corpus for InMemoryOnDiskCorpus -where - I: Input, -{ - /// Returns the number of all enabled entries - #[inline] - fn count(&self) -> usize { - self.inner.count() - } - - /// Returns the number of all disabled entries - fn count_disabled(&self) -> usize { - self.inner.count_disabled() - } - - /// Returns the number of elements including disabled entries - #[inline] - fn count_all(&self) -> usize { - self.inner.count_all() - } - - /// Add an enabled testcase to the corpus and return its index - #[inline] - fn add(&mut self, testcase: Testcase) -> Result { - let id = self.inner.add(testcase)?; - let testcase = &mut self.get(id).unwrap().borrow_mut(); - self.save_testcase(testcase, Some(id))?; - *testcase.input_mut() = None; - Ok(id) - } - - /// Add a disabled testcase to the corpus and return its index - #[inline] - fn add_disabled(&mut self, testcase: Testcase) -> Result { - let id = self.inner.add_disabled(testcase)?; - let testcase = &mut self.get_from_all(id).unwrap().borrow_mut(); - self.save_testcase(testcase, Some(id))?; - *testcase.input_mut() = None; - Ok(id) - } - - /// Replaces the testcase at the given idx - #[inline] - fn replace(&mut self, id: CorpusId, testcase: Testcase) -> Result, Error> { - let entry = self.inner.replace(id, testcase)?; - self.remove_testcase(&entry)?; - let testcase = &mut self.get(id).unwrap().borrow_mut(); - self.save_testcase(testcase, Some(id))?; - *testcase.input_mut() = None; - Ok(entry) - } - - /// Removes an entry from the corpus, returning it if it was present; considers both enabled and disabled corpus - #[inline] - fn remove(&mut self, id: CorpusId) -> Result, Error> { - let entry = self.inner.remove(id)?; - self.remove_testcase(&entry)?; - Ok(entry) - } - - /// Get by id; considers only enabled testcases - #[inline] - fn get(&self, id: CorpusId) -> Result<&RefCell>, Error> { - self.inner.get(id) - } - - /// Get by id; considers both enabled and disabled testcases - #[inline] - fn get_from_all(&self, id: CorpusId) -> Result<&RefCell>, Error> { - self.inner.get_from_all(id) - } - - /// Current testcase scheduled - #[inline] - fn current(&self) -> &Option { - self.inner.current() - } - - /// Current testcase scheduled (mutable) - #[inline] - fn current_mut(&mut self) -> &mut Option { - self.inner.current_mut() - } - - #[inline] - fn next(&self, id: CorpusId) -> Option { - self.inner.next(id) - } - - /// Peek the next free corpus id - #[inline] - fn peek_free_id(&self) -> CorpusId { - self.inner.peek_free_id() - } - - #[inline] - fn prev(&self, id: CorpusId) -> Option { - self.inner.prev(id) - } - - #[inline] - fn first(&self) -> Option { - self.inner.first() - } - - #[inline] - fn last(&self) -> Option { - self.inner.last() - } - - /// Get the nth corpus id; considers only enabled testcases - #[inline] - fn nth(&self, nth: usize) -> CorpusId { - self.inner.nth(nth) - } - /// Get the nth corpus id; considers both enabled and disabled testcases - #[inline] - fn nth_from_all(&self, nth: usize) -> CorpusId { - self.inner.nth_from_all(nth) - } - - fn load_input_into(&self, testcase: &mut Testcase) -> Result<(), Error> { - if testcase.input_mut().is_none() { - let Some(file_path) = testcase.file_path().as_ref() else { - return Err(Error::illegal_argument( - "No file path set for testcase. Could not load inputs.", - )); - }; - let input = I::from_file(file_path)?; - testcase.set_input(input); - } - Ok(()) - } - - fn store_input_from(&self, testcase: &Testcase) -> Result<(), Error> { - // Store the input to disk - let Some(file_path) = testcase.file_path() else { - return Err(Error::illegal_argument( - "No file path set for testcase. Could not store input to disk.", - )); - }; - let Some(input) = testcase.input() else { - return Err(Error::illegal_argument( - "No input available for testcase. Could not store anything.", - )); - }; - input.to_file(file_path) - } -} - -impl HasTestcase for InMemoryOnDiskCorpus -where - I: Input, -{ - fn testcase(&self, id: CorpusId) -> Result>, Error> { - Ok(self.get(id)?.borrow()) - } - - fn testcase_mut(&self, id: CorpusId) -> Result>, Error> { - Ok(self.get(id)?.borrow_mut()) - } -} - -impl InMemoryOnDiskCorpus { - /// Creates an [`InMemoryOnDiskCorpus`]. - /// - /// This corpus stores all testcases to disk, and keeps all of them in memory, as well. - /// - /// By default, it stores metadata for each [`Testcase`] as prettified json. - /// Metadata will be written to a file named `..metadata` - /// The metadata may include objective reason, specific information for a fuzz job, and more. - /// - /// If you don't want metadata, use [`InMemoryOnDiskCorpus::no_meta`]. - /// To pick a different metadata format, use [`InMemoryOnDiskCorpus::with_meta_format`]. - /// - /// Will error, if [`fs::create_dir_all()`] failed for `dir_path`. - pub fn new

(dir_path: P) -> Result - where - P: AsRef, - { - Self::_new( - dir_path.as_ref(), - Some(OnDiskMetadataFormat::JsonPretty), - None, - true, - ) - } - - /// Creates the [`InMemoryOnDiskCorpus`] specifying the format in which `Metadata` will be saved to disk. - /// - /// Will error, if [`fs::create_dir_all()`] failed for `dir_path`. - pub fn with_meta_format

( - dir_path: P, - meta_format: Option, - ) -> Result - where - P: AsRef, - { - Self::_new(dir_path.as_ref(), meta_format, None, true) - } - - /// Creates the [`InMemoryOnDiskCorpus`] specifying the format in which `Metadata` will be saved to disk - /// and the prefix for the filenames. - /// - /// Will error, if [`fs::create_dir_all()`] failed for `dir_path`. - pub fn with_meta_format_and_prefix

( - dir_path: P, - meta_format: Option, - prefix: Option, - locking: bool, - ) -> Result - where - P: AsRef, - { - Self::_new(dir_path.as_ref(), meta_format, prefix, locking) - } - - /// Creates an [`InMemoryOnDiskCorpus`] that will not store .metadata files - /// - /// Will error, if [`fs::create_dir_all()`] failed for `dir_path`. - pub fn no_meta

(dir_path: P) -> Result - where - P: AsRef, - { - Self::_new(dir_path.as_ref(), None, None, true) - } - - /// Private fn to crate a new corpus at the given (non-generic) path with the given optional `meta_format` - fn _new( - dir_path: &Path, - meta_format: Option, - prefix: Option, - locking: bool, - ) -> Result { - match fs::create_dir_all(dir_path) { - Ok(()) => {} - Err(e) if e.kind() == io::ErrorKind::AlreadyExists => {} - Err(e) => return Err(e.into()), - } - Ok(InMemoryOnDiskCorpus { - inner: InMemoryCorpus::new(), - dir_path: dir_path.into(), - meta_format, - prefix, - locking, - }) - } - - /// Sets the filename for a [`Testcase`]. - /// If an error gets returned from the corpus (i.e., file exists), we'll have to retry with a different filename. - /// Renaming testcases will most likely cause duplicate testcases to not be handled correctly - /// if testcases with the same input are not given the same filename. - /// Only rename when you know what you are doing. - #[inline] - pub fn rename_testcase( - &self, - testcase: &mut Testcase, - filename: String, - id: Option, - ) -> Result<(), Error> - where - I: Input, - { - if testcase.filename().is_some() { - // We are renaming! - - let old_filename = testcase.filename_mut().take().unwrap(); - let new_filename = filename; - - // Do operations below when new filename is specified - if old_filename == new_filename { - *testcase.filename_mut() = Some(old_filename); - return Ok(()); - } - - let new_file_path = self.dir_path.join(&new_filename); - self.remove_testcase(testcase)?; - *testcase.filename_mut() = Some(new_filename); - self.save_testcase(testcase, id)?; - *testcase.file_path_mut() = Some(new_file_path); - - Ok(()) - } else { - Err(Error::illegal_argument( - "Cannot rename testcase without name!", - )) - } - } - - fn save_testcase(&self, testcase: &mut Testcase, id: Option) -> Result<(), Error> - where - I: Input, - { - let file_name = testcase.filename_mut().take().unwrap_or_else(|| { - // TODO walk entry metadata to ask for pieces of filename (e.g. :havoc in AFL) - testcase.input().as_ref().unwrap().generate_name(id) - }); - - let mut ctr = 1; - if self.locking { - let lockfile_name = format!(".{file_name}"); - let lockfile_path = self.dir_path.join(lockfile_name); - - let mut lockfile = try_create_new(&lockfile_path)?.unwrap_or( - OpenOptions::new() - .write(true) - .read(true) - .open(&lockfile_path)?, - ); - lockfile.lock_exclusive()?; - - let mut old_ctr = String::new(); - lockfile.read_to_string(&mut old_ctr)?; - if !old_ctr.is_empty() { - ctr = old_ctr.trim().parse::()? + 1; - } - - lockfile.seek(SeekFrom::Start(0))?; - lockfile.write_all(ctr.to_string().as_bytes())?; - } - - if testcase.file_path().is_none() { - *testcase.file_path_mut() = Some(self.dir_path.join(&file_name)); - } - *testcase.filename_mut() = Some(file_name); - - if self.meta_format.is_some() { - let metafile_name = if self.locking { - format!( - ".{}_{}.metadata", - testcase.filename().as_ref().unwrap(), - ctr - ) - } else { - format!(".{}.metadata", testcase.filename().as_ref().unwrap()) - }; - let metafile_path = self.dir_path.join(&metafile_name); - let mut tmpfile_path = metafile_path.clone(); - tmpfile_path.set_file_name(format!(".{metafile_name}.tmp",)); - - let ondisk_meta = OnDiskMetadata { - metadata: testcase.metadata_map(), - exec_time: testcase.exec_time(), - executions: testcase.executions(), - }; - - let mut tmpfile = File::create(&tmpfile_path)?; - - let json_error = - |err| Error::serialize(format!("Failed to json-ify metadata: {err:?}")); - - let serialized = match self.meta_format.as_ref().unwrap() { - OnDiskMetadataFormat::Postcard => postcard::to_allocvec(&ondisk_meta)?, - OnDiskMetadataFormat::Json => { - serde_json::to_vec(&ondisk_meta).map_err(json_error)? - } - OnDiskMetadataFormat::JsonPretty => { - serde_json::to_vec_pretty(&ondisk_meta).map_err(json_error)? - } - #[cfg(feature = "gzip")] - OnDiskMetadataFormat::JsonGzip => GzipCompressor::new() - .compress(&serde_json::to_vec_pretty(&ondisk_meta).map_err(json_error)?), - }; - tmpfile.write_all(&serialized)?; - fs::rename(&tmpfile_path, &metafile_path)?; - *testcase.metadata_path_mut() = Some(metafile_path); - } - - // Only try to write the data if the counter is 1. - // Otherwise we already have a file with this name, and - // we can assume the data has already been written. - if ctr == 1 { - if let Err(err) = self.store_input_from(testcase) { - if self.locking { - return Err(err); - } - log::error!( - "An error occurred when trying to write a testcase without locking: {err}" - ); - } - } - Ok(()) - } - - fn remove_testcase(&self, testcase: &Testcase) -> Result<(), Error> { - if let Some(filename) = testcase.filename() { - let mut ctr = String::new(); - if self.locking { - let lockfile_path = self.dir_path.join(format!(".{filename}")); - let mut lockfile = OpenOptions::new() - .write(true) - .read(true) - .open(&lockfile_path)?; - - lockfile.lock_exclusive()?; - lockfile.read_to_string(&mut ctr)?; - ctr = ctr.trim().to_string(); - - if ctr == "1" { - FileExt::unlock(&lockfile)?; - drop(fs::remove_file(lockfile_path)); - } else { - lockfile.seek(SeekFrom::Start(0))?; - lockfile.write_all(&(ctr.parse::()? - 1).to_le_bytes())?; - return Ok(()); - } - } - - fs::remove_file(self.dir_path.join(filename))?; - if self.meta_format.is_some() { - if self.locking { - fs::remove_file(self.dir_path.join(format!(".{filename}_{ctr}.metadata")))?; - } else { - fs::remove_file(self.dir_path.join(format!(".{filename}.metadata")))?; - } - } - } - Ok(()) - } - - /// Path to the corpus directory associated with this corpus - #[must_use] - pub fn dir_path(&self) -> &PathBuf { - &self.dir_path - } -} - -#[cfg(test)] -mod tests { - #[cfg(not(miri))] - use std::{env, fs, io::Write}; - - #[cfg(not(miri))] - use super::{create_new, try_create_new}; - - #[test] - #[cfg(not(miri))] - fn test() { - let tmp = env::temp_dir(); - let path = tmp.join("testfile.tmp"); - _ = fs::remove_file(&path); - let mut f = create_new(&path).unwrap(); - f.write_all(&[0; 1]).unwrap(); - - match try_create_new(&path) { - Ok(None) => (), - Ok(_) => panic!("File {path:?} did not exist even though it should have?"), - Err(e) => panic!("An unexpected error occurred: {e}"), - } - drop(f); - fs::remove_file(path).unwrap(); - } -} diff --git a/libafl/src/corpus/mod.rs b/libafl/src/corpus/mod.rs index d7670d1a101..59668d7cd7b 100644 --- a/libafl/src/corpus/mod.rs +++ b/libafl/src/corpus/mod.rs @@ -1,6 +1,7 @@ //! Corpuses contain the testcases, either in memory, on disk, or somewhere else. use core::{cell::RefCell, fmt, marker::PhantomData}; +use std::{rc::Rc, string::String}; use serde::{Deserialize, Serialize}; @@ -9,60 +10,67 @@ use crate::Error; pub mod testcase; pub use testcase::{HasTestcase, SchedulerTestcaseMetadata, Testcase}; -pub mod inmemory; -pub use inmemory::InMemoryCorpus; +pub mod single; +pub use single::SingleCorpus; -#[cfg(feature = "std")] -pub mod inmemory_ondisk; -#[cfg(feature = "std")] -pub use inmemory_ondisk::InMemoryOnDiskCorpus; - -#[cfg(feature = "std")] -pub mod ondisk; -#[cfg(feature = "std")] -pub use ondisk::OnDiskCorpus; - -#[cfg(feature = "std")] -pub mod cached; -#[cfg(feature = "std")] -pub use cached::CachedOnDiskCorpus; +pub mod combined; +pub use combined::{CombinedCorpus, FifoCache, IdentityCache}; #[cfg(all(feature = "cmin", unix))] pub mod minimizer; - -pub mod nop; #[cfg(all(feature = "cmin", unix))] pub use minimizer::*; + +pub mod nop; pub use nop::NopCorpus; +pub mod store; +pub use store::{InMemoryStore, OnDiskStore, maps}; + +#[cfg(not(feature = "corpus_btreemap"))] +pub type InMemoryCorpusMap = maps::HashCorpusMap>; + +#[cfg(feature = "corpus_btreemap")] +pub type InMemoryCorpusMap = maps::BtreeCorpusMap>; + +pub type InMemoryCorpus = SingleCorpus>>; + +#[cfg(feature = "std")] +pub type OnDiskCorpus = SingleCorpus>>; + +pub type InMemoryOnDiskCorpus = CombinedCorpus< + IdentityCache, + InMemoryStore>, + OnDiskStore>, + I, +>; + +pub type CachedOnDiskCorpus = CombinedCorpus< + FifoCache>, OnDiskStore>, I>, + InMemoryStore>, + OnDiskStore>, + I, +>; + /// An abstraction for the index that identify a testcase in the corpus #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)] #[repr(transparent)] pub struct CorpusId(pub usize); -impl fmt::Display for CorpusId { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", self.0) - } -} - -impl From for CorpusId { - fn from(id: usize) -> Self { - Self(id) - } -} - -impl From for CorpusId { - fn from(id: u64) -> Self { - Self(id as usize) - } +#[derive(Default, Serialize, Deserialize, Clone, Debug)] +pub struct CorpusCounter { + /// A fresh, progressive ID + /// It stores the next available ID. + current_id: usize, } -impl From for usize { - /// Not that the `CorpusId` is not necessarily stable in the corpus (if we remove [`Testcase`]s, for example). - fn from(id: CorpusId) -> Self { - id.0 - } +/// [`Iterator`] over the ids of a [`Corpus`] +#[derive(Debug)] +pub struct CorpusIdIterator<'a, C, I> { + corpus: &'a C, + cur: Option, + cur_back: Option, + phantom: PhantomData, } /// Utility macro to call `Corpus::random_id`; fetches only enabled [`Testcase`]`s` @@ -128,13 +136,13 @@ pub trait Corpus: Sized { fn replace(&mut self, id: CorpusId, testcase: Testcase) -> Result, Error>; /// Removes an entry from the corpus, returning it if it was present; considers both enabled and disabled testcases - fn remove(&mut self, id: CorpusId) -> Result, Error>; + fn remove(&mut self, id: CorpusId) -> Result>>, Error>; /// Get by id; considers only enabled testcases fn get(&self, id: CorpusId) -> Result<&RefCell>, Error>; /// Get by id; considers both enabled and disabled testcases - fn get_from_all(&self, id: CorpusId) -> Result<&RefCell>, Error>; + fn get_from_all(&self, id: CorpusId) -> Result>>, Error>; /// Current testcase scheduled fn current(&self) -> &Option; @@ -145,8 +153,8 @@ pub trait Corpus: Sized { /// Get the next corpus id fn next(&self, id: CorpusId) -> Option; - /// Peek the next free corpus id - fn peek_free_id(&self) -> CorpusId; + // /// Peek the next free corpus id + // fn peek_free_id(&self) -> CorpusId; /// Get the prev corpus id fn prev(&self, id: CorpusId) -> Option; @@ -176,23 +184,6 @@ pub trait Corpus: Sized { /// Get the nth corpus id; considers both enabled and disabled testcases fn nth_from_all(&self, nth: usize) -> CorpusId; - - /// Method to load the input for this [`Testcase`] from persistent storage, - /// if necessary, and if was not already loaded (`== Some(input)`). - /// After this call, `testcase.input()` must always return `Some(input)`. - fn load_input_into(&self, testcase: &mut Testcase) -> Result<(), Error>; - - /// Method to store the input of this `Testcase` to persistent storage, if necessary. - fn store_input_from(&self, testcase: &Testcase) -> Result<(), Error>; - - /// Loads the `Input` for a given [`CorpusId`] from the [`Corpus`], and returns the clone. - fn cloned_input_for_id(&self, id: CorpusId) -> Result - where - I: Clone, - { - let mut testcase = self.get(id)?.borrow_mut(); - Ok(testcase.load_input(self)?.clone()) - } } /// Trait for types which track the current corpus index @@ -207,13 +198,29 @@ pub trait HasCurrentCorpusId { fn current_corpus_id(&self) -> Result, Error>; } -/// [`Iterator`] over the ids of a [`Corpus`] -#[derive(Debug)] -pub struct CorpusIdIterator<'a, C, I> { - corpus: &'a C, - cur: Option, - cur_back: Option, - phantom: PhantomData, +impl fmt::Display for CorpusId { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl From for CorpusId { + fn from(id: usize) -> Self { + Self(id) + } +} + +impl From for CorpusId { + fn from(id: u64) -> Self { + Self(id as usize) + } +} + +impl From for usize { + /// Not that the `CorpusId` is not necessarily stable in the corpus (if we remove [`Testcase`]s, for example). + fn from(id: CorpusId) -> Self { + id.0 + } } impl Iterator for CorpusIdIterator<'_, C, I> @@ -245,3 +252,11 @@ where } } } + +impl CorpusCounter { + fn new_id(&mut self) -> CorpusId { + let old = self.current_id; + self.current_id.saturating_add(1); + CorpusId(old) + } +} diff --git a/libafl/src/corpus/nop.rs b/libafl/src/corpus/nop.rs index a6e0f902e4f..768094d804e 100644 --- a/libafl/src/corpus/nop.rs +++ b/libafl/src/corpus/nop.rs @@ -1,5 +1,6 @@ //! The null corpus does not store any [`Testcase`]s. use core::{cell::RefCell, marker::PhantomData}; +use std::rc::Rc; use serde::{Deserialize, Serialize}; @@ -53,19 +54,19 @@ impl Corpus for NopCorpus { /// Removes an entry from the corpus, returning it if it was present; considers both enabled and disabled testcases #[inline] - fn remove(&mut self, _id: CorpusId) -> Result, Error> { + fn remove(&mut self, _id: CorpusId) -> Result>>, Error> { Err(Error::unsupported("Unsupported by NopCorpus")) } /// Get by id; considers only enabled testcases #[inline] - fn get(&self, _id: CorpusId) -> Result<&RefCell>, Error> { + fn get(&self, _id: CorpusId) -> Result>>, Error> { Err(Error::unsupported("Unsupported by NopCorpus")) } /// Get by id; considers both enabled and disabled testcases #[inline] - fn get_from_all(&self, _id: CorpusId) -> Result<&RefCell>, Error> { + fn get_from_all(&self, _id: CorpusId) -> Result>>, Error> { Err(Error::unsupported("Unsupported by NopCorpus")) } @@ -86,12 +87,6 @@ impl Corpus for NopCorpus { None } - /// Peek the next free corpus id - #[inline] - fn peek_free_id(&self) -> CorpusId { - CorpusId::from(0_usize) - } - #[inline] fn prev(&self, _id: CorpusId) -> Option { None @@ -118,16 +113,6 @@ impl Corpus for NopCorpus { fn nth_from_all(&self, _nth: usize) -> CorpusId { CorpusId::from(0_usize) } - - #[inline] - fn load_input_into(&self, _testcase: &mut Testcase) -> Result<(), Error> { - Err(Error::unsupported("Unsupported by NopCorpus")) - } - - #[inline] - fn store_input_from(&self, _testcase: &Testcase) -> Result<(), Error> { - Err(Error::unsupported("Unsupported by NopCorpus")) - } } impl NopCorpus { diff --git a/libafl/src/corpus/ondisk.rs b/libafl/src/corpus/ondisk.rs deleted file mode 100644 index 72179a65313..00000000000 --- a/libafl/src/corpus/ondisk.rs +++ /dev/null @@ -1,279 +0,0 @@ -//! The [`OnDiskCorpus`] stores all [`Testcase`]s to disk. -//! -//! It _never_ keeps any of them in memory. -//! This is a good solution for solutions that are never reused, or for *very* memory-constraint environments. -//! For any other occasions, consider using [`CachedOnDiskCorpus`] -//! which stores a certain number of [`Testcase`]s in memory and removes additional ones in a FIFO manner. - -use alloc::string::String; -use core::{ - cell::{Ref, RefCell, RefMut}, - time::Duration, -}; -use std::path::{Path, PathBuf}; - -use libafl_bolts::serdeany::SerdeAnyMap; -use serde::{Deserialize, Serialize}; - -use crate::{ - Error, - corpus::{CachedOnDiskCorpus, Corpus, CorpusId, HasTestcase, Testcase}, - inputs::Input, -}; - -/// Options for the the format of the on-disk metadata -#[derive(Default, Debug, Clone, Serialize, Deserialize)] -pub enum OnDiskMetadataFormat { - /// A binary-encoded postcard - Postcard, - /// JSON - Json, - /// JSON formatted for readability - #[default] - JsonPretty, - /// The same as [`OnDiskMetadataFormat::JsonPretty`], but compressed - #[cfg(feature = "gzip")] - JsonGzip, -} - -/// The [`Testcase`] metadata that'll be stored to disk -#[derive(Debug, Serialize)] -pub struct OnDiskMetadata<'a> { - /// The dynamic metadata [`SerdeAnyMap`] stored to disk - pub metadata: &'a SerdeAnyMap, - /// The exec time for this [`Testcase`] - pub exec_time: &'a Option, - /// The executions of this [`Testcase`] - pub executions: &'a u64, -} - -/// A corpus able to store [`Testcase`]s to disk, and load them from disk, when they are being used. -/// -/// Metadata is written to a `..metadata` file in the same folder by default. -#[derive(Default, Serialize, Deserialize, Clone, Debug)] -pub struct OnDiskCorpus { - /// The root directory backing this corpus - dir_path: PathBuf, - /// We wrapp a cached corpus and set its size to 1. - inner: CachedOnDiskCorpus, -} - -impl Corpus for OnDiskCorpus -where - I: Input, -{ - /// Returns the number of all enabled entries - #[inline] - fn count(&self) -> usize { - self.inner.count() - } - - /// Returns the number of all disabled entries - fn count_disabled(&self) -> usize { - self.inner.count_disabled() - } - - /// Returns the number of all entries - #[inline] - fn count_all(&self) -> usize { - self.inner.count_all() - } - - /// Add an enabled testcase to the corpus and return its index - #[inline] - fn add(&mut self, testcase: Testcase) -> Result { - self.inner.add(testcase) - } - - /// Add a disabled testcase to the corpus and return its index - #[inline] - fn add_disabled(&mut self, testcase: Testcase) -> Result { - self.inner.add_disabled(testcase) - } - - /// Replaces the testcase at the given idx - #[inline] - fn replace(&mut self, id: CorpusId, testcase: Testcase) -> Result, Error> { - self.inner.replace(id, testcase) - } - - /// Removes an entry from the corpus, returning it if it was present; considers both enabled and disabled testcases - #[inline] - fn remove(&mut self, id: CorpusId) -> Result, Error> { - self.inner.remove(id) - } - - /// Get by id; will check the disabled corpus if not available in the enabled - #[inline] - fn get(&self, id: CorpusId) -> Result<&RefCell>, Error> { - self.inner.get(id) - } - - /// Get by id; considers both enabled and disabled testcases - #[inline] - fn get_from_all(&self, id: CorpusId) -> Result<&RefCell>, Error> { - self.inner.get_from_all(id) - } - - /// Current testcase scheduled - #[inline] - fn current(&self) -> &Option { - self.inner.current() - } - - /// Current testcase scheduled (mutable) - #[inline] - fn current_mut(&mut self) -> &mut Option { - self.inner.current_mut() - } - - #[inline] - fn next(&self, id: CorpusId) -> Option { - self.inner.next(id) - } - - /// Peek the next free corpus id - #[inline] - fn peek_free_id(&self) -> CorpusId { - self.inner.peek_free_id() - } - - #[inline] - fn prev(&self, id: CorpusId) -> Option { - self.inner.prev(id) - } - - #[inline] - fn first(&self) -> Option { - self.inner.first() - } - - #[inline] - fn last(&self) -> Option { - self.inner.last() - } - - /// Get the nth corpus id; considers only enabled testcases - #[inline] - fn nth(&self, nth: usize) -> CorpusId { - self.inner.nth(nth) - } - /// Get the nth corpus id; considers both enabled and disabled testcases - #[inline] - fn nth_from_all(&self, nth: usize) -> CorpusId { - self.inner.nth_from_all(nth) - } - - #[inline] - fn load_input_into(&self, testcase: &mut Testcase) -> Result<(), Error> { - self.inner.load_input_into(testcase) - } - - #[inline] - fn store_input_from(&self, testcase: &Testcase) -> Result<(), Error> { - self.inner.store_input_from(testcase) - } -} - -impl HasTestcase for OnDiskCorpus -where - I: Input, -{ - fn testcase(&self, id: CorpusId) -> Result>, Error> { - Ok(self.get(id)?.borrow()) - } - - fn testcase_mut(&self, id: CorpusId) -> Result>, Error> { - Ok(self.get(id)?.borrow_mut()) - } -} - -impl OnDiskCorpus { - /// Creates an [`OnDiskCorpus`]. - /// - /// This corpus stores all testcases to disk. - /// - /// By default, it stores metadata for each [`Testcase`] as prettified json. - /// Metadata will be written to a file named `..metadata` - /// The metadata may include objective reason, specific information for a fuzz job, and more. - /// - /// To pick a different metadata format, use [`OnDiskCorpus::with_meta_format`]. - /// - /// Will error, if [`std::fs::create_dir_all()`] failed for `dir_path`. - pub fn new

(dir_path: P) -> Result - where - P: AsRef, - { - Self::with_meta_format_and_prefix( - dir_path.as_ref(), - Some(OnDiskMetadataFormat::JsonPretty), - None, - true, - ) - } - - /// Creates the [`OnDiskCorpus`] with a filename prefix. - /// - /// Will error, if [`std::fs::create_dir_all()`] failed for `dir_path`. - pub fn with_prefix

(dir_path: P, prefix: Option) -> Result - where - P: AsRef, - { - Self::with_meta_format_and_prefix( - dir_path.as_ref(), - Some(OnDiskMetadataFormat::JsonPretty), - prefix, - true, - ) - } - - /// Creates the [`OnDiskCorpus`] specifying the format in which `Metadata` will be saved to disk. - /// - /// Will error, if [`std::fs::create_dir_all()`] failed for `dir_path`. - pub fn with_meta_format

( - dir_path: P, - meta_format: OnDiskMetadataFormat, - ) -> Result - where - P: AsRef, - { - Self::with_meta_format_and_prefix(dir_path.as_ref(), Some(meta_format), None, true) - } - - /// Creates an [`OnDiskCorpus`] that will not store .metadata files - /// - /// Will error, if [`std::fs::create_dir_all()`] failed for `dir_path`. - pub fn no_meta

(dir_path: P) -> Result - where - P: AsRef, - { - Self::with_meta_format_and_prefix(dir_path.as_ref(), None, None, true) - } - - /// Creates a new corpus at the given (non-generic) path with the given optional `meta_format` - /// and `prefix`. - /// - /// Will error, if [`std::fs::create_dir_all()`] failed for `dir_path`. - pub fn with_meta_format_and_prefix( - dir_path: &Path, - meta_format: Option, - prefix: Option, - locking: bool, - ) -> Result { - Ok(OnDiskCorpus { - dir_path: dir_path.into(), - inner: CachedOnDiskCorpus::with_meta_format_and_prefix( - dir_path, - 1, - meta_format, - prefix, - locking, - )?, - }) - } - - /// Path to the corpus directory associated with this corpus - pub fn dir_path(&self) -> &PathBuf { - &self.dir_path - } -} diff --git a/libafl/src/corpus/single.rs b/libafl/src/corpus/single.rs new file mode 100644 index 00000000000..1a4ee44917f --- /dev/null +++ b/libafl/src/corpus/single.rs @@ -0,0 +1,114 @@ +use core::{cell::RefCell, marker::PhantomData}; +use std::{rc::Rc, vec::Vec}; + +use libafl_bolts::Error; +use serde::{Deserialize, Serialize}; + +use super::{Corpus, CorpusCounter, CorpusId, Testcase, store::Store}; + +/// You average corpus. +/// It has one backing store, used to store / retrieve testcases. +#[derive(Default, Serialize, Deserialize, Clone, Debug)] +pub struct SingleCorpus { + /// The backing testcase store + store: S, + /// The corpus ID counter + counter: CorpusCounter, + /// The keys in order (use `Vec::binary_search`) + keys: Vec, + /// The current ID + current: Option, + phantom: PhantomData, +} + +impl SingleCorpus +where + S: Default, +{ + pub fn new() -> Self { + Self { + store: S::default(), + counter: CorpusCounter::default(), + keys: Vec::new(), + current: None, + phantom: PhantomData, + } + } +} + +impl Corpus for SingleCorpus +where + S: Store, +{ + fn count(&self) -> usize { + self.store.count() + } + + fn count_disabled(&self) -> usize { + self.store.count_disabled() + } + + fn count_all(&self) -> usize { + self.store.count_all() + } + + fn add(&mut self, testcase: Testcase) -> Result { + let new_id = self.counter.new_id(); + self.store.add(new_id, testcase)?; + Ok(new_id) + } + + fn add_disabled(&mut self, testcase: Testcase) -> Result { + let new_id = self.counter.new_id(); + self.store.add_disabled(new_id, testcase)?; + Ok(new_id) + } + + fn replace(&mut self, id: CorpusId, testcase: Testcase) -> Result, Error> { + self.store.replace(id, testcase) + } + + fn remove(&mut self, id: CorpusId) -> Result>>, Error> { + self.store.remove(id) + } + + fn get(&self, id: CorpusId) -> Result>>, Error> { + self.store.get(id) + } + + fn get_from_all(&self, id: CorpusId) -> Result>>, Error> { + self.store.get_from_all(id) + } + + fn current(&self) -> &Option { + &self.current + } + + fn current_mut(&mut self) -> &mut Option { + &mut self.current + } + + fn next(&self, id: CorpusId) -> Option { + self.store.next(id) + } + + fn prev(&self, id: CorpusId) -> Option { + self.store.prev(id) + } + + fn first(&self) -> Option { + self.store.first() + } + + fn last(&self) -> Option { + self.store.last() + } + + fn nth(&self, nth: usize) -> CorpusId { + self.store.nth(nth) + } + + fn nth_from_all(&self, nth: usize) -> CorpusId { + self.store.nth_from_all(nth) + } +} diff --git a/libafl/src/corpus/store/inmemory.rs b/libafl/src/corpus/store/inmemory.rs new file mode 100644 index 00000000000..cc6af6ac76f --- /dev/null +++ b/libafl/src/corpus/store/inmemory.rs @@ -0,0 +1,104 @@ +use core::{cell::RefCell, marker::PhantomData}; +use std::rc::Rc; + +use libafl_bolts::Error; +use serde::{Deserialize, Serialize}; + +use super::{InMemoryCorpusMap, Store}; +use crate::corpus::{CorpusId, Testcase}; + +/// The map type in which testcases are stored (disable the feature `corpus_btreemap` to use a `HashMap` instead of `BTreeMap`) +#[derive(Default, Serialize, Deserialize, Clone, Debug)] +pub struct InMemoryStore { + enabled_map: M, + disabled_map: M, + phantom: PhantomData, +} + +impl Store for InMemoryStore +where + M: InMemoryCorpusMap>, +{ + fn count(&self) -> usize { + self.enabled_map.count() + } + + fn count_disabled(&self) -> usize { + self.disabled_map.count() + } + + fn is_empty(&self) -> bool { + self.enabled_map.is_empty() + } + + fn add(&mut self, id: CorpusId, testcase: Testcase) -> Result<(), Error> { + Ok(self.enabled_map.add(id, testcase)) + } + + fn add_disabled(&mut self, id: CorpusId, testcase: Testcase) -> Result<(), Error> { + Ok(self.disabled_map.add(id, testcase)) + } + + fn replace(&mut self, id: CorpusId, new_testcase: Testcase) -> Result, Error> { + self.enabled_map.replace(id, new_testcase).ok_or_else(|| { + Error::key_not_found(format!("Index {id} not found, could not replace.")) + }) + } + + fn remove(&mut self, id: CorpusId) -> Result>>, Error> { + let mut testcase = self.enabled_map.remove(id); + + if testcase.is_none() { + testcase = self.disabled_map.remove(id); + } + + testcase + .map(|x| x.clone()) + .ok_or_else(|| Error::key_not_found(format!("Index {id} not found"))) + } + + fn get(&self, id: CorpusId) -> Result>>, Error> { + self.enabled_map + .get(id) + .ok_or_else(|| Error::key_not_found(format!("Index {id} not found"))) + } + + fn get_from_all(&self, id: CorpusId) -> Result>>, Error> { + let mut testcase = self.enabled_map.get(id); + + if testcase.is_none() { + testcase = self.disabled_map.get(id); + } + + testcase.ok_or_else(|| Error::key_not_found(format!("Index {id} not found"))) + } + + fn prev(&self, id: CorpusId) -> Option { + self.enabled_map.prev(id) + } + + fn next(&self, id: CorpusId) -> Option { + self.enabled_map.next(id) + } + + fn first(&self) -> Option { + self.enabled_map.first() + } + + fn last(&self) -> Option { + self.enabled_map.last() + } + + fn nth(&self, nth: usize) -> CorpusId { + self.enabled_map.nth(nth) + } + + fn nth_from_all(&self, nth: usize) -> CorpusId { + let nb_enabled = self.enabled_map.count(); + if nth >= nb_enabled { + self.disabled_map.nth(nth.saturating_sub(nb_enabled)) + } else { + self.enabled_map.nth(nth) + } + } +} diff --git a/libafl/src/corpus/store/maps.rs b/libafl/src/corpus/store/maps.rs new file mode 100644 index 00000000000..86b2213d9a7 --- /dev/null +++ b/libafl/src/corpus/store/maps.rs @@ -0,0 +1,276 @@ +//! Multiple map implementations for the in-memory store. + +use core::cell::RefCell; +use std::{collections::BTreeMap, rc::Rc, vec::Vec}; + +use num_traits::Zero; +use serde::{Deserialize, Serialize}; + +use crate::corpus::CorpusId; + +pub trait InMemoryCorpusMap { + /// Returns the number of testcases + fn count(&self) -> usize; + + /// Returns true, if no elements are in this corpus yet + fn is_empty(&self) -> bool { + self.count().is_zero() + } + + /// Store the testcase associated to corpus_id. + fn add(&mut self, id: CorpusId, testcase: T); + + /// Replaces the [`Testcase`] at the given idx, returning the existing. + fn replace(&mut self, id: CorpusId, new_testcase: T) -> Option; + + /// Removes an entry from the corpus, returning it if it was present; considers both enabled and disabled testcases + fn remove(&mut self, id: CorpusId) -> Option>>; + + /// Get by id; considers only enabled testcases + fn get(&self, id: CorpusId) -> Option>>; + + /// Get the prev corpus id in chronological order + fn prev(&self, id: CorpusId) -> Option; + + /// Get the next corpus id in chronological order + fn next(&self, id: CorpusId) -> Option; + + /// Get the first inserted corpus id + fn first(&self) -> Option; + + /// Get the last inserted corpus id + fn last(&self) -> Option; + + /// Get the nth inserted item + fn nth(&self, nth: usize) -> CorpusId; +} + +#[derive(Default, Serialize, Deserialize, Clone, Debug)] +pub struct CorpusIdHistory { + keys: Vec, +} + +#[derive(Default, Serialize, Deserialize, Clone, Debug)] +pub struct BtreeCorpusMap { + /// A map of `CorpusId` to `Testcase`. + map: BTreeMap>>, + /// A list of available corpus ids + history: CorpusIdHistory, +} + +/// Keep track of the stored `Testcase` and the siblings ids (insertion order) +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct TestcaseStorageItem { + /// The stored testcase + pub testcase: Rc>, + /// Previously inserted id + pub prev: Option, + /// Following inserted id + pub next: Option, +} + +#[derive(Default, Serialize, Deserialize, Clone, Debug)] +pub struct HashCorpusMap { + /// A map of `CorpusId` to `TestcaseStorageItem` + map: hashbrown::HashMap>, + /// First inserted id + first_id: Option, + /// Last inserted id + last_id: Option, + /// A list of available corpus ids + history: CorpusIdHistory, +} + +impl CorpusIdHistory { + /// Add a key to the history + pub fn add(&mut self, id: CorpusId) { + if let Err(idx) = self.keys.binary_search(&id) { + self.keys.insert(idx, id); + } + } + + /// Remove a key from the history + fn remove(&mut self, id: CorpusId) { + if let Ok(idx) = self.keys.binary_search(&id) { + self.keys.remove(idx); + } + } + + // Get the nth item from the map + fn nth(&self, idx: usize) -> CorpusId { + self.keys[idx] + } +} + +impl InMemoryCorpusMap for HashCorpusMap { + fn count(&self) -> usize { + self.map.len() + } + + fn is_empty(&self) -> bool { + self.map.is_empty() + } + + fn add(&mut self, id: CorpusId, testcase: T) { + let prev = if let Some(last_id) = self.last_id { + self.map.get_mut(&last_id).unwrap().next = Some(id); + Some(last_id) + } else { + None + }; + + if self.first_id.is_none() { + self.first_id = Some(id); + } + + self.last_id = Some(id); + + self.history.add(id); + + self.map.insert( + id, + TestcaseStorageItem { + testcase: Rc::new(RefCell::new(testcase)), + prev, + next: None, + }, + ); + } + + fn replace(&mut self, id: CorpusId, new_testcase: T) -> Option { + match self.map.get_mut(&id) { + Some(entry) => Some(entry.testcase.replace(new_testcase)), + _ => None, + } + } + + fn remove(&mut self, id: CorpusId) -> Option>> { + if let Some(item) = self.map.remove(&id) { + if let Some(prev) = item.prev { + self.history.remove(id); + self.map.get_mut(&prev).unwrap().next = item.next; + } else { + // first elem + self.first_id = item.next; + } + + if let Some(next) = item.next { + self.map.get_mut(&next).unwrap().prev = item.prev; + } else { + // last elem + self.last_id = item.prev; + } + + Some(item.testcase) + } else { + None + } + } + + fn get(&self, id: CorpusId) -> Option>> { + self.map.get(&id).map(|inner| inner.testcase.clone()) + } + + fn prev(&self, id: CorpusId) -> Option { + match self.map.get(&id) { + Some(item) => item.prev, + _ => None, + } + } + + fn next(&self, id: CorpusId) -> Option { + match self.map.get(&id) { + Some(item) => item.next, + _ => None, + } + } + + fn first(&self) -> Option { + self.first_id + } + + fn last(&self) -> Option { + self.last_id + } + + fn nth(&self, nth: usize) -> CorpusId { + self.history.nth(nth) + } +} + +impl InMemoryCorpusMap for BtreeCorpusMap { + fn count(&self) -> usize { + self.map.len() + } + + fn is_empty(&self) -> bool { + self.map.is_empty() + } + + fn add(&mut self, id: CorpusId, testcase: T) { + // corpus.insert_key(id); + self.map.insert(id, Rc::new(RefCell::new(testcase))); + self.history.add(id); + } + + fn replace(&mut self, id: CorpusId, new_testcase: T) -> Option { + self.map + .get_mut(&id) + .map(|entry| entry.replace(new_testcase)) + } + + fn remove(&mut self, id: CorpusId) -> Option>> { + self.history.remove(id); + self.map.remove(&id) + } + + fn get(&self, id: CorpusId) -> Option>> { + self.map.get(&id).cloned() + } + + fn prev(&self, id: CorpusId) -> Option { + // TODO see if using self.keys is faster + let mut range = self + .map + .range((core::ops::Bound::Unbounded, core::ops::Bound::Included(id))); + if let Some((this_id, _)) = range.next_back() { + if id != *this_id { + return None; + } + } + if let Some((prev_id, _)) = range.next_back() { + Some(*prev_id) + } else { + None + } + } + + fn next(&self, id: CorpusId) -> Option { + // TODO see if using self.keys is faster + let mut range = self + .map + .range((core::ops::Bound::Included(id), core::ops::Bound::Unbounded)); + if let Some((this_id, _)) = range.next() { + if id != *this_id { + return None; + } + } + if let Some((next_id, _)) = range.next() { + Some(*next_id) + } else { + None + } + } + + fn first(&self) -> Option { + self.map.iter().next().map(|x| *x.0) + } + + fn last(&self) -> Option { + self.map.iter().next_back().map(|x| *x.0) + } + + fn nth(&self, nth: usize) -> CorpusId { + self.history.nth(nth) + } +} diff --git a/libafl/src/corpus/store/mod.rs b/libafl/src/corpus/store/mod.rs new file mode 100644 index 00000000000..ffb6239f899 --- /dev/null +++ b/libafl/src/corpus/store/mod.rs @@ -0,0 +1,72 @@ +//! Stores are collections managing testcases + +use core::cell::RefCell; +use std::rc::Rc; + +use libafl_bolts::Error; + +use super::{CorpusId, Testcase}; + +pub mod maps; +pub use maps::{BtreeCorpusMap, HashCorpusMap, InMemoryCorpusMap}; + +pub mod inmemory; +pub use inmemory::InMemoryStore; + +pub mod ondisk; +pub use ondisk::OnDiskStore; + +/// A store is responsible for storing and retrieving [`Testcase`]s, ordered by add time. +pub trait Store { + /// Returns the number of all enabled entries + fn count(&self) -> usize; + + /// Returns the number of all disabled entries + fn count_disabled(&self) -> usize; + + /// Returns the number of elements including disabled entries + fn count_all(&self) -> usize { + self.count().saturating_add(self.count_disabled()) + } + + /// Returns true, if no elements are in this corpus yet + fn is_empty(&self) -> bool { + self.count() == 0 + } + + /// Store the testcase associated to corpus_id to the enabled set. + fn add(&mut self, id: CorpusId, testcase: Testcase) -> Result<(), Error>; + + /// Store the testcase associated to corpus_id to the disabled set. + fn add_disabled(&mut self, id: CorpusId, testcase: Testcase) -> Result<(), Error>; + + /// Replaces the [`Testcase`] at the given idx in the enabled set, returning the existing. + fn replace(&mut self, id: CorpusId, new_testcase: Testcase) -> Result, Error>; + + /// Removes an entry from the corpus, returning it; considers both enabled and disabled testcases + fn remove(&mut self, id: CorpusId) -> Result>>, Error>; + + /// Get by id; considers only enabled testcases + fn get(&self, id: CorpusId) -> Result>>, Error>; + + /// Get by id; considers both enabled and disabled testcases + fn get_from_all(&self, id: CorpusId) -> Result>>, Error>; + + /// Get the prev corpus id in chronological order + fn prev(&self, id: CorpusId) -> Option; + + /// Get the next corpus id in chronological order + fn next(&self, id: CorpusId) -> Option; + + /// Get the first inserted corpus id + fn first(&self) -> Option; + + /// Get the last inserted corpus id + fn last(&self) -> Option; + + /// Get the nth corpus id; considers only enabled testcases + fn nth(&self, nth: usize) -> CorpusId; + + /// Get the nth corpus id; considers both enabled and disabled testcases + fn nth_from_all(&self, nth: usize) -> CorpusId; +} diff --git a/libafl/src/corpus/store/ondisk.rs b/libafl/src/corpus/store/ondisk.rs new file mode 100644 index 00000000000..ee54adde908 --- /dev/null +++ b/libafl/src/corpus/store/ondisk.rs @@ -0,0 +1,321 @@ +use core::{cell::RefCell, marker::PhantomData}; +use std::{ + fs::{self, File}, + io::{self, Read, Seek, SeekFrom, Write}, + path::{Path, PathBuf}, + rc::Rc, + string::String, + vec::Vec, +}; + +use fs4::fs_std::FileExt; +use libafl_bolts::{Error, compress::GzipCompressor}; +use serde::{Deserialize, Serialize}; + +use super::{InMemoryCorpusMap, Store}; +use crate::{ + corpus::{CorpusId, Testcase, testcase::TestcaseMetadata}, + inputs::Input, +}; + +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +pub struct OnDiskStore { + disk_mgr: DiskMgr, + enabled_map: M, + disabled_map: M, + first: Option, + last: Option, +} + +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +struct DiskMgr { + root_dir: PathBuf, + md_format: OnDiskMetadataFormat, + phantom: PhantomData, +} + +/// Options for the the format of the on-disk metadata +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +pub enum OnDiskMetadataFormat { + /// A binary-encoded postcard + Postcard, + /// JSON + Json, + /// JSON formatted for readability + #[default] + JsonPretty, + /// The same as [`OnDiskMetadataFormat::JsonPretty`], but compressed + #[cfg(feature = "gzip")] + JsonGzip, +} + +impl OnDiskMetadataFormat { + pub fn to_vec(&self, testcase_md: &TestcaseMetadata) -> Result, Error> { + let json_error = |err| Error::serialize(format!("Failed to json-ify metadata: {err:?}")); + + Ok(match self { + OnDiskMetadataFormat::Postcard => postcard::to_allocvec(testcase_md)?, + OnDiskMetadataFormat::Json => serde_json::to_vec(&testcase_md).map_err(json_error)?, + OnDiskMetadataFormat::JsonPretty => { + serde_json::to_vec_pretty(&testcase_md).map_err(json_error)? + } + #[cfg(feature = "gzip")] + OnDiskMetadataFormat::JsonGzip => GzipCompressor::new() + .compress(&serde_json::to_vec_pretty(&testcase_md).map_err(json_error)?), + }) + } + + pub fn from_file(&self, md_path: &Path) -> Result { + let json_error = |err| Error::serialize(format!("Failed to parse metadata: {err:?}")); + let md_serialized = fs::read(md_path)?; + + Ok(match self { + OnDiskMetadataFormat::Postcard => postcard::from_bytes(&md_serialized)?, + OnDiskMetadataFormat::Json => { + serde_json::from_slice(&md_serialized).map_err(json_error)? + } + OnDiskMetadataFormat::JsonPretty => { + serde_json::from_slice(&md_serialized).map_err(json_error)? + } + #[cfg(feature = "gzip")] + OnDiskMetadataFormat::JsonGzip => { + serde_json::from_slice(&GzipCompressor::new().decompress(&md_serialized)?) + .map_err(json_error)? + } + }) + } +} + +#[derive(Debug)] +struct TestcaseLockfile { + lockfile: File, + nb_used: u32, +} + +impl TestcaseLockfile { + pub fn new(ondisk_mgr: &DiskMgr, testcase_id: &String) -> Result { + let lockfile_path = ondisk_mgr.root_dir.join(format!(".{}.lock", testcase_id)); + + let mut lockfile = match File::create_new(lockfile_path.as_path()) { + Ok(f) => f, + Err(e) => match e.kind() { + io::ErrorKind::AlreadyExists => File::open(lockfile_path.as_path()).unwrap(), + + _ => return Err(e.into()), + }, + }; + + lockfile.lock_exclusive()?; + + let mut nb_used_buf: [u8; 4] = [0; 4]; + let nb_used: u32 = match lockfile.read_exact(&mut nb_used_buf) { + Ok(()) => u32::from_le_bytes(nb_used_buf), + Err(e) => match e.kind() { + io::ErrorKind::UnexpectedEof => 0, + + _ => return Err(e.into()), + }, + }; + + Ok(Self { lockfile, nb_used }) + } + + /// returns true if it is the first use + pub fn inc_used(&mut self) -> bool { + self.nb_used += 1; + self.nb_used == 1 + } + + /// returns true if not in used anymore + /// can be safely deleted + pub fn dec_used(&mut self) -> bool { + if self.nb_used == 0 { + true + } else { + self.nb_used -= 1; + self.nb_used == 0 + } + } +} + +impl Drop for TestcaseLockfile { + fn drop(&mut self) { + let nb_used_buf = self.nb_used.to_le_bytes(); + + self.lockfile.seek(SeekFrom::Start(0)); + self.lockfile.write_all(&nb_used_buf).unwrap(); + + FileExt::unlock(&self.lockfile); + } +} + +impl DiskMgr +where + I: Input, +{ + fn testcase_path(&self, testcase_id: &String) -> PathBuf { + self.root_dir.join(&testcase_id) + } + + fn testcase_md_path(&self, testcase_id: &String) -> PathBuf { + self.root_dir.join(format!(".{}.metadata", testcase_id)) + } + + fn save_testcase(&self, testcase: &Testcase) -> Result { + let testcase_id = testcase.id(); + let testcase_path = self.testcase_path(testcase_id); + let mut lockfile = TestcaseLockfile::new(self, testcase_id)?; + + if lockfile.inc_used() { + // save md to file + let ser_fmt = self.md_format.clone(); + let testcase_md_path = self.testcase_md_path(testcase_id); + + let mut testcase_md_f = File::create_new(testcase_md_path.as_path()).unwrap(); + let testcase_md_ser = ser_fmt.to_vec(testcase.metadata())?; + + testcase_md_f.write_all(&testcase_md_ser)?; + + // testcase_f.write_all(testcase.input().target_bytes().as_ref())?; + testcase.input().to_file(testcase_path.as_path())?; + } + + Ok(testcase_id.clone()) + } + + /// prerequisite: the testcase should not have been "removed" before. + /// also, it should only happen if it has been saved before. + fn load_testcase(&self, testcase_id: &String) -> Result, Error> { + let testcase_path = self.testcase_path(testcase_id); + let testcase_md_path = self.testcase_md_path(testcase_id); + let ser_fmt = self.md_format.clone(); + + let _lockfile = TestcaseLockfile::new(self, testcase_id)?; + + let input = I::from_file(testcase_path.as_path())?; + let md = ser_fmt.from_file(testcase_md_path.as_path())?; + + Ok(Testcase::new(input, md)) + } + + fn remove_testcase(&self, testcase_id: &String) -> Result<(), Error> { + let mut lockfile = TestcaseLockfile::new(self, testcase_id)?; + + if lockfile.dec_used() { + fs::remove_file(self.testcase_path(testcase_id))?; + fs::remove_file(self.testcase_md_path(testcase_id))?; + } + + Ok(()) + } +} + +impl Store for OnDiskStore +where + I: Input, + M: InMemoryCorpusMap, +{ + fn count_all(&self) -> usize { + self.count().saturating_add(self.count_disabled()) + } + + fn is_empty(&self) -> bool { + self.count() == 0 + } + + fn count(&self) -> usize { + self.enabled_map.count() + } + + fn count_disabled(&self) -> usize { + self.disabled_map.count() + } + + fn add(&mut self, id: CorpusId, testcase: Testcase) -> Result<(), Error> { + let testcase_id = self.disk_mgr.save_testcase(&testcase)?; + self.enabled_map.add(id, testcase_id); + Ok(()) + } + + fn add_disabled(&mut self, id: CorpusId, testcase: Testcase) -> Result<(), Error> { + let testcase_id = self.disk_mgr.save_testcase(&testcase)?; + self.disabled_map.add(id, testcase_id); + Ok(()) + } + + fn replace(&mut self, id: CorpusId, new_testcase: Testcase) -> Result, Error> { + let new_tc_id = self.disk_mgr.save_testcase(&new_testcase)?; + let old_tc_id = self.enabled_map.replace(id, new_tc_id).ok_or_else(|| { + Error::key_not_found(format!("Index {id} not found, could not replace.")) + })?; + + let old_tc = self.disk_mgr.load_testcase(&old_tc_id)?; + self.disk_mgr.remove_testcase(&old_tc_id)?; + Ok(old_tc) + } + + fn remove(&mut self, id: CorpusId) -> Result>>, Error> { + let old_tc_id = self + .enabled_map + .remove(id) + .or_else(|| self.disabled_map.remove(id)) + .ok_or(Error::key_not_found(format!("Index {id} not found")))?; + + let old_tc_id_borrowed = old_tc_id.borrow(); + let old_tc = self.disk_mgr.load_testcase(&old_tc_id_borrowed)?; + self.disk_mgr.remove_testcase(&old_tc_id_borrowed)?; + Ok(Rc::new(RefCell::new(old_tc))) + } + + fn get(&self, id: CorpusId) -> Result>>, Error> { + let tc_id = self + .enabled_map + .get(id) + .ok_or(Error::key_not_found(format!("Index not found: {id}")))?; + + let tc_id_borrowed = tc_id.borrow(); + let tc = self.disk_mgr.load_testcase(&*tc_id_borrowed)?; + Ok(Rc::new(RefCell::new(tc))) + } + + fn get_from_all(&self, id: CorpusId) -> Result>>, Error> { + let tc_id = self + .enabled_map + .get(id) + .or_else(|| self.disabled_map.get(id)) + .ok_or(Error::key_not_found(format!("Index {id} not found")))?; + + let tc_id_borrowed = tc_id.borrow(); + let tc = self.disk_mgr.load_testcase(&*&tc_id_borrowed)?; + Ok(Rc::new(RefCell::new(tc))) + } + + fn prev(&self, id: CorpusId) -> Option { + self.enabled_map.prev(id) + } + + fn next(&self, id: CorpusId) -> Option { + self.enabled_map.next(id) + } + + fn first(&self) -> Option { + self.enabled_map.first() + } + + fn last(&self) -> Option { + self.enabled_map.last() + } + + fn nth(&self, nth: usize) -> CorpusId { + self.enabled_map.nth(nth) + } + + fn nth_from_all(&self, nth: usize) -> CorpusId { + let nb_enabled = self.enabled_map.count(); + if nth >= nb_enabled { + self.disabled_map.nth(nth.saturating_sub(nb_enabled)) + } else { + self.enabled_map.nth(nth) + } + } +} diff --git a/libafl/src/corpus/testcase.rs b/libafl/src/corpus/testcase.rs index 3e2763e661d..54b53367713 100644 --- a/libafl/src/corpus/testcase.rs +++ b/libafl/src/corpus/testcase.rs @@ -6,16 +6,15 @@ use alloc::string::String; use alloc::{borrow::Cow, vec::Vec}; use core::{ cell::{Ref, RefMut}, + hash::Hasher, time::Duration, }; -#[cfg(feature = "std")] -use std::path::PathBuf; -use libafl_bolts::{HasLen, serdeany::SerdeAnyMap}; +use libafl_bolts::{hasher_std, serdeany::SerdeAnyMap}; use serde::{Deserialize, Serialize}; +use typed_builder::TypedBuilder; -use super::Corpus; -use crate::{Error, HasMetadata, corpus::CorpusId}; +use crate::{Error, HasMetadata, corpus::CorpusId, inputs::Input}; /// Shorthand to receive a [`Ref`] or [`RefMut`] to a stored [`Testcase`], by [`CorpusId`]. /// For a normal state, this should return a [`Testcase`] in the corpus, not the objectives. @@ -29,34 +28,27 @@ pub trait HasTestcase { fn testcase_mut(&self, id: CorpusId) -> Result>, Error>; } -/// An entry in the [`Testcase`] Corpus -#[derive(Serialize, Deserialize, Clone, Debug)] -pub struct Testcase { - /// The [`Input`] of this [`Testcase`], or `None`, if it is not currently in memory - input: Option, - /// The filename for this [`Testcase`] - filename: Option, - /// Complete path to the [`Input`] on disk, if this [`Testcase`] is backed by a file in the filesystem - #[cfg(feature = "std")] - file_path: Option, +#[derive(Serialize, Deserialize, Clone, Debug, TypedBuilder)] +pub struct TestcaseMetadata { /// Map of metadata associated with this [`Testcase`] + #[builder(default)] metadata: SerdeAnyMap, - /// Complete path to the metadata [`SerdeAnyMap`] on disk, if this [`Testcase`] is backed by a file in the filesystem - #[cfg(feature = "std")] - metadata_path: Option, /// Time needed to execute the input + #[builder(default)] exec_time: Option, - /// Cached len of the input, if any - cached_len: Option, /// Number of fuzzing iterations of this particular input updated in `perform_mutational` + #[builder(default = 0)] scheduled_count: usize, /// Number of executions done at discovery time executions: u64, /// Parent [`CorpusId`], if known + #[builder(default)] parent_id: Option, /// If the testcase is "disabled" + #[builder(default = false)] disabled: bool, /// has found crash (or timeout) or not + #[builder(default = 0)] objectives_found: usize, /// Vector of `Feedback` names that deemed this `Testcase` as corpus worthy #[cfg(feature = "track_hit_feedbacks")] @@ -66,37 +58,77 @@ pub struct Testcase { hit_objectives: Vec>, } +/// An entry in the [`Testcase`] Corpus +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct Testcase { + /// The [`Input`] of this [`Testcase`], or `None`, if it is not currently in memory + input: I, + /// The unique id for [`Testcase`]. + /// It should uniquely identify input at the very least. + id: String, + /// The metadata linked to the [`Testcase`] + metadata: TestcaseMetadata, + // /// The filename for this [`Testcase`] + // filename: Option, +} + impl HasMetadata for Testcase { /// Get all the metadata into an [`hashbrown::HashMap`] #[inline] fn metadata_map(&self) -> &SerdeAnyMap { - &self.metadata + &self.metadata.metadata } /// Get all the metadata into an [`hashbrown::HashMap`] (mutable) #[inline] fn metadata_map_mut(&mut self) -> &mut SerdeAnyMap { - &mut self.metadata + &mut self.metadata.metadata + } +} + +impl Testcase +where + I: Input, +{ + fn gen_id(input: &I, _md: &TestcaseMetadata) -> String { + let mut hasher = hasher_std(); + input.hash(&mut hasher); + let hash = hasher.finish(); + format!("{hash:0>8x}") + } + + /// Create a new Testcase instance given an input + #[inline] + pub fn new(input: I, metadata: TestcaseMetadata) -> Self { + let id = Self::gen_id(&input, &metadata); + + Self { + input, + id, + metadata, + } } } /// Impl of a testcase impl Testcase { - /// Returns this [`Testcase`] with a loaded `Input`] - pub fn load_input>(&mut self, corpus: &C) -> Result<&I, Error> { - corpus.load_input_into(self)?; - Ok(self.input.as_ref().unwrap()) + pub fn id(&self) -> &String { + &self.id + } + + pub fn metadata(&self) -> &TestcaseMetadata { + &self.metadata } /// Get the input, if available any #[inline] - pub fn input(&self) -> &Option { + pub fn input(&self) -> &I { &self.input } /// Get the input, if any (mutable) #[inline] - pub fn input_mut(&mut self) -> &mut Option { + pub fn input_mut(&mut self) -> &mut I { // self.cached_len = None; &mut self.input } @@ -104,303 +136,146 @@ impl Testcase { /// Set the input #[inline] pub fn set_input(&mut self, input: I) { - self.input = Some(input); - } - - /// Get the filename, if any - #[inline] - pub fn filename(&self) -> &Option { - &self.filename - } - - /// Get the filename, if any (mutable) - #[inline] - pub fn filename_mut(&mut self) -> &mut Option { - &mut self.filename - } - - /// Get the filename path, if any - #[inline] - #[cfg(feature = "std")] - pub fn file_path(&self) -> &Option { - &self.file_path - } - - /// Get the filename path, if any (mutable) - #[inline] - #[cfg(feature = "std")] - pub fn file_path_mut(&mut self) -> &mut Option { - &mut self.file_path - } - - /// Get the metadata path, if any - #[inline] - #[cfg(feature = "std")] - pub fn metadata_path(&self) -> &Option { - &self.metadata_path - } - - /// Get the metadata path, if any (mutable) - #[inline] - #[cfg(feature = "std")] - pub fn metadata_path_mut(&mut self) -> &mut Option { - &mut self.metadata_path + self.input = input; } /// Get the executions #[inline] - pub fn executions(&self) -> &u64 { - &self.executions + pub fn executions(&self) -> u64 { + self.metadata.executions } /// Get the executions (mutable) #[inline] pub fn executions_mut(&mut self) -> &mut u64 { - &mut self.executions + &mut self.metadata.executions } /// Set the executions #[inline] pub fn set_executions(&mut self, executions: u64) { - self.executions = executions; + self.metadata.executions = executions; } /// Get the execution time of the testcase #[inline] pub fn exec_time(&self) -> &Option { - &self.exec_time + &self.metadata.exec_time } /// Get the execution time of the testcase (mutable) #[inline] pub fn exec_time_mut(&mut self) -> &mut Option { - &mut self.exec_time + &mut self.metadata.exec_time } /// Sets the execution time of the current testcase #[inline] pub fn set_exec_time(&mut self, time: Duration) { - self.exec_time = Some(time); + self.metadata.exec_time = Some(time); } /// Get the `scheduled_count` #[inline] pub fn scheduled_count(&self) -> usize { - self.scheduled_count + self.metadata.scheduled_count } /// Set the `scheduled_count` #[inline] pub fn set_scheduled_count(&mut self, scheduled_count: usize) { - self.scheduled_count = scheduled_count; + self.metadata.scheduled_count = scheduled_count; } /// Get `disabled` #[inline] pub fn disabled(&mut self) -> bool { - self.disabled + self.metadata.disabled } /// Set the testcase as disabled #[inline] pub fn set_disabled(&mut self, disabled: bool) { - self.disabled = disabled; + self.metadata.disabled = disabled; } /// Get the hit feedbacks #[inline] #[cfg(feature = "track_hit_feedbacks")] pub fn hit_feedbacks(&self) -> &Vec> { - &self.hit_feedbacks + &self.metadata.hit_feedbacks } /// Get the hit feedbacks (mutable) #[inline] #[cfg(feature = "track_hit_feedbacks")] pub fn hit_feedbacks_mut(&mut self) -> &mut Vec> { - &mut self.hit_feedbacks + &mut self.metadata.hit_feedbacks } /// Get the hit objectives #[inline] #[cfg(feature = "track_hit_feedbacks")] pub fn hit_objectives(&self) -> &Vec> { - &self.hit_objectives + &self.metadata.hit_objectives } /// Get the hit objectives (mutable) #[inline] #[cfg(feature = "track_hit_feedbacks")] pub fn hit_objectives_mut(&mut self) -> &mut Vec> { - &mut self.hit_objectives - } - - /// Create a new Testcase instance given an input - #[inline] - pub fn new(input: I) -> Self { - Self { - input: Some(input), - filename: None, - #[cfg(feature = "std")] - file_path: None, - metadata: SerdeAnyMap::default(), - #[cfg(feature = "std")] - metadata_path: None, - exec_time: None, - cached_len: None, - executions: 0, - scheduled_count: 0, - parent_id: None, - disabled: false, - objectives_found: 0, - #[cfg(feature = "track_hit_feedbacks")] - hit_feedbacks: Vec::new(), - #[cfg(feature = "track_hit_feedbacks")] - hit_objectives: Vec::new(), - } - } - - /// Creates a testcase, attaching the id of the parent - /// that this [`Testcase`] was derived from on creation - pub fn with_parent_id(input: I, parent_id: CorpusId) -> Self { - Testcase { - input: Some(input), - filename: None, - #[cfg(feature = "std")] - file_path: None, - metadata: SerdeAnyMap::default(), - #[cfg(feature = "std")] - metadata_path: None, - exec_time: None, - cached_len: None, - executions: 0, - scheduled_count: 0, - parent_id: Some(parent_id), - disabled: false, - objectives_found: 0, - #[cfg(feature = "track_hit_feedbacks")] - hit_feedbacks: Vec::new(), - #[cfg(feature = "track_hit_feedbacks")] - hit_objectives: Vec::new(), - } - } - - /// Create a new Testcase instance given an input and a `filename` - /// If locking is enabled, make sure that testcases with the same input have the same filename - /// to prevent ending up with duplicate testcases - #[inline] - pub fn with_filename(input: I, filename: String) -> Self { - Self { - input: Some(input), - filename: Some(filename), - #[cfg(feature = "std")] - file_path: None, - metadata: SerdeAnyMap::default(), - #[cfg(feature = "std")] - metadata_path: None, - exec_time: None, - cached_len: None, - executions: 0, - scheduled_count: 0, - parent_id: None, - disabled: false, - objectives_found: 0, - #[cfg(feature = "track_hit_feedbacks")] - hit_feedbacks: Vec::new(), - #[cfg(feature = "track_hit_feedbacks")] - hit_objectives: Vec::new(), - } - } + &mut self.metadata.hit_objectives + } + + // /// Creates a testcase, attaching the id of the parent + // /// that this [`Testcase`] was derived from on creation + // pub fn with_parent_id(input: I, parent_id: CorpusId) -> Self { + // Testcase { + // input: Some(input), + // filename: None, + // #[cfg(feature = "std")] + // file_path: None, + // metadata: SerdeAnyMap::default(), + // #[cfg(feature = "std")] + // metadata_path: None, + // exec_time: None, + // cached_len: None, + // executions: 0, + // scheduled_count: 0, + // parent_id: Some(parent_id), + // disabled: false, + // objectives_found: 0, + // #[cfg(feature = "track_hit_feedbacks")] + // hit_feedbacks: Vec::new(), + // #[cfg(feature = "track_hit_feedbacks")] + // hit_objectives: Vec::new(), + // } + // } /// Get the id of the parent, that this testcase was derived from #[must_use] pub fn parent_id(&self) -> Option { - self.parent_id + self.metadata.parent_id } /// Sets the id of the parent, that this testcase was derived from pub fn set_parent_id(&mut self, parent_id: CorpusId) { - self.parent_id = Some(parent_id); + self.metadata.parent_id = Some(parent_id); } /// Sets the id of the parent, that this testcase was derived from pub fn set_parent_id_optional(&mut self, parent_id: Option) { - self.parent_id = parent_id; + self.metadata.parent_id = parent_id; } /// Gets how many objectives were found by mutating this testcase pub fn objectives_found(&self) -> usize { - self.objectives_found + self.metadata.objectives_found } /// Adds one objectives to the `objectives_found` counter. Mostly called from crash handler or executor. pub fn found_objective(&mut self) { - self.objectives_found = self.objectives_found.saturating_add(1); - } -} - -impl Default for Testcase { - /// Create a new default Testcase - #[inline] - fn default() -> Self { - Testcase { - input: None, - filename: None, - metadata: SerdeAnyMap::new(), - exec_time: None, - cached_len: None, - scheduled_count: 0, - parent_id: None, - #[cfg(feature = "std")] - file_path: None, - #[cfg(feature = "std")] - metadata_path: None, - disabled: false, - executions: 0, - objectives_found: 0, - #[cfg(feature = "track_hit_feedbacks")] - hit_feedbacks: Vec::new(), - #[cfg(feature = "track_hit_feedbacks")] - hit_objectives: Vec::new(), - } - } -} - -/// Impl of a testcase when the input has len -impl Testcase -where - I: HasLen, -{ - /// Get the cached `len`. Will `Error::EmptyOptional` if `len` is not yet cached. - #[inline] - pub fn cached_len(&mut self) -> Option { - self.cached_len - } - - /// Get the `len` or calculate it, if not yet calculated. - pub fn load_len>(&mut self, corpus: &C) -> Result { - match &self.input { - Some(i) => { - let l = i.len(); - self.cached_len = Some(l); - Ok(l) - } - None => { - if let Some(l) = self.cached_len { - Ok(l) - } else { - corpus.load_input_into(self)?; - self.load_len(corpus) - } - } - } - } -} - -/// Create a testcase from an input -impl From for Testcase { - fn from(input: I) -> Self { - Testcase::new(input) + self.metadata.objectives_found = self.metadata.objectives_found.saturating_add(1); } } @@ -515,15 +390,3 @@ impl SchedulerTestcaseMetadata { } libafl_bolts::impl_serdeany!(SchedulerTestcaseMetadata); - -#[cfg(feature = "std")] -impl Drop for Testcase { - fn drop(&mut self) { - if let Some(filename) = &self.filename { - let mut path = PathBuf::from(filename); - let lockname = format!(".{}.lafl_lock", path.file_name().unwrap().to_str().unwrap()); - path.set_file_name(lockname); - let _ = std::fs::remove_file(path); - } - } -} diff --git a/libafl/src/executors/inprocess/mod.rs b/libafl/src/executors/inprocess/mod.rs index d3942a9cb48..59d59bbbd8d 100644 --- a/libafl/src/executors/inprocess/mod.rs +++ b/libafl/src/executors/inprocess/mod.rs @@ -16,7 +16,7 @@ use libafl_bolts::tuples::{RefIndexable, tuple_list}; use crate::{ Error, HasMetadata, - corpus::{Corpus, Testcase}, + corpus::{Corpus, Testcase, testcase::TestcaseMetadata}, events::{Event, EventFirer, EventRestarter, EventWithStats}, executors::{ Executor, ExitKind, HasObservers, @@ -344,10 +344,13 @@ pub fn run_observers_and_save_state( .expect("In run_observers_and_save_state objective failure."); if is_solution { - let mut new_testcase = Testcase::from(input.clone()); - new_testcase.set_executions(*state.executions()); + let testcase_md = TestcaseMetadata::builder() + .executions(*state.executions()) + .parent_id(*state.corpus().current()) + .build(); + + let mut new_testcase = Testcase::new(input.clone(), testcase_md); new_testcase.add_metadata(exitkind); - new_testcase.set_parent_id_optional(*state.corpus().current()); if let Ok(mut tc) = state.current_testcase_mut() { tc.found_objective(); diff --git a/libafl/src/feedbacks/mod.rs b/libafl/src/feedbacks/mod.rs index 9460be35ee7..627c9729738 100644 --- a/libafl/src/feedbacks/mod.rs +++ b/libafl/src/feedbacks/mod.rs @@ -35,9 +35,6 @@ pub use bool::BoolValueFeedback; #[cfg(feature = "std")] pub mod concolic; -#[cfg(feature = "std")] -/// The module for `CustomFilenameToTestcaseFeedback` -pub mod custom_filename; pub mod differential; /// The module for list feedback pub mod list; diff --git a/libafl/src/fuzzer/mod.rs b/libafl/src/fuzzer/mod.rs index 203cb7e500c..689413e4f94 100644 --- a/libafl/src/fuzzer/mod.rs +++ b/libafl/src/fuzzer/mod.rs @@ -14,7 +14,9 @@ use serde::{Serialize, de::DeserializeOwned}; use crate::monitors::stats::PerfFeature; use crate::{ Error, HasMetadata, - corpus::{Corpus, CorpusId, HasCurrentCorpusId, HasTestcase, Testcase}, + corpus::{ + Corpus, CorpusId, HasCurrentCorpusId, HasTestcase, Testcase, testcase::TestcaseMetadata, + }, events::{ Event, EventConfig, EventFirer, EventReceiver, EventWithStats, ProgressReporter, SendExiting, @@ -420,13 +422,18 @@ where ) -> Result, Error> { let corpus = if exec_res.is_corpus() { // Add the input to the main corpus - let mut testcase = Testcase::from(input.clone()); - testcase.set_executions(*state.executions()); + let tc_md = TestcaseMetadata::builder() + .executions(*state.executions()) + .build(); + + let testcase = Testcase::new(input.clone(), tc_md); + #[cfg(feature = "track_hit_feedbacks")] self.feedback_mut() .append_hit_feedbacks(testcase.hit_feedbacks_mut())?; self.feedback_mut() .append_metadata(state, manager, observers, &mut testcase)?; + let id = state.corpus_mut().add(testcase)?; self.scheduler_mut().on_add(state, id)?; Ok(Some(id)) @@ -436,10 +443,14 @@ where if exec_res.is_solution() { // The input is a solution, add it to the respective corpus - let mut testcase = Testcase::from(input.clone()); - testcase.set_executions(*state.executions()); + let tc_md = TestcaseMetadata::builder() + .executions(*state.executions()) + .parent_id(*state.corpus().current()) + .build(); + + let mut testcase = Testcase::new(input.clone(), tc_md); testcase.add_metadata(*exit_kind); - testcase.set_parent_id_optional(*state.corpus().current()); + if let Ok(mut tc) = state.current_testcase_mut() { tc.found_objective(); } @@ -677,8 +688,10 @@ where let exit_kind = self.execute_input(state, executor, manager, &input)?; let observers = executor.observers(); // Always consider this to be "interesting" - let mut testcase = Testcase::from(input.clone()); - testcase.set_executions(*state.executions()); + let tc_md = TestcaseMetadata::builder() + .executions(*state.executions()) + .build(); + let mut testcase = Testcase::new(input.clone(), tc_md); // Maybe a solution #[cfg(not(feature = "introspection"))] @@ -766,9 +779,13 @@ where } fn add_disabled_input(&mut self, state: &mut S, input: I) -> Result { - let mut testcase = Testcase::from(input.clone()); - testcase.set_executions(*state.executions()); - testcase.set_disabled(true); + let tc_md = TestcaseMetadata::builder() + .executions(*state.executions()) + .disabled(true) + .build(); + + let mut testcase = Testcase::new(input.clone(), tc_md); + // Add the disabled input to the main corpus let id = state.corpus_mut().add_disabled(testcase)?; Ok(id) diff --git a/libafl/src/mutators/encoded_mutations.rs b/libafl/src/mutators/encoded_mutations.rs index a3a02c9657b..2a12225f087 100644 --- a/libafl/src/mutators/encoded_mutations.rs +++ b/libafl/src/mutators/encoded_mutations.rs @@ -332,7 +332,7 @@ where let other_size = { // new scope to make the borrow checker happy let mut other_testcase = state.corpus().get_from_all(id)?.borrow_mut(); - other_testcase.load_input(state.corpus())?.codes().len() + other_testcase.input().codes().len() }; if other_size < 2 { @@ -362,7 +362,7 @@ where let other_testcase = state.corpus().get_from_all(id)?.borrow_mut(); // no need to `load_input` again - we did that above already. - let other = other_testcase.input().as_ref().unwrap(); + let other = other_testcase.input(); input.codes_mut().resize(size + len, 0); unsafe { @@ -411,7 +411,7 @@ where let other_size = { // new scope to make the borrow checker happy let mut other_testcase = state.corpus().get_from_all(id)?.borrow_mut(); - other_testcase.load_input(state.corpus())?.codes().len() + other_testcase.input().codes().len() }; if other_size < 2 { @@ -437,7 +437,7 @@ where let other_testcase = state.corpus().get_from_all(id)?.borrow_mut(); // no need to load the input again, it'll already be present at this point. - let other = other_testcase.input().as_ref().unwrap(); + let other = other_testcase.input(); unsafe { buffer_copy(input.codes_mut(), other.codes(), from, to, len); diff --git a/libafl/src/mutators/gramatron.rs b/libafl/src/mutators/gramatron.rs index 58412723fa0..146a929c133 100644 --- a/libafl/src/mutators/gramatron.rs +++ b/libafl/src/mutators/gramatron.rs @@ -128,17 +128,18 @@ where let rand_num = state.rand_mut().next(); - let mut other_testcase = state.corpus().get(id)?.borrow_mut(); + let other_testcase_ref = state.corpus().get(id)?; + let mut other_testcase = other_testcase_ref.borrow_mut(); if !other_testcase.has_metadata::() { - let meta = GramatronIdxMapMetadata::new(other_testcase.load_input(state.corpus())?); + let meta = GramatronIdxMapMetadata::new(other_testcase.input()); other_testcase.add_metadata(meta); } let meta = other_testcase .metadata_map() .get::() .unwrap(); - let other = other_testcase.input().as_ref().unwrap(); + let other = other_testcase.input(); meta.map.get(&input.terminals()[insert_at].state).map_or( Ok(MutationResult::Skipped), diff --git a/libafl/src/mutators/mutations.rs b/libafl/src/mutators/mutations.rs index 5b3d1373de7..be1380b4af0 100644 --- a/libafl/src/mutators/mutations.rs +++ b/libafl/src/mutators/mutations.rs @@ -1171,10 +1171,7 @@ where let other_size = { let mut other_testcase = state.corpus().get_from_all(id)?.borrow_mut(); - other_testcase - .load_input(state.corpus())? - .mutator_bytes() - .len() + other_testcase.input_mut().mutator_bytes().len() }; if other_size < 2 { @@ -1191,7 +1188,7 @@ where let other_testcase = state.corpus().get_from_all(id)?.borrow_mut(); // No need to load the input again, it'll still be cached. - let other = other_testcase.input().as_ref().unwrap(); + let other = other_testcase.input(); Ok(Self::crossover_insert( input, @@ -1266,7 +1263,7 @@ where let other_size = { let mut testcase = state.corpus().get_from_all(id)?.borrow_mut(); - testcase.load_input(state.corpus())?.mutator_bytes().len() + testcase.input_mut().mutator_bytes().len() }; if other_size < 2 { @@ -1287,7 +1284,7 @@ where let other_testcase = state.corpus().get_from_all(id)?.borrow_mut(); // No need to load the input again, it'll still be cached. - let other = other_testcase.input().as_ref().unwrap(); + let other = other_testcase.input(); Ok(Self::crossover_replace( input, @@ -1371,7 +1368,7 @@ where let other_size = { let mut other_testcase = state.corpus().get_from_all(id)?.borrow_mut(); - let other_input = other_testcase.load_input(state.corpus())?; + let other_input = other_testcase.input(); let input_mapped = (self.input_mapper)(other_input).map_to_option_bytes(); input_mapped.map_or(0, >::len) }; @@ -1394,7 +1391,7 @@ where let other_testcase = state.corpus().get_from_all(id)?.borrow_mut(); // No need to load the input again, it'll still be cached. - let other_input = &mut other_testcase.input().as_ref().unwrap(); + let other_input = &mut other_testcase.input(); let wrapped_mapped_other_input = (self.input_mapper)(other_input).map_to_option_bytes(); if wrapped_mapped_other_input.is_none() { return Ok(MutationResult::Skipped); @@ -1458,7 +1455,7 @@ where let other_size = { let mut other_testcase = state.corpus().get_from_all(id)?.borrow_mut(); - let other_input = other_testcase.load_input(state.corpus())?; + let other_input = other_testcase.input(); let input_mapped = (self.input_mapper)(other_input).map_to_option_bytes(); input_mapped.map_or(0, >::len) }; @@ -1481,7 +1478,7 @@ where let other_testcase = state.corpus().get_from_all(id)?.borrow_mut(); // No need to load the input again, it'll still be cached. - let other_input = &mut other_testcase.input().as_ref().unwrap(); + let other_input = &mut other_testcase.input(); let wrapped_mapped_other_input = (self.input_mapper)(other_input).map_to_option_bytes(); if wrapped_mapped_other_input.is_none() { return Ok(MutationResult::Skipped); @@ -1541,7 +1538,7 @@ where let (first_diff, last_diff) = { let mut other_testcase = state.corpus().get_from_all(id)?.borrow_mut(); - let other = other_testcase.load_input(state.corpus())?; + let other = other_testcase.input(); let (f, l) = locate_diffs(input.mutator_bytes(), other.mutator_bytes()); @@ -1556,7 +1553,7 @@ where let other_testcase = state.corpus().get_from_all(id)?.borrow_mut(); // Input will already be loaded. - let other = other_testcase.input().as_ref().unwrap(); + let other = other_testcase.input(); input.splice( split_at.., diff --git a/libafl/src/mutators/numeric.rs b/libafl/src/mutators/numeric.rs index 6a4c92e93b4..189f57c703a 100644 --- a/libafl/src/mutators/numeric.rs +++ b/libafl/src/mutators/numeric.rs @@ -379,7 +379,7 @@ where } let other_testcase = state.corpus().get_from_all(id)?.borrow_mut(); - *input = *other_testcase.input().as_ref().unwrap(); + *input = *other_testcase.input(); Ok(MutationResult::Mutated) } } @@ -420,7 +420,7 @@ where } let other_testcase = state.corpus().get_from_all(id)?.borrow_mut(); - let other_input = other_testcase.input().as_ref().unwrap(); + let other_input = other_testcase.input(); let mapped_input = (self.input_mapper)(other_input).clone(); *input = mapped_input; Ok(MutationResult::Mutated) diff --git a/libafl/src/state/mod.rs b/libafl/src/state/mod.rs index 6bde85520be..95f3f330cff 100644 --- a/libafl/src/state/mod.rs +++ b/libafl/src/state/mod.rs @@ -18,6 +18,7 @@ use std::{ #[cfg(feature = "std")] use libafl_bolts::core_affinity::{CoreId, Cores}; use libafl_bolts::{ + current_time, rands::{Rand, StdRand}, serdeany::{NamedSerdeAnyMap, SerdeAnyMap}, }; @@ -159,6 +160,13 @@ pub trait HasStartTime { /// The starting time (mutable) fn start_time_mut(&mut self) -> &mut Duration; + + fn time_since_start(&self) -> Duration { + let start_time = self.start_time(); + let current_time = current_time(); + + current_time - *start_time + } } /// Trait for the last report time, the last time this node reported progress @@ -296,12 +304,14 @@ where { /// To get the testcase fn testcase(&self, id: CorpusId) -> Result>, Error> { - Ok(self.corpus().get(id)?.borrow()) + let tc = self.corpus().get(id)?; + Ok(tc.borrow()) } /// To get mutable testcase fn testcase_mut(&self, id: CorpusId) -> Result>, Error> { - Ok(self.corpus().get(id)?.borrow_mut()) + let tc = self.corpus().get(id)?; + Ok(tc.borrow_mut()) } } @@ -1129,7 +1139,7 @@ where rand, executions: 0, imported: 0, - start_time: libafl_bolts::current_time(), + start_time: current_time(), metadata: SerdeAnyMap::default(), named_metadata: NamedSerdeAnyMap::default(), corpus, @@ -1143,7 +1153,7 @@ where #[cfg(feature = "std")] dont_reenter: None, last_report_time: None, - last_found_time: libafl_bolts::current_time(), + last_found_time: current_time(), corpus_id: None, stage_stack: StageStack::default(), phantom: PhantomData,