diff --git a/Cargo.lock b/Cargo.lock index dbbc08f88f0..fe28bb07a30 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -649,9 +649,9 @@ dependencies = [ [[package]] name = "bitbybit" -version = "1.3.2" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb157f9753a7cddfcf4a4f5fed928fbf4ce1b7b64b6bcc121d7a9f95d698997b" +checksum = "ec187a89ab07e209270175faf9e07ceb2755d984954e58a2296e325ddece2762" dependencies = [ "arbitrary-int", "proc-macro2", diff --git a/crates/libafl/Cargo.toml b/crates/libafl/Cargo.toml index c39cd0ca1d2..b531b00a9e9 100644 --- a/crates/libafl/Cargo.toml +++ b/crates/libafl/Cargo.toml @@ -230,7 +230,7 @@ hashbrown = { workspace = true, features = [ "ahash", ], default-features = false } # A faster hashmap, nostd compatible num-traits = { workspace = true, default-features = false } -serde = { workspace = true, features = ["alloc"] } # serialization lib +serde = { workspace = true, features = ["alloc", "rc"] } # serialization lib postcard = { workspace = true } # no_std compatible serde serialization format bincode = { version = "2.0.1", optional = true, features = ["serde"] } bitbybit = { workspace = true } diff --git a/crates/libafl/src/common/mod.rs b/crates/libafl/src/common/mod.rs index 9e508061290..5f067e9b693 100644 --- a/crates/libafl/src/common/mod.rs +++ b/crates/libafl/src/common/mod.rs @@ -3,17 +3,42 @@ use alloc::boxed::Box; use core::any::type_name; -#[cfg(feature = "nautilus")] -pub mod nautilus; - use libafl_bolts::{ Error, serdeany::{NamedSerdeAnyMap, SerdeAny, SerdeAnyMap}, }; + +#[cfg(feature = "nautilus")] +pub mod nautilus; + /// Trait for elements offering metadata pub trait HasMetadata { /// A map, storing all metadata fn metadata_map(&self) -> &SerdeAnyMap; + + /// Check for a metadata + /// + /// # Note + /// For performance reasons, you likely want to use [`Self::metadata_or_insert_with`] instead + #[inline] + fn has_metadata(&self) -> bool + where + M: SerdeAny, + { + self.metadata_map().get::().is_some() + } + + /// To get metadata + #[inline] + fn metadata(&self) -> Result<&M, Error> + where + M: SerdeAny, + { + self.metadata_map() + .get::() + .ok_or_else(|| Error::key_not_found(format!("{} not found", type_name::()))) + } + /// A map, storing all metadata (mutable) fn metadata_map_mut(&mut self) -> &mut SerdeAnyMap; @@ -53,45 +78,46 @@ pub trait HasMetadata { self.metadata_map_mut().remove::() } - /// Check for a metadata - /// - /// # Note - /// For performance reasons, you likely want to use [`Self::metadata_or_insert_with`] instead + /// To get mutable metadata #[inline] - fn has_metadata(&self) -> bool + fn metadata_mut(&mut self) -> Result<&mut M, Error> where M: SerdeAny, { - self.metadata_map().get::().is_some() + self.metadata_map_mut() + .get_mut::() + .ok_or_else(|| Error::key_not_found(format!("{} not found", type_name::()))) } +} - /// To get metadata +/// Trait for elements offering named metadata +pub trait HasNamedMetadata { + /// A map, storing all metadata + fn named_metadata_map(&self) -> &NamedSerdeAnyMap; + + /// Check for a metadata + /// + /// # Note + /// You likely want to use [`Self::named_metadata_or_insert_with`] for performance reasons. #[inline] - fn metadata(&self) -> Result<&M, Error> + fn has_named_metadata(&self, name: &str) -> bool where M: SerdeAny, { - self.metadata_map() - .get::() - .ok_or_else(|| Error::key_not_found(format!("{} not found", type_name::()))) + self.named_metadata_map().contains::(name) } - /// To get mutable metadata + /// To get named metadata #[inline] - fn metadata_mut(&mut self) -> Result<&mut M, Error> + fn named_metadata(&self, name: &str) -> Result<&M, Error> where M: SerdeAny, { - self.metadata_map_mut() - .get_mut::() + self.named_metadata_map() + .get::(name) .ok_or_else(|| Error::key_not_found(format!("{} not found", type_name::()))) } -} -/// Trait for elements offering named metadata -pub trait HasNamedMetadata { - /// A map, storing all metadata - fn named_metadata_map(&self) -> &NamedSerdeAnyMap; /// A map, storing all metadata (mutable) fn named_metadata_map_mut(&mut self) -> &mut NamedSerdeAnyMap; @@ -136,29 +162,6 @@ pub trait HasNamedMetadata { .get_or_insert_with::(name, default) } - /// Check for a metadata - /// - /// # Note - /// You likely want to use [`Self::named_metadata_or_insert_with`] for performance reasons. - #[inline] - fn has_named_metadata(&self, name: &str) -> bool - where - M: SerdeAny, - { - self.named_metadata_map().contains::(name) - } - - /// To get named metadata - #[inline] - fn named_metadata(&self, name: &str) -> Result<&M, Error> - where - M: SerdeAny, - { - self.named_metadata_map() - .get::(name) - .ok_or_else(|| Error::key_not_found(format!("{} not found", type_name::()))) - } - /// To get mutable named metadata #[inline] fn named_metadata_mut(&mut self, name: &str) -> Result<&mut M, Error> diff --git a/crates/libafl/src/common/nautilus/grammartec/chunkstore.rs b/crates/libafl/src/common/nautilus/grammartec/chunkstore.rs index f33a5834f07..07f6eb42383 100644 --- a/crates/libafl/src/common/nautilus/grammartec/chunkstore.rs +++ b/crates/libafl/src/common/nautilus/grammartec/chunkstore.rs @@ -49,7 +49,7 @@ impl ChunkStore { } } - pub fn add_tree(&mut self, tree: Tree, ctx: &Context) { + pub fn add_tree(&mut self, tree: &Tree, ctx: &Context) { let mut buffer = vec![]; let id = self.trees.len(); let mut contains_new_chunk = false; @@ -76,8 +76,9 @@ impl ChunkStore { contains_new_chunk = true; } } + if contains_new_chunk { - self.trees.push(tree); + self.trees.push(tree.clone()); } } @@ -129,7 +130,7 @@ mod tests { let tree = ctx.generate_tree_from_rule(&mut rand, r1, random_size); fs::create_dir_all("/tmp/outputs/chunks").expect("40234068"); let mut cks = ChunkStore::new("/tmp/".to_string()); - cks.add_tree(tree, &ctx); + cks.add_tree(&tree, &ctx); // assert!(cks.seen_outputs.contains("a b c".as_bytes())); // assert!(cks.seen_outputs.contains("b c".as_bytes())); // assert!(cks.seen_outputs.contains("c".as_bytes())); @@ -139,7 +140,7 @@ mod tests { let random_size = ctx.get_random_len_for_ruleid(&r2); let tree = ctx.generate_tree_from_rule(&mut rand, r2, random_size); - cks.add_tree(tree, &ctx); + cks.add_tree(&tree, &ctx); // assert_eq!(cks.seen_outputs.len(), 3); // assert_eq!(cks.nts_to_chunks[&ctx.nt_id("B")].len(), 1); let (tree_id, node_id) = cks.nts_to_chunks[&ctx.nt_id("B")][0]; diff --git a/crates/libafl/src/common/nautilus/grammartec/mutator.rs b/crates/libafl/src/common/nautilus/grammartec/mutator.rs index d59071f0bd0..4634242faaf 100644 --- a/crates/libafl/src/common/nautilus/grammartec/mutator.rs +++ b/crates/libafl/src/common/nautilus/grammartec/mutator.rs @@ -538,7 +538,7 @@ mod tests { let _ = ctx.add_rule("A", b"a"); ctx.initialize(101); let tree = ctx.generate_tree_from_rule(&mut rand, r3, 100); - cks.add_tree(tree, &ctx); + cks.add_tree(&tree, &ctx); for _ in 0..100 { let tree = ctx.generate_tree_from_rule(&mut rand, r1, 100); let mut mutator = Mutator::new(&ctx); diff --git a/crates/libafl/src/corpus/cache.rs b/crates/libafl/src/corpus/cache.rs new file mode 100644 index 00000000000..05052c614a2 --- /dev/null +++ b/crates/libafl/src/corpus/cache.rs @@ -0,0 +1,440 @@ +//! A collection of cache policy implementations. +//! They are meant to be used by [`crate::corpus::CombinedCorpus`]. +//! +//! Caches are acting on two [`Store`]s: +//! - a **cache store** holding on the testcases with quick access. +//! - a **backing store** with more expensive access, used when the testcase cannot be found in the cache store. + +use alloc::{collections::VecDeque, rc::Rc, vec::Vec}; +use core::{cell::RefCell, marker::PhantomData}; + +use libafl_bolts::Error; +use serde::{Deserialize, Serialize}; + +use crate::{ + corpus::{ + CorpusId, Testcase, + maps::InMemoryCorpusMap, + store::{RemovableStore, Store}, + testcase::{IsTestcaseMetadataCell, TestcaseMetadata}, + }, + inputs::Input, +}; + +/// Describes how a change to metadata should be propagated to the fallback store +pub trait HasCachePolicy { + /// Mark a corpus id as dirty + fn dirty(&self, corpus_id: CorpusId); +} + +/// Propagate the changes when the cell gets dropped. +/// Expect more writes to the fallback store. +#[derive(Debug, Serialize, Deserialize)] +pub struct WritebackOnDropPolicy; +impl HasCachePolicy for WritebackOnDropPolicy { + fn dirty(&self, _corpus_id: CorpusId) { + // do nothing + } +} + +/// Propagate the changes when the cache is flushed explicitly. +/// +/// Less writes to the fallback stores will be issued, but the used is responsible for +/// flushing the cache regularly. +/// If the cache is not flushed, no data will be written to the fallback store, resulting in +/// data loss. +#[derive(Debug, Serialize, Deserialize)] +pub struct WritebackOnFlushPolicy { + dirty_entries: RefCell>, +} + +impl HasCachePolicy for WritebackOnFlushPolicy { + fn dirty(&self, corpus_id: CorpusId) { + self.dirty_entries.borrow_mut().push(corpus_id); + } +} + +/// A cache, managing a cache store and a fallback store. +pub trait Cache { + /// A [`TestcaseMetadata`] cell. + type TestcaseMetadataCell: IsTestcaseMetadataCell; + + /// Add a testcase to the cache + fn add_shared( + &mut self, + id: CorpusId, + input: Rc, + md: TestcaseMetadata, + cache_store: &mut CS, + fallback_store: &mut FS, + ) -> Result<(), Error>; + + /// Get a testcase from the cache + fn get_from( + &mut self, + id: CorpusId, + cache_store: &mut CS, + fallback_store: &FS, + ) -> Result, Error>; + + /// Disable an entry + fn disable( + &mut self, + id: CorpusId, + cache_store: &mut CS, + fallback_store: &mut FS, + ) -> Result<(), Error>; + + /// Replace a testcase in the cache + fn replace_metadata( + &mut self, + id: CorpusId, + md: TestcaseMetadata, + cache_store: &mut CS, + fallback_store: &mut FS, + ) -> Result; + + /// Flush the cache, committing the cached testcase to the fallback store. + fn flush( + &mut self, + id: CorpusId, + cache_store: &mut CS, + fallback_store: &mut FS, + ) -> Result<(), Error>; + + /// Mark a corpus entry as written explicitly, for subsequent flushes. + /// + /// Thus, a cache [`Self::flush`] should propagate to entries marked as [`Self::written`]. + fn written(&self, id: CorpusId); +} + +/// A composed testcase metadata cell, linking the cached cell with the fallback cell. +#[derive(Debug, Serialize, Deserialize)] +pub struct CacheTestcaseMetadataCell +where + CC: IsTestcaseMetadataCell, + CP: HasCachePolicy, + FC: IsTestcaseMetadataCell, +{ + write_access: RefCell, + cache_policy: Rc, + cache_cell: CC, + fallback_cell: Rc, +} + +impl Clone for CacheTestcaseMetadataCell +where + CC: IsTestcaseMetadataCell + Clone, + CP: HasCachePolicy, + FC: IsTestcaseMetadataCell, +{ + fn clone(&self) -> Self { + Self { + write_access: self.write_access.clone(), + cache_policy: self.cache_policy.clone(), + cache_cell: self.cache_cell.clone(), + fallback_cell: self.fallback_cell.clone(), + } + } +} + +/// The standard cell for testcase metadata in an identity cache. +pub type StdIdentityCacheTestcaseMetadataCell = Rc< + CacheTestcaseMetadataCell< + >::TestcaseMetadataCell, + WritebackOnFlushPolicy, + >::TestcaseMetadataCell, + >, +>; + +/// An identity cache, storing everything both in the cache and the backing store. +/// +/// It only supports [`WritebackOnFlushPolicy`] since all the testcases are stored in memory on load +/// forever. +#[derive(Debug, Serialize, Deserialize)] +pub struct IdentityCache { + cell_map: RefCell, + cache_policy: Rc, +} + +/// A `First In / First Out` cache policy. +#[derive(Debug, Serialize, Deserialize)] +pub struct FifoCache { + cached_ids: VecDeque, + cache_max_len: usize, + phantom: PhantomData<(I, CS, FS)>, +} + +impl CacheTestcaseMetadataCell +where + CC: IsTestcaseMetadataCell, + CP: HasCachePolicy, + FC: IsTestcaseMetadataCell, +{ + /// Create a new [`CacheTestcaseMetadataCell`] + pub fn new(cache_policy: Rc, cache_cell: CC, fallback_cell: FC) -> Self { + Self { + cache_policy, + write_access: RefCell::new(false), + cache_cell, + fallback_cell: Rc::new(fallback_cell), + } + } +} + +impl IsTestcaseMetadataCell for CacheTestcaseMetadataCell +where + CC: IsTestcaseMetadataCell, + CP: HasCachePolicy, + FC: IsTestcaseMetadataCell, +{ + type TestcaseMetadataRef<'a> + = CC::TestcaseMetadataRef<'a> + where + Self: 'a; + type TestcaseMetadataRefMut<'a> + = CC::TestcaseMetadataRefMut<'a> + where + Self: 'a; + + fn testcase_metadata(&self) -> Self::TestcaseMetadataRef<'_> { + self.cache_cell.testcase_metadata() + } + + fn testcase_metadata_mut(&self) -> Self::TestcaseMetadataRefMut<'_> { + *self.write_access.borrow_mut() = true; + self.cache_cell.testcase_metadata_mut() + } + + fn into_testcase_metadata(self) -> TestcaseMetadata { + self.cache_cell.testcase_metadata().clone() + } + + fn replace_testcase_metadata(&self, testcase_metadata: TestcaseMetadata) -> TestcaseMetadata { + self.fallback_cell + .replace_testcase_metadata(testcase_metadata.clone()); + self.cache_cell.replace_testcase_metadata(testcase_metadata) + } + + fn flush(&self) -> Result<(), Error> { + let write_access = self.write_access.borrow_mut(); + + if *write_access { + *self.fallback_cell.testcase_metadata_mut() = + self.cache_cell.testcase_metadata().clone(); + self.fallback_cell.flush()?; + *self.write_access.borrow_mut() = false; + } + + Ok(()) + } +} + +impl Drop for CacheTestcaseMetadataCell +where + CC: IsTestcaseMetadataCell, + CP: HasCachePolicy, + FC: IsTestcaseMetadataCell, +{ + fn drop(&mut self) { + self.flush().unwrap(); + } +} + +impl Cache for IdentityCache +where + CS: RemovableStore, + FS: Store, + I: Input, + M: InMemoryCorpusMap>>, + >::TestcaseMetadataCell: Clone, +{ + type TestcaseMetadataCell = StdIdentityCacheTestcaseMetadataCell; + + fn add_shared( + &mut self, + id: CorpusId, + input: Rc, + md: TestcaseMetadata, + cache_store: &mut CS, + fallback_store: &mut FS, + ) -> Result<(), Error> { + cache_store.add_shared::(id, input.clone(), md.clone())?; + fallback_store.add_shared::(id, input, md) + } + + fn get_from( + &mut self, + id: CorpusId, + cache_store: &mut CS, + fallback_store: &FS, + ) -> Result, Error> { + if let Some(tc) = self.cell_map.borrow().get(id) { + Ok(tc.clone()) + } else { + let (input, cc) = cache_store.get_from::(id)?.into_inner(); + let (_, fc) = fallback_store.get_from::(id)?.into_inner(); + + let cache_cell = Rc::new(CacheTestcaseMetadataCell::new( + self.cache_policy.clone(), + cc, + fc, + )); + let testcase = Testcase::new(input, cache_cell.clone()); + + self.cell_map.borrow_mut().add(id, testcase.clone()); + + Ok(testcase) + } + } + + fn disable( + &mut self, + id: CorpusId, + cache_store: &mut CS, + fallback_store: &mut FS, + ) -> Result<(), Error> { + cache_store.disable(id)?; + fallback_store.disable(id) + } + + fn replace_metadata( + &mut self, + _id: CorpusId, + _md: TestcaseMetadata, + _cache_store: &mut CS, + _fallback_store: &mut FS, + ) -> Result { + todo!() + } + + fn flush( + &mut self, + _id: CorpusId, + _cache_store: &mut CS, + _fallback_store: &mut FS, + ) -> Result<(), Error> { + todo!() + } + + fn written(&self, id: CorpusId) { + self.cache_policy.dirty(id); + } +} + +impl FifoCache +where + CS: RemovableStore, + FS: Store, + I: Clone, +{ + #[expect(clippy::too_many_arguments)] + fn get_inner( + &mut self, + id: CorpusId, + cache_store: &mut CS, + fallback_store: &FS, + cache_add_fn: CAF, + cache_get_fn: CGF, + cache_rm_fn: CRF, + fallback_get_fn: FGF, + ) -> Result, Error> + where + CAF: FnOnce(&mut CS, CorpusId, Testcase>) -> Result<(), Error>, + CGF: FnOnce(&CS, CorpusId) -> Result, Error>, + CRF: FnOnce(&mut CS, CorpusId) -> Result, Error>, + FGF: FnOnce(&FS, CorpusId) -> Result, Error>, + { + if self.cached_ids.contains(&id) { + cache_get_fn(cache_store, id) + } else { + if self.cached_ids.len() == self.cache_max_len { + let to_evict = self.cached_ids.pop_back().unwrap(); + cache_rm_fn(cache_store, to_evict)?; + } + + debug_assert!(self.cached_ids.len() < self.cache_max_len); + + // tescase is not cached, fetch it from fallback + let fb_tc = fallback_get_fn(fallback_store, id)?.cloned(); + cache_add_fn(cache_store, id, fb_tc)?; + + self.cached_ids.push_front(id); + + cache_get_fn(cache_store, id) + } + } +} + +impl Cache for FifoCache +where + CS: RemovableStore, + FS: Store, + I: Clone, +{ + type TestcaseMetadataCell = CS::TestcaseMetadataCell; + + fn add_shared( + &mut self, + id: CorpusId, + input: Rc, + metadata: TestcaseMetadata, + _cache_store: &mut CS, + fallback_store: &mut FS, + ) -> Result<(), Error> { + fallback_store.add_shared::(id, input, metadata) + } + + fn get_from( + &mut self, + id: CorpusId, + cache_store: &mut CS, + fallback_store: &FS, + ) -> Result, Error> { + self.get_inner( + id, + cache_store, + fallback_store, + |cache_store, corpus_id, testcase| { + let (input, md) = testcase.into_inner(); + cache_store.add_shared::(corpus_id, input, md.into_testcase_metadata()) + }, + Store::get, + RemovableStore::remove, + Store::get_from::, + ) + } + + fn disable( + &mut self, + id: CorpusId, + cache_store: &mut CS, + fallback_store: &mut FS, + ) -> Result<(), Error> { + cache_store.disable(id)?; + fallback_store.disable(id) + } + + fn replace_metadata( + &mut self, + _id: CorpusId, + _md: TestcaseMetadata, + _cache_store: &mut CS, + _fallback_store: &mut FS, + ) -> Result { + todo!() + } + + fn flush( + &mut self, + _id: CorpusId, + _cache_store: &mut CS, + _fallback_store: &mut FS, + ) -> Result<(), Error> { + todo!() + } + + fn written(&self, _id: CorpusId) { + todo!() + } +} diff --git a/crates/libafl/src/corpus/cached.rs b/crates/libafl/src/corpus/cached.rs deleted file mode 100644 index 3995181491d..00000000000 --- a/crates/libafl/src/corpus/cached.rs +++ /dev/null @@ -1,299 +0,0 @@ -//! The [`CachedOnDiskCorpus`] stores [`Testcase`]s to disk, keeping a subset of them in memory/cache, evicting in a FIFO manner. - -use alloc::{collections::vec_deque::VecDeque, string::String}; -use core::cell::{Ref, RefCell, RefMut}; -use std::path::Path; - -use serde::{Deserialize, Serialize}; - -use crate::{ - Error, - corpus::{ - Corpus, CorpusId, EnableDisableCorpus, HasTestcase, Testcase, - inmemory_ondisk::InMemoryOnDiskCorpus, ondisk::OnDiskMetadataFormat, - }, - inputs::Input, -}; - -/// A corpus that keeps a maximum number of [`Testcase`]s in memory -/// and load them from disk, when they are being used. -/// The eviction policy is FIFO. -#[derive(Default, Serialize, Deserialize, Clone, Debug)] -pub struct CachedOnDiskCorpus { - inner: InMemoryOnDiskCorpus, - cached_indexes: RefCell>, - cache_max_len: usize, -} - -impl CachedOnDiskCorpus -where - I: Input, -{ - fn cache_testcase<'a>( - &'a self, - testcase: &'a RefCell>, - id: CorpusId, - ) -> Result<(), Error> { - if testcase.borrow().input().is_none() { - self.load_input_into(&mut testcase.borrow_mut())?; - let mut borrowed_num = 0; - while self.cached_indexes.borrow().len() >= self.cache_max_len { - let removed = self.cached_indexes.borrow_mut().pop_front().unwrap(); - - if let Ok(mut borrowed) = self.inner.get_from_all(removed)?.try_borrow_mut() { - *borrowed.input_mut() = None; - } else { - self.cached_indexes.borrow_mut().push_back(removed); - borrowed_num += 1; - if self.cache_max_len == borrowed_num { - break; - } - } - } - self.cached_indexes.borrow_mut().push_back(id); - } - Ok(()) - } -} - -impl Corpus for CachedOnDiskCorpus -where - I: Input, -{ - /// Returns the number of all enabled entries - #[inline] - fn count(&self) -> usize { - self.inner.count() - } - - /// Returns the number of all disabled entries - fn count_disabled(&self) -> usize { - self.inner.count_disabled() - } - - /// Returns the number of elements including disabled entries - #[inline] - fn count_all(&self) -> usize { - self.inner.count_all() - } - - /// Add an enabled testcase to the corpus and return its index - #[inline] - fn add(&mut self, testcase: Testcase) -> Result { - self.inner.add(testcase) - } - - /// Add a disabled testcase to the corpus and return its index - #[inline] - fn add_disabled(&mut self, testcase: Testcase) -> Result { - self.inner.add_disabled(testcase) - } - - /// Replaces the testcase at the given idx - #[inline] - fn replace(&mut self, id: CorpusId, testcase: Testcase) -> Result, Error> { - // TODO finish - self.inner.replace(id, testcase) - } - - /// Removes an entry from the corpus, returning it if it was present; considers both enabled and disabled testcases. - fn remove(&mut self, id: CorpusId) -> Result, Error> { - let testcase = self.inner.remove(id)?; - self.cached_indexes.borrow_mut().retain(|e| *e != id); - Ok(testcase) - } - - /// Get by id; considers only enabled testcases - #[inline] - fn get(&self, id: CorpusId) -> Result<&RefCell>, Error> { - let testcase = { self.inner.get(id)? }; - self.cache_testcase(testcase, id)?; - Ok(testcase) - } - /// Get by id; considers both enabled and disabled testcases - #[inline] - fn get_from_all(&self, id: CorpusId) -> Result<&RefCell>, Error> { - let testcase = { self.inner.get_from_all(id)? }; - self.cache_testcase(testcase, id)?; - Ok(testcase) - } - - /// Current testcase scheduled - #[inline] - fn current(&self) -> &Option { - self.inner.current() - } - - /// Current testcase scheduled (mutable) - #[inline] - fn current_mut(&mut self) -> &mut Option { - self.inner.current_mut() - } - - #[inline] - fn next(&self, id: CorpusId) -> Option { - self.inner.next(id) - } - - /// Peek the next free corpus id - #[inline] - fn peek_free_id(&self) -> CorpusId { - self.inner.peek_free_id() - } - - #[inline] - fn prev(&self, id: CorpusId) -> Option { - self.inner.prev(id) - } - - #[inline] - fn first(&self) -> Option { - self.inner.first() - } - - #[inline] - fn last(&self) -> Option { - self.inner.last() - } - - /// Get the nth corpus id; considers only enabled testcases - #[inline] - fn nth(&self, nth: usize) -> CorpusId { - self.inner.nth(nth) - } - /// Get the nth corpus id; considers both enabled and disabled testcases - #[inline] - fn nth_from_all(&self, nth: usize) -> CorpusId { - self.inner.nth_from_all(nth) - } - - #[inline] - fn load_input_into(&self, testcase: &mut Testcase) -> Result<(), Error> { - self.inner.load_input_into(testcase) - } - - #[inline] - fn store_input_from(&self, testcase: &Testcase) -> Result<(), Error> { - self.inner.store_input_from(testcase) - } -} - -impl HasTestcase for CachedOnDiskCorpus -where - I: Input, -{ - fn testcase(&self, id: CorpusId) -> Result>, Error> { - Ok(self.get(id)?.borrow()) - } - - fn testcase_mut(&self, id: CorpusId) -> Result>, Error> { - Ok(self.get(id)?.borrow_mut()) - } -} - -impl EnableDisableCorpus for CachedOnDiskCorpus -where - I: Input, -{ - #[inline] - fn disable(&mut self, id: CorpusId) -> Result<(), Error> { - self.cached_indexes.borrow_mut().retain(|e| *e != id); - self.inner.disable(id) - } - - #[inline] - fn enable(&mut self, id: CorpusId) -> Result<(), Error> { - self.cached_indexes.borrow_mut().retain(|e| *e != id); - self.inner.enable(id) - } -} - -impl CachedOnDiskCorpus { - /// Creates the [`CachedOnDiskCorpus`]. - /// - /// This corpus stores (and reads) all testcases to/from disk - /// - /// By default, it stores metadata for each [`Testcase`] as prettified json. - /// Metadata will be written to a file named `..metadata` - /// the metadata may include objective reason, specific information for a fuzz job, and more. - /// - /// If you don't want metadata, use [`CachedOnDiskCorpus::no_meta`]. - /// to pick a different metadata format, use [`CachedOnDiskCorpus::with_meta_format`]. - /// - /// Will error, if [`std::fs::create_dir_all()`] failed for `dir_path`. - pub fn new

(dir_path: P, cache_max_len: usize) -> Result - where - P: AsRef, - { - Self::_new(InMemoryOnDiskCorpus::new(dir_path)?, cache_max_len) - } - - /// Creates an [`CachedOnDiskCorpus`] that does not store [`Testcase`] metadata to disk. - pub fn no_meta

(dir_path: P, cache_max_len: usize) -> Result - where - P: AsRef, - { - Self::_new(InMemoryOnDiskCorpus::no_meta(dir_path)?, cache_max_len) - } - - /// Creates the [`CachedOnDiskCorpus`] specifying the format in which `Metadata` will be saved to disk. - /// - /// Will error, if [`std::fs::create_dir_all()`] failed for `dir_path`. - pub fn with_meta_format

( - dir_path: P, - cache_max_len: usize, - meta_format: Option, - ) -> Result - where - P: AsRef, - { - Self::_new( - InMemoryOnDiskCorpus::with_meta_format(dir_path, meta_format)?, - cache_max_len, - ) - } - - /// Creates the [`CachedOnDiskCorpus`] specifying the metadata format and the prefix to prepend - /// to each testcase. - /// - /// Will error, if [`std::fs::create_dir_all()`] failed for `dir_path`. - pub fn with_meta_format_and_prefix

( - dir_path: P, - cache_max_len: usize, - meta_format: Option, - prefix: Option, - locking: bool, - ) -> Result - where - P: AsRef, - { - Self::_new( - InMemoryOnDiskCorpus::with_meta_format_and_prefix( - dir_path, - meta_format, - prefix, - locking, - )?, - cache_max_len, - ) - } - - /// Internal constructor `fn` - fn _new(on_disk_corpus: InMemoryOnDiskCorpus, cache_max_len: usize) -> Result { - if cache_max_len == 0 { - return Err(Error::illegal_argument( - "The max cache len in CachedOnDiskCorpus cannot be 0", - )); - } - Ok(Self { - inner: on_disk_corpus, - cached_indexes: RefCell::new(VecDeque::new()), - cache_max_len, - }) - } - - /// Fetch the inner corpus - pub fn inner(&self) -> &InMemoryOnDiskCorpus { - &self.inner - } -} diff --git a/crates/libafl/src/corpus/collection.rs b/crates/libafl/src/corpus/collection.rs new file mode 100644 index 00000000000..e3d3e041187 --- /dev/null +++ b/crates/libafl/src/corpus/collection.rs @@ -0,0 +1,620 @@ +//! A collection of various [`Corpus`]. + +use alloc::{rc::Rc, string::String}; +use core::cell::RefCell; +use std::path::PathBuf; + +use libafl_bolts::Error; +use serde::{Deserialize, Serialize}; + +use crate::{ + corpus::{ + CombinedCorpus, Corpus, CorpusId, FifoCache, IdentityCache, InMemoryStore, OnDiskStore, + SingleCorpus, Testcase, TestcaseMetadata, + cache::StdIdentityCacheTestcaseMetadataCell, + maps::{self, InMemoryCorpusMap, InMemoryTestcaseMap}, + store::{OnDiskMetadataFormat, Store}, + }, + inputs::Input, +}; + +#[cfg(not(feature = "corpus_btreemap"))] +type StdInMemoryMap = maps::HashCorpusMap; +#[cfg(feature = "corpus_btreemap")] +type StdInMemoryMap = maps::BtreeCorpusMap; + +type StdInMemoryTestcaseMetadataCell = Rc>; +type StdInMemoryTestcase = Testcase; +type InnerStdInMemoryCorpusMap = StdInMemoryMap>; +type InnerStdInMemoryStore = + InMemoryStore, StdInMemoryTestcaseMetadataCell>; +type InnerInMemoryCorpus = SingleCorpus>; + +//type StdOnDiskTestcaseMetadataCell = Rc>; +//type StdOnDiskTestcase = Testcase>; +//type InnerStdOnDiskCorpusMap = StdInMemoryMap>; +type InnerStdOnDiskStore = OnDiskStore>; +#[cfg(feature = "std")] +type InnerOnDiskCorpus = SingleCorpus>; + +type InnerInMemoryOnDiskCorpus = CombinedCorpus< + IdentityCache< + StdInMemoryMap< + Testcase< + I, + StdIdentityCacheTestcaseMetadataCell< + I, + InnerStdInMemoryStore, + InnerStdOnDiskStore, + >, + >, + >, + >, + InnerStdInMemoryStore, + InnerStdOnDiskStore, + I, +>; + +type InnerCachedOnDiskCorpus = CombinedCorpus< + FifoCache, InnerStdOnDiskStore, I>, + InnerStdInMemoryStore, + InnerStdOnDiskStore, + I, +>; + +/// The standard fully in-memory corpus map. +#[repr(transparent)] +#[derive(Debug, Serialize)] +pub struct StdInMemoryCorpusMap(InnerStdInMemoryCorpusMap); + +/// The standard fully in-memory store. +#[repr(transparent)] +#[derive(Debug, Serialize)] +pub struct StdInMemoryStore(InnerStdInMemoryStore); + +/// The standard fully on-disk store. +#[repr(transparent)] +#[derive(Debug, Serialize)] +pub struct StdOnDiskStore(InnerStdOnDiskStore); + +/// The standard in-memory corpus. +#[repr(transparent)] +#[derive(Debug, Serialize, Deserialize)] +pub struct InMemoryCorpus(InnerInMemoryCorpus); + +/// The standard fully on-disk corpus. +#[cfg(feature = "std")] +#[repr(transparent)] +#[derive(Debug, Serialize, Deserialize)] +pub struct OnDiskCorpus(InnerOnDiskCorpus); + +/// The standard corpus for storing on disk and in-memory with a cache. +/// Useful for very large corpuses. +#[repr(transparent)] +#[derive(Debug, Serialize)] +pub struct CachedOnDiskCorpus(InnerCachedOnDiskCorpus); + +/// The standard corpus for storing on disk and in-memory. +#[repr(transparent)] +#[derive(Debug, Serialize)] +pub struct InMemoryOnDiskCorpus(InnerInMemoryOnDiskCorpus); + +impl InMemoryCorpusMap> for StdInMemoryCorpusMap +where + I: Input, +{ + fn count(&self) -> usize { + self.0.count() + } + + fn add(&mut self, id: CorpusId, testcase: Testcase>>) { + self.0.add(id, testcase); + } + + fn get(&self, id: CorpusId) -> Option<&Testcase>>> { + self.0.get(id) + } + + fn get_mut(&mut self, id: CorpusId) -> Option<&mut Testcase>>> { + self.0.get_mut(id) + } + + fn remove(&mut self, id: CorpusId) -> Option>>> { + self.0.remove(id) + } + + fn prev(&self, id: CorpusId) -> Option { + self.0.prev(id) + } + + fn next(&self, id: CorpusId) -> Option { + self.0.next(id) + } + + fn first(&self) -> Option { + self.0.first() + } + + fn last(&self) -> Option { + self.0.last() + } + + fn nth(&self, nth: usize) -> CorpusId { + self.0.nth(nth) + } +} + +impl InMemoryTestcaseMap> for StdInMemoryCorpusMap +where + I: Input, +{ + fn replace_metadata( + &mut self, + id: CorpusId, + testcase_metadata: TestcaseMetadata, + ) -> Option { + self.0.replace_metadata(id, testcase_metadata) + } +} + +impl Store for StdInMemoryStore +where + I: Input, +{ + type TestcaseMetadataCell = as Store>::TestcaseMetadataCell; + + fn count(&self) -> usize { + self.0.count() + } + + fn count_disabled(&self) -> usize { + self.0.count_disabled() + } + + fn add_shared( + &mut self, + id: CorpusId, + input: Rc, + md: TestcaseMetadata, + ) -> Result<(), Error> { + self.0.add_shared::(id, input, md) + } + + fn get_from( + &self, + id: CorpusId, + ) -> Result, Error> { + self.0.get_from::(id) + } + + fn disable(&mut self, id: CorpusId) -> Result<(), Error> { + self.0.disable(id) + } + + fn replace_metadata( + &mut self, + id: CorpusId, + metadata: TestcaseMetadata, + ) -> Result { + self.0.replace_metadata(id, metadata) + } + + fn prev(&self, id: CorpusId) -> Option { + self.0.prev(id) + } + + fn next(&self, id: CorpusId) -> Option { + self.0.next(id) + } + + fn first(&self) -> Option { + self.0.first() + } + + fn last(&self) -> Option { + self.0.last() + } + + fn nth(&self, nth: usize) -> CorpusId { + self.0.nth(nth) + } + + fn nth_from_all(&self, nth: usize) -> CorpusId { + self.0.nth_from_all(nth) + } +} + +impl Store for StdOnDiskStore +where + I: Input, +{ + type TestcaseMetadataCell = as Store>::TestcaseMetadataCell; + + fn count(&self) -> usize { + self.0.count() + } + + fn count_disabled(&self) -> usize { + self.0.count_disabled() + } + + fn add_shared( + &mut self, + id: CorpusId, + input: Rc, + md: TestcaseMetadata, + ) -> Result<(), Error> { + self.0.add_shared::(id, input, md) + } + + fn get_from( + &self, + id: CorpusId, + ) -> Result, Error> { + self.0.get_from::(id) + } + + fn disable(&mut self, id: CorpusId) -> Result<(), Error> { + self.0.disable(id) + } + + fn replace_metadata( + &mut self, + id: CorpusId, + metadata: TestcaseMetadata, + ) -> Result { + self.0.replace_metadata(id, metadata) + } + + fn prev(&self, id: CorpusId) -> Option { + self.0.prev(id) + } + + fn next(&self, id: CorpusId) -> Option { + self.0.next(id) + } + + fn first(&self) -> Option { + self.0.first() + } + + fn last(&self) -> Option { + self.0.last() + } + + fn nth(&self, nth: usize) -> CorpusId { + self.0.nth(nth) + } + + fn nth_from_all(&self, nth: usize) -> CorpusId { + self.0.nth_from_all(nth) + } +} + +impl Default for InMemoryCorpus { + fn default() -> Self { + InMemoryCorpus(InnerInMemoryCorpus::default()) + } +} + +impl InMemoryCorpus { + /// Create a new [`InMemoryCorpus`]. + #[must_use] + pub fn new() -> Self { + Self::default() + } +} + +impl Corpus for InMemoryCorpus +where + I: Input, +{ + type TestcaseMetadataCell = as Corpus>::TestcaseMetadataCell; + + fn count(&self) -> usize { + self.0.count() + } + + fn count_disabled(&self) -> usize { + self.0.count_disabled() + } + + fn count_all(&self) -> usize { + self.0.count_all() + } + + fn add_shared( + &mut self, + input: Rc, + md: TestcaseMetadata, + ) -> Result { + self.0.add_shared::(input, md) + } + + fn get_from( + &self, + id: CorpusId, + ) -> Result, Error> { + self.0.get_from::(id) + } + + fn disable(&mut self, id: CorpusId) -> Result<(), Error> { + self.0.disable(id) + } + + fn replace_metadata( + &mut self, + id: CorpusId, + md: TestcaseMetadata, + ) -> Result { + self.0.replace_metadata(id, md) + } + + fn current(&self) -> &Option { + self.0.current() + } + + fn current_mut(&mut self) -> &mut Option { + self.0.current_mut() + } + + fn next(&self, id: CorpusId) -> Option { + self.0.next(id) + } + + fn prev(&self, id: CorpusId) -> Option { + self.0.prev(id) + } + + fn first(&self) -> Option { + self.0.first() + } + + fn last(&self) -> Option { + self.0.last() + } + + fn nth_from_all(&self, nth: usize) -> CorpusId { + self.0.nth_from_all(nth) + } +} + +#[cfg(feature = "std")] +impl OnDiskCorpus +where + I: Input, +{ + /// Create a new [`OnDiskCorpus`] + pub fn new(root: PathBuf) -> Result { + Ok(OnDiskCorpus(InnerOnDiskCorpus::new( + InnerStdOnDiskStore::new(root)?, + ))) + } + + /// Create a new [`OnDiskCorpus`] with a specific [`OnDiskMetadataFormat`] + pub fn new_with_format(root: PathBuf, md_format: OnDiskMetadataFormat) -> Result { + Ok(OnDiskCorpus(InnerOnDiskCorpus::new( + InnerStdOnDiskStore::new_with_format(root, md_format)?, + ))) + } +} + +#[cfg(feature = "std")] +impl Corpus for OnDiskCorpus +where + I: Input, +{ + type TestcaseMetadataCell = as Corpus>::TestcaseMetadataCell; + + fn count(&self) -> usize { + self.0.count() + } + + fn count_disabled(&self) -> usize { + self.0.count_disabled() + } + + fn count_all(&self) -> usize { + self.0.count_all() + } + + fn add_shared( + &mut self, + input: Rc, + md: TestcaseMetadata, + ) -> Result { + self.0.add_shared::(input, md) + } + + fn get_from( + &self, + id: CorpusId, + ) -> Result, Error> { + self.0.get_from::(id) + } + + fn disable(&mut self, id: CorpusId) -> Result<(), Error> { + self.0.disable(id) + } + + fn replace_metadata( + &mut self, + id: CorpusId, + md: TestcaseMetadata, + ) -> Result { + self.0.replace_metadata(id, md) + } + + fn current(&self) -> &Option { + self.0.current() + } + + fn current_mut(&mut self) -> &mut Option { + self.0.current_mut() + } + + fn next(&self, id: CorpusId) -> Option { + self.0.next(id) + } + + fn prev(&self, id: CorpusId) -> Option { + self.0.prev(id) + } + + fn first(&self) -> Option { + self.0.first() + } + + fn last(&self) -> Option { + self.0.last() + } + + fn nth_from_all(&self, nth: usize) -> CorpusId { + self.0.nth_from_all(nth) + } +} + +impl Corpus for InMemoryOnDiskCorpus +where + I: Input, +{ + type TestcaseMetadataCell = as Corpus>::TestcaseMetadataCell; + + fn count(&self) -> usize { + self.0.count() + } + + fn count_disabled(&self) -> usize { + self.0.count_disabled() + } + + fn count_all(&self) -> usize { + self.0.count_all() + } + + fn add_shared( + &mut self, + input: Rc, + md: TestcaseMetadata, + ) -> Result { + self.0.add_shared::(input, md) + } + + fn get_from( + &self, + id: CorpusId, + ) -> Result, Error> { + self.0.get_from::(id) + } + + fn disable(&mut self, id: CorpusId) -> Result<(), Error> { + self.0.disable(id) + } + + fn replace_metadata( + &mut self, + id: CorpusId, + md: TestcaseMetadata, + ) -> Result { + self.0.replace_metadata(id, md) + } + + fn current(&self) -> &Option { + self.0.current() + } + + fn current_mut(&mut self) -> &mut Option { + self.0.current_mut() + } + + fn next(&self, id: CorpusId) -> Option { + self.0.next(id) + } + + fn prev(&self, id: CorpusId) -> Option { + self.0.prev(id) + } + + fn first(&self) -> Option { + self.0.first() + } + + fn last(&self) -> Option { + self.0.last() + } + + fn nth_from_all(&self, nth: usize) -> CorpusId { + self.0.nth_from_all(nth) + } +} + +impl Corpus for CachedOnDiskCorpus { + type TestcaseMetadataCell = as Corpus>::TestcaseMetadataCell; + + fn count(&self) -> usize { + self.0.count() + } + + fn count_disabled(&self) -> usize { + self.0.count_disabled() + } + + fn count_all(&self) -> usize { + self.0.count_all() + } + + fn add_shared( + &mut self, + input: Rc, + md: TestcaseMetadata, + ) -> Result { + self.0.add_shared::(input, md) + } + + fn get_from( + &self, + id: CorpusId, + ) -> Result, Error> { + self.0.get_from::(id) + } + + fn disable(&mut self, id: CorpusId) -> Result<(), Error> { + self.0.disable(id) + } + + fn replace_metadata( + &mut self, + id: CorpusId, + md: TestcaseMetadata, + ) -> Result { + self.0.replace_metadata(id, md) + } + + fn current(&self) -> &Option { + self.0.current() + } + + fn current_mut(&mut self) -> &mut Option { + self.0.current_mut() + } + + fn next(&self, id: CorpusId) -> Option { + self.0.next(id) + } + + fn prev(&self, id: CorpusId) -> Option { + self.0.prev(id) + } + + fn first(&self) -> Option { + self.0.first() + } + + fn last(&self) -> Option { + self.0.last() + } + + fn nth_from_all(&self, nth: usize) -> CorpusId { + self.0.nth_from_all(nth) + } +} diff --git a/crates/libafl/src/corpus/combined.rs b/crates/libafl/src/corpus/combined.rs new file mode 100644 index 00000000000..39895ecb694 --- /dev/null +++ b/crates/libafl/src/corpus/combined.rs @@ -0,0 +1,132 @@ +//! A cached corpus, using a given [`Cache`] policy and two [`Store`]s. + +use alloc::{rc::Rc, vec::Vec}; +use core::{cell::RefCell, marker::PhantomData}; + +use libafl_bolts::Error; +use serde::{Deserialize, Serialize}; + +use super::{Cache, Corpus, CorpusCounter, CorpusId, Testcase, store::Store}; +use crate::corpus::testcase::TestcaseMetadata; + +/// A [`CombinedCorpus`] tries first to use the main store according to some policy. +/// If it fails, it falls back to the secondary store. +#[derive(Default, Serialize, Deserialize, Clone, Debug)] +pub struct CombinedCorpus { + /// The cache store + cache_store: RefCell, + /// The fallback store + fallback_store: FS, + /// The policy taking decisions + cache: Rc>, + /// The corpus ID counter + counter: CorpusCounter, + /// The keys in order (use `Vec::binary_search`) + keys: Vec, + /// The current ID + current: Option, + phantom: PhantomData, +} + +impl Corpus for CombinedCorpus +where + C: Cache, + CS: Store, + FS: Store, + I: Clone, +{ + type TestcaseMetadataCell = C::TestcaseMetadataCell; + + fn count(&self) -> usize { + self.fallback_store.count() + } + + fn count_disabled(&self) -> usize { + self.fallback_store.count_disabled() + } + + fn count_all(&self) -> usize { + self.fallback_store.count_all() + } + + fn add_shared( + &mut self, + input: Rc, + md: TestcaseMetadata, + ) -> Result { + let new_id = self.counter.new_id(); + + self.cache.borrow_mut().add_shared::( + new_id, + input, + md, + &mut *self.cache_store.borrow_mut(), + &mut self.fallback_store, + )?; + + Ok(new_id) + } + + fn get_from( + &self, + id: CorpusId, + ) -> Result, Error> { + let mut cache = self.cache.borrow_mut(); + let cache_store = &mut *self.cache_store.borrow_mut(); + + cache.get_from::(id, cache_store, &self.fallback_store) + } + + fn disable(&mut self, id: CorpusId) -> Result<(), Error> { + self.cache.borrow_mut().disable( + id, + &mut *self.cache_store.borrow_mut(), + &mut self.fallback_store, + ) + } + + fn replace_metadata( + &mut self, + id: CorpusId, + md: TestcaseMetadata, + ) -> Result { + self.cache.borrow_mut().replace_metadata( + id, + md, + &mut *self.cache_store.borrow_mut(), + &mut self.fallback_store, + ) + } + + fn current(&self) -> &Option { + &self.current + } + + fn current_mut(&mut self) -> &mut Option { + &mut self.current + } + + fn next(&self, id: CorpusId) -> Option { + self.fallback_store.next(id) + } + + fn prev(&self, id: CorpusId) -> Option { + self.fallback_store.prev(id) + } + + fn first(&self) -> Option { + self.fallback_store.first() + } + + fn last(&self) -> Option { + self.fallback_store.last() + } + + fn nth(&self, nth: usize) -> CorpusId { + self.fallback_store.nth(nth) + } + + fn nth_from_all(&self, nth: usize) -> CorpusId { + self.fallback_store.nth_from_all(nth) + } +} diff --git a/crates/libafl/src/corpus/dynamic.rs b/crates/libafl/src/corpus/dynamic.rs index 5b2fb3e950b..bf732608be6 100644 --- a/crates/libafl/src/corpus/dynamic.rs +++ b/crates/libafl/src/corpus/dynamic.rs @@ -1,11 +1,12 @@ //! Dynamic corpus that allows users to switch corpus types at runtime. -use core::{cell::RefCell, marker::PhantomData}; +use alloc::rc::Rc; +use core::marker::PhantomData; use libafl_bolts::Error; use serde::{Deserialize, Serialize}; -use crate::corpus::{Corpus, CorpusId, Testcase}; +use crate::corpus::{Corpus, CorpusId, Testcase, TestcaseMetadata}; /// An dynamic corpus type accepting two types of corpus at runtime. This helps rustc better /// reason about the bounds compared to dyn objects. @@ -38,34 +39,17 @@ where C1: Corpus, C2: Corpus, { - fn peek_free_id(&self) -> CorpusId { + fn add(&mut self, input: Rc, md: TestcaseMetadata) -> Result { match self { - Self::Corpus1(c1, _) => c1.peek_free_id(), - Self::Corpus2(c2, _) => c2.peek_free_id(), + Self::Corpus1(c1, _) => c1.add(input, md), + Self::Corpus2(c2, _) => c2.add(input, md), } } - fn add(&mut self, testcase: Testcase) -> Result { + fn add_disabled(&mut self, input: Rc, md: TestcaseMetadata) -> Result { match self { - Self::Corpus1(c1, _) => c1.add(testcase), - Self::Corpus2(c2, _) => c2.add(testcase), - } - } - - fn add_disabled(&mut self, testcase: Testcase) -> Result { - match self { - Self::Corpus1(c1, _) => c1.add_disabled(testcase), - Self::Corpus2(c2, _) => c2.add_disabled(testcase), - } - } - - fn cloned_input_for_id(&self, idx: CorpusId) -> Result - where - I: Clone, - { - match self { - Self::Corpus1(c1, _) => c1.cloned_input_for_id(idx), - Self::Corpus2(c2, _) => c2.cloned_input_for_id(idx), + Self::Corpus1(c1, _) => c1.add_disabled(input, md), + Self::Corpus2(c2, _) => c2.add_disabled(input, md), } } @@ -111,17 +95,14 @@ where } } - fn get(&self, id: CorpusId) -> Result<&RefCell>, Error> { - match self { - Self::Corpus1(c1, _) => c1.get(id), - Self::Corpus2(c2, _) => c2.get(id), - } - } - - fn get_from_all(&self, id: CorpusId) -> Result<&RefCell>, Error> { + /// Get testcase by id + fn get_from( + &self, + id: CorpusId, + ) -> Result, Error> { match self { - Self::Corpus1(c1, _) => c1.get_from_all(id), - Self::Corpus2(c2, _) => c2.get_from_all(id), + Self::Corpus1(c1, _) => c1.get_from(id), + Self::Corpus2(c2, _) => c2.get_from(id), } } @@ -139,13 +120,6 @@ where } } - fn load_input_into(&self, testcase: &mut Testcase) -> Result<(), Error> { - match self { - Self::Corpus1(c1, _) => c1.load_input_into(testcase), - Self::Corpus2(c2, _) => c2.load_input_into(testcase), - } - } - fn next(&self, id: CorpusId) -> Option { match self { Self::Corpus1(c1, _) => c1.next(id), @@ -174,24 +148,15 @@ where } } - fn remove(&mut self, id: CorpusId) -> Result, Error> { - match self { - Self::Corpus1(c1, _) => c1.remove(id), - Self::Corpus2(c2, _) => c2.remove(id), - } - } - - fn replace(&mut self, idx: CorpusId, testcase: Testcase) -> Result, Error> { - match self { - Self::Corpus1(c1, _) => c1.replace(idx, testcase), - Self::Corpus2(c2, _) => c2.replace(idx, testcase), - } - } - - fn store_input_from(&self, testcase: &Testcase) -> Result<(), Error> { + fn replace( + &mut self, + id: CorpusId, + input: Rc, + md: TestcaseMetadata, + ) -> Result, Error> { match self { - Self::Corpus1(c1, _) => c1.store_input_from(testcase), - Self::Corpus2(c2, _) => c2.store_input_from(testcase), + Self::Corpus1(c1, _) => c1.replace(id, input, md), + Self::Corpus2(c2, _) => c2.replace(id, input, md), } } } diff --git a/crates/libafl/src/corpus/inmemory.rs b/crates/libafl/src/corpus/inmemory.rs deleted file mode 100644 index d98fee9d02f..00000000000 --- a/crates/libafl/src/corpus/inmemory.rs +++ /dev/null @@ -1,718 +0,0 @@ -//! In-memory corpus, keeps all test cases in memory at all times - -use alloc::vec::Vec; -use core::cell::{Ref, RefCell, RefMut}; - -use serde::{Deserialize, Serialize}; - -use super::{EnableDisableCorpus, HasTestcase}; -use crate::{ - Error, - corpus::{Corpus, CorpusId, Testcase}, -}; - -/// Keep track of the stored `Testcase` and the siblings ids (insertion order) -#[cfg(not(feature = "corpus_btreemap"))] -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct TestcaseStorageItem { - /// The stored testcase - pub testcase: RefCell>, - /// Previously inserted id - pub prev: Option, - /// Following inserted id - pub next: Option, -} - -/// The map type in which testcases are stored (disable the feature `corpus_btreemap` to use a `HashMap` instead of `BTreeMap`) -#[derive(Default, Serialize, Deserialize, Clone, Debug)] -pub struct TestcaseStorageMap { - #[cfg(not(feature = "corpus_btreemap"))] - /// A map of `CorpusId` to `TestcaseStorageItem` - pub map: hashbrown::HashMap>, - #[cfg(feature = "corpus_btreemap")] - /// A map of `CorpusId` to `Testcase`. - pub map: alloc::collections::btree_map::BTreeMap>>, - /// The keys in order (use `Vec::binary_search`) - pub keys: Vec, - /// First inserted id - #[cfg(not(feature = "corpus_btreemap"))] - first_id: Option, - /// Last inserted id - #[cfg(not(feature = "corpus_btreemap"))] - last_id: Option, -} - -impl TestcaseStorageMap { - /// Insert a key in the keys set - fn insert_key(&mut self, id: CorpusId) { - if let Err(idx) = self.keys.binary_search(&id) { - self.keys.insert(idx, id); - } - } - - /// Remove a key from the keys set - fn remove_key(&mut self, id: CorpusId) { - if let Ok(idx) = self.keys.binary_search(&id) { - self.keys.remove(idx); - } - } - - /// Replace a testcase given a `CorpusId` - #[cfg(not(feature = "corpus_btreemap"))] - pub fn replace(&mut self, id: CorpusId, testcase: Testcase) -> Option> { - match self.map.get_mut(&id) { - Some(entry) => Some(entry.testcase.replace(testcase)), - _ => None, - } - } - - /// Replace a testcase given a `CorpusId` - #[cfg(feature = "corpus_btreemap")] - pub fn replace(&mut self, id: CorpusId, testcase: Testcase) -> Option> { - self.map.get_mut(&id).map(|entry| entry.replace(testcase)) - } - - /// Remove a testcase given a [`CorpusId`] - #[cfg(not(feature = "corpus_btreemap"))] - pub fn remove(&mut self, id: CorpusId) -> Option>> { - match self.map.remove(&id) { - Some(item) => { - self.remove_key(id); - match item.prev { - Some(prev) => { - self.map.get_mut(&prev).unwrap().next = item.next; - } - _ => { - // first elem - self.first_id = item.next; - } - } - match item.next { - Some(next) => { - self.map.get_mut(&next).unwrap().prev = item.prev; - } - _ => { - // last elem - self.last_id = item.prev; - } - } - Some(item.testcase) - } - _ => None, - } - } - - /// Remove a testcase given a [`CorpusId`] - #[cfg(feature = "corpus_btreemap")] - pub fn remove(&mut self, id: CorpusId) -> Option>> { - self.remove_key(id); - self.map.remove(&id) - } - - /// Get a testcase given a `CorpusId` - #[cfg(not(feature = "corpus_btreemap"))] - #[must_use] - pub fn get(&self, id: CorpusId) -> Option<&RefCell>> { - self.map.get(&id).as_ref().map(|x| &x.testcase) - } - - /// Get a testcase given a `CorpusId` - #[cfg(feature = "corpus_btreemap")] - #[must_use] - pub fn get(&self, id: CorpusId) -> Option<&RefCell>> { - self.map.get(&id) - } - - /// Get the next id given a `CorpusId` (creation order) - #[cfg(not(feature = "corpus_btreemap"))] - #[must_use] - pub fn next(&self, id: CorpusId) -> Option { - match self.map.get(&id) { - Some(item) => item.next, - _ => None, - } - } - - /// Get the next id given a `CorpusId` (creation order) - #[cfg(feature = "corpus_btreemap")] - #[must_use] - pub fn next(&self, id: CorpusId) -> Option { - // TODO see if using self.keys is faster - let mut range = self - .map - .range((core::ops::Bound::Included(id), core::ops::Bound::Unbounded)); - if let Some((this_id, _)) = range.next() { - if id != *this_id { - return None; - } - } - if let Some((next_id, _)) = range.next() { - Some(*next_id) - } else { - None - } - } - - /// Get the previous id given a `CorpusId` (creation order) - #[cfg(not(feature = "corpus_btreemap"))] - #[must_use] - pub fn prev(&self, id: CorpusId) -> Option { - match self.map.get(&id) { - Some(item) => item.prev, - _ => None, - } - } - - /// Get the previous id given a `CorpusId` (creation order) - #[cfg(feature = "corpus_btreemap")] - #[must_use] - pub fn prev(&self, id: CorpusId) -> Option { - // TODO see if using self.keys is faster - let mut range = self - .map - .range((core::ops::Bound::Unbounded, core::ops::Bound::Included(id))); - if let Some((this_id, _)) = range.next_back() { - if id != *this_id { - return None; - } - } - if let Some((prev_id, _)) = range.next_back() { - Some(*prev_id) - } else { - None - } - } - - /// Get the first created id - #[cfg(not(feature = "corpus_btreemap"))] - #[must_use] - pub fn first(&self) -> Option { - self.first_id - } - - /// Get the first created id - #[cfg(feature = "corpus_btreemap")] - #[must_use] - pub fn first(&self) -> Option { - self.map.iter().next().map(|x| *x.0) - } - - /// Get the last created id - #[cfg(not(feature = "corpus_btreemap"))] - #[must_use] - pub fn last(&self) -> Option { - self.last_id - } - - /// Get the last created id - #[cfg(feature = "corpus_btreemap")] - #[must_use] - pub fn last(&self) -> Option { - self.map.iter().next_back().map(|x| *x.0) - } - - fn new() -> Self { - Self { - #[cfg(not(feature = "corpus_btreemap"))] - map: hashbrown::HashMap::default(), - #[cfg(feature = "corpus_btreemap")] - map: alloc::collections::BTreeMap::default(), - keys: Vec::default(), - #[cfg(not(feature = "corpus_btreemap"))] - first_id: None, - #[cfg(not(feature = "corpus_btreemap"))] - last_id: None, - } - } -} -/// Storage map for the testcases (used in `Corpus` implementations) with an incremental index -#[derive(Default, Serialize, Deserialize, Clone, Debug)] -pub struct TestcaseStorage { - /// The map in which enabled testcases are stored - pub enabled: TestcaseStorageMap, - /// The map in which disabled testcases are stored - pub disabled: TestcaseStorageMap, - /// The progressive id for both maps - progressive_id: usize, -} - -impl TestcaseStorage { - /// Insert a testcase assigning a `CorpusId` to it - pub fn insert(&mut self, testcase: RefCell>) -> CorpusId { - self.insert_inner(testcase, false) - } - - #[must_use] - /// Peek the next free corpus id - pub fn peek_free_id(&self) -> CorpusId { - CorpusId::from(self.progressive_id) - } - - /// Insert a testcase assigning a `CorpusId` to it - pub fn insert_disabled(&mut self, testcase: RefCell>) -> CorpusId { - self.insert_inner(testcase, true) - } - - /// Insert a testcase assigning a `CorpusId` to it - #[cfg(not(feature = "corpus_btreemap"))] - fn insert_inner(&mut self, testcase: RefCell>, is_disabled: bool) -> CorpusId { - let id = CorpusId::from(self.progressive_id); - self.progressive_id += 1; - let corpus = if is_disabled { - &mut self.disabled - } else { - &mut self.enabled - }; - let prev = if let Some(last_id) = corpus.last_id { - corpus.map.get_mut(&last_id).unwrap().next = Some(id); - Some(last_id) - } else { - None - }; - if corpus.first_id.is_none() { - corpus.first_id = Some(id); - } - corpus.last_id = Some(id); - corpus.insert_key(id); - corpus.map.insert( - id, - TestcaseStorageItem { - testcase, - prev, - next: None, - }, - ); - id - } - - #[cfg(not(feature = "corpus_btreemap"))] - fn insert_inner_with_id( - &mut self, - testcase: RefCell>, - is_disabled: bool, - id: CorpusId, - ) -> Result<(), Error> { - if self.progressive_id < id.into() { - return Err(Error::illegal_state( - "trying to insert a testcase with an id bigger than the internal Id counter", - )); - } - let corpus = if is_disabled { - &mut self.disabled - } else { - &mut self.enabled - }; - let prev = if let Some(last_id) = corpus.last_id { - corpus.map.get_mut(&last_id).unwrap().next = Some(id); - Some(last_id) - } else { - None - }; - if corpus.first_id.is_none() { - corpus.first_id = Some(id); - } - corpus.last_id = Some(id); - corpus.insert_key(id); - corpus.map.insert( - id, - TestcaseStorageItem { - testcase, - prev, - next: None, - }, - ); - Ok(()) - } - - #[cfg(feature = "corpus_btreemap")] - fn insert_inner_with_id( - &mut self, - testcase: RefCell>, - is_disabled: bool, - id: CorpusId, - ) -> Result<(), Error> { - if self.progressive_id < id.into() { - return Err(Error::illegal_state( - "trying to insert a testcase with an id bigger than the internal Id counter", - )); - } - let corpus = if is_disabled { - &mut self.disabled - } else { - &mut self.enabled - }; - corpus.insert_key(id); - corpus.map.insert(id, testcase); - Ok(()) - } - - /// Insert a testcase assigning a `CorpusId` to it - #[cfg(feature = "corpus_btreemap")] - fn insert_inner(&mut self, testcase: RefCell>, is_disabled: bool) -> CorpusId { - let id = CorpusId::from(self.progressive_id); - self.progressive_id += 1; - let corpus = if is_disabled { - &mut self.disabled - } else { - &mut self.enabled - }; - corpus.insert_key(id); - corpus.map.insert(id, testcase); - id - } - - /// Create new `TestcaseStorage` - #[must_use] - pub fn new() -> Self { - Self { - enabled: TestcaseStorageMap::new(), - disabled: TestcaseStorageMap::new(), - progressive_id: 0, - } - } -} - -/// A corpus handling all in memory. -#[derive(Default, Serialize, Deserialize, Clone, Debug)] -pub struct InMemoryCorpus { - storage: TestcaseStorage, - current: Option, -} - -impl Corpus for InMemoryCorpus { - /// Returns the number of all enabled entries - #[inline] - fn count(&self) -> usize { - self.storage.enabled.map.len() - } - - /// Returns the number of all disabled entries - fn count_disabled(&self) -> usize { - self.storage.disabled.map.len() - } - - /// Returns the number of elements including disabled entries - #[inline] - fn count_all(&self) -> usize { - self.storage - .enabled - .map - .len() - .saturating_add(self.storage.disabled.map.len()) - } - - /// Add an enabled testcase to the corpus and return its index - #[inline] - fn add(&mut self, testcase: Testcase) -> Result { - Ok(self.storage.insert(RefCell::new(testcase))) - } - - /// Add a disabled testcase to the corpus and return its index - #[inline] - fn add_disabled(&mut self, testcase: Testcase) -> Result { - Ok(self.storage.insert_disabled(RefCell::new(testcase))) - } - - /// Replaces the testcase at the given id - #[inline] - fn replace(&mut self, id: CorpusId, testcase: Testcase) -> Result, Error> { - self.storage.enabled.replace(id, testcase).ok_or_else(|| { - Error::key_not_found(format!("Index {id} not found, could not replace.")) - }) - } - - /// Removes an entry from the corpus, returning it if it was present; considers both enabled and disabled testcases - #[inline] - fn remove(&mut self, id: CorpusId) -> Result, Error> { - let mut testcase = self.storage.enabled.remove(id); - if testcase.is_none() { - testcase = self.storage.disabled.remove(id); - } - testcase - .map(|x| x.take()) - .ok_or_else(|| Error::key_not_found(format!("Index {id} not found"))) - } - - /// Get by id; considers only enabled testcases - #[inline] - fn get(&self, id: CorpusId) -> Result<&RefCell>, Error> { - self.storage - .enabled - .get(id) - .ok_or_else(|| Error::key_not_found(format!("Index {id} not found"))) - } - /// Get by id; considers both enabled and disabled testcases - #[inline] - fn get_from_all(&self, id: CorpusId) -> Result<&RefCell>, Error> { - let mut testcase = self.storage.enabled.get(id); - if testcase.is_none() { - testcase = self.storage.disabled.get(id); - } - testcase.ok_or_else(|| Error::key_not_found(format!("Index {id} not found"))) - } - - /// Current testcase scheduled - #[inline] - fn current(&self) -> &Option { - &self.current - } - - /// Current testcase scheduled (mutable) - #[inline] - fn current_mut(&mut self) -> &mut Option { - &mut self.current - } - - #[inline] - fn next(&self, id: CorpusId) -> Option { - self.storage.enabled.next(id) - } - - /// Peek the next free corpus id - #[inline] - fn peek_free_id(&self) -> CorpusId { - self.storage.peek_free_id() - } - - #[inline] - fn prev(&self, id: CorpusId) -> Option { - self.storage.enabled.prev(id) - } - - #[inline] - fn first(&self) -> Option { - self.storage.enabled.first() - } - - #[inline] - fn last(&self) -> Option { - self.storage.enabled.last() - } - - /// Get the nth corpus id; considers only enabled testcases - #[inline] - fn nth(&self, nth: usize) -> CorpusId { - self.storage.enabled.keys[nth] - } - - /// Get the nth corpus id; considers both enabled and disabled testcases - #[inline] - fn nth_from_all(&self, nth: usize) -> CorpusId { - let enabled_count = self.count(); - if nth >= enabled_count { - return self.storage.disabled.keys[nth.saturating_sub(enabled_count)]; - } - self.storage.enabled.keys[nth] - } - - #[inline] - fn load_input_into(&self, _: &mut Testcase) -> Result<(), Error> { - // Inputs never get evicted, nothing to load here. - Ok(()) - } - - #[inline] - fn store_input_from(&self, _: &Testcase) -> Result<(), Error> { - Ok(()) - } -} - -impl EnableDisableCorpus for InMemoryCorpus { - #[inline] - fn disable(&mut self, id: CorpusId) -> Result<(), Error> { - if let Some(testcase) = self.storage.enabled.remove(id) { - self.storage.insert_inner_with_id(testcase, true, id) - } else { - Err(Error::key_not_found(format!( - "Index {id} not found in enabled testcases" - ))) - } - } - - #[inline] - fn enable(&mut self, id: CorpusId) -> Result<(), Error> { - if let Some(testcase) = self.storage.disabled.remove(id) { - self.storage.insert_inner_with_id(testcase, false, id) - } else { - Err(Error::key_not_found(format!( - "Index {id} not found in disabled testcases" - ))) - } - } -} - -impl HasTestcase for InMemoryCorpus { - fn testcase(&self, id: CorpusId) -> Result>, Error> { - Ok(self.get(id)?.borrow()) - } - - fn testcase_mut(&self, id: CorpusId) -> Result>, Error> { - Ok(self.get(id)?.borrow_mut()) - } -} - -impl InMemoryCorpus { - /// Creates a new [`InMemoryCorpus`], keeping all [`Testcase`]`s` in memory. - /// This is the simplest and fastest option, however test progress will be lost on exit or on OOM. - #[must_use] - pub fn new() -> Self { - Self { - storage: TestcaseStorage::new(), - current: None, - } - } -} - -#[cfg(test)] -#[cfg(not(feature = "corpus_btreemap"))] -mod tests { - use super::*; - use crate::{ - Error, - corpus::Testcase, - inputs::{HasMutatorBytes, bytes::BytesInput}, - }; - - /// Helper function to create a corpus with predefined test cases - #[cfg(not(feature = "corpus_btreemap"))] - fn setup_corpus() -> (InMemoryCorpus, Vec) { - let mut corpus = InMemoryCorpus::::new(); - let mut ids = Vec::new(); - - // Add initial test cases with distinct byte patterns ([1,2,3],[2,3,4],[3,4,5]) - for i in 0..3u8 { - let input = BytesInput::new(vec![i + 1, i + 2, i + 3]); - let tc_id = corpus.add(Testcase::new(input)).unwrap(); - ids.push(tc_id); - } - - (corpus, ids) - } - - /// Helper function to verify corpus counts - #[cfg(not(feature = "corpus_btreemap"))] - fn assert_corpus_counts(corpus: &InMemoryCorpus, enabled: usize, disabled: usize) { - let total = enabled + disabled; // if a testcase is not in the enabled map, then it's in the disabled one. - assert_eq!(corpus.count(), enabled, "Wrong number of enabled testcases"); - assert_eq!( - corpus.count_disabled(), - disabled, - "Wrong number of disabled testcases" - ); - assert_eq!(corpus.count_all(), total, "Wrong total number of testcases"); - } - - #[test] - #[cfg(not(feature = "corpus_btreemap"))] - fn test_corpus_basic_operations() { - let (corpus, ids) = setup_corpus(); - assert_corpus_counts(&corpus, 3, 0); - - for id in &ids { - assert!(corpus.get(*id).is_ok(), "Failed to get testcase {id:?}"); - assert!( - corpus.get_from_all(*id).is_ok(), - "Failed to get testcase from all {id:?}" - ); - } - - // Non-existent ID should fail - let invalid_id = CorpusId(999); - assert!(corpus.get(invalid_id).is_err()); - assert!(corpus.get_from_all(invalid_id).is_err()); - } - - #[test] - #[cfg(not(feature = "corpus_btreemap"))] - fn test_corpus_disable_enable() -> Result<(), Error> { - let (mut corpus, ids) = setup_corpus(); - let invalid_id = CorpusId(999); - - corpus.disable(ids[1])?; - assert_corpus_counts(&corpus, 2, 1); - - // Verify disabled testcase is not in enabled list but is in all list - assert!( - corpus.get(ids[1]).is_err(), - "Disabled testcase should not be accessible via get()" - ); - assert!( - corpus.get_from_all(ids[1]).is_ok(), - "Disabled testcase should be accessible via get_from_all()" - ); - - // Other testcases are still accessible - assert!(corpus.get(ids[0]).is_ok()); - assert!(corpus.get(ids[2]).is_ok()); - - corpus.enable(ids[1])?; - assert_corpus_counts(&corpus, 3, 0); - - // Verify all testcases are accessible from the enabled map again - for id in &ids { - assert!(corpus.get(*id).is_ok()); - } - - // Corner cases - assert!( - corpus.disable(ids[1]).is_ok(), - "Should be able to disable testcase" - ); - assert!( - corpus.disable(ids[1]).is_err(), - "Should not be able to disable already disabled testcase" - ); - assert!( - corpus.enable(ids[0]).is_err(), - "Should not be able to enable already enabled testcase" - ); - assert!( - corpus.disable(invalid_id).is_err(), - "Should not be able to disable non-existent testcase" - ); - assert!( - corpus.enable(invalid_id).is_err(), - "Should not be able to enable non-existent testcase" - ); - - Ok(()) - } - - #[test] - #[cfg(not(feature = "corpus_btreemap"))] - fn test_corpus_operations_after_disabled() -> Result<(), Error> { - let (mut corpus, ids) = setup_corpus(); - - corpus.disable(ids[0])?; - assert_corpus_counts(&corpus, 2, 1); - - let removed = corpus.remove(ids[0])?; - let removed_data = removed.input().as_ref().unwrap().mutator_bytes(); - assert_eq!( - removed_data, - &vec![1, 2, 3], - "Removed testcase has incorrect data" - ); - assert_corpus_counts(&corpus, 2, 0); - - let removed = corpus.remove(ids[1])?; - let removed_data = removed.input().as_ref().unwrap().mutator_bytes(); - assert_eq!( - removed_data, - &vec![2, 3, 4], - "Removed testcase has incorrect data" - ); - assert_corpus_counts(&corpus, 1, 0); - - // Not possible to get removed testcases - assert!(corpus.get(ids[0]).is_err()); - assert!(corpus.get_from_all(ids[0]).is_err()); - assert!(corpus.get(ids[1]).is_err()); - assert!(corpus.get_from_all(ids[1]).is_err()); - - // Only the third testcase should remain - assert!(corpus.get(ids[2]).is_ok()); - - Ok(()) - } -} diff --git a/crates/libafl/src/corpus/inmemory_ondisk.rs b/crates/libafl/src/corpus/inmemory_ondisk.rs deleted file mode 100644 index d5432a765c4..00000000000 --- a/crates/libafl/src/corpus/inmemory_ondisk.rs +++ /dev/null @@ -1,545 +0,0 @@ -//! The [`InMemoryOnDiskCorpus`] stores [`Testcase`]s to disk. -//! -//! Additionally, _all_ of them are kept in memory. -//! For a lower memory footprint, consider using [`crate::corpus::CachedOnDiskCorpus`] -//! which only stores a certain number of [`Testcase`]s and removes additional ones in a FIFO manner. - -use alloc::string::{String, ToString}; -use core::cell::{Ref, RefCell, RefMut}; -use std::{ - fs, - fs::{File, OpenOptions}, - io, - io::{Read, Seek, SeekFrom, Write}, - path::{Path, PathBuf}, -}; - -use fs2::FileExt; -#[cfg(feature = "gzip")] -use libafl_bolts::compress::GzipCompressor; -use serde::{Deserialize, Serialize}; - -use super::{ - EnableDisableCorpus, HasTestcase, - ondisk::{OnDiskMetadata, OnDiskMetadataFormat}, -}; -use crate::{ - Error, HasMetadata, - corpus::{Corpus, CorpusId, InMemoryCorpus, Testcase}, - inputs::Input, -}; - -/// Creates the given `path` and returns an error if it fails. -/// If the create succeeds, it will return the file. -/// If the create fails for _any_ reason, including, but not limited to, a preexisting existing file of that name, -/// it will instead return the respective [`io::Error`]. -fn create_new>(path: P) -> Result { - OpenOptions::new() - .write(true) - .read(true) - .create_new(true) - .open(path) -} - -/// Tries to create the given `path` and returns `None` _only_ if the file already existed. -/// If the create succeeds, it will return the file. -/// If the create fails for some other reason, it will instead return the respective [`io::Error`]. -fn try_create_new>(path: P) -> Result, io::Error> { - match create_new(path) { - Ok(ret) => Ok(Some(ret)), - Err(err) if err.kind() == io::ErrorKind::AlreadyExists => Ok(None), - Err(err) => Err(err), - } -} - -/// A corpus able to store [`Testcase`]s to disk, while also keeping all of them in memory. -/// -/// Metadata is written to a `..metadata` file in the same folder by default. -#[derive(Default, Serialize, Deserialize, Clone, Debug)] -pub struct InMemoryOnDiskCorpus { - inner: InMemoryCorpus, - dir_path: PathBuf, - meta_format: Option, - prefix: Option, - locking: bool, -} - -impl Corpus for InMemoryOnDiskCorpus -where - I: Input, -{ - /// Returns the number of all enabled entries - #[inline] - fn count(&self) -> usize { - self.inner.count() - } - - /// Returns the number of all disabled entries - fn count_disabled(&self) -> usize { - self.inner.count_disabled() - } - - /// Returns the number of elements including disabled entries - #[inline] - fn count_all(&self) -> usize { - self.inner.count_all() - } - - /// Add an enabled testcase to the corpus and return its index - #[inline] - fn add(&mut self, testcase: Testcase) -> Result { - let id = self.inner.add(testcase)?; - let testcase = &mut self.get(id).unwrap().borrow_mut(); - self.save_testcase(testcase, Some(id))?; - *testcase.input_mut() = None; - Ok(id) - } - - /// Add a disabled testcase to the corpus and return its index - #[inline] - fn add_disabled(&mut self, testcase: Testcase) -> Result { - let id = self.inner.add_disabled(testcase)?; - let testcase = &mut self.get_from_all(id).unwrap().borrow_mut(); - self.save_testcase(testcase, Some(id))?; - *testcase.input_mut() = None; - Ok(id) - } - - /// Replaces the testcase at the given idx - #[inline] - fn replace(&mut self, id: CorpusId, testcase: Testcase) -> Result, Error> { - let entry = self.inner.replace(id, testcase)?; - self.remove_testcase(&entry)?; - let testcase = &mut self.get(id).unwrap().borrow_mut(); - self.save_testcase(testcase, Some(id))?; - *testcase.input_mut() = None; - Ok(entry) - } - - /// Removes an entry from the corpus, returning it if it was present; considers both enabled and disabled corpus - #[inline] - fn remove(&mut self, id: CorpusId) -> Result, Error> { - let entry = self.inner.remove(id)?; - self.remove_testcase(&entry)?; - Ok(entry) - } - - /// Get by id; considers only enabled testcases - #[inline] - fn get(&self, id: CorpusId) -> Result<&RefCell>, Error> { - self.inner.get(id) - } - - /// Get by id; considers both enabled and disabled testcases - #[inline] - fn get_from_all(&self, id: CorpusId) -> Result<&RefCell>, Error> { - self.inner.get_from_all(id) - } - - /// Current testcase scheduled - #[inline] - fn current(&self) -> &Option { - self.inner.current() - } - - /// Current testcase scheduled (mutable) - #[inline] - fn current_mut(&mut self) -> &mut Option { - self.inner.current_mut() - } - - #[inline] - fn next(&self, id: CorpusId) -> Option { - self.inner.next(id) - } - - /// Peek the next free corpus id - #[inline] - fn peek_free_id(&self) -> CorpusId { - self.inner.peek_free_id() - } - - #[inline] - fn prev(&self, id: CorpusId) -> Option { - self.inner.prev(id) - } - - #[inline] - fn first(&self) -> Option { - self.inner.first() - } - - #[inline] - fn last(&self) -> Option { - self.inner.last() - } - - /// Get the nth corpus id; considers only enabled testcases - #[inline] - fn nth(&self, nth: usize) -> CorpusId { - self.inner.nth(nth) - } - /// Get the nth corpus id; considers both enabled and disabled testcases - #[inline] - fn nth_from_all(&self, nth: usize) -> CorpusId { - self.inner.nth_from_all(nth) - } - - fn load_input_into(&self, testcase: &mut Testcase) -> Result<(), Error> { - if testcase.input_mut().is_none() { - let Some(file_path) = testcase.file_path().as_ref() else { - return Err(Error::illegal_argument( - "No file path set for testcase. Could not load inputs.", - )); - }; - let input = I::from_file(file_path)?; - testcase.set_input(input); - } - Ok(()) - } - - fn store_input_from(&self, testcase: &Testcase) -> Result<(), Error> { - // Store the input to disk - let Some(file_path) = testcase.file_path() else { - return Err(Error::illegal_argument( - "No file path set for testcase. Could not store input to disk.", - )); - }; - let Some(input) = testcase.input() else { - return Err(Error::illegal_argument( - "No input available for testcase. Could not store anything.", - )); - }; - input.to_file(file_path) - } -} - -impl EnableDisableCorpus for InMemoryOnDiskCorpus -where - I: Input, -{ - #[inline] - fn disable(&mut self, id: CorpusId) -> Result<(), Error> { - self.inner.disable(id)?; - // Ensure testcase is saved to disk correctly with its new status - let testcase_cell = &mut self.get_from_all(id).unwrap().borrow_mut(); - self.save_testcase(testcase_cell, Some(id))?; - Ok(()) - } - - #[inline] - fn enable(&mut self, id: CorpusId) -> Result<(), Error> { - self.inner.enable(id)?; - // Ensure testcase is saved to disk correctly with its new status - let testcase_cell = &mut self.get_from_all(id).unwrap().borrow_mut(); - self.save_testcase(testcase_cell, Some(id))?; - Ok(()) - } -} - -impl HasTestcase for InMemoryOnDiskCorpus -where - I: Input, -{ - fn testcase(&self, id: CorpusId) -> Result>, Error> { - Ok(self.get(id)?.borrow()) - } - - fn testcase_mut(&self, id: CorpusId) -> Result>, Error> { - Ok(self.get(id)?.borrow_mut()) - } -} - -impl InMemoryOnDiskCorpus { - /// Creates an [`InMemoryOnDiskCorpus`]. - /// - /// This corpus stores all testcases to disk, and keeps all of them in memory, as well. - /// - /// By default, it stores metadata for each [`Testcase`] as prettified json. - /// Metadata will be written to a file named `..metadata` - /// The metadata may include objective reason, specific information for a fuzz job, and more. - /// - /// If you don't want metadata, use [`InMemoryOnDiskCorpus::no_meta`]. - /// To pick a different metadata format, use [`InMemoryOnDiskCorpus::with_meta_format`]. - /// - /// Will error, if [`fs::create_dir_all()`] failed for `dir_path`. - pub fn new

(dir_path: P) -> Result - where - P: AsRef, - { - Self::_new( - dir_path.as_ref(), - Some(OnDiskMetadataFormat::JsonPretty), - None, - true, - ) - } - - /// Creates the [`InMemoryOnDiskCorpus`] specifying the format in which `Metadata` will be saved to disk. - /// - /// Will error, if [`fs::create_dir_all()`] failed for `dir_path`. - pub fn with_meta_format

( - dir_path: P, - meta_format: Option, - ) -> Result - where - P: AsRef, - { - Self::_new(dir_path.as_ref(), meta_format, None, true) - } - - /// Creates the [`InMemoryOnDiskCorpus`] specifying the format in which `Metadata` will be saved to disk - /// and the prefix for the filenames. - /// - /// Will error, if [`fs::create_dir_all()`] failed for `dir_path`. - pub fn with_meta_format_and_prefix

( - dir_path: P, - meta_format: Option, - prefix: Option, - locking: bool, - ) -> Result - where - P: AsRef, - { - Self::_new(dir_path.as_ref(), meta_format, prefix, locking) - } - - /// Creates an [`InMemoryOnDiskCorpus`] that will not store .metadata files - /// - /// Will error, if [`fs::create_dir_all()`] failed for `dir_path`. - pub fn no_meta

(dir_path: P) -> Result - where - P: AsRef, - { - Self::_new(dir_path.as_ref(), None, None, true) - } - - /// Private fn to crate a new corpus at the given (non-generic) path with the given optional `meta_format` - fn _new( - dir_path: &Path, - meta_format: Option, - prefix: Option, - locking: bool, - ) -> Result { - match fs::create_dir_all(dir_path) { - Ok(()) => {} - Err(e) if e.kind() == io::ErrorKind::AlreadyExists => {} - Err(e) => return Err(e.into()), - } - Ok(InMemoryOnDiskCorpus { - inner: InMemoryCorpus::new(), - dir_path: dir_path.into(), - meta_format, - prefix, - locking, - }) - } - - /// Sets the filename for a [`Testcase`]. - /// If an error gets returned from the corpus (i.e., file exists), we'll have to retry with a different filename. - /// Renaming testcases will most likely cause duplicate testcases to not be handled correctly - /// if testcases with the same input are not given the same filename. - /// Only rename when you know what you are doing. - #[inline] - pub fn rename_testcase( - &self, - testcase: &mut Testcase, - filename: String, - id: Option, - ) -> Result<(), Error> - where - I: Input, - { - if testcase.filename().is_some() { - // We are renaming! - - let old_filename = testcase.filename_mut().take().unwrap(); - let new_filename = filename; - - // Do operations below when new filename is specified - if old_filename == new_filename { - *testcase.filename_mut() = Some(old_filename); - return Ok(()); - } - - let new_file_path = self.dir_path.join(&new_filename); - self.remove_testcase(testcase)?; - *testcase.filename_mut() = Some(new_filename); - self.save_testcase(testcase, id)?; - *testcase.file_path_mut() = Some(new_file_path); - - Ok(()) - } else { - Err(Error::illegal_argument( - "Cannot rename testcase without name!", - )) - } - } - - fn save_testcase(&self, testcase: &mut Testcase, id: Option) -> Result<(), Error> - where - I: Input, - { - let file_name = testcase.filename_mut().take().unwrap_or_else(|| { - // TODO walk entry metadata to ask for pieces of filename (e.g. :havoc in AFL) - testcase.input().as_ref().unwrap().generate_name(id) - }); - - let mut ctr = 1; - if self.locking { - let lockfile_name = format!(".{file_name}"); - let lockfile_path = self.dir_path.join(lockfile_name); - - let mut lockfile = try_create_new(&lockfile_path)?.unwrap_or( - OpenOptions::new() - .write(true) - .read(true) - .open(&lockfile_path)?, - ); - lockfile.lock_exclusive()?; - - let mut old_ctr = String::new(); - lockfile.read_to_string(&mut old_ctr)?; - if !old_ctr.is_empty() { - ctr = old_ctr.trim().parse::()? + 1; - } - - lockfile.seek(SeekFrom::Start(0))?; - lockfile.write_all(ctr.to_string().as_bytes())?; - } - - if testcase.file_path().is_none() { - *testcase.file_path_mut() = Some(self.dir_path.join(&file_name)); - } - *testcase.filename_mut() = Some(file_name); - - if self.meta_format.is_some() { - let metafile_name = if self.locking { - format!( - ".{}_{}.metadata", - testcase.filename().as_ref().unwrap(), - ctr - ) - } else { - format!(".{}.metadata", testcase.filename().as_ref().unwrap()) - }; - let metafile_path = self.dir_path.join(&metafile_name); - let mut tmpfile_path = metafile_path.clone(); - tmpfile_path.set_file_name(format!(".{metafile_name}.tmp",)); - - let ondisk_meta = OnDiskMetadata { - metadata: testcase.metadata_map(), - exec_time: testcase.exec_time(), - executions: testcase.executions(), - }; - - let mut tmpfile = File::create(&tmpfile_path)?; - - let json_error = - |err| Error::serialize(format!("Failed to json-ify metadata: {err:?}")); - - let serialized = match self.meta_format.as_ref().unwrap() { - OnDiskMetadataFormat::Postcard => postcard::to_allocvec(&ondisk_meta)?, - OnDiskMetadataFormat::Json => { - serde_json::to_vec(&ondisk_meta).map_err(json_error)? - } - OnDiskMetadataFormat::JsonPretty => { - serde_json::to_vec_pretty(&ondisk_meta).map_err(json_error)? - } - #[cfg(feature = "gzip")] - OnDiskMetadataFormat::JsonGzip => GzipCompressor::new() - .compress(&serde_json::to_vec_pretty(&ondisk_meta).map_err(json_error)?), - }; - tmpfile.write_all(&serialized)?; - fs::rename(&tmpfile_path, &metafile_path)?; - *testcase.metadata_path_mut() = Some(metafile_path); - } - - // Only try to write the data if the counter is 1. - // Otherwise we already have a file with this name, and - // we can assume the data has already been written. - if ctr == 1 { - if let Err(err) = self.store_input_from(testcase) { - if self.locking { - return Err(err); - } - log::error!( - "An error occurred when trying to write a testcase without locking: {err}" - ); - } - } - Ok(()) - } - - fn remove_testcase(&self, testcase: &Testcase) -> Result<(), Error> { - if let Some(filename) = testcase.filename() { - let mut ctr = String::new(); - if self.locking { - let lockfile_path = self.dir_path.join(format!(".{filename}")); - let mut lockfile = OpenOptions::new() - .write(true) - .read(true) - .open(&lockfile_path)?; - - lockfile.lock_exclusive()?; - lockfile.read_to_string(&mut ctr)?; - ctr = ctr.trim().to_string(); - - if ctr == "1" { - FileExt::unlock(&lockfile)?; - drop(fs::remove_file(lockfile_path)); - } else { - lockfile.seek(SeekFrom::Start(0))?; - lockfile.write_all((ctr.parse::()? - 1).to_string().as_bytes())?; - return Ok(()); - } - } - - fs::remove_file(self.dir_path.join(filename))?; - if self.meta_format.is_some() { - if self.locking { - fs::remove_file(self.dir_path.join(format!(".{filename}_{ctr}.metadata")))?; - } else { - fs::remove_file(self.dir_path.join(format!(".{filename}.metadata")))?; - } - } - } - Ok(()) - } - - /// Path to the corpus directory associated with this corpus - #[must_use] - pub fn dir_path(&self) -> &PathBuf { - &self.dir_path - } -} - -#[cfg(test)] -mod tests { - #[cfg(not(miri))] - use std::{env, fs, io::Write}; - - #[cfg(not(miri))] - use super::{create_new, try_create_new}; - - #[test] - #[cfg(not(miri))] - fn test() { - let tmp = env::temp_dir(); - let path = tmp.join("testfile.tmp"); - _ = fs::remove_file(&path); - let mut f = create_new(&path).unwrap(); - f.write_all(&[0; 1]).unwrap(); - - match try_create_new(&path) { - Ok(None) => (), - Ok(_) => panic!( - "File {} did not exist even though it should have?", - &path.display() - ), - Err(e) => panic!("An unexpected error occurred: {e}"), - } - drop(f); - fs::remove_file(path).unwrap(); - } -} diff --git a/crates/libafl/src/corpus/minimizer.rs b/crates/libafl/src/corpus/minimizer.rs index 9b5b82809b2..362ca0d1588 100644 --- a/crates/libafl/src/corpus/minimizer.rs +++ b/crates/libafl/src/corpus/minimizer.rs @@ -14,7 +14,7 @@ use z3::{Optimize, ast::Bool}; use crate::{ Error, HasMetadata, HasScheduler, - corpus::Corpus, + corpus::{Corpus, IsTestcaseMetadataCell}, events::{Event, EventFirer, EventWithStats, LogSeverity}, executors::{Executor, ExitKind, HasObservers}, inputs::Input, @@ -97,31 +97,30 @@ where let mut curr = 0; while let Some(id) = cur_id { let (weight, executions) = { - if state.corpus().get(id)?.borrow().scheduled_count() == 0 { - // Execute the input; we cannot rely on the metadata already being present. - - let input = state - .corpus() - .get(id)? - .borrow_mut() - .load_input(state.corpus())? - .clone(); - - let (exit_kind, mut total_time, _) = - run_target_with_timing(fuzzer, executor, state, mgr, &input, false)?; - if exit_kind != ExitKind::Ok { - total_time = Duration::from_secs(1); + { + let tc = state.corpus().get(id)?; + let mut md = tc.testcase_metadata_mut(); + if md.scheduled_count() == 0 { + // Execute the input; we cannot rely on the metadata already being present. + let input = tc.input(); + + let (exit_kind, mut total_time, _) = run_target_with_timing( + fuzzer, + executor, + state, + mgr, + input.as_ref(), + false, + )?; + if exit_kind != ExitKind::Ok { + total_time = Duration::from_secs(1); + } + md.set_exec_time(total_time); } - state - .corpus() - .get(id)? - .borrow_mut() - .set_exec_time(total_time); } - let mut testcase = state.corpus().get(id)?.borrow_mut(); ( - TS::compute(state, &mut *testcase)? + TS::compute(state, id)? .to_u64() .expect("Weight must be computable."), *state.executions(), @@ -214,12 +213,11 @@ where continue; } - let removed = state.corpus_mut().remove(id)?; + state.corpus_mut().disable(id)?; + // scheduler needs to know we've removed the input, or it will continue to try // to use now-missing inputs - fuzzer - .scheduler_mut() - .on_remove(state, id, &Some(removed))?; + fuzzer.scheduler_mut().on_remove(state, id)?; } *state.corpus_mut().current_mut() = None; //we may have removed the current ID from the corpus diff --git a/crates/libafl/src/corpus/mod.rs b/crates/libafl/src/corpus/mod.rs index b3bf26fb12a..4e6e21e70c7 100644 --- a/crates/libafl/src/corpus/mod.rs +++ b/crates/libafl/src/corpus/mod.rs @@ -1,71 +1,67 @@ //! Corpuses contain the testcases, either in memory, on disk, or somewhere else. -use core::{cell::RefCell, fmt, marker::PhantomData}; +use alloc::rc::Rc; +use core::{fmt, marker::PhantomData}; use serde::{Deserialize, Serialize}; use crate::Error; pub mod testcase; -pub use testcase::{HasTestcase, SchedulerTestcaseMetadata, Testcase}; +pub use testcase::{ + HasTestcase, IsTestcaseMetadataCell, SchedulerTestcaseMetadata, Testcase, TestcaseMetadata, +}; -pub mod inmemory; -pub use inmemory::InMemoryCorpus; +pub mod cache; +pub use cache::{Cache, FifoCache, IdentityCache}; -pub mod dynamic; -pub use dynamic::DynamicCorpus; +pub mod single; +pub use single::SingleCorpus; -#[cfg(feature = "std")] -pub mod inmemory_ondisk; -#[cfg(feature = "std")] -pub use inmemory_ondisk::InMemoryOnDiskCorpus; +// pub mod dynamic; +// pub use dynamic::DynamicCorpus; -#[cfg(feature = "std")] -pub mod ondisk; -#[cfg(feature = "std")] -pub use ondisk::OnDiskCorpus; - -#[cfg(feature = "std")] -pub mod cached; -#[cfg(feature = "std")] -pub use cached::CachedOnDiskCorpus; +pub mod combined; +pub use combined::CombinedCorpus; #[cfg(all(feature = "cmin", unix))] pub mod minimizer; - -pub mod nop; #[cfg(all(feature = "cmin", unix))] pub use minimizer::*; + +pub mod nop; pub use nop::NopCorpus; +pub mod store; +pub use store::{InMemoryStore, OnDiskStore, maps}; + +pub mod collection; +pub use collection::{ + CachedOnDiskCorpus, InMemoryCorpus, InMemoryOnDiskCorpus, OnDiskCorpus, StdInMemoryCorpusMap, + StdInMemoryStore, StdOnDiskStore, +}; + /// An abstraction for the index that identify a testcase in the corpus #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)] #[repr(transparent)] pub struct CorpusId(pub usize); -impl fmt::Display for CorpusId { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", self.0) - } -} - -impl From for CorpusId { - fn from(id: usize) -> Self { - Self(id) - } -} - -impl From for CorpusId { - fn from(id: u64) -> Self { - Self(id as usize) - } +/// A counter for [`Corpus`] implementors. +/// Useful to generate fresh [`CorpusId`]s. +#[derive(Default, Serialize, Deserialize, Clone, Debug)] +pub struct CorpusCounter { + /// A fresh, progressive ID + /// It stores the next available ID. + current_id: usize, } -impl From for usize { - /// Not that the `CorpusId` is not necessarily stable in the corpus (if we remove [`Testcase`]s, for example). - fn from(id: CorpusId) -> Self { - id.0 - } +/// [`Iterator`] over the ids of a [`Corpus`] +#[derive(Debug)] +pub struct CorpusIdIterator<'a, C, I> { + corpus: &'a C, + cur: Option, + cur_back: Option, + phantom: PhantomData, } /// Utility macro to call `Corpus::random_id`; fetches only enabled [`Testcase`]`s` @@ -107,6 +103,9 @@ macro_rules! random_corpus_id_with_disabled { /// Corpus with all current [`Testcase`]s, or solutions pub trait Corpus: Sized { + /// A [`TestcaseMetadata`] cell. + type TestcaseMetadataCell: IsTestcaseMetadataCell; + /// Returns the number of all enabled entries fn count(&self) -> usize; @@ -122,22 +121,68 @@ pub trait Corpus: Sized { } /// Add an enabled testcase to the corpus and return its index - fn add(&mut self, testcase: Testcase) -> Result; + /// + /// The default [`TestcaseMetadata`] will be instantiated. + fn add(&mut self, input: I) -> Result { + self.add_shared::(Rc::new(input), TestcaseMetadata::default()) + } + + /// Add an enabled testcase to the corpus and return its index + fn add_with_metadata(&mut self, input: I, md: TestcaseMetadata) -> Result { + self.add_shared::(Rc::new(input), md) + } + + /// Add a disabled testcase to the corpus and return its index + /// + /// The default [`TestcaseMetadata`] will be instantiated. + fn add_disabled(&mut self, input: I) -> Result { + self.add_shared::(Rc::new(input), TestcaseMetadata::default()) + } /// Add a disabled testcase to the corpus and return its index - fn add_disabled(&mut self, testcase: Testcase) -> Result; + fn add_disabled_with_metadata( + &mut self, + input: I, + md: TestcaseMetadata, + ) -> Result { + self.add_shared::(Rc::new(input), md) + } + + /// Add a testcase to the corpus, and returns its index. + /// The associated type tells whether the input should be added to the enabled or the disabled corpus. + /// + /// The input can be shared through [`Rc`]. + fn add_shared( + &mut self, + input: Rc, + md: TestcaseMetadata, + ) -> Result; + + /// Get testcase by id; considers only enabled testcases + fn get(&self, id: CorpusId) -> Result, Error> { + Self::get_from::(self, id) + } - /// Replaces the [`Testcase`] at the given idx, returning the existing. - fn replace(&mut self, id: CorpusId, testcase: Testcase) -> Result, Error>; + /// Get testcase by id, looking at the enabled and disabled stores. + fn get_from_all(&self, id: CorpusId) -> Result, Error> { + Self::get_from::(self, id) + } - /// Removes an entry from the corpus, returning it if it was present; considers both enabled and disabled testcases - fn remove(&mut self, id: CorpusId) -> Result, Error>; + /// Get testcase by id + fn get_from( + &self, + id: CorpusId, + ) -> Result, Error>; - /// Get by id; considers only enabled testcases - fn get(&self, id: CorpusId) -> Result<&RefCell>, Error>; + /// Disable a corpus entry + fn disable(&mut self, id: CorpusId) -> Result<(), Error>; - /// Get by id; considers both enabled and disabled testcases - fn get_from_all(&self, id: CorpusId) -> Result<&RefCell>, Error>; + /// Replace a [`TestcaseMetadata`] by another one. + fn replace_metadata( + &mut self, + id: CorpusId, + md: TestcaseMetadata, + ) -> Result; /// Current testcase scheduled fn current(&self) -> &Option; @@ -148,9 +193,6 @@ pub trait Corpus: Sized { /// Get the next corpus id fn next(&self, id: CorpusId) -> Option; - /// Peek the next free corpus id - fn peek_free_id(&self) -> CorpusId; - /// Get the prev corpus id fn prev(&self, id: CorpusId) -> Option; @@ -179,23 +221,6 @@ pub trait Corpus: Sized { /// Get the nth corpus id; considers both enabled and disabled testcases fn nth_from_all(&self, nth: usize) -> CorpusId; - - /// Method to load the input for this [`Testcase`] from persistent storage, - /// if necessary, and if was not already loaded (`== Some(input)`). - /// After this call, `testcase.input()` must always return `Some(input)`. - fn load_input_into(&self, testcase: &mut Testcase) -> Result<(), Error>; - - /// Method to store the input of this `Testcase` to persistent storage, if necessary. - fn store_input_from(&self, testcase: &Testcase) -> Result<(), Error>; - - /// Loads the `Input` for a given [`CorpusId`] from the [`Corpus`], and returns the clone. - fn cloned_input_for_id(&self, id: CorpusId) -> Result - where - I: Clone, - { - let mut testcase = self.get(id)?.borrow_mut(); - Ok(testcase.load_input(self)?.clone()) - } } /// Marker trait for corpus implementations that actually support enable/disable functionality @@ -219,13 +244,29 @@ pub trait HasCurrentCorpusId { fn current_corpus_id(&self) -> Result, Error>; } -/// [`Iterator`] over the ids of a [`Corpus`] -#[derive(Debug)] -pub struct CorpusIdIterator<'a, C, I> { - corpus: &'a C, - cur: Option, - cur_back: Option, - phantom: PhantomData, +impl fmt::Display for CorpusId { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl From for CorpusId { + fn from(id: usize) -> Self { + Self(id) + } +} + +impl From for CorpusId { + fn from(id: u64) -> Self { + Self(id as usize) + } +} + +impl From for usize { + /// Not that the `CorpusId` is not necessarily stable in the corpus (if we remove [`Testcase`]s, for example). + fn from(id: CorpusId) -> Self { + id.0 + } } impl Iterator for CorpusIdIterator<'_, C, I> @@ -257,3 +298,11 @@ where } } } + +impl CorpusCounter { + fn new_id(&mut self) -> CorpusId { + let old = self.current_id; + self.current_id += 1; + CorpusId(old) + } +} diff --git a/crates/libafl/src/corpus/nop.rs b/crates/libafl/src/corpus/nop.rs index a6e0f902e4f..d5410ebd9ca 100644 --- a/crates/libafl/src/corpus/nop.rs +++ b/crates/libafl/src/corpus/nop.rs @@ -1,11 +1,16 @@ //! The null corpus does not store any [`Testcase`]s. -use core::{cell::RefCell, marker::PhantomData}; + +use alloc::rc::Rc; +use core::marker::PhantomData; use serde::{Deserialize, Serialize}; use crate::{ Error, - corpus::{Corpus, CorpusId, Testcase}, + corpus::{ + Corpus, CorpusId, Testcase, + testcase::{NopTestcaseMetadataCell, TestcaseMetadata}, + }, }; /// A corpus which does not store any [`Testcase`]s. @@ -16,6 +21,8 @@ pub struct NopCorpus { } impl Corpus for NopCorpus { + type TestcaseMetadataCell = NopTestcaseMetadataCell; + /// Returns the number of all enabled entries #[inline] fn count(&self) -> usize { @@ -35,37 +42,22 @@ impl Corpus for NopCorpus { /// Add an enabled testcase to the corpus and return its index #[inline] - fn add(&mut self, _testcase: Testcase) -> Result { + fn add_shared( + &mut self, + _input: Rc, + _md: TestcaseMetadata, + ) -> Result { Err(Error::unsupported("Unsupported by NopCorpus")) } - /// Add a disabled testcase to the corpus and return its index - #[inline] - fn add_disabled(&mut self, _testcase: Testcase) -> Result { + fn get_from( + &self, + _id: CorpusId, + ) -> Result, Error> { Err(Error::unsupported("Unsupported by NopCorpus")) } - /// Replaces the testcase with the given id - #[inline] - fn replace(&mut self, _id: CorpusId, _testcase: Testcase) -> Result, Error> { - Err(Error::unsupported("Unsupported by NopCorpus")) - } - - /// Removes an entry from the corpus, returning it if it was present; considers both enabled and disabled testcases - #[inline] - fn remove(&mut self, _id: CorpusId) -> Result, Error> { - Err(Error::unsupported("Unsupported by NopCorpus")) - } - - /// Get by id; considers only enabled testcases - #[inline] - fn get(&self, _id: CorpusId) -> Result<&RefCell>, Error> { - Err(Error::unsupported("Unsupported by NopCorpus")) - } - - /// Get by id; considers both enabled and disabled testcases - #[inline] - fn get_from_all(&self, _id: CorpusId) -> Result<&RefCell>, Error> { + fn disable(&mut self, _id: CorpusId) -> Result<(), Error> { Err(Error::unsupported("Unsupported by NopCorpus")) } @@ -86,12 +78,6 @@ impl Corpus for NopCorpus { None } - /// Peek the next free corpus id - #[inline] - fn peek_free_id(&self) -> CorpusId { - CorpusId::from(0_usize) - } - #[inline] fn prev(&self, _id: CorpusId) -> Option { None @@ -119,13 +105,11 @@ impl Corpus for NopCorpus { CorpusId::from(0_usize) } - #[inline] - fn load_input_into(&self, _testcase: &mut Testcase) -> Result<(), Error> { - Err(Error::unsupported("Unsupported by NopCorpus")) - } - - #[inline] - fn store_input_from(&self, _testcase: &Testcase) -> Result<(), Error> { + fn replace_metadata( + &mut self, + _id: CorpusId, + _md: TestcaseMetadata, + ) -> Result { Err(Error::unsupported("Unsupported by NopCorpus")) } } diff --git a/crates/libafl/src/corpus/ondisk.rs b/crates/libafl/src/corpus/ondisk.rs deleted file mode 100644 index ded71f417ab..00000000000 --- a/crates/libafl/src/corpus/ondisk.rs +++ /dev/null @@ -1,294 +0,0 @@ -//! The [`OnDiskCorpus`] stores all [`Testcase`]s to disk. -//! -//! It _never_ keeps any of them in memory. -//! This is a good solution for solutions that are never reused, or for *very* memory-constraint environments. -//! For any other occasions, consider using [`CachedOnDiskCorpus`] -//! which stores a certain number of [`Testcase`]s in memory and removes additional ones in a FIFO manner. - -use alloc::string::String; -use core::{ - cell::{Ref, RefCell, RefMut}, - time::Duration, -}; -use std::path::{Path, PathBuf}; - -use libafl_bolts::serdeany::SerdeAnyMap; -use serde::{Deserialize, Serialize}; - -use crate::{ - Error, - corpus::{CachedOnDiskCorpus, Corpus, CorpusId, EnableDisableCorpus, HasTestcase, Testcase}, - inputs::Input, -}; - -/// Options for the the format of the on-disk metadata -#[derive(Default, Debug, Clone, Serialize, Deserialize)] -pub enum OnDiskMetadataFormat { - /// A binary-encoded postcard - Postcard, - /// JSON - Json, - /// JSON formatted for readability - #[default] - JsonPretty, - /// The same as [`OnDiskMetadataFormat::JsonPretty`], but compressed - #[cfg(feature = "gzip")] - JsonGzip, -} - -/// The [`Testcase`] metadata that'll be stored to disk -#[derive(Debug, Serialize)] -pub struct OnDiskMetadata<'a> { - /// The dynamic metadata [`SerdeAnyMap`] stored to disk - pub metadata: &'a SerdeAnyMap, - /// The exec time for this [`Testcase`] - pub exec_time: &'a Option, - /// The executions of this [`Testcase`] - pub executions: &'a u64, -} - -/// A corpus able to store [`Testcase`]s to disk, and load them from disk, when they are being used. -/// -/// Metadata is written to a `..metadata` file in the same folder by default. -#[derive(Default, Serialize, Deserialize, Clone, Debug)] -pub struct OnDiskCorpus { - /// The root directory backing this corpus - dir_path: PathBuf, - /// We wrapp a cached corpus and set its size to 1. - inner: CachedOnDiskCorpus, -} - -impl Corpus for OnDiskCorpus -where - I: Input, -{ - /// Returns the number of all enabled entries - #[inline] - fn count(&self) -> usize { - self.inner.count() - } - - /// Returns the number of all disabled entries - fn count_disabled(&self) -> usize { - self.inner.count_disabled() - } - - /// Returns the number of all entries - #[inline] - fn count_all(&self) -> usize { - self.inner.count_all() - } - - /// Add an enabled testcase to the corpus and return its index - #[inline] - fn add(&mut self, testcase: Testcase) -> Result { - self.inner.add(testcase) - } - - /// Add a disabled testcase to the corpus and return its index - #[inline] - fn add_disabled(&mut self, testcase: Testcase) -> Result { - self.inner.add_disabled(testcase) - } - - /// Replaces the testcase at the given idx - #[inline] - fn replace(&mut self, id: CorpusId, testcase: Testcase) -> Result, Error> { - self.inner.replace(id, testcase) - } - - /// Removes an entry from the corpus, returning it if it was present; considers both enabled and disabled testcases - #[inline] - fn remove(&mut self, id: CorpusId) -> Result, Error> { - self.inner.remove(id) - } - - /// Get by id; will check the disabled corpus if not available in the enabled - #[inline] - fn get(&self, id: CorpusId) -> Result<&RefCell>, Error> { - self.inner.get(id) - } - - /// Get by id; considers both enabled and disabled testcases - #[inline] - fn get_from_all(&self, id: CorpusId) -> Result<&RefCell>, Error> { - self.inner.get_from_all(id) - } - - /// Current testcase scheduled - #[inline] - fn current(&self) -> &Option { - self.inner.current() - } - - /// Current testcase scheduled (mutable) - #[inline] - fn current_mut(&mut self) -> &mut Option { - self.inner.current_mut() - } - - #[inline] - fn next(&self, id: CorpusId) -> Option { - self.inner.next(id) - } - - /// Peek the next free corpus id - #[inline] - fn peek_free_id(&self) -> CorpusId { - self.inner.peek_free_id() - } - - #[inline] - fn prev(&self, id: CorpusId) -> Option { - self.inner.prev(id) - } - - #[inline] - fn first(&self) -> Option { - self.inner.first() - } - - #[inline] - fn last(&self) -> Option { - self.inner.last() - } - - /// Get the nth corpus id; considers only enabled testcases - #[inline] - fn nth(&self, nth: usize) -> CorpusId { - self.inner.nth(nth) - } - /// Get the nth corpus id; considers both enabled and disabled testcases - #[inline] - fn nth_from_all(&self, nth: usize) -> CorpusId { - self.inner.nth_from_all(nth) - } - - #[inline] - fn load_input_into(&self, testcase: &mut Testcase) -> Result<(), Error> { - self.inner.load_input_into(testcase) - } - - #[inline] - fn store_input_from(&self, testcase: &Testcase) -> Result<(), Error> { - self.inner.store_input_from(testcase) - } -} - -impl HasTestcase for OnDiskCorpus -where - I: Input, -{ - fn testcase(&self, id: CorpusId) -> Result>, Error> { - Ok(self.get(id)?.borrow()) - } - - fn testcase_mut(&self, id: CorpusId) -> Result>, Error> { - Ok(self.get(id)?.borrow_mut()) - } -} - -impl EnableDisableCorpus for OnDiskCorpus -where - I: Input, -{ - #[inline] - fn disable(&mut self, id: CorpusId) -> Result<(), Error> { - self.inner.disable(id) - } - - #[inline] - fn enable(&mut self, id: CorpusId) -> Result<(), Error> { - self.inner.enable(id) - } -} - -impl OnDiskCorpus { - /// Creates an [`OnDiskCorpus`]. - /// - /// This corpus stores all testcases to disk. - /// - /// By default, it stores metadata for each [`Testcase`] as prettified json. - /// Metadata will be written to a file named `..metadata` - /// The metadata may include objective reason, specific information for a fuzz job, and more. - /// - /// To pick a different metadata format, use [`OnDiskCorpus::with_meta_format`]. - /// - /// Will error, if [`std::fs::create_dir_all()`] failed for `dir_path`. - pub fn new

(dir_path: P) -> Result - where - P: AsRef, - { - Self::with_meta_format_and_prefix( - dir_path.as_ref(), - Some(OnDiskMetadataFormat::JsonPretty), - None, - true, - ) - } - - /// Creates the [`OnDiskCorpus`] with a filename prefix. - /// - /// Will error, if [`std::fs::create_dir_all()`] failed for `dir_path`. - pub fn with_prefix

(dir_path: P, prefix: Option) -> Result - where - P: AsRef, - { - Self::with_meta_format_and_prefix( - dir_path.as_ref(), - Some(OnDiskMetadataFormat::JsonPretty), - prefix, - true, - ) - } - - /// Creates the [`OnDiskCorpus`] specifying the format in which `Metadata` will be saved to disk. - /// - /// Will error, if [`std::fs::create_dir_all()`] failed for `dir_path`. - pub fn with_meta_format

( - dir_path: P, - meta_format: OnDiskMetadataFormat, - ) -> Result - where - P: AsRef, - { - Self::with_meta_format_and_prefix(dir_path.as_ref(), Some(meta_format), None, true) - } - - /// Creates an [`OnDiskCorpus`] that will not store .metadata files - /// - /// Will error, if [`std::fs::create_dir_all()`] failed for `dir_path`. - pub fn no_meta

(dir_path: P) -> Result - where - P: AsRef, - { - Self::with_meta_format_and_prefix(dir_path.as_ref(), None, None, true) - } - - /// Creates a new corpus at the given (non-generic) path with the given optional `meta_format` - /// and `prefix`. - /// - /// Will error, if [`std::fs::create_dir_all()`] failed for `dir_path`. - pub fn with_meta_format_and_prefix( - dir_path: &Path, - meta_format: Option, - prefix: Option, - locking: bool, - ) -> Result { - Ok(OnDiskCorpus { - dir_path: dir_path.into(), - inner: CachedOnDiskCorpus::with_meta_format_and_prefix( - dir_path, - 1, - meta_format, - prefix, - locking, - )?, - }) - } - - /// Path to the corpus directory associated with this corpus - pub fn dir_path(&self) -> &PathBuf { - &self.dir_path - } -} diff --git a/crates/libafl/src/corpus/single.rs b/crates/libafl/src/corpus/single.rs new file mode 100644 index 00000000000..a86c5e21306 --- /dev/null +++ b/crates/libafl/src/corpus/single.rs @@ -0,0 +1,130 @@ +//! A simple corpus, with a backing store. +//! +//! A [`SingleCorpus`] owns a single store, in which every testcase is added. + +use alloc::{rc::Rc, vec::Vec}; +use core::marker::PhantomData; + +use libafl_bolts::Error; +use serde::{Deserialize, Serialize}; + +use super::{Corpus, CorpusCounter, CorpusId, Testcase, store::Store}; +use crate::corpus::testcase::TestcaseMetadata; + +/// You average corpus. +/// It has one backing store, used to store / retrieve testcases. +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct SingleCorpus { + /// The backing testcase store + store: S, + /// The corpus ID counter + counter: CorpusCounter, + /// The keys in order (use `Vec::binary_search`) + keys: Vec, + /// The current ID + current: Option, + phantom: PhantomData, +} + +impl Default for SingleCorpus +where + S: Default, +{ + fn default() -> Self { + Self::new(S::default()) + } +} + +impl SingleCorpus { + /// Create a new [`SingleCorpus`] + pub fn new(store: S) -> Self { + Self { + store, + counter: CorpusCounter::default(), + keys: Vec::default(), + current: None, + phantom: PhantomData, + } + } +} + +impl Corpus for SingleCorpus +where + S: Store, +{ + type TestcaseMetadataCell = S::TestcaseMetadataCell; + + fn count(&self) -> usize { + self.store.count() + } + + fn count_disabled(&self) -> usize { + self.store.count_disabled() + } + + fn count_all(&self) -> usize { + self.store.count_all() + } + + fn add_shared( + &mut self, + input: Rc, + md: TestcaseMetadata, + ) -> Result { + let new_id = self.counter.new_id(); + self.store.add_shared::(new_id, input, md)?; + Ok(new_id) + } + + /// Get testcase by id + fn get_from( + &self, + id: CorpusId, + ) -> Result, Error> { + self.store.get_from::(id) + } + + fn disable(&mut self, id: CorpusId) -> Result<(), Error> { + self.store.disable(id) + } + + fn replace_metadata( + &mut self, + id: CorpusId, + md: TestcaseMetadata, + ) -> Result { + self.store.replace_metadata(id, md) + } + + fn current(&self) -> &Option { + &self.current + } + + fn current_mut(&mut self) -> &mut Option { + &mut self.current + } + + fn next(&self, id: CorpusId) -> Option { + self.store.next(id) + } + + fn prev(&self, id: CorpusId) -> Option { + self.store.prev(id) + } + + fn first(&self) -> Option { + self.store.first() + } + + fn last(&self) -> Option { + self.store.last() + } + + fn nth(&self, nth: usize) -> CorpusId { + self.store.nth(nth) + } + + fn nth_from_all(&self, nth: usize) -> CorpusId { + self.store.nth_from_all(nth) + } +} diff --git a/crates/libafl/src/corpus/store/inmemory.rs b/crates/libafl/src/corpus/store/inmemory.rs new file mode 100644 index 00000000000..210db6cebca --- /dev/null +++ b/crates/libafl/src/corpus/store/inmemory.rs @@ -0,0 +1,162 @@ +//! An in-memory store + +use alloc::rc::Rc; +use core::marker::PhantomData; + +use libafl_bolts::Error; +use serde::{Deserialize, Serialize}; + +use super::{InMemoryCorpusMap, RemovableStore, Store}; +use crate::{ + corpus::{ + CorpusId, Testcase, + testcase::{HasInstantiableTestcaseMetadata, TestcaseMetadata}, + }, + inputs::Input, +}; + +/// The map type in which testcases are stored (disable the feature `corpus_btreemap` to use a `HashMap` instead of `BTreeMap`) +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct InMemoryStore { + enabled_map: M, + disabled_map: M, + phantom: PhantomData<(I, TMC)>, +} + +impl Default for InMemoryStore +where + M: Default, +{ + fn default() -> Self { + Self { + enabled_map: M::default(), + disabled_map: M::default(), + phantom: PhantomData, + } + } +} + +impl Store for InMemoryStore +where + M: InMemoryCorpusMap>, + TMC: HasInstantiableTestcaseMetadata + Clone, + I: Input, +{ + type TestcaseMetadataCell = TMC; + + fn count(&self) -> usize { + self.enabled_map.count() + } + + fn count_disabled(&self) -> usize { + self.disabled_map.count() + } + + fn is_empty(&self) -> bool { + self.enabled_map.is_empty() + } + + fn add_shared( + &mut self, + id: CorpusId, + input: Rc, + md: TestcaseMetadata, + ) -> Result<(), Error> { + if ENABLED { + self.enabled_map + .add(id, Testcase::new(input, TMC::instantiate(md))); + Ok(()) + } else { + self.disabled_map + .add(id, Testcase::new(input, TMC::instantiate(md))); + Ok(()) + } + } + + fn get_from( + &self, + id: CorpusId, + ) -> Result, Error> { + if ENABLED { + self.enabled_map + .get(id) + .cloned() + .ok_or_else(|| Error::key_not_found(format!("Index {id} not found"))) + } else { + let mut testcase = self.enabled_map.get(id); + + if testcase.is_none() { + testcase = self.disabled_map.get(id); + } + + testcase + .cloned() + .ok_or_else(|| Error::key_not_found(format!("Index {id} not found"))) + } + } + + fn disable(&mut self, id: CorpusId) -> Result<(), Error> { + let tc = self + .enabled_map + .remove(id) + .ok_or_else(|| Error::key_not_found(format!("Index {id} not found")))?; + self.disabled_map.add(id, tc); + Ok(()) + } + + fn replace_metadata( + &mut self, + _id: CorpusId, + _metadata: TestcaseMetadata, + ) -> Result { + todo!() + } + + fn prev(&self, id: CorpusId) -> Option { + self.enabled_map.prev(id) + } + + fn next(&self, id: CorpusId) -> Option { + self.enabled_map.next(id) + } + + fn first(&self) -> Option { + self.enabled_map.first() + } + + fn last(&self) -> Option { + self.enabled_map.last() + } + + fn nth(&self, nth: usize) -> CorpusId { + self.enabled_map.nth(nth) + } + + fn nth_from_all(&self, nth: usize) -> CorpusId { + let nb_enabled = self.enabled_map.count(); + if nth >= nb_enabled { + self.disabled_map.nth(nth.saturating_sub(nb_enabled)) + } else { + self.enabled_map.nth(nth) + } + } +} + +impl RemovableStore for InMemoryStore +where + M: InMemoryCorpusMap>, + TMC: HasInstantiableTestcaseMetadata + Clone, + I: Input, +{ + fn remove(&mut self, id: CorpusId) -> Result, Error> { + if let Some(tc) = self.enabled_map.remove(id) { + Ok(tc) + } else if let Some(tc) = self.disabled_map.remove(id) { + Ok(tc) + } else { + Err(Error::key_not_found(format!( + "Index {id} not found for remove" + ))) + } + } +} diff --git a/crates/libafl/src/corpus/store/maps.rs b/crates/libafl/src/corpus/store/maps.rs new file mode 100644 index 00000000000..65fe6e80f52 --- /dev/null +++ b/crates/libafl/src/corpus/store/maps.rs @@ -0,0 +1,325 @@ +//! Multiple map implementations for the in-memory store. + +use alloc::{collections::BTreeMap, vec::Vec}; + +use num_traits::Zero; +use serde::{Deserialize, Serialize}; + +use crate::corpus::{CorpusId, IsTestcaseMetadataCell, TestcaseMetadata}; + +/// A trait implemented by in-memory corpus maps +pub trait InMemoryCorpusMap { + /// Returns the number of testcases + fn count(&self) -> usize; + + /// Returns true, if no elements are in this corpus yet + fn is_empty(&self) -> bool { + self.count().is_zero() + } + + /// Store the testcase associated to `corpus_id`. + fn add(&mut self, id: CorpusId, testcase: T); + + /// Get by id; considers only enabled testcases + fn get(&self, id: CorpusId) -> Option<&T>; + + /// Get by id; considers only enabled testcases + fn get_mut(&mut self, id: CorpusId) -> Option<&mut T>; + + /// Remove a testcase from the map, returning the removed testcase if present. + fn remove(&mut self, id: CorpusId) -> Option; + + /// Get the prev corpus id in chronological order + fn prev(&self, id: CorpusId) -> Option; + + /// Get the next corpus id in chronological order + fn next(&self, id: CorpusId) -> Option; + + /// Get the first inserted corpus id + fn first(&self) -> Option; + + /// Get the last inserted corpus id + fn last(&self) -> Option; + + /// Get the nth inserted item + fn nth(&self, nth: usize) -> CorpusId; +} + +/// A corpus map for testcases. +pub trait InMemoryTestcaseMap: InMemoryCorpusMap { + /// Replace the metadata of a given testcase + fn replace_metadata( + &mut self, + id: CorpusId, + testcase_metadata: TestcaseMetadata, + ) -> Option; +} + +/// A history for [`CorpusId`] +#[derive(Default, Serialize, Deserialize, Clone, Debug)] +pub struct CorpusIdHistory { + keys: Vec, +} + +/// A [`BTreeMap`] based [`InMemoryCorpusMap`] +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct BtreeCorpusMap { + /// A map of `CorpusId` to `Testcase`. + map: BTreeMap, + /// A list of available corpus ids + history: CorpusIdHistory, +} + +/// Keep track of the stored `Testcase` and the siblings ids (insertion order) +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct TestcaseStorageItem { + /// The stored testcase + pub testcase: T, + /// Previously inserted id + pub prev: Option, + /// Following inserted id + pub next: Option, +} + +/// A [`hashbrown::HashMap`] based [`InMemoryCorpusMap`] +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct HashCorpusMap { + /// A map of `CorpusId` to `TestcaseStorageItem` + map: hashbrown::HashMap>, + /// First inserted id + first_id: Option, + /// Last inserted id + last_id: Option, + /// A list of available corpus ids + history: CorpusIdHistory, +} + +impl Default for BtreeCorpusMap { + fn default() -> Self { + Self { + map: BTreeMap::default(), + history: CorpusIdHistory::default(), + } + } +} + +impl Default for HashCorpusMap { + fn default() -> Self { + Self { + map: hashbrown::HashMap::default(), + first_id: None, + last_id: None, + history: CorpusIdHistory::default(), + } + } +} + +impl CorpusIdHistory { + /// Add a key to the history + pub fn add(&mut self, id: CorpusId) { + if let Err(idx) = self.keys.binary_search(&id) { + self.keys.insert(idx, id); + } + } + + /// Remove a key from the history + pub fn remove(&mut self, id: CorpusId) { + if let Ok(idx) = self.keys.binary_search(&id) { + self.keys.remove(idx); + } + } + + // Get the nth item from the map + fn nth(&self, idx: usize) -> CorpusId { + self.keys[idx] + } +} + +impl InMemoryCorpusMap for HashCorpusMap { + fn count(&self) -> usize { + self.map.len() + } + + fn is_empty(&self) -> bool { + self.map.is_empty() + } + + fn add(&mut self, id: CorpusId, testcase: T) { + let prev = if let Some(last_id) = self.last_id { + self.map.get_mut(&last_id).unwrap().next = Some(id); + Some(last_id) + } else { + None + }; + + if self.first_id.is_none() { + self.first_id = Some(id); + } + + self.last_id = Some(id); + + self.history.add(id); + + self.map.insert( + id, + TestcaseStorageItem { + testcase, + prev, + next: None, + }, + ); + } + + fn get(&self, id: CorpusId) -> Option<&T> { + self.map.get(&id).map(|inner| &inner.testcase) + } + + fn get_mut(&mut self, id: CorpusId) -> Option<&mut T> { + self.map.get_mut(&id).map(|storage| &mut storage.testcase) + } + + fn remove(&mut self, id: CorpusId) -> Option { + let entry = self.map.remove(&id)?; + self.history.remove(id); + + if let Some(prev) = &entry.prev { + self.map.get_mut(prev).unwrap().next = entry.next; + } + + if let Some(next) = &entry.next { + self.map.get_mut(next).unwrap().prev = entry.prev; + } + + Some(entry.testcase) + } + + fn prev(&self, id: CorpusId) -> Option { + match self.map.get(&id) { + Some(item) => item.prev, + _ => None, + } + } + + fn next(&self, id: CorpusId) -> Option { + match self.map.get(&id) { + Some(item) => item.next, + _ => None, + } + } + + fn first(&self) -> Option { + self.first_id + } + + fn last(&self) -> Option { + self.last_id + } + + fn nth(&self, nth: usize) -> CorpusId { + self.history.nth(nth) + } +} + +impl InMemoryTestcaseMap for HashCorpusMap +where + T: IsTestcaseMetadataCell, +{ + fn replace_metadata( + &mut self, + id: CorpusId, + testcase_metadata: TestcaseMetadata, + ) -> Option { + let old_tc = self.map.get_mut(&id)?; + Some(old_tc.testcase.replace_testcase_metadata(testcase_metadata)) + } +} + +impl InMemoryCorpusMap for BtreeCorpusMap { + fn count(&self) -> usize { + self.map.len() + } + + fn is_empty(&self) -> bool { + self.map.is_empty() + } + + fn add(&mut self, id: CorpusId, testcase: T) { + // corpus.insert_key(id); + self.map.insert(id, testcase); + self.history.add(id); + } + + fn get(&self, id: CorpusId) -> Option<&T> { + self.map.get(&id) + } + + fn get_mut(&mut self, id: CorpusId) -> Option<&mut T> { + self.map.get_mut(&id) + } + + fn remove(&mut self, id: CorpusId) -> Option { + let ret = self.map.remove(&id)?; + self.history.remove(id); + Some(ret) + } + + fn prev(&self, id: CorpusId) -> Option { + // TODO see if using self.keys is faster + let mut range = self + .map + .range((core::ops::Bound::Unbounded, core::ops::Bound::Included(id))); + if let Some((this_id, _)) = range.next_back() { + if id != *this_id { + return None; + } + } + if let Some((prev_id, _)) = range.next_back() { + Some(*prev_id) + } else { + None + } + } + + fn next(&self, id: CorpusId) -> Option { + // TODO see if using self.keys is faster + let mut range = self + .map + .range((core::ops::Bound::Included(id), core::ops::Bound::Unbounded)); + if let Some((this_id, _)) = range.next() { + if id != *this_id { + return None; + } + } + if let Some((next_id, _)) = range.next() { + Some(*next_id) + } else { + None + } + } + + fn first(&self) -> Option { + self.map.iter().next().map(|x| *x.0) + } + + fn last(&self) -> Option { + self.map.iter().next_back().map(|x| *x.0) + } + + fn nth(&self, nth: usize) -> CorpusId { + self.history.nth(nth) + } +} + +impl InMemoryTestcaseMap for BtreeCorpusMap +where + T: IsTestcaseMetadataCell, +{ + fn replace_metadata( + &mut self, + id: CorpusId, + testcase_metadata: TestcaseMetadata, + ) -> Option { + let tc = self.get_mut(id)?; + Some(tc.replace_testcase_metadata(testcase_metadata)) + } +} diff --git a/crates/libafl/src/corpus/store/mod.rs b/crates/libafl/src/corpus/store/mod.rs new file mode 100644 index 00000000000..ac2e43cafdc --- /dev/null +++ b/crates/libafl/src/corpus/store/mod.rs @@ -0,0 +1,97 @@ +//! Stores are collections managing testcases + +use alloc::rc::Rc; + +use libafl_bolts::Error; + +use super::{CorpusId, Testcase}; +use crate::corpus::testcase::{IsTestcaseMetadataCell, TestcaseMetadata}; + +pub mod maps; +pub use maps::{BtreeCorpusMap, HashCorpusMap, InMemoryCorpusMap}; + +pub mod inmemory; +pub use inmemory::InMemoryStore; + +pub mod ondisk; +pub use ondisk::{OnDiskMetadataFormat, OnDiskStore}; + +/// A store is responsible for storing and retrieving [`Testcase`]s, ordered by add time. +pub trait Store { + /// A [`TestcaseMetadata`] cell. + type TestcaseMetadataCell: IsTestcaseMetadataCell; + + /// Returns the number of all enabled entries + fn count(&self) -> usize; + + /// Returns the number of all disabled entries + fn count_disabled(&self) -> usize; + + /// Returns the number of elements including disabled entries + fn count_all(&self) -> usize { + self.count().saturating_add(self.count_disabled()) + } + + /// Returns true, if no elements are in this corpus yet + fn is_empty(&self) -> bool { + self.count() == 0 + } + + /// Store the testcase associated to `corpus_id` to the set. + fn add_shared( + &mut self, + id: CorpusId, + input: Rc, + md: TestcaseMetadata, + ) -> Result<(), Error>; + + /// Get testcase by id; considers only enabled testcases + fn get(&self, id: CorpusId) -> Result, Error> { + Self::get_from::(self, id) + } + + /// Get testcase by id; considers both enabled and disabled testcases + fn get_from_all(&self, id: CorpusId) -> Result, Error> { + Self::get_from::(self, id) + } + + /// Get testcase by id + fn get_from( + &self, + id: CorpusId, + ) -> Result, Error>; + + /// Disable a testcase by id + fn disable(&mut self, id: CorpusId) -> Result<(), Error>; + + /// Replaces the [`Testcase`] at the given idx in the enabled set, returning the existing. + fn replace_metadata( + &mut self, + id: CorpusId, + metadata: TestcaseMetadata, + ) -> Result; + + /// Get the prev corpus id in chronological order + fn prev(&self, id: CorpusId) -> Option; + + /// Get the next corpus id in chronological order + fn next(&self, id: CorpusId) -> Option; + + /// Get the first inserted corpus id + fn first(&self) -> Option; + + /// Get the last inserted corpus id + fn last(&self) -> Option; + + /// Get the nth corpus id; considers only enabled testcases + fn nth(&self, nth: usize) -> CorpusId; + + /// Get the nth corpus id; considers both enabled and disabled testcases + fn nth_from_all(&self, nth: usize) -> CorpusId; +} + +/// A Store with removable entries +pub trait RemovableStore: Store { + /// Removes an entry from the corpus, returning it; considers both enabled and disabled testcases + fn remove(&mut self, id: CorpusId) -> Result, Error>; +} diff --git a/crates/libafl/src/corpus/store/ondisk.rs b/crates/libafl/src/corpus/store/ondisk.rs new file mode 100644 index 00000000000..a212b88f576 --- /dev/null +++ b/crates/libafl/src/corpus/store/ondisk.rs @@ -0,0 +1,371 @@ +//! An on-disk store + +use alloc::{rc::Rc, string::String, vec::Vec}; +use core::{ + cell::{Ref, RefCell, RefMut}, + marker::PhantomData, +}; +use std::{ + fs::{self, OpenOptions}, + io::Write, + path::{Path, PathBuf}, +}; + +#[cfg(feature = "gzip")] +use libafl_bolts::compress::GzipCompressor; +use libafl_bolts::Error; +use serde::{Deserialize, Serialize}; + +use super::{InMemoryCorpusMap, Store}; +use crate::{ + corpus::{ + CorpusId, Testcase, + testcase::{IsTestcaseMetadataCell, TestcaseMetadata}, + }, + inputs::Input, +}; + +/// An on-disk store +/// +/// The maps only store the unique ID associated to the added [`Testcase`]s. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct OnDiskStore { + disk_mgr: Rc>, + enabled_map: M, + disabled_map: M, + first: Option, + last: Option, +} + +/// A Disk Manager, able to load and store [`Testcase`]s +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct DiskMgr { + root_dir: PathBuf, + md_format: OnDiskMetadataFormat, + phantom: PhantomData, +} + +/// An on-disk [`Testcase`] cell. +#[derive(Debug)] +pub struct OnDiskTestcaseCell { + mgr: Rc>, + id: String, + testcase_md: RefCell, + modified: RefCell, +} + +/// Options for the the format of the on-disk metadata +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +pub enum OnDiskMetadataFormat { + /// A binary-encoded postcard + Postcard, + /// JSON + Json, + /// JSON formatted for readability + #[default] + JsonPretty, + /// The same as [`OnDiskMetadataFormat::JsonPretty`], but compressed + #[cfg(feature = "gzip")] + JsonGzip, +} + +impl OnDiskMetadataFormat { + /// Convert a [`TestcaseMetadata`] to the format [`OnDiskMetadataFormat`] and stores it in a [`Vec`]. + pub fn to_vec(&self, testcase_md: &TestcaseMetadata) -> Result, Error> { + let json_error = |err| Error::serialize(format!("Failed to json-ify metadata: {err:?}")); + + Ok(match self { + OnDiskMetadataFormat::Postcard => postcard::to_allocvec(testcase_md)?, + OnDiskMetadataFormat::Json => serde_json::to_vec(&testcase_md).map_err(json_error)?, + OnDiskMetadataFormat::JsonPretty => { + serde_json::to_vec_pretty(&testcase_md).map_err(json_error)? + } + #[cfg(feature = "gzip")] + OnDiskMetadataFormat::JsonGzip => GzipCompressor::new() + .compress(&serde_json::to_vec_pretty(&testcase_md).map_err(json_error)?), + }) + } + + /// Load a [`TestcaseMetadata`] from a file with a format [`OnDiskMetadataFormat`]. + pub fn from_file(&self, md_path: &Path) -> Result { + let json_error = |err| Error::serialize(format!("Failed to parse metadata: {err:?}")); + let md_serialized = fs::read(md_path)?; + + Ok(match self { + OnDiskMetadataFormat::Postcard => postcard::from_bytes(&md_serialized)?, + OnDiskMetadataFormat::Json => { + serde_json::from_slice(&md_serialized).map_err(json_error)? + } + OnDiskMetadataFormat::JsonPretty => { + serde_json::from_slice(&md_serialized).map_err(json_error)? + } + #[cfg(feature = "gzip")] + OnDiskMetadataFormat::JsonGzip => { + serde_json::from_slice(&GzipCompressor::new().decompress(&md_serialized)?) + .map_err(json_error)? + } + }) + } +} + +impl OnDiskTestcaseCell { + /// Get a new [`OnDiskTestcaseCell`]. + #[must_use] + pub fn new(mgr: Rc>, id: String, testcase_md: TestcaseMetadata) -> Self { + Self { + mgr, + id, + testcase_md: RefCell::new(testcase_md), + modified: RefCell::new(false), + } + } +} + +impl IsTestcaseMetadataCell for OnDiskTestcaseCell { + type TestcaseMetadataRef<'a> + = Ref<'a, TestcaseMetadata> + where + I: 'a; + type TestcaseMetadataRefMut<'a> + = RefMut<'a, TestcaseMetadata> + where + I: 'a; + + fn testcase_metadata(&self) -> Ref<'_, TestcaseMetadata> { + self.testcase_md.borrow() + } + + fn testcase_metadata_mut(&self) -> RefMut<'_, TestcaseMetadata> { + *self.modified.borrow_mut() = true; + self.testcase_md.borrow_mut() + } + + fn into_testcase_metadata(self) -> TestcaseMetadata { + self.testcase_md.clone().into_inner() + } + + fn replace_testcase_metadata(&self, _testcase_metadata: TestcaseMetadata) -> TestcaseMetadata { + todo!() + } + + fn flush(&self) -> Result<(), Error> { + self.mgr.save_metadata(&self.id, &self.testcase_md.borrow()) + } +} + +impl Drop for OnDiskTestcaseCell { + fn drop(&mut self) { + self.flush().unwrap(); + } +} + +impl DiskMgr { + /// Create a new [`DiskMgr`] + pub fn new(root_dir: PathBuf) -> Result { + Self::new_with_format(root_dir, OnDiskMetadataFormat::default()) + } + + /// Create a new [`DiskMgr`], with a given [`OnDiskMetadataFormat`] + pub fn new_with_format( + root_dir: PathBuf, + md_format: OnDiskMetadataFormat, + ) -> Result { + Ok(Self { + root_dir, + md_format, + phantom: PhantomData, + }) + } + + fn testcase_path(&self, testcase_id: &String) -> PathBuf { + self.root_dir.join(testcase_id) + } + + fn testcase_md_path(&self, testcase_id: &String) -> PathBuf { + self.root_dir.join(format!(".{testcase_id}.metadata")) + } + + /// The file is created if it does not exist, or reused if it's already there + pub fn save_metadata(&self, id: &String, md: &TestcaseMetadata) -> Result<(), Error> { + let testcase_md_path = self.testcase_md_path(id); + + let mut testcase_md_f = OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .open(&testcase_md_path)?; + + let testcase_md_ser = self.md_format.to_vec(md)?; + testcase_md_f.write_all(&testcase_md_ser)?; + + Ok(()) + } +} + +impl DiskMgr +where + I: Input, +{ + fn save_input(&self, input: &I) -> Result { + let testcase_id = Testcase::>::compute_id(input); + let testcase_path = self.testcase_path(&testcase_id); + input.to_file(testcase_path.as_path())?; + + Ok(testcase_id) + } + + fn save_testcase(&self, input: &I, md: &TestcaseMetadata) -> Result { + let id = self.save_input(input)?; + self.save_metadata(&id, md)?; + Ok(id) + } + + /// prerequisite: the testcase should not have been "removed" before. + /// also, it should only happen if it has been saved before. + fn load_testcase( + self: &Rc, + testcase_id: &String, + ) -> Result>, Error> { + let testcase_path = self.as_ref().testcase_path(testcase_id); + let testcase_md_path = self.as_ref().testcase_md_path(testcase_id); + let ser_fmt = self.md_format.clone(); + + // let _lockfile = TestcaseLockfile::new(self, testcase_id)?; + + let input = I::from_file(testcase_path.as_path())?; + let md = ser_fmt.from_file(testcase_md_path.as_path())?; + + Ok(Testcase::new( + Rc::new(input), + OnDiskTestcaseCell::new(self.clone(), testcase_id.clone(), md), + )) + } +} + +impl OnDiskStore +where + M: Default, +{ + /// Create a new [`OnDiskStore`] + pub fn new(root: PathBuf) -> Result { + Self::new_with_format(root, OnDiskMetadataFormat::default()) + } + + /// Create a new [`OnDiskStore`], with a specified [`OnDiskMetadataFormat`]. + pub fn new_with_format(root: PathBuf, md_format: OnDiskMetadataFormat) -> Result { + let disk_mgr = Rc::new(DiskMgr::new_with_format(root, md_format)?); + + Ok(Self { + disk_mgr, + enabled_map: M::default(), + disabled_map: M::default(), + first: None, + last: None, + }) + } +} + +impl Store for OnDiskStore +where + I: Input, + M: InMemoryCorpusMap, +{ + type TestcaseMetadataCell = OnDiskTestcaseCell; + + fn count_all(&self) -> usize { + self.count().saturating_add(self.count_disabled()) + } + + fn is_empty(&self) -> bool { + self.count() == 0 + } + + fn count(&self) -> usize { + self.enabled_map.count() + } + + fn count_disabled(&self) -> usize { + self.disabled_map.count() + } + + fn add_shared( + &mut self, + id: CorpusId, + input: Rc, + md: TestcaseMetadata, + ) -> Result<(), Error> { + let testcase_id = self.disk_mgr.save_testcase(input.as_ref(), &md)?; + + if ENABLED { + self.enabled_map.add(id, testcase_id); + } else { + self.disabled_map.add(id, testcase_id); + } + + Ok(()) + } + + fn get_from( + &self, + id: CorpusId, + ) -> Result, Error> { + let tc_id = if ENABLED { + self.enabled_map + .get(id) + .ok_or_else(|| Error::key_not_found(format!("Index not found: {id}")))? + } else { + self.enabled_map + .get(id) + .or_else(|| self.disabled_map.get(id)) + .ok_or_else(|| Error::key_not_found(format!("Index {id} not found")))? + }; + + self.disk_mgr.load_testcase(tc_id) + } + + fn disable(&mut self, id: CorpusId) -> Result<(), Error> { + let tc = self + .enabled_map + .remove(id) + .ok_or_else(|| Error::key_not_found(format!("Index {id} not found")))?; + self.disabled_map.add(id, tc); + Ok(()) + } + + fn replace_metadata( + &mut self, + _id: CorpusId, + _metadata: TestcaseMetadata, + ) -> Result { + todo!() + } + + fn prev(&self, id: CorpusId) -> Option { + self.enabled_map.prev(id) + } + + fn next(&self, id: CorpusId) -> Option { + self.enabled_map.next(id) + } + + fn first(&self) -> Option { + self.enabled_map.first() + } + + fn last(&self) -> Option { + self.enabled_map.last() + } + + fn nth(&self, nth: usize) -> CorpusId { + self.enabled_map.nth(nth) + } + + fn nth_from_all(&self, nth: usize) -> CorpusId { + let nb_enabled = self.enabled_map.count(); + if nth >= nb_enabled { + self.disabled_map.nth(nth.saturating_sub(nb_enabled)) + } else { + self.enabled_map.nth(nth) + } + } +} diff --git a/crates/libafl/src/corpus/testcase.rs b/crates/libafl/src/corpus/testcase.rs index dfcaeab4ebe..15d00e3ecef 100644 --- a/crates/libafl/src/corpus/testcase.rs +++ b/crates/libafl/src/corpus/testcase.rs @@ -1,321 +1,606 @@ //! The [`Testcase`] is a struct embedded in each [`Corpus`]. //! It will contain a respective input, and metadata. -use alloc::string::String; #[cfg(feature = "track_hit_feedbacks")] use alloc::{borrow::Cow, vec::Vec}; +use alloc::{rc::Rc, string::String}; use core::{ - cell::{Ref, RefMut}, + cell::{Ref, RefCell, RefMut}, + fmt::{Debug, Formatter}, + hash::Hasher, + marker::PhantomData, + ops::{Deref, DerefMut}, time::Duration, }; -#[cfg(feature = "std")] -use std::path::PathBuf; -use libafl_bolts::{HasLen, serdeany::SerdeAnyMap}; -use serde::{Deserialize, Serialize}; +use libafl_bolts::{ + HasLen, hasher_std, + serdeany::{SerdeAny, SerdeAnyMap}, +}; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use typed_builder::TypedBuilder; + +use crate::{ + Error, HasMetadata, + corpus::{Corpus, CorpusId}, + inputs::Input, + state::HasCorpus, +}; + +/// A testcase metadata cell that can be instantiated only from a [`TestcaseMetadata`]. +pub trait HasInstantiableTestcaseMetadata: IsTestcaseMetadataCell { + /// Instantiate a testcase metadata cell from a [`TestcaseMetadata`]. + fn instantiate(metadata: TestcaseMetadata) -> Self; +} + +/// Trait implemented by possible [`TestcaseMetadata`] cells. +pub trait IsTestcaseMetadataCell { + /// A reference to a testcase metadata. + type TestcaseMetadataRef<'a>: Deref + where + Self: 'a; + + /// A mutable reference to a testcase metadata. + type TestcaseMetadataRefMut<'a>: DerefMut + where + Self: 'a; + + /// Get a reference to the testcase metadata. + fn testcase_metadata(&self) -> Self::TestcaseMetadataRef<'_>; + + /// Get a mutable reference to the testcase metadata. + fn testcase_metadata_mut(&self) -> Self::TestcaseMetadataRefMut<'_>; + + /// Consume the cell, and get the inner testcase metadata. + fn into_testcase_metadata(self) -> TestcaseMetadata; + + /// Replace the inner testcase metadata with new metadata, returning the old metadata + fn replace_testcase_metadata(&self, testcase_metadata: TestcaseMetadata) -> TestcaseMetadata { + let mut tc_ref = self.testcase_metadata_mut(); + let old_tc = tc_ref.clone(); + *tc_ref = testcase_metadata; + old_tc + } + + /// Propagate metadata cell changes + fn flush(&self) -> Result<(), Error> { + Ok(()) + } +} + +/// A dummy (empty) [`TestcaseMetadata`] reference. +#[derive(Default, Clone, Copy, Debug)] +pub struct NopTestcaseMetadataRef<'a>(PhantomData<&'a ()>); + +/// A dummy (empty) [`TestcaseMetadata`] cell. +#[derive(Default, Clone, Copy, Debug)] +pub struct NopTestcaseMetadataCell; + +impl Deref for NopTestcaseMetadataRef<'_> { + type Target = TestcaseMetadata; + + fn deref(&self) -> &Self::Target { + panic!("Invalid testcase metadata ref") + } +} -use super::Corpus; -use crate::{Error, HasMetadata, corpus::CorpusId}; +impl DerefMut for NopTestcaseMetadataRef<'_> { + fn deref_mut(&mut self) -> &mut Self::Target { + panic!("Invalid testcase metadata ref mut") + } +} + +impl IsTestcaseMetadataCell for NopTestcaseMetadataCell { + type TestcaseMetadataRef<'a> = NopTestcaseMetadataRef<'a>; + type TestcaseMetadataRefMut<'a> = NopTestcaseMetadataRef<'a>; + + fn testcase_metadata(&self) -> Self::TestcaseMetadataRef<'_> { + NopTestcaseMetadataRef::default() + } + + fn testcase_metadata_mut(&self) -> Self::TestcaseMetadataRefMut<'_> { + NopTestcaseMetadataRef::default() + } + + fn into_testcase_metadata(self) -> TestcaseMetadata { + panic!("Invalid testcase metadata") + } + + fn replace_testcase_metadata(&self, _testcase_metadata: TestcaseMetadata) -> TestcaseMetadata { + panic!("Invalid testcase metadata") + } +} + +impl IsTestcaseMetadataCell for RefCell { + type TestcaseMetadataRef<'a> = Ref<'a, TestcaseMetadata>; + type TestcaseMetadataRefMut<'a> = RefMut<'a, TestcaseMetadata>; + + fn testcase_metadata(&self) -> Self::TestcaseMetadataRef<'_> { + self.borrow() + } + + fn testcase_metadata_mut(&self) -> Self::TestcaseMetadataRefMut<'_> { + self.borrow_mut() + } + + fn into_testcase_metadata(self) -> TestcaseMetadata { + self.into_inner() + } + + fn replace_testcase_metadata(&self, testcase_metadata: TestcaseMetadata) -> TestcaseMetadata { + RefCell::replace(self, testcase_metadata) + } +} + +impl HasInstantiableTestcaseMetadata for RefCell { + fn instantiate(metadata: TestcaseMetadata) -> Self { + RefCell::new(metadata) + } +} + +impl IsTestcaseMetadataCell for Rc +where + T: IsTestcaseMetadataCell + Clone, +{ + type TestcaseMetadataRef<'a> + = T::TestcaseMetadataRef<'a> + where + Self: 'a; + + type TestcaseMetadataRefMut<'a> + = T::TestcaseMetadataRefMut<'a> + where + Self: 'a; + + // fn new(md: TestcaseMetadata) -> Self { + // Rc::new(T::new(md)) + // } + fn testcase_metadata(&self) -> Self::TestcaseMetadataRef<'_> { + self.deref().testcase_metadata() + } + + fn testcase_metadata_mut(&self) -> Self::TestcaseMetadataRefMut<'_> { + self.deref().testcase_metadata_mut() + } + + fn into_testcase_metadata(self) -> TestcaseMetadata { + self.deref().clone().into_testcase_metadata() + } + + fn replace_testcase_metadata(&self, testcase_metadata: TestcaseMetadata) -> TestcaseMetadata { + T::replace_testcase_metadata(self, testcase_metadata) + } +} + +impl HasInstantiableTestcaseMetadata for Rc +where + T: HasInstantiableTestcaseMetadata + Clone, +{ + fn instantiate(metadata: TestcaseMetadata) -> Self { + Rc::new(T::instantiate(metadata)) + } +} + +impl IsTestcaseMetadataCell for Testcase +where + M: IsTestcaseMetadataCell, +{ + type TestcaseMetadataRef<'a> + = M::TestcaseMetadataRef<'a> + where + Self: 'a; + type TestcaseMetadataRefMut<'a> + = M::TestcaseMetadataRefMut<'a> + where + Self: 'a; + + fn testcase_metadata(&self) -> Self::TestcaseMetadataRef<'_> { + self.metadata.testcase_metadata() + } + + fn testcase_metadata_mut(&self) -> Self::TestcaseMetadataRefMut<'_> { + self.metadata.testcase_metadata_mut() + } + + fn into_testcase_metadata(self) -> TestcaseMetadata { + self.metadata.into_testcase_metadata() + } + + fn replace_testcase_metadata(&self, testcase_metadata: TestcaseMetadata) -> TestcaseMetadata { + self.metadata.replace_testcase_metadata(testcase_metadata) + } +} /// Shorthand to receive a [`Ref`] or [`RefMut`] to a stored [`Testcase`], by [`CorpusId`]. /// For a normal state, this should return a [`Testcase`] in the corpus, not the objectives. -pub trait HasTestcase { +pub trait HasTestcase: HasCorpus { /// Shorthand to receive a [`Ref`] to a stored [`Testcase`], by [`CorpusId`]. /// For a normal state, this should return a [`Testcase`] in the corpus, not the objectives. - fn testcase(&self, id: CorpusId) -> Result>, Error>; - - /// Shorthand to receive a [`RefMut`] to a stored [`Testcase`], by [`CorpusId`]. - /// For a normal state, this should return a [`Testcase`] in the corpus, not the objectives. - fn testcase_mut(&self, id: CorpusId) -> Result>, Error>; + fn testcase( + &self, + id: CorpusId, + ) -> Result>::TestcaseMetadataCell>, Error>; } -/// An entry in the [`Testcase`] Corpus -#[derive(Serialize, Deserialize, Clone, Debug)] -pub struct Testcase { - /// The [`Input`] of this [`Testcase`], or `None`, if it is not currently in memory - input: Option, - /// The filename for this [`Testcase`] - filename: Option, - /// Complete path to the [`Input`] on disk, if this [`Testcase`] is backed by a file in the filesystem - #[cfg(feature = "std")] - file_path: Option, +/// The [`Testcase`] metadata. +#[derive(Serialize, Deserialize, Clone, Debug, Default, TypedBuilder)] +pub struct TestcaseMetadata { /// Map of metadata associated with this [`Testcase`] + #[builder(default)] metadata: SerdeAnyMap, - /// Complete path to the metadata [`SerdeAnyMap`] on disk, if this [`Testcase`] is backed by a file in the filesystem - #[cfg(feature = "std")] - metadata_path: Option, /// Time needed to execute the input + #[builder(default)] exec_time: Option, - /// Cached len of the input, if any - cached_len: Option, /// Number of fuzzing iterations of this particular input updated in `perform_mutational` + #[builder(default = 0)] scheduled_count: usize, /// Number of executions done at discovery time executions: u64, /// Parent [`CorpusId`], if known + #[builder(default)] parent_id: Option, /// If the testcase is "disabled" + #[builder(default = false)] disabled: bool, /// has found crash (or timeout) or not + #[builder(default = 0)] objectives_found: usize, /// Vector of `Feedback` names that deemed this `Testcase` as corpus worthy #[cfg(feature = "track_hit_feedbacks")] + #[builder(default)] hit_feedbacks: Vec>, /// Vector of `Feedback` names that deemed this `Testcase` as solution worthy #[cfg(feature = "track_hit_feedbacks")] + #[builder(default)] hit_objectives: Vec>, } -impl HasMetadata for Testcase { - /// Get all the metadata into an [`hashbrown::HashMap`] - #[inline] +/// An entry in the [`Testcase`] Corpus +pub struct Testcase { + /// The [`Input`] of this [`Testcase`], or `None`, if it is not currently in memory + input: Rc, + + /// The unique id for [`Testcase`]. + /// It should uniquely identify the input. + id: String, + + /// The metadata linked to the [`Testcase`] + pub(crate) metadata: M, +} + +impl Clone for Testcase +where + M: Clone, +{ + fn clone(&self) -> Self { + Self { + input: self.input.clone(), + id: self.id.clone(), + metadata: self.metadata.clone(), + } + } +} + +impl Debug for Testcase +where + I: Debug, + M: IsTestcaseMetadataCell, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + f.debug_struct("Testcase") + .field("input", self.input.as_ref()) + .field("id", &self.id) + .field("metadata", &*self.metadata.testcase_metadata()) + .finish() + } +} + +impl Serialize for Testcase +where + M: IsTestcaseMetadataCell, +{ + fn serialize(&self, _serializer: S) -> Result + where + S: Serializer, + { + todo!() + } +} + +impl<'de, I, M> Deserialize<'de> for Testcase +where + M: IsTestcaseMetadataCell, +{ + fn deserialize(_deserializer: D) -> Result + where + D: Deserializer<'de>, + { + todo!() + } +} + +impl HasMetadata for TestcaseMetadata { fn metadata_map(&self) -> &SerdeAnyMap { &self.metadata } - /// Get all the metadata into an [`hashbrown::HashMap`] (mutable) - #[inline] fn metadata_map_mut(&mut self) -> &mut SerdeAnyMap { &mut self.metadata } } +impl Testcase { + /// Get the input + #[inline] + pub fn input(&self) -> Rc { + self.input.clone() + } + + /// Get the associated unique ID. + pub fn id(&self) -> &String { + &self.id + } + + /// Decompose a [`Testcase`] into its inner input and metadata. + pub fn into_inner(self) -> (Rc, M) { + (self.input, self.metadata) + } +} + +impl Testcase +where + I: HasLen, +{ + /// Get the input length + pub fn input_len(&self) -> usize { + self.input.len() + } +} + +impl Testcase +where + I: Input, + M: IsTestcaseMetadataCell, +{ + /// Create a new Testcase instance given an input + pub fn new(input: Rc, metadata: M) -> Self { + let id = Self::compute_id(&input); + + Self { + input, + id, + metadata, + } + } + + /// Get the unique ID associated to an input. + pub fn compute_id(input: &I) -> String { + let mut hasher = hasher_std(); + input.hash(&mut hasher); + let hash = hasher.finish(); + format!("{hash:0>8x}") + } +} + +impl Testcase +where + M: IsTestcaseMetadataCell, + I: Clone, +{ + /// Clone the input embedded in the [`Testcase`]. + pub fn cloned_input(&self) -> I { + self.input.as_ref().clone() + } +} + /// Impl of a testcase -impl Testcase { - /// Returns this [`Testcase`] with a loaded `Input`] - pub fn load_input>(&mut self, corpus: &C) -> Result<&I, Error> { - corpus.load_input_into(self)?; - Ok(self.input.as_ref().unwrap()) +impl Testcase +where + M: IsTestcaseMetadataCell, +{ + /// Get the same testcase, with an owned [`TestcaseMetadata`]. + pub fn cloned(self) -> Testcase> { + Testcase { + input: self.input, + id: self.id, + metadata: RefCell::new(self.metadata.into_testcase_metadata()), + } } - /// Get the input, if available any + /// Test whether the metadata map contains a metadata #[inline] - pub fn input(&self) -> &Option { - &self.input + pub fn has_metadata(&self) -> bool + where + MT: SerdeAny, + { + self.metadata.testcase_metadata().has_metadata::() } - /// Get the input, if any (mutable) + /// Get the executions #[inline] - pub fn input_mut(&mut self) -> &mut Option { - // self.cached_len = None; - &mut self.input + pub fn executions(&self) -> u64 { + self.metadata.testcase_metadata().executions() } - /// Set the input + /// Get the `scheduled_count` #[inline] - pub fn set_input(&mut self, input: I) { - self.input = Some(input); + pub fn scheduled_count(&self) -> usize { + self.metadata.testcase_metadata().scheduled_count() } - /// Get the filename, if any + /// Get `disabled` #[inline] - pub fn filename(&self) -> &Option { - &self.filename + pub fn disabled(&mut self) -> bool { + self.metadata.testcase_metadata_mut().disabled() } - /// Get the filename, if any (mutable) - #[inline] - pub fn filename_mut(&mut self) -> &mut Option { - &mut self.filename + /// Get the id of the parent, that this testcase was derived from + #[must_use] + pub fn parent_id(&self) -> Option { + self.metadata.testcase_metadata().parent_id() } - /// Get the filename path, if any - #[inline] - #[cfg(feature = "std")] - pub fn file_path(&self) -> &Option { - &self.file_path + /// Gets how many objectives were found by mutating this testcase + pub fn objectives_found(&self) -> usize { + self.metadata.testcase_metadata().objectives_found() } - /// Get the filename path, if any (mutable) + /// Set the executions #[inline] - #[cfg(feature = "std")] - pub fn file_path_mut(&mut self) -> &mut Option { - &mut self.file_path + pub fn set_executions(&mut self, executions: u64) { + self.metadata + .testcase_metadata_mut() + .set_executions(executions); } - /// Get the metadata path, if any + /// Sets the execution time of the current testcase #[inline] - #[cfg(feature = "std")] - pub fn metadata_path(&self) -> &Option { - &self.metadata_path + pub fn set_exec_time(&mut self, time: Duration) { + self.metadata.testcase_metadata_mut().set_exec_time(time); } - /// Get the metadata path, if any (mutable) + /// Set the `scheduled_count` #[inline] - #[cfg(feature = "std")] - pub fn metadata_path_mut(&mut self) -> &mut Option { - &mut self.metadata_path + pub fn set_scheduled_count(&mut self, scheduled_count: usize) { + self.metadata + .testcase_metadata_mut() + .set_scheduled_count(scheduled_count); } - /// Get the executions + /// Set the testcase as disabled #[inline] - pub fn executions(&self) -> &u64 { - &self.executions + pub fn set_disabled(&mut self, disabled: bool) { + self.metadata.testcase_metadata_mut().set_disabled(disabled); } - /// Get the executions (mutable) - #[inline] - pub fn executions_mut(&mut self) -> &mut u64 { - &mut self.executions + /// Sets the id of the parent, that this testcase was derived from + pub fn set_parent_id(&mut self, parent_id: CorpusId) { + self.metadata + .testcase_metadata_mut() + .set_parent_id(parent_id); } - /// Set the executions - #[inline] - pub fn set_executions(&mut self, executions: u64) { - self.executions = executions; + /// Sets the id of the parent, that this testcase was derived from + pub fn set_parent_id_optional(&mut self, parent_id: Option) { + self.metadata + .testcase_metadata_mut() + .set_parent_id_optional(parent_id); } - /// Get the execution time of the testcase - #[inline] - pub fn exec_time(&self) -> &Option { - &self.exec_time + /// Adds one objective to the `objectives_found` counter. Mostly called from crash handler or executor. + pub fn found_objective(&mut self) { + self.metadata.testcase_metadata_mut().found_objective(); } +} - /// Get the execution time of the testcase (mutable) +impl TestcaseMetadata { + /// Get the executions #[inline] - pub fn exec_time_mut(&mut self) -> &mut Option { - &mut self.exec_time + #[must_use] + pub fn executions(&self) -> u64 { + self.executions } - /// Sets the execution time of the current testcase + /// Get the execution time of the testcase #[inline] - pub fn set_exec_time(&mut self, time: Duration) { - self.exec_time = Some(time); + #[must_use] + pub fn exec_time(&self) -> &Option { + &self.exec_time } /// Get the `scheduled_count` #[inline] + #[must_use] pub fn scheduled_count(&self) -> usize { self.scheduled_count } - /// Set the `scheduled_count` - #[inline] - pub fn set_scheduled_count(&mut self, scheduled_count: usize) { - self.scheduled_count = scheduled_count; - } - /// Get `disabled` #[inline] + #[must_use] pub fn disabled(&mut self) -> bool { self.disabled } - /// Set the testcase as disabled - #[inline] - pub fn set_disabled(&mut self, disabled: bool) { - self.disabled = disabled; - } - /// Get the hit feedbacks #[inline] + #[must_use] #[cfg(feature = "track_hit_feedbacks")] pub fn hit_feedbacks(&self) -> &Vec> { &self.hit_feedbacks } - /// Get the hit feedbacks (mutable) - #[inline] - #[cfg(feature = "track_hit_feedbacks")] - pub fn hit_feedbacks_mut(&mut self) -> &mut Vec> { - &mut self.hit_feedbacks - } - /// Get the hit objectives #[inline] + #[must_use] #[cfg(feature = "track_hit_feedbacks")] pub fn hit_objectives(&self) -> &Vec> { &self.hit_objectives } - /// Get the hit objectives (mutable) + /// Get the id of the parent, that this testcase was derived from + #[must_use] + pub fn parent_id(&self) -> Option { + self.parent_id + } + + /// Gets how many objectives were found by mutating this testcase + #[must_use] + pub fn objectives_found(&self) -> usize { + self.objectives_found + } + + /// Get the executions (mutable) #[inline] - #[cfg(feature = "track_hit_feedbacks")] - pub fn hit_objectives_mut(&mut self) -> &mut Vec> { - &mut self.hit_objectives + #[must_use] + pub fn executions_mut(&mut self) -> &mut u64 { + &mut self.executions } - /// Create a new Testcase instance given an input + /// Set the executions #[inline] - pub fn new(input: I) -> Self { - Self { - input: Some(input), - filename: None, - #[cfg(feature = "std")] - file_path: None, - metadata: SerdeAnyMap::default(), - #[cfg(feature = "std")] - metadata_path: None, - exec_time: None, - cached_len: None, - executions: 0, - scheduled_count: 0, - parent_id: None, - disabled: false, - objectives_found: 0, - #[cfg(feature = "track_hit_feedbacks")] - hit_feedbacks: Vec::new(), - #[cfg(feature = "track_hit_feedbacks")] - hit_objectives: Vec::new(), - } + pub fn set_executions(&mut self, executions: u64) { + self.executions = executions; } - /// Creates a testcase, attaching the id of the parent - /// that this [`Testcase`] was derived from on creation - pub fn with_parent_id(input: I, parent_id: CorpusId) -> Self { - Testcase { - input: Some(input), - filename: None, - #[cfg(feature = "std")] - file_path: None, - metadata: SerdeAnyMap::default(), - #[cfg(feature = "std")] - metadata_path: None, - exec_time: None, - cached_len: None, - executions: 0, - scheduled_count: 0, - parent_id: Some(parent_id), - disabled: false, - objectives_found: 0, - #[cfg(feature = "track_hit_feedbacks")] - hit_feedbacks: Vec::new(), - #[cfg(feature = "track_hit_feedbacks")] - hit_objectives: Vec::new(), - } + /// Get a mutable reference to the execution time + #[must_use] + pub fn exec_time_mut(&mut self) -> &mut Option { + &mut self.exec_time + } + + /// Sets the execution time of the current testcase + #[inline] + pub fn set_exec_time(&mut self, time: Duration) { + self.exec_time = Some(time); } - /// Create a new Testcase instance given an input and a `filename` - /// If locking is enabled, make sure that testcases with the same input have the same filename - /// to prevent ending up with duplicate testcases + /// Set the `scheduled_count` #[inline] - pub fn with_filename(input: I, filename: String) -> Self { - Self { - input: Some(input), - filename: Some(filename), - #[cfg(feature = "std")] - file_path: None, - metadata: SerdeAnyMap::default(), - #[cfg(feature = "std")] - metadata_path: None, - exec_time: None, - cached_len: None, - executions: 0, - scheduled_count: 0, - parent_id: None, - disabled: false, - objectives_found: 0, - #[cfg(feature = "track_hit_feedbacks")] - hit_feedbacks: Vec::new(), - #[cfg(feature = "track_hit_feedbacks")] - hit_objectives: Vec::new(), - } + pub fn set_scheduled_count(&mut self, scheduled_count: usize) { + self.scheduled_count = scheduled_count; } - /// Get the id of the parent, that this testcase was derived from + /// Set the testcase as disabled + #[inline] + pub fn set_disabled(&mut self, disabled: bool) { + self.disabled = disabled; + } + + /// Get the hit feedbacks (mutable) + #[cfg(feature = "track_hit_feedbacks")] + #[inline] #[must_use] - pub fn parent_id(&self) -> Option { - self.parent_id + pub fn hit_feedbacks_mut(&mut self) -> &mut Vec> { + &mut self.hit_feedbacks + } + + /// Get the hit objectives (mutable) + #[cfg(feature = "track_hit_feedbacks")] + #[inline] + #[must_use] + pub fn hit_objectives_mut(&mut self) -> &mut Vec> { + &mut self.hit_objectives } /// Sets the id of the parent, that this testcase was derived from @@ -328,79 +613,10 @@ impl Testcase { self.parent_id = parent_id; } - /// Gets how many objectives were found by mutating this testcase - pub fn objectives_found(&self) -> usize { - self.objectives_found - } - /// Adds one objectives to the `objectives_found` counter. Mostly called from crash handler or executor. pub fn found_objective(&mut self) { - self.objectives_found = self.objectives_found.saturating_add(1); - } -} - -impl Default for Testcase { - /// Create a new default Testcase - #[inline] - fn default() -> Self { - Testcase { - input: None, - filename: None, - metadata: SerdeAnyMap::new(), - exec_time: None, - cached_len: None, - scheduled_count: 0, - parent_id: None, - #[cfg(feature = "std")] - file_path: None, - #[cfg(feature = "std")] - metadata_path: None, - disabled: false, - executions: 0, - objectives_found: 0, - #[cfg(feature = "track_hit_feedbacks")] - hit_feedbacks: Vec::new(), - #[cfg(feature = "track_hit_feedbacks")] - hit_objectives: Vec::new(), - } - } -} - -/// Impl of a testcase when the input has len -impl Testcase -where - I: HasLen, -{ - /// Get the cached `len`. Will `Error::EmptyOptional` if `len` is not yet cached. - #[inline] - pub fn cached_len(&mut self) -> Option { - self.cached_len - } - - /// Get the `len` or calculate it, if not yet calculated. - pub fn load_len>(&mut self, corpus: &C) -> Result { - match &self.input { - Some(i) => { - let l = i.len(); - self.cached_len = Some(l); - Ok(l) - } - None => { - if let Some(l) = self.cached_len { - Ok(l) - } else { - corpus.load_input_into(self)?; - self.load_len(corpus) - } - } - } - } -} - -/// Create a testcase from an input -impl From for Testcase { - fn from(input: I) -> Self { - Testcase::new(input) + let count = self.objectives_found.saturating_add(1); + self.objectives_found = count; } } @@ -515,15 +731,3 @@ impl SchedulerTestcaseMetadata { } libafl_bolts::impl_serdeany!(SchedulerTestcaseMetadata); - -#[cfg(feature = "std")] -impl Drop for Testcase { - fn drop(&mut self) { - if let Some(filename) = &self.filename { - let mut path = PathBuf::from(filename); - let lockname = format!(".{}.lafl_lock", path.file_name().unwrap().to_str().unwrap()); - path.set_file_name(lockname); - let _ = std::fs::remove_file(path); - } - } -} diff --git a/crates/libafl/src/events/llmp/restarting.rs b/crates/libafl/src/events/llmp/restarting.rs index 154e779ca55..f52606d5b93 100644 --- a/crates/libafl/src/events/llmp/restarting.rs +++ b/crates/libafl/src/events/llmp/restarting.rs @@ -989,7 +989,7 @@ mod tests { use crate::{ StdFuzzer, - corpus::{Corpus, InMemoryCorpus, Testcase}, + corpus::{Corpus, InMemoryCorpus}, events::llmp::restarting::{_ENV_FUZZER_SENDER, LlmpEventManagerBuilder}, executors::{ExitKind, InProcessExecutor}, feedbacks::ConstFeedback, @@ -1018,8 +1018,7 @@ mod tests { let time = TimeObserver::new("time"); let mut corpus = InMemoryCorpus::::new(); - let testcase = Testcase::new(vec![0; 4].into()); - corpus.add(testcase).unwrap(); + corpus.add(vec![0; 4].into()).unwrap(); let solutions = InMemoryCorpus::::new(); diff --git a/crates/libafl/src/executors/inprocess/mod.rs b/crates/libafl/src/executors/inprocess/mod.rs index d3942a9cb48..75de7c8e104 100644 --- a/crates/libafl/src/executors/inprocess/mod.rs +++ b/crates/libafl/src/executors/inprocess/mod.rs @@ -15,8 +15,9 @@ use core::{ use libafl_bolts::tuples::{RefIndexable, tuple_list}; use crate::{ - Error, HasMetadata, - corpus::{Corpus, Testcase}, + Error, + common::HasMetadata, + corpus::{Corpus, IsTestcaseMetadataCell, testcase::TestcaseMetadata}, events::{Event, EventFirer, EventRestarter, EventWithStats}, executors::{ Executor, ExitKind, HasObservers, @@ -344,22 +345,25 @@ pub fn run_observers_and_save_state( .expect("In run_observers_and_save_state objective failure."); if is_solution { - let mut new_testcase = Testcase::from(input.clone()); - new_testcase.set_executions(*state.executions()); - new_testcase.add_metadata(exitkind); - new_testcase.set_parent_id_optional(*state.corpus().current()); + let mut testcase_md = TestcaseMetadata::builder() + .executions(*state.executions()) + .parent_id(*state.corpus().current()) + .build(); - if let Ok(mut tc) = state.current_testcase_mut() { - tc.found_objective(); + testcase_md.add_metadata(exitkind); + + if let Ok(tc) = state.current_testcase() { + tc.testcase_metadata_mut().found_objective(); } fuzzer .objective_mut() - .append_metadata(state, event_mgr, &*observers, &mut new_testcase) + .append_metadata(state, event_mgr, &*observers, input, &mut testcase_md) .expect("Failed adding metadata"); + state .solutions_mut() - .add(new_testcase) + .add_with_metadata(input.clone(), testcase_md) .expect("In run_observers_and_save_state solutions failure."); let event = Event::Objective { diff --git a/crates/libafl/src/feedbacks/bool.rs b/crates/libafl/src/feedbacks/bool.rs index 912f73e887b..df5935929a3 100644 --- a/crates/libafl/src/feedbacks/bool.rs +++ b/crates/libafl/src/feedbacks/bool.rs @@ -9,6 +9,7 @@ use libafl_bolts::{ use crate::{ HasNamedMetadata, + corpus::testcase::TestcaseMetadata, feedbacks::{Feedback, StateInitializer}, observers::{ObserversTuple, ValueObserver}, }; @@ -86,7 +87,8 @@ where _state: &mut S, _manager: &mut EM, _observers: &OT, - _testcase: &mut crate::corpus::Testcase, + _input: &I, + _md: &mut TestcaseMetadata, ) -> Result<(), Error> { Ok(()) } diff --git a/crates/libafl/src/feedbacks/capture_feedback.rs b/crates/libafl/src/feedbacks/capture_feedback.rs index 44102131e55..5e257f3cc10 100644 --- a/crates/libafl/src/feedbacks/capture_feedback.rs +++ b/crates/libafl/src/feedbacks/capture_feedback.rs @@ -7,7 +7,7 @@ use serde::{Serialize, de::DeserializeOwned}; use crate::{ HasMetadata, - corpus::Testcase, + corpus::testcase::TestcaseMetadata, executors::ExitKind, feedbacks::{Feedback, StateInitializer}, stages::verify_timeouts::TimeoutsToVerify, @@ -64,7 +64,8 @@ where _state: &mut S, _manager: &mut EM, _observers: &OT, - _testcase: &mut Testcase, + _input: &I, + _md: &mut TestcaseMetadata, ) -> Result<(), Error> { Ok(()) } diff --git a/crates/libafl/src/feedbacks/concolic.rs b/crates/libafl/src/feedbacks/concolic.rs index 56b29e064b0..ff885e8d82b 100644 --- a/crates/libafl/src/feedbacks/concolic.rs +++ b/crates/libafl/src/feedbacks/concolic.rs @@ -14,7 +14,7 @@ use libafl_bolts::{ use crate::{ Error, HasMetadata, - corpus::Testcase, + corpus::testcase::TestcaseMetadata, feedbacks::{Feedback, StateInitializer}, observers::concolic::ConcolicObserver, }; @@ -38,18 +38,15 @@ impl<'map> ConcolicFeedback<'map> { } } - fn add_concolic_feedback_to_metadata( - &mut self, - observers: &OT, - testcase: &mut Testcase, - ) where + fn add_concolic_feedback_to_metadata(&mut self, observers: &OT, md: &mut TestcaseMetadata) + where OT: MatchName, { if let Some(metadata) = observers .get(&self.observer_handle) .map(ConcolicObserver::create_metadata_from_current_map) { - testcase.metadata_map_mut().insert(metadata); + md.metadata_map_mut().insert(metadata); } } } @@ -76,9 +73,10 @@ where _state: &mut S, _manager: &mut EM, observers: &OT, - testcase: &mut Testcase, + _input: &I, + md: &mut TestcaseMetadata, ) -> Result<(), Error> { - self.add_concolic_feedback_to_metadata(observers, testcase); + self.add_concolic_feedback_to_metadata::(observers, md); Ok(()) } } diff --git a/crates/libafl/src/feedbacks/custom_filename.rs b/crates/libafl/src/feedbacks/custom_filename.rs index 96cd72e4e04..eda78485980 100644 --- a/crates/libafl/src/feedbacks/custom_filename.rs +++ b/crates/libafl/src/feedbacks/custom_filename.rs @@ -89,7 +89,8 @@ where state: &mut S, _manager: &mut EM, _observers: &OT, - testcase: &mut Testcase, + _input: &I, + md: &mut TestcaseMetadata, ) -> Result<(), Error> { *testcase.filename_mut() = Some(self.generator.set_name(state, testcase)?); Ok(()) diff --git a/crates/libafl/src/feedbacks/list.rs b/crates/libafl/src/feedbacks/list.rs index ede622474b0..2ddc13cdc44 100644 --- a/crates/libafl/src/feedbacks/list.rs +++ b/crates/libafl/src/feedbacks/list.rs @@ -15,6 +15,7 @@ use serde::{Deserialize, Serialize, de::DeserializeOwned}; use crate::{ HasNamedMetadata, + corpus::testcase::TestcaseMetadata, executors::ExitKind, feedbacks::{Feedback, StateInitializer}, observers::ListObserver, @@ -176,7 +177,8 @@ where state: &mut S, _manager: &mut EM, _observers: &OT, - _testcase: &mut crate::corpus::Testcase, + _input: &I, + _md: &mut TestcaseMetadata, ) -> Result<(), Error> { self.append_list_observer_metadata(state); Ok(()) diff --git a/crates/libafl/src/feedbacks/map.rs b/crates/libafl/src/feedbacks/map.rs index 13caef28fa9..cae239bda55 100644 --- a/crates/libafl/src/feedbacks/map.rs +++ b/crates/libafl/src/feedbacks/map.rs @@ -27,7 +27,7 @@ use super::simd::SimdMapFeedback; use crate::feedbacks::premature_last_result_err; use crate::{ Error, HasMetadata, HasNamedMetadata, - corpus::Testcase, + corpus::testcase::TestcaseMetadata, events::{Event, EventFirer, EventWithStats}, executors::ExitKind, feedbacks::{Feedback, HasObserverHandle, StateInitializer}, @@ -368,11 +368,12 @@ where state: &mut S, manager: &mut EM, observers: &OT, - testcase: &mut Testcase, + _input: &I, + md: &mut TestcaseMetadata, ) -> Result<(), Error> { if let Some(novelties) = self.novelties.as_mut().map(core::mem::take) { let meta = MapNoveltiesMetadata::new(novelties); - testcase.add_metadata(meta); + md.add_metadata(meta); } let observer = observers.get(&self.map_ref).expect("MapObserver not found. This is likely because you entered the crash handler with the wrong executor/observer").as_ref(); let initial = observer.initial(); @@ -403,7 +404,7 @@ where indices.push(i); } let meta = MapIndexesMetadata::new(indices); - if testcase.try_add_metadata(meta).is_err() { + if md.try_add_metadata(meta).is_err() { return Err(Error::key_exists( "MapIndexesMetadata is already attached to this testcase. You should not have more than one observer with tracking.", )); diff --git a/crates/libafl/src/feedbacks/mod.rs b/crates/libafl/src/feedbacks/mod.rs index 18bfcde983d..574efef1054 100644 --- a/crates/libafl/src/feedbacks/mod.rs +++ b/crates/libafl/src/feedbacks/mod.rs @@ -1,6 +1,5 @@ //! The feedbacks reduce observer state after each run to a single `is_interesting`-value. //! If a testcase is interesting, it may be added to a Corpus. - // TODO: make S of Feedback an associated type when specialisation + AT is stable use alloc::borrow::Cow; @@ -8,27 +7,23 @@ use alloc::borrow::Cow; use alloc::vec::Vec; use core::{fmt::Debug, marker::PhantomData}; -#[cfg(feature = "std")] -pub use concolic::ConcolicFeedback; -pub use differential::DiffFeedback; use libafl_bolts::{ Named, tuples::{Handle, Handled, MatchName, MatchNameRef}, }; -pub use list::*; -pub use map::*; -#[cfg(feature = "nautilus")] -pub use nautilus::*; -#[cfg(feature = "std")] -pub use new_hash_feedback::NewHashFeedback; -#[cfg(feature = "std")] -pub use new_hash_feedback::NewHashFeedbackMetadata; use serde::{Deserialize, Serialize}; -use crate::{Error, corpus::Testcase, executors::ExitKind, observers::TimeObserver}; +#[cfg(feature = "introspection")] +use crate::state::HasClientPerfMonitor; +use crate::{ + Error, corpus::testcase::TestcaseMetadata, executors::ExitKind, observers::TimeObserver, + state::HasCorpus, +}; #[cfg(feature = "std")] pub mod capture_feedback; +#[cfg(feature = "std")] +pub use capture_feedback::CaptureTimeoutFeedback; pub mod bool; pub use bool::BoolValueFeedback; @@ -36,28 +31,36 @@ pub use bool::BoolValueFeedback; #[cfg(feature = "std")] pub mod concolic; #[cfg(feature = "std")] -/// The module for `CustomFilenameToTestcaseFeedback` -pub mod custom_filename; +pub use concolic::ConcolicFeedback; + pub mod differential; +pub use differential::DiffFeedback; + /// The module for list feedback pub mod list; +pub use list::*; + pub mod map; +pub use map::*; + #[cfg(feature = "nautilus")] pub mod nautilus; +#[cfg(feature = "nautilus")] +pub use nautilus::*; + #[cfg(feature = "std")] pub mod new_hash_feedback; +#[cfg(feature = "std")] +pub use new_hash_feedback::NewHashFeedback; +#[cfg(feature = "std")] +pub use new_hash_feedback::NewHashFeedbackMetadata; + #[cfg(feature = "simd")] pub mod simd; #[cfg(feature = "std")] pub mod stdio; pub mod transferred; -#[cfg(feature = "std")] -pub use capture_feedback::CaptureTimeoutFeedback; - -#[cfg(feature = "introspection")] -use crate::state::HasClientPerfMonitor; - #[cfg(feature = "value_bloom_feedback")] pub mod value_bloom; #[cfg(feature = "value_bloom_feedback")] @@ -146,7 +149,8 @@ pub trait Feedback: StateInitializer + Named { _state: &mut S, _manager: &mut EM, _observers: &OT, - _testcase: &mut Testcase, + _input: &I, + _md: &mut TestcaseMetadata, ) -> Result<(), Error> { Ok(()) } @@ -295,12 +299,13 @@ where state: &mut S, manager: &mut EM, observers: &OT, - testcase: &mut Testcase, + input: &I, + md: &mut TestcaseMetadata, ) -> Result<(), Error> { self.first - .append_metadata(state, manager, observers, testcase)?; + .append_metadata(state, manager, observers, input, md)?; self.second - .append_metadata(state, manager, observers, testcase) + .append_metadata(state, manager, observers, input, md) } } @@ -655,10 +660,11 @@ where state: &mut S, manager: &mut EM, observers: &OT, - testcase: &mut Testcase, + input: &I, + md: &mut TestcaseMetadata, ) -> Result<(), Error> { self.inner - .append_metadata(state, manager, observers, testcase) + .append_metadata(state, manager, observers, input, md) } } @@ -701,7 +707,7 @@ macro_rules! feedback_and { $crate::feedbacks::EagerAndFeedback::new($head , feedback_and!($($tail),+)) }; } -/// + /// Variadic macro to create a chain of (fast) [`AndFeedback`](FastAndFeedback) #[macro_export] macro_rules! feedback_and_fast { @@ -752,7 +758,10 @@ macro_rules! feedback_not { impl StateInitializer for () {} /// Hack to use () as empty Feedback -impl Feedback for () { +impl Feedback for () +where + S: HasCorpus, +{ #[cfg(feature = "track_hit_feedbacks")] fn last_result(&self) -> Result { Ok(false) @@ -922,7 +931,8 @@ where _state: &mut S, _manager: &mut EM, observers: &OT, - testcase: &mut Testcase, + _input: &I, + md: &mut TestcaseMetadata, ) -> Result<(), Error> { let Some(observer) = observers.get(&self.observer_handle) else { return Err(Error::illegal_state( @@ -930,7 +940,7 @@ where )); }; - *testcase.exec_time_mut() = *observer.last_runtime(); + *md.exec_time_mut() = *observer.last_runtime(); Ok(()) } } @@ -964,7 +974,10 @@ pub enum ConstFeedback { impl StateInitializer for ConstFeedback {} -impl Feedback for ConstFeedback { +impl Feedback for ConstFeedback +where + S: HasCorpus, +{ #[inline] fn is_interesting( &mut self, diff --git a/crates/libafl/src/feedbacks/nautilus.rs b/crates/libafl/src/feedbacks/nautilus.rs index f58c018db03..0c70a039fec 100644 --- a/crates/libafl/src/feedbacks/nautilus.rs +++ b/crates/libafl/src/feedbacks/nautilus.rs @@ -9,7 +9,7 @@ use serde::{Deserialize, Serialize}; use crate::{ Error, HasMetadata, common::nautilus::grammartec::{chunkstore::ChunkStore, context::Context}, - corpus::{Corpus, Testcase}, + corpus::TestcaseMetadata, executors::ExitKind, feedbacks::{Feedback, StateInitializer}, generators::NautilusContext, @@ -64,20 +64,16 @@ impl<'a> NautilusFeedback<'a> { fn append_nautilus_metadata_to_state( &mut self, state: &mut S, - testcase: &mut Testcase, - ) -> Result<(), Error> - where + input: &NautilusInput, + _md: &mut TestcaseMetadata, + ) where S: HasCorpus + HasMetadata, { - state.corpus().load_input_into(testcase)?; - let input = testcase.input().as_ref().unwrap().clone(); let meta = state .metadata_map_mut() .get_mut::() .expect("NautilusChunksMetadata not in the state"); - meta.cks.add_tree(input.tree, self.ctx); - - Ok(()) + meta.cks.add_tree(&input.tree, self.ctx); } } @@ -110,9 +106,12 @@ where state: &mut S, _manager: &mut EM, _observers: &OT, - testcase: &mut Testcase, + input: &NautilusInput, + md: &mut TestcaseMetadata, ) -> Result<(), Error> { - self.append_nautilus_metadata_to_state(state, testcase) + self.append_nautilus_metadata_to_state(state, input, md); + + Ok(()) } #[cfg(feature = "track_hit_feedbacks")] diff --git a/crates/libafl/src/feedbacks/simd.rs b/crates/libafl/src/feedbacks/simd.rs index f892ccffb79..dc64c17a483 100644 --- a/crates/libafl/src/feedbacks/simd.rs +++ b/crates/libafl/src/feedbacks/simd.rs @@ -21,7 +21,7 @@ use super::{DifferentIsNovel, Feedback, HasObserverHandle, MapFeedback, StateIni use crate::state::HasClientPerfMonitor; use crate::{ HasNamedMetadata, - corpus::Testcase, + corpus::testcase::TestcaseMetadata, events::EventFirer, executors::ExitKind, feedbacks::MapFeedbackMetadata, @@ -253,9 +253,10 @@ where state: &mut S, manager: &mut EM, observers: &OT, - testcase: &mut Testcase, + input: &I, + md: &mut TestcaseMetadata, ) -> Result<(), Error> { self.map - .append_metadata(state, manager, observers, testcase) + .append_metadata(state, manager, observers, input, md) } } diff --git a/crates/libafl/src/feedbacks/stdio.rs b/crates/libafl/src/feedbacks/stdio.rs index 9474175965e..159f25706a4 100644 --- a/crates/libafl/src/feedbacks/stdio.rs +++ b/crates/libafl/src/feedbacks/stdio.rs @@ -10,7 +10,7 @@ use serde::{Deserialize, Serialize}; use crate::{ Error, HasMetadata, - corpus::Testcase, + corpus::testcase::TestcaseMetadata, feedbacks::{Feedback, StateInitializer}, observers::{StdErrObserver, StdOutObserver}, }; @@ -33,10 +33,10 @@ pub struct StdOutToMetadataFeedback { impl StdOutToMetadataFeedback { /// Append to the testcase the generated metadata in case of a new corpus item. #[inline] - fn append_stdout_observation_to_testcase( + fn append_stdout_observation_to_testcase( &mut self, observers: &OT, - testcase: &mut Testcase, + md: &mut TestcaseMetadata, ) -> Result<(), Error> where OT: MatchName, @@ -50,9 +50,7 @@ impl StdOutToMetadataFeedback { .ok_or(Error::illegal_state("StdOutObserver has no stdout"))?; let stdout = String::from_utf8_lossy(buffer).into_owned(); - testcase - .metadata_map_mut() - .insert(StdOutMetadata { stdout }); + md.metadata_map_mut().insert(StdOutMetadata { stdout }); Ok(()) } @@ -76,9 +74,10 @@ where _state: &mut S, _manager: &mut EM, observers: &OT, - testcase: &mut Testcase, + _input: &I, + md: &mut TestcaseMetadata, ) -> Result<(), Error> { - self.append_stdout_observation_to_testcase(observers, testcase) + self.append_stdout_observation_to_testcase(observers, md) } } @@ -132,7 +131,8 @@ where _state: &mut S, _manager: &mut EM, observers: &OT, - testcase: &mut Testcase, + _input: &I, + md: &mut TestcaseMetadata, ) -> Result<(), Error> { let observer = observers .get(&self.o_ref) @@ -143,9 +143,7 @@ where .ok_or(Error::illegal_state("StdErrObserver has no stderr"))?; let stderr = String::from_utf8_lossy(buffer).into_owned(); - testcase - .metadata_map_mut() - .insert(StdErrMetadata { stderr }); + md.metadata_map_mut().insert(StdErrMetadata { stderr }); Ok(()) } diff --git a/crates/libafl/src/feedbacks/transferred.rs b/crates/libafl/src/feedbacks/transferred.rs index a1259faeb3d..76522ab0ba9 100644 --- a/crates/libafl/src/feedbacks/transferred.rs +++ b/crates/libafl/src/feedbacks/transferred.rs @@ -12,6 +12,7 @@ use crate::{ HasMetadata, executors::ExitKind, feedbacks::{Feedback, StateInitializer}, + state::HasCorpus, }; /// Constant name of the [`TransferringMetadata`]. @@ -65,7 +66,7 @@ where impl Feedback for TransferredFeedback where - S: HasMetadata, + S: HasCorpus + HasMetadata, { fn is_interesting( &mut self, diff --git a/crates/libafl/src/fuzzer/mod.rs b/crates/libafl/src/fuzzer/mod.rs index 35ca76d9bb4..e7e34140447 100644 --- a/crates/libafl/src/fuzzer/mod.rs +++ b/crates/libafl/src/fuzzer/mod.rs @@ -14,7 +14,9 @@ use serde::{Serialize, de::DeserializeOwned}; use crate::monitors::stats::PerfFeature; use crate::{ Error, HasMetadata, - corpus::{Corpus, CorpusId, HasCurrentCorpusId, HasTestcase, Testcase}, + corpus::{ + Corpus, CorpusId, HasCurrentCorpusId, HasTestcase, IsTestcaseMetadataCell, TestcaseMetadata, + }, events::{ Event, EventConfig, EventFirer, EventReceiver, EventWithStats, ProgressReporter, SendExiting, @@ -421,30 +423,34 @@ where ExecuteInputResult::Corpus => { // Not a solution // Add the input to the main corpus - let mut testcase = Testcase::from(input.clone()); + let mut md = TestcaseMetadata::default(); + #[cfg(feature = "track_hit_feedbacks")] self.feedback_mut() - .append_hit_feedbacks(testcase.hit_feedbacks_mut())?; + .append_hit_feedbacks(md.hit_feedbacks_mut())?; self.feedback_mut() - .append_metadata(state, manager, observers, &mut testcase)?; - let id = state.corpus_mut().add(testcase)?; + .append_metadata(state, manager, observers, input, &mut md)?; + + let id = state.corpus_mut().add_with_metadata(input.clone(), md)?; self.scheduler_mut().on_add(state, id)?; Ok(Some(id)) } ExecuteInputResult::Solution => { // The input is a solution, add it to the respective corpus - let mut testcase = Testcase::from(input.clone()); - testcase.set_parent_id_optional(*state.corpus().current()); - if let Ok(mut tc) = state.current_testcase_mut() { + let mut md = TestcaseMetadata::default(); + + md.set_parent_id_optional(*state.corpus().current()); + + if let Ok(mut tc) = state.current_testcase() { tc.found_objective(); } #[cfg(feature = "track_hit_feedbacks")] self.objective_mut() - .append_hit_feedbacks(testcase.hit_objectives_mut())?; + .append_hit_feedbacks(md.hit_objectives_mut())?; self.objective_mut() - .append_metadata(state, manager, observers, &mut testcase)?; - state.solutions_mut().add(testcase)?; + .append_metadata(state, manager, observers, input, &mut md)?; + state.solutions_mut().add_with_metadata(input.clone(), md)?; Ok(None) } @@ -697,8 +703,9 @@ where let exit_kind = self.execute_input(state, executor, manager, &input)?; let observers = executor.observers(); // Always consider this to be "interesting" - let mut testcase = Testcase::from(input.clone()); - testcase.set_executions(*state.executions()); + let mut tc_md = TestcaseMetadata::builder() + .executions(*state.executions()) + .build(); // Maybe a solution #[cfg(not(feature = "introspection"))] @@ -718,11 +725,18 @@ where if is_solution { #[cfg(feature = "track_hit_feedbacks")] self.objective_mut() - .append_hit_feedbacks(testcase.hit_objectives_mut())?; - self.objective_mut() - .append_metadata(state, manager, &*observers, &mut testcase)?; + .append_hit_feedbacks(tc_md.hit_objectives_mut())?; + self.objective_mut().append_metadata( + state, + manager, + &*observers, + &input, + &mut tc_md, + )?; // we don't care about solution id - let id = state.solutions_mut().add(testcase)?; + let id = state + .solutions_mut() + .add_with_metadata(input.clone(), tc_md.clone())?; manager.fire( state, @@ -756,11 +770,11 @@ where #[cfg(feature = "track_hit_feedbacks")] self.feedback_mut() - .append_hit_feedbacks(testcase.hit_feedbacks_mut())?; + .append_hit_feedbacks(tc_md.hit_feedbacks_mut())?; // Add the input to the main corpus self.feedback_mut() - .append_metadata(state, manager, &*observers, &mut testcase)?; - let id = state.corpus_mut().add(testcase)?; + .append_metadata(state, manager, &*observers, &input, &mut tc_md)?; + let id = state.corpus_mut().add_with_metadata(input.clone(), tc_md)?; self.scheduler_mut().on_add(state, id)?; let observers_buf = if manager.configuration() == EventConfig::AlwaysUnique { @@ -788,11 +802,15 @@ where } fn add_disabled_input(&mut self, state: &mut S, input: I) -> Result { - let mut testcase = Testcase::from(input.clone()); - testcase.set_executions(*state.executions()); - testcase.set_disabled(true); + let tc_md = TestcaseMetadata::builder() + .executions(*state.executions()) + .disabled(true) + .build(); + // Add the disabled input to the main corpus - let id = state.corpus_mut().add_disabled(testcase)?; + let id = state + .corpus_mut() + .add_disabled_with_metadata(input.clone(), tc_md)?; Ok(id) } } @@ -945,10 +963,12 @@ where state.introspection_stats_mut().mark_manager_time(); { - if let Ok(mut testcase) = state.testcase_mut(id) { - let scheduled_count = testcase.scheduled_count(); + if let Ok(testcase) = state.testcase(id) { + let mut md = testcase.testcase_metadata_mut(); + + let scheduled_count = md.scheduled_count(); // increase scheduled count, this was fuzz_level in afl - testcase.set_scheduled_count(scheduled_count + 1); + md.set_scheduled_count(scheduled_count + 1); } } diff --git a/crates/libafl/src/inputs/generalized.rs b/crates/libafl/src/inputs/generalized.rs index e3b2d8f4445..d578740e383 100644 --- a/crates/libafl/src/inputs/generalized.rs +++ b/crates/libafl/src/inputs/generalized.rs @@ -7,7 +7,7 @@ use serde::{Deserialize, Serialize}; use crate::{ Error, HasMetadata, - corpus::Testcase, + corpus::{Testcase, testcase::IsTestcaseMetadataCell}, inputs::BytesInput, stages::mutational::{MutatedTransform, MutatedTransformPost}, }; @@ -107,8 +107,12 @@ impl GeneralizedInputMetadata { impl MutatedTransform for GeneralizedInputMetadata { type Post = Self; - fn try_transform_from(base: &mut Testcase, _state: &S) -> Result { + fn try_transform_from( + base: &Testcase, + _state: &S, + ) -> Result { let meta = base + .testcase_metadata() .metadata_map() .get::() .ok_or_else(|| { diff --git a/crates/libafl/src/lib.rs b/crates/libafl/src/lib.rs index d3389e8cef3..25cf622d81a 100644 --- a/crates/libafl/src/lib.rs +++ b/crates/libafl/src/lib.rs @@ -113,7 +113,7 @@ mod tests { use crate::stages::ExecutionCountRestartHelperMetadata; use crate::{ StdFuzzer, - corpus::{Corpus, InMemoryCorpus, Testcase}, + corpus::{Corpus, InMemoryCorpus}, events::NopEventManager, executors::{ExitKind, InProcessExecutor}, feedbacks::ConstFeedback, @@ -138,8 +138,7 @@ mod tests { let rand = StdRand::with_seed(0); let mut corpus = InMemoryCorpus::::new(); - let testcase = Testcase::new(vec![0; 4].into()); - corpus.add(testcase).unwrap(); + corpus.add(vec![0; 4].into()).unwrap(); let mut feedback = ConstFeedback::new(false); let mut objective = ConstFeedback::new(false); diff --git a/crates/libafl/src/mutators/encoded_mutations.rs b/crates/libafl/src/mutators/encoded_mutations.rs index 48e88570dc8..1e0c94ce290 100644 --- a/crates/libafl/src/mutators/encoded_mutations.rs +++ b/crates/libafl/src/mutators/encoded_mutations.rs @@ -387,8 +387,8 @@ where let other_size = { // new scope to make the borrow checker happy - let mut other_testcase = state.corpus().get_from_all(id)?.borrow_mut(); - other_testcase.load_input(state.corpus())?.codes().len() + let other_testcase = state.corpus().get_from_all(id)?; + other_testcase.input().codes().len() }; if other_size < 2 { @@ -416,9 +416,9 @@ where } } - let other_testcase = state.corpus().get_from_all(id)?.borrow_mut(); + let other_testcase = state.corpus().get_from_all(id)?; // no need to `load_input` again - we did that above already. - let other = other_testcase.input().as_ref().unwrap(); + let other = other_testcase.input(); input.codes_mut().resize(size + len, 0); unsafe { @@ -474,8 +474,8 @@ where let other_size = { // new scope to make the borrow checker happy - let mut other_testcase = state.corpus().get_from_all(id)?.borrow_mut(); - other_testcase.load_input(state.corpus())?.codes().len() + let other_testcase = state.corpus().get_from_all(id)?; + other_testcase.input().codes().len() }; if other_size < 2 { @@ -499,9 +499,9 @@ where .rand_mut() .below(unsafe { NonZero::new(size - len).unwrap_unchecked() }); - let other_testcase = state.corpus().get_from_all(id)?.borrow_mut(); + let other_testcase = state.corpus().get_from_all(id)?; // no need to load the input again, it'll already be present at this point. - let other = other_testcase.input().as_ref().unwrap(); + let other = other_testcase.input(); unsafe { buffer_copy(input.codes_mut(), other.codes(), from, to, len); diff --git a/crates/libafl/src/mutators/gramatron.rs b/crates/libafl/src/mutators/gramatron.rs index b36f1ad61e8..76d1f43d75a 100644 --- a/crates/libafl/src/mutators/gramatron.rs +++ b/crates/libafl/src/mutators/gramatron.rs @@ -13,7 +13,7 @@ use serde::{Deserialize, Serialize}; use crate::{ Error, HasMetadata, - corpus::Corpus, + corpus::{Corpus, IsTestcaseMetadataCell}, generators::GramatronGenerator, inputs::{GramatronInput, Terminal}, mutators::{MutationResult, Mutator}, @@ -136,17 +136,18 @@ where let rand_num = state.rand_mut().next(); - let mut other_testcase = state.corpus().get(id)?.borrow_mut(); + let other_testcase = state.corpus().get(id)?; + let mut other_md = other_testcase.testcase_metadata_mut(); - if !other_testcase.has_metadata::() { - let meta = GramatronIdxMapMetadata::new(other_testcase.load_input(state.corpus())?); - other_testcase.add_metadata(meta); + if !other_md.has_metadata::() { + let meta = GramatronIdxMapMetadata::new(other_testcase.input().as_ref()); + other_md.add_metadata(meta); } - let meta = other_testcase + let meta = other_md .metadata_map() .get::() .unwrap(); - let other = other_testcase.input().as_ref().unwrap(); + let other = other_testcase.input(); meta.map.get(&input.terminals()[insert_at].state).map_or( Ok(MutationResult::Skipped), diff --git a/crates/libafl/src/mutators/grimoire.rs b/crates/libafl/src/mutators/grimoire.rs index 91143d7a5d0..5de61e0131c 100644 --- a/crates/libafl/src/mutators/grimoire.rs +++ b/crates/libafl/src/mutators/grimoire.rs @@ -15,7 +15,7 @@ use libafl_bolts::{ use crate::{ Error, HasMetadata, - corpus::Corpus, + corpus::{Corpus, IsTestcaseMetadataCell}, inputs::{GeneralizedInputMetadata, GeneralizedItem}, mutators::{MutationResult, Mutator, token_mutations::Tokens}, random_corpus_id, @@ -41,8 +41,9 @@ where let rand1 = state.rand_mut().next(); let rand2 = state.rand_mut().next(); - let other_testcase = state.corpus().get(id)?.borrow(); + let other_testcase = state.corpus().get(id)?; if let Some(other) = other_testcase + .testcase_metadata() .metadata_map() .get::() { @@ -92,8 +93,9 @@ where } } - let other_testcase = state.corpus().get(id)?.borrow(); + let other_testcase = state.corpus().get(id)?; match other_testcase + .testcase_metadata() .metadata_map() .get::() { diff --git a/crates/libafl/src/mutators/list.rs b/crates/libafl/src/mutators/list.rs index 7278ce6f00d..195f0c8a8c4 100644 --- a/crates/libafl/src/mutators/list.rs +++ b/crates/libafl/src/mutators/list.rs @@ -179,8 +179,8 @@ where let other_idx_raw = state.rand_mut().next() as usize; let id = random_corpus_id!(state.corpus(), state.rand_mut()); - let mut testcase = state.corpus().get(id)?.borrow_mut(); - let other = testcase.load_input(state.corpus())?; + let testcase = state.corpus().get(id)?; + let other = testcase.input(); let other_len = other.len(); @@ -232,8 +232,8 @@ where let other_idx_raw = state.rand_mut().next() as usize; let id = random_corpus_id!(state.corpus(), state.rand_mut()); - let mut testcase = state.corpus().get(id)?.borrow_mut(); - let other = testcase.load_input(state.corpus())?; + let testcase = state.corpus().get(id)?; + let other = testcase.input(); let other_len = other.len(); diff --git a/crates/libafl/src/mutators/multi.rs b/crates/libafl/src/mutators/multi.rs index f5a31c882d6..22704dd3999 100644 --- a/crates/libafl/src/mutators/multi.rs +++ b/crates/libafl/src/mutators/multi.rs @@ -202,8 +202,8 @@ where } } - let mut other_testcase = state.corpus().get(id)?.borrow_mut(); - let other = other_testcase.load_input(state.corpus())?; + let other_testcase = state.corpus().get(id)?; + let other = other_testcase.input(); let other_len = other.len(); if other_len == 0 { return Ok(MutationResult::Skipped); @@ -237,9 +237,9 @@ where NonZero::new_unchecked(min(other_size, size - target)) }); - let other_testcase = state.corpus().get(id)?.borrow_mut(); + let other_testcase = state.corpus().get(id)?; // No need to load the input again, it'll still be cached. - let other = other_testcase.input().as_ref().unwrap(); + let other = other_testcase.input(); Ok(Self::crossover_insert( part, @@ -341,8 +341,8 @@ where } } - let mut other_testcase = state.corpus().get(id)?.borrow_mut(); - let other = other_testcase.load_input(state.corpus())?; + let other_testcase = state.corpus().get(id)?; + let other = other_testcase.input(); let other_len = other.len(); if other_len == 0 { @@ -376,9 +376,9 @@ where NonZero::new_unchecked(min(other_size, size - target)) }); - let other_testcase = state.corpus().get(id)?.borrow_mut(); + let other_testcase = state.corpus().get(id)?; // No need to load the input again, it'll still be cached. - let other = other_testcase.input().as_ref().unwrap(); + let other = other_testcase.input(); Ok(Self::crossover_replace( part, diff --git a/crates/libafl/src/mutators/mutations.rs b/crates/libafl/src/mutators/mutations.rs index bc3f332b1c5..2e628437c01 100644 --- a/crates/libafl/src/mutators/mutations.rs +++ b/crates/libafl/src/mutators/mutations.rs @@ -1302,11 +1302,8 @@ where } let other_size = { - let mut other_testcase = state.corpus().get_from_all(id)?.borrow_mut(); - other_testcase - .load_input(state.corpus())? - .mutator_bytes() - .len() + let other_testcase = state.corpus().get_from_all(id)?; + other_testcase.input().len() }; if other_size < 2 { @@ -1321,9 +1318,9 @@ where }); let target = state.rand_mut().below(nonzero_size); - let other_testcase = state.corpus().get_from_all(id)?.borrow_mut(); + let other_testcase = state.corpus().get_from_all(id)?; // No need to load the input again, it'll still be cached. - let other = other_testcase.input().as_ref().unwrap(); + let other = other_testcase.input(); Ok(Self::crossover_insert( input, @@ -1404,10 +1401,7 @@ where } } - let other_size = { - let mut testcase = state.corpus().get_from_all(id)?.borrow_mut(); - testcase.load_input(state.corpus())?.mutator_bytes().len() - }; + let other_size = state.corpus().get_from_all(id)?.input().len(); if other_size < 2 { return Ok(MutationResult::Skipped); @@ -1425,9 +1419,9 @@ where NonZero::new(min(other_size, size - target)).unwrap_unchecked() }); - let other_testcase = state.corpus().get_from_all(id)?.borrow_mut(); + let other_testcase = state.corpus().get_from_all(id)?; // No need to load the input again, it'll still be cached. - let other = other_testcase.input().as_ref().unwrap(); + let other = other_testcase.input(); Ok(Self::crossover_replace( input, @@ -1518,9 +1512,9 @@ where } let other_size = { - let mut other_testcase = state.corpus().get_from_all(id)?.borrow_mut(); - let other_input = other_testcase.load_input(state.corpus())?; - let input_mapped = (self.input_mapper)(other_input).map_to_option_bytes(); + let other_testcase = state.corpus().get_from_all(id)?; + let other_input = other_testcase.input(); + let input_mapped = (self.input_mapper)(other_input.as_ref()).map_to_option_bytes(); input_mapped.map_or(0, >::len) }; @@ -1540,9 +1534,9 @@ where .rand_mut() .below(unsafe { NonZero::new(size).unwrap_unchecked() }); - let other_testcase = state.corpus().get_from_all(id)?.borrow_mut(); + let other_testcase = state.corpus().get_from_all(id)?; // No need to load the input again, it'll still be cached. - let other_input = &mut other_testcase.input().as_ref().unwrap(); + let other_input = &mut other_testcase.input(); let wrapped_mapped_other_input = (self.input_mapper)(other_input).map_to_option_bytes(); if wrapped_mapped_other_input.is_none() { return Ok(MutationResult::Skipped); @@ -1613,9 +1607,9 @@ where } let other_size = { - let mut other_testcase = state.corpus().get_from_all(id)?.borrow_mut(); - let other_input = other_testcase.load_input(state.corpus())?; - let input_mapped = (self.input_mapper)(other_input).map_to_option_bytes(); + let other_testcase = state.corpus().get_from_all(id)?; + let other_input = other_testcase.input(); + let input_mapped = (self.input_mapper)(other_input.as_ref()).map_to_option_bytes(); input_mapped.map_or(0, >::len) }; @@ -1635,10 +1629,11 @@ where NonZero::new(min(other_size, size - target)).unwrap_unchecked() }); - let other_testcase = state.corpus().get_from_all(id)?.borrow_mut(); + let other_testcase = state.corpus().get_from_all(id)?; // No need to load the input again, it'll still be cached. - let other_input = &mut other_testcase.input().as_ref().unwrap(); - let wrapped_mapped_other_input = (self.input_mapper)(other_input).map_to_option_bytes(); + let other_input = &mut other_testcase.input(); + let wrapped_mapped_other_input = + (self.input_mapper)(other_input.as_ref()).map_to_option_bytes(); if wrapped_mapped_other_input.is_none() { return Ok(MutationResult::Skipped); } @@ -1704,8 +1699,8 @@ where } let (first_diff, last_diff) = { - let mut other_testcase = state.corpus().get_from_all(id)?.borrow_mut(); - let other = other_testcase.load_input(state.corpus())?; + let other_testcase = state.corpus().get_from_all(id)?; + let other = other_testcase.input(); let (f, l) = locate_diffs(input.mutator_bytes(), other.mutator_bytes()); @@ -1718,9 +1713,9 @@ where let split_at = state.rand_mut().between(first_diff, last_diff); - let other_testcase = state.corpus().get_from_all(id)?.borrow_mut(); + let other_testcase = state.corpus().get_from_all(id)?; // Input will already be loaded. - let other = other_testcase.input().as_ref().unwrap(); + let other = other_testcase.input(); input.splice( split_at.., @@ -1872,9 +1867,7 @@ mod tests { let mut feedback = ConstFeedback::new(false); let mut objective = ConstFeedback::new(false); - corpus - .add(BytesInput::new(vec![0x42; 0x1337]).into()) - .unwrap(); + corpus.add(BytesInput::new(vec![0x42; 0x1337])).unwrap(); StdState::new( rand, diff --git a/crates/libafl/src/mutators/numeric.rs b/crates/libafl/src/mutators/numeric.rs index f5ac9ba00f8..feabb057b23 100644 --- a/crates/libafl/src/mutators/numeric.rs +++ b/crates/libafl/src/mutators/numeric.rs @@ -426,8 +426,8 @@ where return Ok(MutationResult::Skipped); } - let other_testcase = state.corpus().get_from_all(id)?.borrow_mut(); - *input = *other_testcase.input().as_ref().unwrap(); + let other_testcase = state.corpus().get_from_all(id)?; + *input = *other_testcase.input(); Ok(MutationResult::Mutated) } #[inline] @@ -475,9 +475,9 @@ where return Ok(MutationResult::Skipped); } - let other_testcase = state.corpus().get_from_all(id)?.borrow_mut(); - let other_input = other_testcase.input().as_ref().unwrap(); - let mapped_input = (self.input_mapper)(other_input).clone(); + let other_testcase = state.corpus().get_from_all(id)?; + let other_input = other_testcase.input(); + let mapped_input = (self.input_mapper)(other_input.as_ref()).clone(); *input = mapped_input; Ok(MutationResult::Mutated) } @@ -502,13 +502,13 @@ mod tests { use libafl_bolts::{ rands::{Rand, XkcdRand}, - tuples::IntoVec as _, + tuples::IntoVec, }; use serde::{Deserialize, Serialize}; use super::{Numeric, int_mutators}; use crate::{ - corpus::{Corpus as _, InMemoryCorpus, Testcase}, + corpus::{Corpus, InMemoryCorpus}, inputs::value::I16Input, mutators::MutationResult, state::StdState, @@ -544,7 +544,7 @@ mod tests { #[test] fn all_mutate_owned() { let mut corpus = InMemoryCorpus::new(); - corpus.add(Testcase::new(42_i16.into())).unwrap(); + corpus.add(42_i16.into()).unwrap(); let mut state = StdState::new( XkcdRand::new(), corpus, diff --git a/crates/libafl/src/mutators/scheduled.rs b/crates/libafl/src/mutators/scheduled.rs index 677124cdda1..db2332e6c6e 100644 --- a/crates/libafl/src/mutators/scheduled.rs +++ b/crates/libafl/src/mutators/scheduled.rs @@ -17,7 +17,7 @@ use serde::{Deserialize, Serialize}; use super::MutationId; use crate::{ Error, HasMetadata, - corpus::{Corpus, CorpusId}, + corpus::{Corpus, CorpusId, IsTestcaseMetadataCell}, mutators::{ MutationResult, Mutator, MutatorsTuple, token_mutations::{TokenInsert, TokenReplace}, @@ -306,15 +306,18 @@ where fn post_exec(&mut self, state: &mut S, corpus_id: Option) -> Result<(), Error> { if let Some(id) = corpus_id { - let mut testcase = (*state.corpus_mut().get(id)?).borrow_mut(); + let testcase = state.corpus().get(id)?; let mut log = Vec::>::new(); + while let Some(idx) = self.mutation_log.pop() { let name = self.scheduled.mutations().name(idx.0).unwrap().clone(); // TODO maybe return an Error on None log.push(name); } + let meta = LogMutationMetadata::new(log); - testcase.add_metadata(meta); + testcase.testcase_metadata_mut().add_metadata(meta); } + // Always reset the log for each run self.mutation_log.clear(); Ok(()) @@ -389,7 +392,7 @@ mod tests { use libafl_bolts::rands::{StdRand, XkcdRand}; use crate::{ - corpus::{Corpus, InMemoryCorpus, Testcase}, + corpus::{Corpus, InMemoryCorpus}, feedbacks::ConstFeedback, inputs::{BytesInput, HasMutatorBytes}, mutators::{ @@ -405,14 +408,10 @@ mod tests { fn test_mut_scheduled() { let rand = XkcdRand::with_seed(0); let mut corpus: InMemoryCorpus = InMemoryCorpus::new(); - corpus - .add(Testcase::new(vec![b'a', b'b', b'c'].into())) - .unwrap(); - corpus - .add(Testcase::new(vec![b'd', b'e', b'f'].into())) - .unwrap(); + corpus.add(vec![b'a', b'b', b'c'].into()).unwrap(); + corpus.add(vec![b'd', b'e', b'f'].into()).unwrap(); - let mut input = corpus.cloned_input_for_id(corpus.first().unwrap()).unwrap(); + let input = corpus.get(corpus.first().unwrap()).unwrap().input().clone(); let mut feedback = ConstFeedback::new(false); let mut objective = ConstFeedback::new(false); @@ -427,22 +426,28 @@ mod tests { .unwrap(); let mut splice = SpliceMutator::new(); - splice.mutate(&mut state, &mut input).unwrap(); + let mut spliced_input = input.as_ref().clone(); + splice.mutate(&mut state, &mut spliced_input).unwrap(); - log::trace!("{:?}", input.mutator_bytes()); + log::trace!("{:?}", spliced_input.mutator_bytes()); // The pre-seeded rand should have spliced at position 2. - assert_eq!(input.mutator_bytes(), b"abf"); + assert_eq!(spliced_input.mutator_bytes(), b"abf"); } #[test] fn test_havoc() { let rand = StdRand::with_seed(0x1337); let mut corpus: InMemoryCorpus = InMemoryCorpus::new(); - corpus.add(Testcase::new(b"abc".to_vec().into())).unwrap(); - corpus.add(Testcase::new(b"def".to_vec().into())).unwrap(); - - let mut input = corpus.cloned_input_for_id(corpus.first().unwrap()).unwrap(); + corpus.add(b"abc".to_vec().into()).unwrap(); + corpus.add(b"def".to_vec().into()).unwrap(); + + let mut input = corpus + .get(corpus.first().unwrap()) + .unwrap() + .input() + .as_ref() + .clone(); let input_prior = input.clone(); let mut feedback = ConstFeedback::new(false); @@ -480,10 +485,15 @@ mod tests { fn test_single_choice() { let rand = StdRand::with_seed(0x1337); let mut corpus: InMemoryCorpus = InMemoryCorpus::new(); - corpus.add(Testcase::new(b"abc".to_vec().into())).unwrap(); - corpus.add(Testcase::new(b"def".to_vec().into())).unwrap(); - - let mut input = corpus.cloned_input_for_id(corpus.first().unwrap()).unwrap(); + corpus.add(b"abc".to_vec().into()).unwrap(); + corpus.add(b"def".to_vec().into()).unwrap(); + + let mut input = corpus + .get(corpus.first().unwrap()) + .unwrap() + .input() + .as_ref() + .clone(); let input_prior = input.clone(); let mut feedback = ConstFeedback::new(false); diff --git a/crates/libafl/src/mutators/token_mutations.rs b/crates/libafl/src/mutators/token_mutations.rs index 36aabcb1723..528a0bcb113 100644 --- a/crates/libafl/src/mutators/token_mutations.rs +++ b/crates/libafl/src/mutators/token_mutations.rs @@ -1977,19 +1977,14 @@ impl AflppRedQueen { tokens.add_token(&v); } } -#[derive(Debug, Copy, Clone)] +#[derive(Default, Debug, Copy, Clone)] enum TextType { + #[default] None, Ascii(usize), UTF8(usize), } -impl Default for TextType { - fn default() -> Self { - Self::None - } -} - impl TextType { fn is_ascii_or_utf8(self) -> bool { match self { diff --git a/crates/libafl/src/mutators/unicode/mod.rs b/crates/libafl/src/mutators/unicode/mod.rs index 618acab4094..b3a86ced4e4 100644 --- a/crates/libafl/src/mutators/unicode/mod.rs +++ b/crates/libafl/src/mutators/unicode/mod.rs @@ -11,7 +11,7 @@ use libafl_bolts::{Error, HasLen, Named, rands::Rand}; use crate::{ HasMetadata, - corpus::{CorpusId, HasTestcase, Testcase}, + corpus::{CorpusId, HasTestcase, IsTestcaseMetadataCell, Testcase}, inputs::{BytesInput, HasMutatorBytes, ResizableMutator}, mutators::{MutationResult, Mutator, Tokens, rand_range}, nonzero, @@ -35,9 +35,15 @@ where { type Post = UnicodeIdentificationMetadata; - fn try_transform_from(base: &mut Testcase, state: &S) -> Result { - let input = base.load_input(state.corpus())?.clone(); - let metadata = base.metadata::().cloned()?; + fn try_transform_from( + base: &Testcase, + _state: &S, + ) -> Result { + let input = base.input().as_ref().clone(); + let metadata = base + .testcase_metadata() + .metadata::() + .cloned()?; Ok((input, metadata)) } @@ -52,8 +58,9 @@ where { fn post_exec(self, state: &mut S, corpus_id: Option) -> Result<(), Error> { if let Some(corpus_id) = corpus_id { - let mut tc = state.testcase_mut(corpus_id)?; - tc.add_metadata(self); + let tc = state.testcase(corpus_id)?; + let mut md = tc.testcase_metadata_mut(); + md.add_metadata(self); } Ok(()) } diff --git a/crates/libafl/src/schedulers/accounting.rs b/crates/libafl/src/schedulers/accounting.rs index 87ba1c0084d..29e0a48d5b2 100644 --- a/crates/libafl/src/schedulers/accounting.rs +++ b/crates/libafl/src/schedulers/accounting.rs @@ -13,7 +13,7 @@ use serde::{Deserialize, Serialize}; use super::IndexesLenTimeMinimizerScheduler; use crate::{ Error, HasMetadata, - corpus::{Corpus, CorpusId}, + corpus::{Corpus, CorpusId, IsTestcaseMetadataCell}, observers::CanTrack, schedulers::{ Scheduler, @@ -140,13 +140,8 @@ where self.inner.cull(state)?; } let mut id = self.inner.base_mut().next(state)?; - while { - !state - .corpus() - .get(id)? - .borrow() - .has_metadata::() - } && state.rand_mut().coinflip(self.skip_non_favored_prob) + while { !state.corpus().get(id)?.has_metadata::() } + && state.rand_mut().coinflip(self.skip_non_favored_prob) { id = self.inner.base_mut().next(state)?; } @@ -199,9 +194,10 @@ where equal_score = true; } - let mut old = state.corpus().get_from_all(*old_id)?.borrow_mut(); + let old = state.corpus().get_from_all(*old_id)?; + let mut md = old.testcase_metadata_mut(); let must_remove = { - let old_meta = old.metadata_map_mut().get_mut::().ok_or_else(|| { + let old_meta = md.metadata_map_mut().get_mut::().ok_or_else(|| { Error::key_not_found(format!( "AccountingIndexesMetadata, needed by CoverageAccountingScheduler, not found in testcase #{old_id}" )) @@ -211,7 +207,7 @@ where }; if must_remove { - drop(old.metadata_map_mut().remove::()); + drop(md.metadata_map_mut().remove::()); } } } @@ -239,7 +235,7 @@ where state .corpus() .get(id)? - .borrow_mut() + .testcase_metadata_mut() .metadata_map_mut() .insert(AccountingIndexesMetadata::with_tcref( indexes, @@ -269,12 +265,14 @@ where }; for (_key, id) in &top_rated.map { - let mut entry = state.corpus().get(*id)?.borrow_mut(); + let entry = state.corpus().get(*id)?; if entry.scheduled_count() > 0 { continue; } - entry.add_metadata(IsFavoredMetadata {}); + entry + .testcase_metadata_mut() + .add_metadata(IsFavoredMetadata {}); } Ok(()) diff --git a/crates/libafl/src/schedulers/minimizer.rs b/crates/libafl/src/schedulers/minimizer.rs index 4d9f192053d..0a96124a042 100644 --- a/crates/libafl/src/schedulers/minimizer.rs +++ b/crates/libafl/src/schedulers/minimizer.rs @@ -11,7 +11,7 @@ use serde::{Deserialize, Serialize}; use super::HasQueueCycles; use crate::{ Error, HasMetadata, - corpus::{Corpus, CorpusId, Testcase}, + corpus::{Corpus, CorpusId, IsTestcaseMetadataCell}, feedbacks::MapIndexesMetadata, observers::CanTrack, require_index_tracking, @@ -86,25 +86,9 @@ where M: for<'a> AsIter<'a, Item = usize> + SerdeAny + HasRefCnt, S: HasCorpus + HasMetadata + HasRand, { - /// Replaces the [`Testcase`] at the given [`CorpusId`] - fn on_replace( - &mut self, - state: &mut S, - id: CorpusId, - testcase: &Testcase, - ) -> Result<(), Error> { - self.base.on_replace(state, id, testcase)?; - self.update_score(state, id) - } - /// Removes an entry from the corpus - fn on_remove( - &mut self, - state: &mut S, - id: CorpusId, - testcase: &Option>, - ) -> Result<(), Error> { - self.base.on_remove(state, id, testcase)?; + fn on_remove(&mut self, state: &mut S, id: CorpusId) -> Result<(), Error> { + self.base.on_remove(state, id)?; let mut entries = if let Some(meta) = state.metadata_map_mut().get_mut::() { meta.map @@ -116,10 +100,12 @@ where }; entries.sort_unstable(); // this should already be sorted, but just in case let mut map = HashMap::new(); - for current_id in state.corpus().ids() { - let mut old = state.corpus().get(current_id)?.borrow_mut(); - let factor = F::compute(state, &mut *old)?; - if let Some(old_map) = old.metadata_map_mut().get_mut::() { + let ids: Vec = state.corpus().ids().collect(); + for current_id in ids { + let factor = F::compute(state, current_id)?; + let old = state.corpus().get(current_id)?; + let mut old_md = old.testcase_metadata_mut(); + if let Some(old_map) = old_md.metadata_map_mut().get_mut::() { let mut e_iter = entries.iter(); let mut map_iter = old_map.as_iter(); // ASSERTION: guaranteed to be in order? @@ -169,8 +155,9 @@ where meta.map.reserve(reserve); for (entry, (_, new_id)) in map_iter { - let mut new = state.corpus().get(*new_id)?.borrow_mut(); - let new_meta = new.metadata_map_mut().get_mut::().ok_or_else(|| { + let new = state.corpus().get(*new_id)?; + let mut new_md = new.testcase_metadata_mut(); + let new_meta = new_md.metadata_map_mut().get_mut::().ok_or_else(|| { Error::key_not_found(format!( "{} needed for MinimizerScheduler not found in testcase #{new_id}", type_name::() @@ -185,6 +172,17 @@ where } Ok(()) } + + /// Replaces the [`Testcase`] at the given [`CorpusId`] + fn on_replace( + &mut self, + state: &mut S, + id: CorpusId, + prev: &>::TestcaseMetadataCell, + ) -> Result<(), Error> { + self.base.on_replace(state, id, prev)?; + self.update_score(state, id) + } } impl Scheduler for MinimizerScheduler @@ -216,7 +214,7 @@ where !state .corpus() .get(id)? - .borrow() + .testcase_metadata() .has_metadata::() } && state.rand_mut().coinflip(self.skip_non_favored_prob) { @@ -254,9 +252,10 @@ where let mut new_favoreds = vec![]; { - let mut entry = state.corpus().get(id)?.borrow_mut(); - let factor = F::compute(state, &mut *entry)?; - let meta = entry.metadata_map_mut().get_mut::().ok_or_else(|| { + let factor = F::compute(state, id)?; + let entry = state.corpus().get(id)?; + let mut entry_md = entry.testcase_metadata_mut(); + let meta = entry_md.metadata_map_mut().get_mut::().ok_or_else(|| { Error::key_not_found(format!( "Metadata needed for MinimizerScheduler not found in testcase #{id}" )) @@ -268,13 +267,14 @@ where new_favoreds.push(*elem); // always retain current; we'll drop it later otherwise continue; } - let mut old = state.corpus().get(*old_id)?.borrow_mut(); - if factor > F::compute(state, &mut *old)? { + if factor > F::compute(state, *old_id)? { continue; } + let old = state.corpus().get(*old_id)?; + let mut old_md = old.testcase_metadata_mut(); let must_remove = { - let old_meta = old.metadata_map_mut().get_mut::().ok_or_else(|| { + let old_meta = old_md.metadata_map_mut().get_mut::().ok_or_else(|| { Error::key_not_found(format!( "{} needed for MinimizerScheduler not found in testcase #{old_id}", type_name::() @@ -285,7 +285,7 @@ where }; if must_remove && self.remove_metadata { - drop(old.metadata_map_mut().remove::()); + drop(old_md.metadata_map_mut().remove::()); } } @@ -300,7 +300,7 @@ where state .corpus() .get(id)? - .borrow_mut() + .testcase_metadata_mut() .metadata_map_mut() .remove::(), ); @@ -319,7 +319,7 @@ where } /// Cull the [`Corpus`] using the [`MinimizerScheduler`] - pub fn cull(&self, state: &S) -> Result<(), Error> + pub fn cull(&self, state: &mut S) -> Result<(), Error> where S: HasCorpus + HasMetadata, { @@ -331,8 +331,9 @@ where for (key, id) in &top_rated.map { if !acc.contains(key) { - let mut entry = state.corpus().get(*id)?.borrow_mut(); - let meta = entry.metadata_map().get::().ok_or_else(|| { + let entry = state.corpus().get(*id)?; + let mut md = entry.testcase_metadata_mut(); + let meta = md.metadata_map().get::().ok_or_else(|| { Error::key_not_found(format!( "{} needed for MinimizerScheduler not found in testcase #{id}", type_name::() @@ -342,7 +343,7 @@ where acc.insert(*elem); } - entry.add_metadata(IsFavoredMetadata {}); + md.add_metadata(IsFavoredMetadata {}); } } diff --git a/crates/libafl/src/schedulers/mod.rs b/crates/libafl/src/schedulers/mod.rs index 0b29ae2e094..6d9c5fc6f62 100644 --- a/crates/libafl/src/schedulers/mod.rs +++ b/crates/libafl/src/schedulers/mod.rs @@ -3,6 +3,19 @@ use alloc::{borrow::ToOwned, string::ToString}; use core::{hash::Hash, marker::PhantomData}; +use libafl_bolts::{ + generic_hash_std, + rands::Rand, + tuples::{Handle, MatchName, MatchNameRef}, +}; + +use crate::{ + Error, HasMetadata, + corpus::{Corpus, CorpusId, HasTestcase, IsTestcaseMetadataCell, SchedulerTestcaseMetadata}, + random_corpus_id, + state::{HasCorpus, HasRand}, +}; + pub mod testcase_score; pub use testcase_score::{LenTimeMulTestcasePenalty, TestcasePenalty, TestcaseScore}; @@ -27,30 +40,16 @@ pub mod weighted; pub use weighted::{StdWeightedScheduler, WeightedScheduler}; pub mod tuneable; -use libafl_bolts::{ - generic_hash_std, - rands::Rand, - tuples::{Handle, MatchName, MatchNameRef}, -}; pub use tuneable::*; -use crate::{ - Error, HasMetadata, - corpus::{Corpus, CorpusId, HasTestcase, SchedulerTestcaseMetadata, Testcase}, - random_corpus_id, - state::{HasCorpus, HasRand}, -}; - /// The scheduler also implements `on_remove` and `on_replace` if it implements this stage. -pub trait RemovableScheduler { +pub trait RemovableScheduler +where + S: HasCorpus, +{ /// Removed the given entry from the corpus at the given index - /// When you remove testcases, make sure that that testcase is not currently fuzzed one! - fn on_remove( - &mut self, - _state: &mut S, - _id: CorpusId, - _testcase: &Option>, - ) -> Result<(), Error> { + /// When you remove testcases, make sure that testcase is not currently fuzzed one! + fn on_remove(&mut self, _state: &mut S, _id: CorpusId) -> Result<(), Error> { Ok(()) } @@ -59,7 +58,7 @@ pub trait RemovableScheduler { &mut self, _state: &mut S, _id: CorpusId, - _prev: &Testcase, + _prev: &>::TestcaseMetadataCell, ) -> Result<(), Error> { Ok(()) } @@ -73,13 +72,14 @@ pub fn on_add_metadata_default( ) -> Result<(), Error> where CS: AflScheduler, - S: HasTestcase + HasCorpus, + S: HasCorpus + HasTestcase + HasMetadata, { let current_id = *state.corpus().current(); let mut depth = match current_id { Some(parent_idx) => state .testcase(parent_idx)? + .testcase_metadata() .metadata::()? .depth(), None => 0, @@ -90,12 +90,13 @@ where // Attach a `SchedulerTestcaseMetadata` to the queue entry. depth += 1; - let mut testcase = state.testcase_mut(id)?; - testcase.add_metadata(SchedulerTestcaseMetadata::with_n_fuzz_entry( + let testcase = state.testcase(id)?; + let mut md = testcase.testcase_metadata_mut(); + md.add_metadata(SchedulerTestcaseMetadata::with_n_fuzz_entry( depth, scheduler.last_hash(), )); - testcase.set_parent_id_optional(current_id); + md.set_parent_id_optional(current_id); Ok(()) } @@ -138,8 +139,10 @@ where let current_id = *state.corpus().current(); if let Some(id) = current_id { - let mut testcase = state.testcase_mut(id)?; - let tcmeta = testcase.metadata_mut::()?; + let testcase = state.testcase(id)?; + let mut md = testcase.testcase_metadata_mut(); + + let tcmeta = md.metadata_mut::()?; if tcmeta.handicap() >= 4 { tcmeta.set_handicap(tcmeta.handicap() - 4); @@ -220,11 +223,7 @@ where fn on_add(&mut self, state: &mut S, id: CorpusId) -> Result<(), Error> { // Set parent id let current_id = *state.corpus().current(); - state - .corpus() - .get(id)? - .borrow_mut() - .set_parent_id_optional(current_id); + state.corpus().get(id)?.set_parent_id_optional(current_id); Ok(()) } diff --git a/crates/libafl/src/schedulers/powersched.rs b/crates/libafl/src/schedulers/powersched.rs index 8e13bed83a7..ef43c30cd1a 100644 --- a/crates/libafl/src/schedulers/powersched.rs +++ b/crates/libafl/src/schedulers/powersched.rs @@ -11,7 +11,7 @@ use serde::{Deserialize, Serialize}; use crate::{ Error, HasMetadata, - corpus::{Corpus, CorpusId, HasTestcase, Testcase}, + corpus::{Corpus, CorpusId, HasTestcase}, schedulers::{ AflScheduler, HasQueueCycles, RemovableScheduler, Scheduler, on_add_metadata_default, on_evaluation_metadata_default, on_next_metadata_default, @@ -280,27 +280,7 @@ pub struct PowerQueueScheduler { phantom: PhantomData, } -impl RemovableScheduler for PowerQueueScheduler { - /// This will *NOT* neutralize the effect of this removed testcase from the global data such as `SchedulerMetadata` - fn on_remove( - &mut self, - _state: &mut S, - _id: CorpusId, - _prev: &Option>, - ) -> Result<(), Error> { - Ok(()) - } - - /// This will *NOT* neutralize the effect of this removed testcase from the global data such as `SchedulerMetadata` - fn on_replace( - &mut self, - _state: &mut S, - _id: CorpusId, - _prev: &Testcase, - ) -> Result<(), Error> { - Ok(()) - } -} +impl RemovableScheduler for PowerQueueScheduler where S: HasCorpus {} impl AflScheduler for PowerQueueScheduler { type ObserverRef = C; @@ -326,7 +306,7 @@ impl HasQueueCycles for PowerQueueScheduler { impl Scheduler for PowerQueueScheduler where - S: HasCorpus + HasMetadata + HasTestcase, + for<'a> S: HasCorpus + HasMetadata + HasTestcase, O: Hash, C: AsRef, { @@ -389,7 +369,7 @@ where #[must_use] pub fn new(state: &mut S, observer: &C, strat: PowerSchedule) -> Self where - S: HasMetadata, + S: HasMetadata + HasMetadata, { if !state.has_metadata::() { state.add_metadata::(SchedulerMetadata::new(Some(strat))); diff --git a/crates/libafl/src/schedulers/probabilistic_sampling.rs b/crates/libafl/src/schedulers/probabilistic_sampling.rs index 9906af74369..45a748de038 100644 --- a/crates/libafl/src/schedulers/probabilistic_sampling.rs +++ b/crates/libafl/src/schedulers/probabilistic_sampling.rs @@ -10,7 +10,7 @@ use serde::{Deserialize, Serialize}; use crate::{ Error, HasMetadata, - corpus::{Corpus, CorpusId, Testcase}, + corpus::{Corpus, CorpusId, IsTestcaseMetadataCell}, schedulers::{RemovableScheduler, Scheduler, TestcaseScore}, state::{HasCorpus, HasRand}, }; @@ -68,7 +68,7 @@ impl ProbabilitySamplingScheduler { F: TestcaseScore, S: HasCorpus + HasMetadata + HasRand, { - let prob = F::compute(state, &mut *state.corpus().get(id)?.borrow_mut())?; + let prob = F::compute(state, id)?; debug_assert!( prob >= 0.0 && prob.is_finite(), "scheduler probability is {prob}; to work correctly it must be >= 0.0 and finite" @@ -88,12 +88,7 @@ where F: TestcaseScore, S: HasCorpus + HasMetadata + HasRand, { - fn on_remove( - &mut self, - state: &mut S, - id: CorpusId, - _testcase: &Option>, - ) -> Result<(), Error> { + fn on_remove(&mut self, state: &mut S, id: CorpusId) -> Result<(), Error> { let meta = state .metadata_map_mut() .get_mut::() @@ -108,7 +103,7 @@ where &mut self, state: &mut S, id: CorpusId, - _prev: &Testcase, + _prev: &>::TestcaseMetadataCell, ) -> Result<(), Error> { let meta = state .metadata_map_mut() @@ -129,10 +124,11 @@ where { fn on_add(&mut self, state: &mut S, id: CorpusId) -> Result<(), Error> { let current_id = *state.corpus().current(); + state .corpus() .get(id)? - .borrow_mut() + .testcase_metadata_mut() .set_parent_id_optional(current_id); if state.metadata_map().get::().is_none() { @@ -190,7 +186,7 @@ mod tests { use crate::{ Error, - corpus::{Corpus, InMemoryCorpus, Testcase}, + corpus::{Corpus, CorpusId, InMemoryCorpus}, feedbacks::ConstFeedback, inputs::bytes::BytesInput, schedulers::{ProbabilitySamplingScheduler, Scheduler, TestcaseScore}, @@ -206,7 +202,7 @@ mod tests { where S: HasCorpus, { - fn compute(_state: &S, _: &mut Testcase) -> Result { + fn compute(_state: &S, _corpus_id: CorpusId) -> Result { Ok(FACTOR) } } @@ -233,11 +229,11 @@ mod tests { let mut objective = ConstFeedback::new(false); let mut corpus = InMemoryCorpus::new(); - let t1 = Testcase::with_filename(BytesInput::new(vec![0_u8; 4]), "1".into()); - let t2 = Testcase::with_filename(BytesInput::new(vec![1_u8; 4]), "2".into()); + // let t1 = Testcase::with_filename(, "1".into()); + // let t2 = Testcase::with_filename(, "2".into()); - let idx1 = corpus.add(t1).unwrap(); - let idx2 = corpus.add(t2).unwrap(); + let idx1 = corpus.add(BytesInput::new(vec![0_u8; 4])).unwrap(); + let idx2 = corpus.add(BytesInput::new(vec![1_u8; 4])).unwrap(); let mut state = StdState::new( rand, diff --git a/crates/libafl/src/schedulers/queue.rs b/crates/libafl/src/schedulers/queue.rs index c9c8551479e..2fff7fbb6bb 100644 --- a/crates/libafl/src/schedulers/queue.rs +++ b/crates/libafl/src/schedulers/queue.rs @@ -3,8 +3,8 @@ use alloc::borrow::ToOwned; use crate::{ - Error, - corpus::{Corpus, CorpusId}, + Error, HasMetadata, + corpus::{Corpus, CorpusId, IsTestcaseMetadataCell}, schedulers::{HasQueueCycles, RemovableScheduler, Scheduler}, state::HasCorpus, }; @@ -16,20 +16,19 @@ pub struct QueueScheduler { runs_in_current_cycle: u64, } -impl RemovableScheduler for QueueScheduler {} +impl RemovableScheduler for QueueScheduler where S: HasCorpus {} impl Scheduler for QueueScheduler where - S: HasCorpus, + S: HasCorpus + HasMetadata, { fn on_add(&mut self, state: &mut S, id: CorpusId) -> Result<(), Error> { // Set parent id let current_id = *state.corpus().current(); - state - .corpus() - .get(id)? - .borrow_mut() - .set_parent_id_optional(current_id); + let current_tc = state.corpus().get(id)?; + let mut current_md = current_tc.testcase_metadata_mut(); + + current_md.set_parent_id_optional(current_id); Ok(()) } @@ -102,11 +101,11 @@ mod tests { use libafl_bolts::rands::StdRand; use crate::{ - corpus::{Corpus, OnDiskCorpus, Testcase}, + corpus::{Corpus, OnDiskCorpus}, feedbacks::ConstFeedback, inputs::bytes::BytesInput, schedulers::{QueueScheduler, Scheduler}, - state::{HasCorpus, StdState}, + state::StdState, }; #[test] @@ -116,8 +115,8 @@ mod tests { let mut q = OnDiskCorpus::::new(PathBuf::from("target/.test/fancy/path")).unwrap(); - let t = Testcase::with_filename(BytesInput::new(vec![0_u8; 4]), "fancyfile".into()); - q.add(t).unwrap(); + // let t = Testcase::with_filename(), "fancyfile".into()); + let added_id = q.add(BytesInput::new(vec![0_u8; 4])).unwrap(); let objective_q = OnDiskCorpus::::new(PathBuf::from("target/.test/fancy/objective/path")) @@ -130,17 +129,18 @@ mod tests { let next_id = >::next(&mut scheduler, &mut state).unwrap(); - let filename = state - .corpus() - .get(next_id) - .unwrap() - .borrow() - .filename() - .as_ref() - .unwrap() - .clone(); - - assert_eq!(filename, "fancyfile"); + // let filename = state + // .corpus() + // .get(next_id) + // .unwrap() + // .borrow() + // .filename() + // .as_ref() + // .unwrap() + // .clone(); + + // assert_eq!(filename, "fancyfile"); + assert_eq!(added_id, next_id); fs::remove_dir_all("target/.test/fancy/path").unwrap(); } diff --git a/crates/libafl/src/schedulers/testcase_score.rs b/crates/libafl/src/schedulers/testcase_score.rs index d0e96379a3f..87befc29d71 100644 --- a/crates/libafl/src/schedulers/testcase_score.rs +++ b/crates/libafl/src/schedulers/testcase_score.rs @@ -6,7 +6,7 @@ use num_traits::Zero; use crate::{ Error, HasMetadata, - corpus::{Corpus, SchedulerTestcaseMetadata, Testcase}, + corpus::{Corpus, CorpusId, IsTestcaseMetadataCell, SchedulerTestcaseMetadata}, feedbacks::MapIndexesMetadata, schedulers::{ minimizer::{IsFavoredMetadata, TopRatedsMetadata}, @@ -16,15 +16,18 @@ use crate::{ }; /// Compute the favor factor of a [`Testcase`]. Higher is better. -pub trait TestcaseScore { +pub trait TestcaseScore +where + S: HasCorpus, +{ /// Computes the favor factor of a [`Testcase`]. Higher is better. - fn compute(state: &S, entry: &mut Testcase) -> Result; + fn compute(state: &S, corpus_id: CorpusId) -> Result; } /// Compute the favor factor of a [`Testcase`]. Lower is better. pub trait TestcasePenalty { /// Computes the favor factor of a [`Testcase`]. Higher is better. - fn compute(state: &S, entry: &mut Testcase) -> Result; + fn compute(state: &S, corpus_id: CorpusId) -> Result; } /// Multiply the testcase size with the execution time. @@ -38,10 +41,12 @@ where I: HasLen, { #[expect(clippy::cast_precision_loss)] - fn compute(state: &S, entry: &mut Testcase) -> Result { + fn compute(state: &S, corpus_id: CorpusId) -> Result { + let testcase = state.corpus().get(corpus_id)?; + let md = testcase.testcase_metadata(); + // TODO maybe enforce entry.exec_time().is_some() - Ok(entry.exec_time().map_or(1, |d| d.as_millis()) as f64 - * entry.load_len(state.corpus())? as f64) + Ok(md.exec_time().map_or(1, |d| d.as_millis()) as f64 * testcase.input_len() as f64) } } @@ -61,24 +66,24 @@ where { /// Compute the `power` we assign to each corpus entry #[expect(clippy::cast_precision_loss, clippy::too_many_lines)] - fn compute(state: &S, entry: &mut Testcase) -> Result { + fn compute(state: &S, corpus_id: CorpusId) -> Result { let psmeta = state.metadata::()?; + let corpus = state.corpus(); + let testcase = corpus.get(corpus_id)?; + let md = testcase.testcase_metadata(); let fuzz_mu = if let Some(strat) = psmeta.strat() { if *strat.base() == BaseSchedule::COE { - let corpus = state.corpus(); let mut n_paths = 0; let mut v = 0.0; - let cur_index = state.corpus().current().unwrap(); + let cur_index = corpus.current().unwrap(); for id in corpus.ids() { let n_fuzz_entry = if cur_index == id { - entry - .metadata::()? - .n_fuzz_entry() + md.metadata::()?.n_fuzz_entry() } else { corpus .get(id)? - .borrow() + .testcase_metadata() .metadata::()? .n_fuzz_entry() }; @@ -100,7 +105,7 @@ where }; let mut perf_score = 100.0; - let q_exec_us = entry + let q_exec_us = md .exec_time() .ok_or_else(|| Error::key_not_found("exec_time not set".to_string()))? .as_nanos() as f64; @@ -112,8 +117,8 @@ where psmeta.bitmap_size() / psmeta.bitmap_entries() }; - let favored = entry.has_metadata::(); - let tcmeta = entry.metadata::()?; + let favored = md.has_metadata::(); + let tcmeta = md.metadata::()?; if q_exec_us * 0.1 > avg_exec_us { perf_score = 10.0; @@ -185,7 +190,7 @@ where } } BaseSchedule::FAST => { - if entry.scheduled_count() != 0 { + if md.scheduled_count() != 0 { let lg = libm::log2(f64::from(psmeta.n_fuzz()[tcmeta.n_fuzz_entry()])); match lg { @@ -224,11 +229,11 @@ where } } BaseSchedule::LIN => { - factor = (entry.scheduled_count() as f64) + factor = (md.scheduled_count() as f64) / f64::from(psmeta.n_fuzz()[tcmeta.n_fuzz_entry()] + 1); } BaseSchedule::QUAD => { - factor = ((entry.scheduled_count() * entry.scheduled_count()) as f64) + factor = ((md.scheduled_count() * md.scheduled_count()) as f64) / f64::from(psmeta.n_fuzz()[tcmeta.n_fuzz_entry()] + 1); } } @@ -256,7 +261,7 @@ where perf_score = HAVOC_MAX_MULT * 100.0; } - if entry.objectives_found() > 0 && psmeta.strat().is_some_and(|s| s.avoid_crash()) { + if md.objectives_found() > 0 && psmeta.strat().is_some_and(|s| s.avoid_crash()) { perf_score *= 0.00; } @@ -275,23 +280,25 @@ where { /// Compute the `weight` used in weighted corpus entry selection algo #[expect(clippy::cast_precision_loss)] - fn compute(state: &S, entry: &mut Testcase) -> Result { + fn compute(state: &S, corpus_id: CorpusId) -> Result { let mut weight = 1.0; let psmeta = state.metadata::()?; + let testcase = state.corpus().get(corpus_id)?; + let md = testcase.testcase_metadata(); - let tcmeta = entry.metadata::()?; + let tcmeta = md.metadata::()?; // This means that this testcase has never gone through the calibration stage before1, // In this case we'll just return the default weight // This methoud is called in corpus's on_add() method. Fuzz_level is zero at that time. - if entry.scheduled_count() == 0 || psmeta.cycles() == 0 { + if md.scheduled_count() == 0 || psmeta.cycles() == 0 { return Ok(weight); } - let q_exec_us = entry + let q_exec_us = md .exec_time() .ok_or_else(|| Error::key_not_found("exec_time not set".to_string()))? .as_nanos() as f64; - let favored = entry.has_metadata::(); + let favored = md.has_metadata::(); let avg_exec_us = psmeta.exec_time().as_nanos() as f64 / psmeta.cycles() as f64; let avg_bitmap_size = psmeta.bitmap_size_log() / psmeta.bitmap_entries() as f64; @@ -318,9 +325,10 @@ where libm::log2(q_bitmap_size).max(1.0) / avg_bitmap_size }; - let tc_ref = match entry.metadata_map().get::() { - Some(meta) => meta.refcnt() as f64, - None => 0.0, + let tc_ref = if let Some(meta) = md.metadata_map().get::() { + meta.refcnt() as f64 + } else { + 0.0 }; let avg_top_size = match state.metadata::() { @@ -339,11 +347,11 @@ where } // was it fuzzed before? - if entry.scheduled_count() == 0 { + if md.scheduled_count() == 0 { weight *= 2.0; } - if entry.objectives_found() > 0 && psmeta.strat().is_some_and(|s| s.avoid_crash()) { + if md.objectives_found() > 0 && psmeta.strat().is_some_and(|s| s.avoid_crash()) { weight *= 0.00; } diff --git a/crates/libafl/src/schedulers/tuneable.rs b/crates/libafl/src/schedulers/tuneable.rs index 2728cef7146..0588e3e9d02 100644 --- a/crates/libafl/src/schedulers/tuneable.rs +++ b/crates/libafl/src/schedulers/tuneable.rs @@ -101,7 +101,7 @@ impl TuneableScheduler { } } -impl RemovableScheduler for TuneableScheduler {} +impl RemovableScheduler for TuneableScheduler where S: HasCorpus {} impl Scheduler for TuneableScheduler where @@ -110,11 +110,7 @@ where fn on_add(&mut self, state: &mut S, id: CorpusId) -> Result<(), Error> { // Set parent id let current_id = *state.corpus().current(); - state - .corpus() - .get(id)? - .borrow_mut() - .set_parent_id_optional(current_id); + state.corpus().get(id)?.set_parent_id_optional(current_id); Ok(()) } diff --git a/crates/libafl/src/schedulers/weighted.rs b/crates/libafl/src/schedulers/weighted.rs index 33af2cadeb0..50360127b23 100644 --- a/crates/libafl/src/schedulers/weighted.rs +++ b/crates/libafl/src/schedulers/weighted.rs @@ -15,7 +15,7 @@ use serde::{Deserialize, Serialize}; use crate::{ Error, HasMetadata, - corpus::{Corpus, CorpusId, HasTestcase, Testcase}, + corpus::{Corpus, CorpusId, HasTestcase}, random_corpus_id, schedulers::{ AflScheduler, HasQueueCycles, RemovableScheduler, Scheduler, on_add_metadata_default, @@ -173,8 +173,7 @@ where let mut sum: f64 = 0.0; for i in state.corpus().ids() { - let mut testcase = state.corpus().get(i)?.borrow_mut(); - let weight = F::compute(state, &mut *testcase)?; + let weight = F::compute(state, i)?; weights.insert(i, weight); sum += weight; } @@ -257,14 +256,12 @@ where } } -impl RemovableScheduler for WeightedScheduler { +impl RemovableScheduler for WeightedScheduler +where + S: HasCorpus, +{ /// This will *NOT* neutralize the effect of this removed testcase from the global data such as `SchedulerMetadata` - fn on_remove( - &mut self, - _state: &mut S, - _id: CorpusId, - _prev: &Option>, - ) -> Result<(), Error> { + fn on_remove(&mut self, _state: &mut S, _id: CorpusId) -> Result<(), Error> { self.table_invalidated = true; Ok(()) } @@ -274,7 +271,7 @@ impl RemovableScheduler for WeightedScheduler { &mut self, _state: &mut S, _id: CorpusId, - _prev: &Testcase, + _prev: &>::TestcaseMetadataCell, ) -> Result<(), Error> { self.table_invalidated = true; Ok(()) diff --git a/crates/libafl/src/stages/afl_stats.rs b/crates/libafl/src/stages/afl_stats.rs index c42436b7be1..b098a6b7049 100644 --- a/crates/libafl/src/stages/afl_stats.rs +++ b/crates/libafl/src/stages/afl_stats.rs @@ -26,7 +26,10 @@ use serde::{Deserialize, Serialize}; use crate::feedbacks::{CRASH_FEEDBACK_NAME, TIMEOUT_FEEDBACK_NAME}; use crate::{ Error, HasMetadata, HasNamedMetadata, HasScheduler, - corpus::{Corpus, HasCurrentCorpusId, SchedulerTestcaseMetadata, Testcase}, + corpus::{ + Corpus, HasCurrentCorpusId, SchedulerTestcaseMetadata, TestcaseMetadata, + testcase::IsTestcaseMetadataCell, + }, events::{Event, EventFirer, EventWithStats}, executors::HasObservers, feedbacks::{HasObserverHandle, MapFeedbackMetadata}, @@ -273,25 +276,26 @@ where "state is not currently processing a corpus index", )); }; - let testcase = state.corpus().get(corpus_idx)?.borrow(); + let testcase = state.corpus().get(corpus_idx)?; + let md = &*testcase.testcase_metadata(); // NOTE: scheduled_count represents the amount of fuzz runs a // testcase has had. Since this stage is kept at the very end of stage list, // the entry would have been fuzzed already (and should contain IsFavoredMetadata) but would have a scheduled count of zero // since the scheduled count is incremented after all stages have been run. - if testcase.scheduled_count() == 0 { + if md.scheduled_count() == 0 { // New testcase! self.cycles_wo_finds = 0; self.update_last_find(); #[cfg(feature = "track_hit_feedbacks")] { - self.maybe_update_last_crash(&testcase, state); - self.maybe_update_last_hang(&testcase, state); + self.maybe_update_last_crash(md, state); + self.maybe_update_last_hang(md, state); } self.update_has_fuzzed_size(); - self.maybe_update_is_favored_size(&testcase); + self.maybe_update_is_favored_size(md); } - self.maybe_update_slowest_exec(&testcase); - self.maybe_update_max_depth(&testcase); + self.maybe_update_slowest_exec(md); + self.maybe_update_max_depth(md); // See if we actually need to run the stage, if not, avoid dynamic value computation. if !self.check_interval() { @@ -413,8 +417,6 @@ where self.write_plot_data(&plot_data)?; } - drop(testcase); - // We construct this simple json by hand to squeeze out some extra speed. let json = format!( "{{\ @@ -489,14 +491,14 @@ where Ok(()) } - fn maybe_update_is_favored_size(&mut self, testcase: &Testcase) { - if testcase.has_metadata::() { + fn maybe_update_is_favored_size(&mut self, md: &TestcaseMetadata) { + if md.has_metadata::() { self.is_favored_size += 1; } } - fn maybe_update_slowest_exec(&mut self, testcase: &Testcase) { - if let Some(exec_time) = testcase.exec_time() { + fn maybe_update_slowest_exec(&mut self, md: &TestcaseMetadata) { + if let Some(exec_time) = md.exec_time() { if exec_time > &self.slowest_exec { self.slowest_exec = *exec_time; } @@ -507,8 +509,8 @@ where self.has_fuzzed_size += 1; } - fn maybe_update_max_depth(&mut self, testcase: &Testcase) { - if let Ok(metadata) = testcase.metadata::() { + fn maybe_update_max_depth(&mut self, md: &TestcaseMetadata) { + if let Ok(metadata) = md.metadata::() { if metadata.depth() > self.max_depth { self.max_depth = metadata.depth(); } @@ -520,12 +522,12 @@ where } #[cfg(feature = "track_hit_feedbacks")] - fn maybe_update_last_crash(&mut self, testcase: &Testcase, state: &S) + fn maybe_update_last_crash(&mut self, md: &TestcaseMetadata, state: &S) where S: HasExecutions, { #[cfg(feature = "track_hit_feedbacks")] - if testcase + if md .hit_objectives() .contains(&Cow::Borrowed(CRASH_FEEDBACK_NAME)) { @@ -535,11 +537,11 @@ where } #[cfg(feature = "track_hit_feedbacks")] - fn maybe_update_last_hang(&mut self, testcase: &Testcase, state: &S) + fn maybe_update_last_hang(&mut self, md: &TestcaseMetadata, state: &S) where S: HasExecutions, { - if testcase + if md .hit_objectives() .contains(&Cow::Borrowed(TIMEOUT_FEEDBACK_NAME)) { diff --git a/crates/libafl/src/stages/calibrate.rs b/crates/libafl/src/stages/calibrate.rs index d1c44a52b91..536f3bb2b5f 100644 --- a/crates/libafl/src/stages/calibrate.rs +++ b/crates/libafl/src/stages/calibrate.rs @@ -14,7 +14,7 @@ use serde::{Deserialize, Serialize}; use crate::{ Error, HasMetadata, HasNamedMetadata, HasScheduler, - corpus::{Corpus, HasCurrentCorpusId, SchedulerTestcaseMetadata}, + corpus::{Corpus, HasCurrentCorpusId, IsTestcaseMetadataCell, SchedulerTestcaseMetadata}, events::{Event, EventFirer, EventWithStats, LogSeverity}, executors::{Executor, ExitKind, HasObservers}, feedbacks::{HasObserverHandle, map::MapFeedbackMetadata}, @@ -283,19 +283,21 @@ where psmeta.set_bitmap_size_log(psmeta.bitmap_size_log() + libm::log2(bitmap_size as f64)); psmeta.set_bitmap_entries(psmeta.bitmap_entries() + 1); - let mut testcase = state.current_testcase_mut()?; + let testcase = state.current_testcase()?; + let mut tc_md = testcase.testcase_metadata_mut(); - testcase.set_exec_time(total_time / (iter as u32)); - // log::trace!("time: {:#?}", testcase.exec_time()); + tc_md.set_exec_time(total_time / (iter as u32)); // If the testcase doesn't have its own `SchedulerTestcaseMetadata`, create it. - let data = if let Ok(metadata) = testcase.metadata_mut::() { + let data = if let Ok(metadata) = tc_md.metadata_mut::() { metadata } else { - let depth = match testcase.parent_id() { + let depth = match tc_md.parent_id() { Some(parent_id) => { - match (*state.corpus().get(parent_id)?) - .borrow() + match state + .corpus() + .get(parent_id)? + .testcase_metadata() .metadata_map() .get::() { @@ -305,10 +307,8 @@ where } _ => 0, }; - testcase.add_metadata(SchedulerTestcaseMetadata::new(depth)); - testcase - .metadata_mut::() - .unwrap() + tc_md.add_metadata(SchedulerTestcaseMetadata::new(depth)); + tc_md.metadata_mut::().unwrap() }; data.set_cycle_and_time((total_time, iter)); diff --git a/crates/libafl/src/stages/concolic.rs b/crates/libafl/src/stages/concolic.rs index 08613662d15..41a17064c06 100644 --- a/crates/libafl/src/stages/concolic.rs +++ b/crates/libafl/src/stages/concolic.rs @@ -15,7 +15,7 @@ use libafl_bolts::{ use crate::monitors::stats::PerfFeature; use crate::{ Error, HasMetadata, HasNamedMetadata, - corpus::HasCurrentCorpusId, + corpus::{HasCurrentCorpusId, IsTestcaseMetadataCell}, executors::{Executor, HasObservers}, observers::{ObserversTuple, concolic::ConcolicObserver}, stages::{Restartable, RetryCountRestartHelper, Stage, TracingStage}, @@ -53,7 +53,7 @@ where TE::Observers: ObserversTuple, S: HasExecutions + HasCorpus - + HasNamedMetadata + + HasMetadata + HasCurrentTestcase + HasCurrentCorpusId + MaybeHasClientPerfMonitor, @@ -70,7 +70,8 @@ where if let Some(observer) = self.inner.executor().observers().get(&self.observer_handle) { let metadata = observer.create_metadata_from_current_map(); state - .current_testcase_mut()? + .current_testcase()? + .testcase_metadata_mut() .metadata_map_mut() .insert(metadata); } @@ -393,9 +394,10 @@ where start_timer!(state); mark_feature_time!(state, PerfFeature::GetInputFromCorpus); } - let testcase = state.current_testcase()?.clone(); + let testcase = state.current_testcase()?; + let md = testcase.testcase_metadata(); - let mutations = testcase.metadata::().ok().map(|meta| { + let mutations = md.metadata::().ok().map(|meta| { start_timer!(state); let mutations = { generate_mutations(meta.iter_messages()) }; mark_feature_time!(state, PerfFeature::Mutate); diff --git a/crates/libafl/src/stages/dump.rs b/crates/libafl/src/stages/dump.rs index ef10eee334e..d5b58ac9752 100644 --- a/crates/libafl/src/stages/dump.rs +++ b/crates/libafl/src/stages/dump.rs @@ -14,13 +14,7 @@ use std::{ use libafl_bolts::impl_serdeany; use serde::{Deserialize, Serialize}; -use crate::{ - Error, HasMetadata, - corpus::{Corpus, CorpusId, Testcase}, - inputs::Input, - stages::{Restartable, Stage}, - state::{HasCorpus, HasRand, HasSolutions}, -}; +use crate::{Error, corpus::{Corpus, CorpusId, IsTestcaseMetadataCell, Testcase, TestcaseMetadata}, inputs::Input, stages::{Restartable, Stage}, state::{HasCorpus, HasRand, HasSolutions}, HasMetadata}; /// Metadata used to store information about disk dump indexes for names #[cfg_attr( @@ -47,8 +41,8 @@ pub struct DumpToDiskStage { impl Stage for DumpToDiskStage where - CB1: FnMut(&Testcase, &S) -> Vec, - CB2: FnMut(&Testcase, &CorpusId) -> P, + CB1: FnMut(&I, &TestcaseMetadata, &S) -> Vec, + CB2: FnMut(&I, &TestcaseMetadata, &CorpusId) -> P, S: HasCorpus + HasSolutions + HasRand + HasMetadata, P: AsRef, { @@ -65,7 +59,16 @@ where } impl Restartable - for DumpToDiskStage, &CorpusId) -> String, EM, I, S, Z> + for DumpToDiskStage< + CB1, + fn(&Testcase>::TestcaseMetadataCell>, &CorpusId) -> String, + EM, + I, + S, + Z, + > +where + S: HasCorpus, { #[inline] fn should_restart(&mut self, _state: &mut S) -> Result { @@ -81,9 +84,17 @@ impl Restartable } /// Implementation for `DumpToDiskStage` with a default `generate_filename` function. -impl DumpToDiskStage, &CorpusId) -> String, EM, I, S, Z> +impl + DumpToDiskStage< + CB1, + fn(&Testcase>::TestcaseMetadataCell>, &CorpusId) -> String, + EM, + I, + S, + Z, + > where - S: HasSolutions + HasRand + HasMetadata, + S: HasCorpus + HasSolutions + HasRand + HasMetadata, I: Input, { /// Create a new [`DumpToDiskStage`] with a default `generate_filename` function. @@ -102,14 +113,18 @@ where /// Default `generate_filename` function. #[expect(clippy::trivially_copy_pass_by_ref)] - fn generate_filename(testcase: &Testcase, id: &CorpusId) -> String { + fn generate_filename( + testcase: &Testcase>::TestcaseMetadataCell>, + id: &CorpusId, + ) -> String { + // TODO: check that [ Some(id.0.to_string()), - testcase.filename().clone(), - testcase - .input() - .as_ref() - .map(|t| t.generate_name(Some(*id))), + Some(testcase.id().clone()), + // testcase + // .input() + // .as_ref() + // .map(|t| t.generate_name(Some(*id))), ] .iter() .flatten() @@ -164,9 +179,9 @@ where #[inline] fn dump_state_to_disk>(&mut self, state: &mut S) -> Result<(), Error> where + CB1: FnMut(&I, &TestcaseMetadata, &S) -> Vec, + CB2: FnMut(&I, &TestcaseMetadata, &CorpusId) -> P, S: HasCorpus, - CB1: FnMut(&Testcase, &S) -> Vec, - CB2: FnMut(&Testcase, &CorpusId) -> P, { let (mut corpus_id, mut solutions_id) = if let Some(meta) = state.metadata_map().get::() { @@ -179,13 +194,16 @@ where }; while let Some(i) = corpus_id { - let mut testcase = state.corpus().get(i)?.borrow_mut(); - state.corpus().load_input_into(&mut testcase)?; - let bytes = (self.to_bytes)(&testcase, state); + let testcase = state.corpus().get(i)?; + + let input = testcase.input(); + let md = testcase.testcase_metadata(); + + let bytes = (self.to_bytes)(input.as_ref(), &md, state); let fname = self .corpus_dir - .join((self.generate_filename)(&testcase, &i)); + .join((self.generate_filename)(input.as_ref(), &md, &i)); let mut f = File::create(fname)?; drop(f.write_all(&bytes)); @@ -193,13 +211,16 @@ where } while let Some(i) = solutions_id { - let mut testcase = state.solutions().get(i)?.borrow_mut(); - state.solutions().load_input_into(&mut testcase)?; - let bytes = (self.to_bytes)(&testcase, state); + let testcase = state.solutions().get(i)?; + + let input = testcase.input(); + let md = testcase.testcase_metadata(); + + let bytes = (self.to_bytes)(input.as_ref(), &md, state); let fname = self .solutions_dir - .join((self.generate_filename)(&testcase, &i)); + .join((self.generate_filename)(input.as_ref(), &md, &i)); let mut f = File::create(fname)?; drop(f.write_all(&bytes)); diff --git a/crates/libafl/src/stages/generalization.rs b/crates/libafl/src/stages/generalization.rs index 712eb1ca0cd..27a6bac3670 100644 --- a/crates/libafl/src/stages/generalization.rs +++ b/crates/libafl/src/stages/generalization.rs @@ -15,12 +15,10 @@ use libafl_bolts::{ use crate::monitors::stats::PerfFeature; use crate::{ Error, HasMetadata, HasNamedMetadata, - corpus::{Corpus, HasCurrentCorpusId}, + corpus::{Corpus, HasCurrentCorpusId, IsTestcaseMetadataCell}, executors::{Executor, HasObservers}, feedbacks::map::MapNoveltiesMetadata, - inputs::{ - BytesInput, GeneralizedInputMetadata, GeneralizedItem, HasMutatorBytes, ResizableMutator, - }, + inputs::{BytesInput, GeneralizedInputMetadata, GeneralizedItem, ResizableMutator}, mark_feature_time, observers::{CanTrack, MapObserver, ObserversTuple}, require_novelties_tracking, @@ -112,25 +110,29 @@ where start_timer!(state); { let corpus = state.corpus(); - let mut testcase = corpus.get(corpus_id)?.borrow_mut(); + let testcase = corpus.get(corpus_id)?; if testcase.scheduled_count() > 0 { return Ok(()); } - - corpus.load_input_into(&mut testcase)?; } mark_feature_time!(state, PerfFeature::GetInputFromCorpus); - let mut entry = state.corpus().get(corpus_id)?.borrow_mut(); - let input = entry.input_mut().as_mut().unwrap(); - - let payload: Vec<_> = input.mutator_bytes().iter().map(|&x| Some(x)).collect(); + let entry = state.corpus().get(corpus_id)?; + let md = entry.testcase_metadata(); + let input = entry.input(); + + let payload: Vec<_> = input + .as_ref() + .clone() + .into_inner() + .into_iter() + .map(Some) + .collect(); if payload.len() > MAX_GENERALIZED_LEN { return Ok(()); } - let original = input.clone(); - let meta = entry.metadata_map().get::().ok_or_else(|| { + let meta = md.metadata_map().get::().ok_or_else(|| { Error::key_not_found(format!( "MapNoveltiesMetadata needed for GeneralizationStage not found in testcase #{corpus_id} (check the arguments of MapFeedback::new(...))" )) @@ -138,7 +140,7 @@ where if meta.as_slice().is_empty() { return Ok(()); // don't generalise inputs which don't have novelties } - (payload, original, meta.as_slice().to_vec()) + (payload, input.clone(), meta.as_slice().to_vec()) }; // Do not generalized unstable inputs @@ -336,8 +338,11 @@ where assert!(meta.generalized().first() == Some(&GeneralizedItem::Gap)); assert!(meta.generalized().last() == Some(&GeneralizedItem::Gap)); - let mut entry = state.corpus().get(corpus_id)?.borrow_mut(); - entry.metadata_map_mut().insert(meta); + let entry = state.corpus().get(corpus_id)?; + entry + .testcase_metadata_mut() + .metadata_map_mut() + .insert(meta); } Ok(()) diff --git a/crates/libafl/src/stages/mod.rs b/crates/libafl/src/stages/mod.rs index ab23116c603..d26c10bb9c4 100644 --- a/crates/libafl/src/stages/mod.rs +++ b/crates/libafl/src/stages/mod.rs @@ -541,7 +541,7 @@ mod test { use libafl_bolts::{Error, Named}; use crate::{ - corpus::{Corpus, HasCurrentCorpusId, Testcase}, + corpus::{Corpus, HasCurrentCorpusId}, inputs::NopInput, stages::RetryCountRestartHelper, state::{HasCorpus, StdState}, @@ -569,7 +569,7 @@ mod test { let mut state = StdState::nop()?; let stage = StageWithOneTry; - let corpus_id = state.corpus_mut().add(Testcase::new(NopInput {}))?; + let corpus_id = state.corpus_mut().add(NopInput {})?; state.set_corpus_id(corpus_id)?; diff --git a/crates/libafl/src/stages/mutational.rs b/crates/libafl/src/stages/mutational.rs index 454aa7ba0ad..073d6a27bc7 100644 --- a/crates/libafl/src/stages/mutational.rs +++ b/crates/libafl/src/stages/mutational.rs @@ -13,7 +13,7 @@ use libafl_bolts::{Named, rands::Rand}; use crate::monitors::stats::PerfFeature; use crate::{ Error, HasMetadata, HasNamedMetadata, - corpus::{Corpus, CorpusId, HasCurrentCorpusId, Testcase}, + corpus::{CorpusId, HasCurrentCorpusId, Testcase, testcase::IsTestcaseMetadataCell}, fuzzer::Evaluator, inputs::Input, mark_feature_time, @@ -23,7 +23,6 @@ use crate::{ start_timer, state::{HasCorpus, HasCurrentTestcase, HasExecutions, HasRand, MaybeHasClientPerfMonitor}, }; - // TODO multi mutators stage /// Action performed after the un-transformed input is executed (e.g., updating metadata) @@ -47,7 +46,10 @@ pub trait MutatedTransform: Sized { type Post: MutatedTransformPost; /// Transform the provided testcase into this type - fn try_transform_from(base: &mut Testcase, state: &S) -> Result; + fn try_transform_from( + base: &Testcase, + state: &S, + ) -> Result; /// Transform this instance back into the original input type fn try_transform_into(self, state: &S) -> Result<(I, Self::Post), Error>; @@ -62,9 +64,11 @@ where type Post = (); #[inline] - fn try_transform_from(base: &mut Testcase, state: &S) -> Result { - state.corpus().load_input_into(base)?; - Ok(base.input().as_ref().unwrap().clone()) + fn try_transform_from( + base: &Testcase, + _state: &S, + ) -> Result { + Ok(base.input().as_ref().clone()) } #[inline] @@ -258,9 +262,9 @@ where .saturating_sub(self.execs_since_progress_start(state)?); */ let num = self.iterations(state)?; - let mut testcase = state.current_testcase_mut()?; + let testcase = state.current_testcase()?; - let Ok(input) = I1::try_transform_from(&mut testcase, state) else { + let Ok(input) = I1::try_transform_from(&testcase, state) else { return Ok(()); }; drop(testcase); @@ -324,8 +328,8 @@ where state: &mut S, manager: &mut EM, ) -> Result<(), Error> { - let mut testcase = state.current_testcase_mut()?; - let Ok(input) = I::try_transform_from(&mut testcase, state) else { + let testcase = state.current_testcase()?; + let Ok(input) = I::try_transform_from(&testcase, state) else { return Ok(()); }; drop(testcase); diff --git a/crates/libafl/src/stages/power.rs b/crates/libafl/src/stages/power.rs index 10a86c029bb..16a25c597ed 100644 --- a/crates/libafl/src/stages/power.rs +++ b/crates/libafl/src/stages/power.rs @@ -23,7 +23,7 @@ use crate::{ mutational::{MutatedTransform, MutatedTransformPost}, }, start_timer, - state::{HasCurrentTestcase, HasExecutions, HasRand, MaybeHasClientPerfMonitor}, + state::{HasCorpus, HasCurrentTestcase, HasExecutions, HasRand, MaybeHasClientPerfMonitor}, }; /// The unique id for this stage @@ -47,7 +47,7 @@ impl Named for PowerMutationalStage impl MutationalStage for PowerMutationalStage where - S: HasCurrentTestcase, + S: HasCorpus + HasCurrentCorpusId, F: TestcaseScore, { type Mutator = M; @@ -67,8 +67,8 @@ where #[expect(clippy::cast_sign_loss)] fn iterations(&self, state: &mut S) -> Result { // Update handicap - let mut testcase = state.current_testcase_mut()?; - let score = F::compute(state, &mut testcase)? as usize; + let current_corpus_id = state.current_corpus_id()?.unwrap(); + let score = F::compute(state, current_corpus_id)? as usize; Ok(score) } @@ -122,7 +122,11 @@ where E: Executor + HasObservers, F: TestcaseScore, M: Mutator, - S: HasMetadata + HasRand + HasCurrentTestcase + MaybeHasClientPerfMonitor, + S: HasMetadata + + HasRand + + HasCurrentTestcase + + MaybeHasClientPerfMonitor + + HasCurrentCorpusId, I: MutatedTransform + Clone, Z: Evaluator, { @@ -160,9 +164,9 @@ where .saturating_sub(self.execs_since_progress_start(state)?); */ let num = self.iterations(state)?; - let mut testcase = state.current_testcase_mut()?; + let testcase = state.current_testcase()?; - let Ok(input) = I::try_transform_from(&mut testcase, state) else { + let Ok(input) = I::try_transform_from(&testcase, state) else { return Ok(()); }; drop(testcase); diff --git a/crates/libafl/src/stages/push/mod.rs b/crates/libafl/src/stages/push/mod.rs index c5c926483ee..9e4ac7f727c 100644 --- a/crates/libafl/src/stages/push/mod.rs +++ b/crates/libafl/src/stages/push/mod.rs @@ -21,8 +21,8 @@ use libafl_bolts::Named; pub use mutational::StdMutationalPushStage; use crate::{ - Error, EvaluatorObservers, ExecutesInput, ExecutionProcessor, HasMetadata, HasScheduler, - common::HasNamedMetadata, + Error, EvaluatorObservers, ExecutesInput, ExecutionProcessor, HasMetadata, HasNamedMetadata, + HasScheduler, corpus::{CorpusId, HasCurrentCorpusId}, events::{EventFirer, EventRestarter, HasEventManagerId, ProgressReporter}, executors::{Executor, ExitKind, HasObservers}, diff --git a/crates/libafl/src/stages/push/mutational.rs b/crates/libafl/src/stages/push/mutational.rs index fa1e1ff249c..561d61b587d 100644 --- a/crates/libafl/src/stages/push/mutational.rs +++ b/crates/libafl/src/stages/push/mutational.rs @@ -130,8 +130,9 @@ where start_timer!(state); let input = state - .corpus_mut() - .cloned_input_for_id(self.current_corpus_id.unwrap()); + .corpus() + .get(self.current_corpus_id.unwrap()) + .map(|tc| tc.input().as_ref().clone()); let mut input = match input { Err(e) => return Some(Err(e)), Ok(input) => input, diff --git a/crates/libafl/src/stages/replay.rs b/crates/libafl/src/stages/replay.rs index 8addf866882..88060d0270b 100644 --- a/crates/libafl/src/stages/replay.rs +++ b/crates/libafl/src/stages/replay.rs @@ -132,9 +132,8 @@ where log::info!("Replaying corpus: {id}"); let input = { - let mut tc = state.corpus().get(id)?.borrow_mut(); - let input = tc.load_input(state.corpus())?; - input.clone() + let tc = state.corpus().get(id)?; + tc.input().as_ref().clone() }; fuzzer.evaluate_input(state, executor, manager, &input)?; @@ -151,9 +150,8 @@ where } log::info!("Replaying solution: {id}"); let input = { - let mut tc = state.solutions().get(id)?.borrow_mut(); - let input = tc.load_input(state.corpus())?; - input.clone() + let tc = state.solutions().get(id)?; + tc.input().as_ref().clone() }; fuzzer.evaluate_input(state, executor, manager, &input)?; diff --git a/crates/libafl/src/stages/sync.rs b/crates/libafl/src/stages/sync.rs index ea94295b62d..0ee0c28a892 100644 --- a/crates/libafl/src/stages/sync.rs +++ b/crates/libafl/src/stages/sync.rs @@ -280,7 +280,7 @@ where last_id.map_or_else(|| state.corpus().first(), |id| state.corpus().next(id)); while let Some(id) = cur_id { - let input = state.corpus().cloned_input_for_id(id)?; + let input = state.corpus().get(id)?.cloned_input(); self.client.fire( state, diff --git a/crates/libafl/src/stages/time_tracker.rs b/crates/libafl/src/stages/time_tracker.rs index ec79de33c09..b4945e59265 100644 --- a/crates/libafl/src/stages/time_tracker.rs +++ b/crates/libafl/src/stages/time_tracker.rs @@ -1,4 +1,5 @@ //! Stage that wraps another stage and tracks it's execution time in `State` + use core::{marker::PhantomData, time::Duration}; use libafl_bolts::{Error, current_time}; @@ -7,6 +8,7 @@ use crate::{ HasMetadata, stages::{Restartable, Stage}, }; + /// Track an inner Stage's execution time #[derive(Debug)] pub struct TimeTrackingStageWrapper { diff --git a/crates/libafl/src/stages/tmin.rs b/crates/libafl/src/stages/tmin.rs index cd66d1d51a3..72549f85c5d 100644 --- a/crates/libafl/src/stages/tmin.rs +++ b/crates/libafl/src/stages/tmin.rs @@ -4,7 +4,7 @@ use alloc::{ borrow::{Cow, ToOwned}, string::ToString, }; -use core::{borrow::BorrowMut, fmt::Debug, hash::Hash, marker::PhantomData}; +use core::{fmt::Debug, hash::Hash, marker::PhantomData}; use ahash::RandomState; use libafl_bolts::{ @@ -20,7 +20,7 @@ use crate::monitors::stats::PerfFeature; use crate::{ Error, ExecutesInput, ExecutionProcessor, HasFeedback, HasMetadata, HasNamedMetadata, HasScheduler, - corpus::{Corpus, HasCurrentCorpusId, Testcase}, + corpus::{Corpus, HasCurrentCorpusId, testcase::TestcaseMetadata}, events::EventFirer, executors::{ExitKind, HasObservers}, feedbacks::{Feedback, FeedbackFactory, HasObserverHandle, StateInitializer}, @@ -191,7 +191,7 @@ where } start_timer!(state); - let transformed = I::try_transform_from(state.current_testcase_mut()?.borrow_mut(), state)?; + let transformed = I::try_transform_from(&state.current_testcase()?, state)?; let mut base = state.current_input_cloned()?; // potential post operation if base is replaced by a shorter input let mut base_post = None; @@ -283,17 +283,26 @@ where fuzzer .feedback_mut() .is_interesting(state, manager, &base, &*observers, &exit_kind)?; - let mut testcase = Testcase::from(base); - testcase.set_executions(*state.executions()); - testcase.set_parent_id(base_corpus_id); - fuzzer - .feedback_mut() - .append_metadata(state, manager, &*observers, &mut testcase)?; - let prev = state.corpus_mut().replace(base_corpus_id, testcase)?; + let mut tc_md = TestcaseMetadata::builder() + .executions(*state.executions()) + .parent_id(Some(base_corpus_id)) + .build(); + + fuzzer.feedback_mut().append_metadata( + state, + manager, + &*observers, + &base, + &mut tc_md, + )?; + + let prev = state.corpus_mut().replace_metadata(base_corpus_id, tc_md)?; + fuzzer .scheduler_mut() .on_replace(state, base_corpus_id, &prev)?; + // perform the post operation for the new testcase, e.g. to update metadata. // base_post should be updated along with the base (and is no longer None) base_post @@ -366,6 +375,7 @@ where M: Hash, C: AsRef, OT: MatchName, + S: HasCorpus, { fn is_interesting( &mut self, diff --git a/crates/libafl/src/stages/tracing.rs b/crates/libafl/src/stages/tracing.rs index 6df82fdf0ca..496d2ba4eb6 100644 --- a/crates/libafl/src/stages/tracing.rs +++ b/crates/libafl/src/stages/tracing.rs @@ -35,11 +35,7 @@ impl TracingStage where TE: Executor + HasObservers, TE::Observers: ObserversTuple, - S: HasExecutions - + HasCorpus - + HasNamedMetadata - + HasCurrentTestcase - + MaybeHasClientPerfMonitor, + S: HasExecutions + HasCorpus + HasCurrentTestcase + MaybeHasClientPerfMonitor, { /// Perform tracing on the given `CorpusId`. Useful for if wrapping [`TracingStage`] with your /// own stage and you need to manage [`super::NestedStageRetryCountRestartHelper`] differently diff --git a/crates/libafl/src/stages/tuneable.rs b/crates/libafl/src/stages/tuneable.rs index ac47097b10a..3a912b4baa5 100644 --- a/crates/libafl/src/stages/tuneable.rs +++ b/crates/libafl/src/stages/tuneable.rs @@ -251,8 +251,8 @@ where let iters = self.fixed_iters(state)?; start_timer!(state); - let mut testcase = state.current_testcase_mut()?; - let Ok(input) = I::try_transform_from(&mut testcase, state) else { + let testcase = state.current_testcase()?; + let Ok(input) = I::try_transform_from(&testcase, state) else { return Ok(()); }; drop(testcase); @@ -307,12 +307,18 @@ where /// Creates a new default tuneable mutational stage #[must_use] - pub fn new(state: &mut S, mutator: M) -> Self { + pub fn new(state: &mut S, mutator: M) -> Self + where + S: HasNamedMetadata, + { Self::transforming(state, mutator, STD_TUNEABLE_MUTATIONAL_STAGE_NAME) } /// Crates a new tuneable mutational stage with the given name - pub fn with_name(state: &mut S, mutator: M, name: &str) -> Self { + pub fn with_name(state: &mut S, mutator: M, name: &str) -> Self + where + S: HasNamedMetadata, + { Self::transforming(state, mutator, name) } @@ -325,7 +331,10 @@ where } /// Set the number of iterations to be used by the std [`TuneableMutationalStage`] - pub fn set_iters_std(state: &mut S, iters: u64) -> Result<(), Error> { + pub fn set_iters_std(state: &mut S, iters: u64) -> Result<(), Error> + where + S: HasNamedMetadata, + { set_iters_by_name(state, iters, STD_TUNEABLE_MUTATIONAL_STAGE_NAME) } @@ -367,7 +376,10 @@ where } /// Set the time to mutate a single input in the std [`TuneableMutationalStage`] - pub fn set_seed_fuzz_time_std(state: &mut S, fuzz_time: Duration) -> Result<(), Error> { + pub fn set_seed_fuzz_time_std(state: &mut S, fuzz_time: Duration) -> Result<(), Error> + where + S: HasNamedMetadata, + { set_seed_fuzz_time_by_name(state, fuzz_time, STD_TUNEABLE_MUTATIONAL_STAGE_NAME) } @@ -413,7 +425,10 @@ where } /// Reset the std stage to a normal, randomized, stage - pub fn reset_std(state: &mut S) -> Result<(), Error> { + pub fn reset_std(state: &mut S) -> Result<(), Error> + where + S: HasNamedMetadata, + { reset_by_name(state, STD_TUNEABLE_MUTATIONAL_STAGE_NAME) } diff --git a/crates/libafl/src/stages/unicode.rs b/crates/libafl/src/stages/unicode.rs index a4aa9051107..475348632a3 100644 --- a/crates/libafl/src/stages/unicode.rs +++ b/crates/libafl/src/stages/unicode.rs @@ -9,6 +9,7 @@ use serde::{Deserialize, Serialize}; use crate::{ HasMetadata, + corpus::IsTestcaseMetadataCell, inputs::{BytesInput, HasTargetBytes}, stages::{Restartable, Stage}, state::{HasCorpus, HasCurrentTestcase}, @@ -93,16 +94,18 @@ impl UnicodeIdentificationStage { S: HasCurrentTestcase, I: HasTargetBytes, { - let mut tc = state.current_testcase_mut()?; + let tc = state.current_testcase()?; + let input = tc.input(); + if tc.has_metadata::() { return Ok(()); // skip recompute } - let input = tc.load_input(state.corpus())?; - let bytes = input.target_bytes(); let metadata = extract_metadata(&bytes); - tc.add_metadata(metadata); + + let mut md = tc.testcase_metadata_mut(); + md.add_metadata(metadata); Ok(()) } diff --git a/crates/libafl/src/state/mod.rs b/crates/libafl/src/state/mod.rs index 28dcf97ebe4..6c2d2e7b459 100644 --- a/crates/libafl/src/state/mod.rs +++ b/crates/libafl/src/state/mod.rs @@ -2,13 +2,7 @@ #[cfg(feature = "std")] use alloc::vec::Vec; -use core::{ - borrow::BorrowMut, - cell::{Ref, RefMut}, - fmt::Debug, - marker::PhantomData, - time::Duration, -}; +use core::{fmt::Debug, marker::PhantomData, time::Duration}; #[cfg(feature = "std")] use std::{ fs, @@ -18,14 +12,12 @@ use std::{ #[cfg(feature = "std")] use libafl_bolts::core_affinity::{CoreId, Cores}; use libafl_bolts::{ + current_time, rands::{Rand, StdRand}, serdeany::{NamedSerdeAnyMap, SerdeAnyMap}, }; use serde::{Deserialize, Serialize, de::DeserializeOwned}; -mod stack; -pub use stack::StageStack; - #[cfg(feature = "introspection")] use crate::monitors::stats::ClientPerfStats; use crate::{ @@ -38,6 +30,10 @@ use crate::{ inputs::{Input, NopInput}, stages::StageId, }; + +mod stack; +pub use stack::StageStack; + /// The maximum size of a testcase pub const DEFAULT_MAX_SIZE: usize = 1_048_576; @@ -48,6 +44,7 @@ pub trait HasCorpus { /// The testcase corpus fn corpus(&self) -> &Self::Corpus; + /// The testcase corpus (mutable) fn corpus_mut(&mut self) -> &mut Self::Corpus; } @@ -156,6 +153,14 @@ pub trait HasStartTime { /// The starting time (mutable) fn start_time_mut(&mut self) -> &mut Duration; + + /// Get the time elapsed since start. + fn time_since_start(&self) -> Duration { + let start_time = self.start_time(); + let current_time = current_time(); + + current_time - *start_time + } } /// Trait for the last report time, the last time this node reported progress @@ -292,13 +297,12 @@ where C: Corpus, { /// To get the testcase - fn testcase(&self, id: CorpusId) -> Result>, Error> { - Ok(self.corpus().get(id)?.borrow()) - } - - /// To get mutable testcase - fn testcase_mut(&self, id: CorpusId) -> Result>, Error> { - Ok(self.corpus().get(id)?.borrow_mut()) + fn testcase( + &self, + id: CorpusId, + ) -> Result>::TestcaseMetadataCell>, Error> { + let tc = self.corpus().get(id)?; + Ok(tc) } } @@ -452,14 +456,9 @@ pub trait HasCurrentTestcase: HasCorpus { /// Gets the current [`Testcase`] we are fuzzing /// /// Will return [`Error::key_not_found`] if no `corpus_id` is currently set. - fn current_testcase(&self) -> Result>, Error>; - //fn current_testcase(&self) -> Result<&Testcase, Error>; - - /// Gets the current [`Testcase`] we are fuzzing (mut) - /// - /// Will return [`Error::key_not_found`] if no `corpus_id` is currently set. - fn current_testcase_mut(&self) -> Result>, Error>; - //fn current_testcase_mut(&self) -> Result<&mut Testcase, Error>; + fn current_testcase( + &self, + ) -> Result>::TestcaseMetadataCell>, Error>; /// Gets a cloned representation of the current [`Testcase`]. /// @@ -476,29 +475,21 @@ where T: HasCorpus + HasCurrentCorpusId, I: Clone, { - fn current_testcase(&self) -> Result>, Error> { + fn current_testcase( + &self, + ) -> Result>::TestcaseMetadataCell>, Error> { let Some(corpus_id) = self.current_corpus_id()? else { return Err(Error::key_not_found( "We are not currently processing a testcase", )); }; - Ok(self.corpus().get(corpus_id)?.borrow()) - } - - fn current_testcase_mut(&self) -> Result>, Error> { - let Some(corpus_id) = self.current_corpus_id()? else { - return Err(Error::illegal_state( - "We are not currently processing a testcase", - )); - }; - - Ok(self.corpus().get(corpus_id)?.borrow_mut()) + self.corpus().get(corpus_id) } fn current_input_cloned(&self) -> Result { - let mut testcase = self.current_testcase_mut()?; - Ok(testcase.borrow_mut().load_input(self.corpus())?.clone()) + let testcase = self.current_testcase()?; + Ok(testcase.cloned_input()) } } @@ -1140,7 +1131,7 @@ where rand, executions: 0, imported: 0, - start_time: libafl_bolts::current_time(), + start_time: current_time(), metadata: SerdeAnyMap::default(), named_metadata: NamedSerdeAnyMap::default(), corpus, @@ -1154,7 +1145,7 @@ where #[cfg(feature = "std")] dont_reenter: None, last_report_time: None, - last_found_time: libafl_bolts::current_time(), + last_found_time: current_time(), corpus_id: None, stage_stack: StageStack::default(), phantom: PhantomData, @@ -1231,7 +1222,10 @@ impl HasMaxSize for NopState { } } -impl HasCorpus for NopState { +impl HasCorpus for NopState +where + I: Input, +{ type Corpus = InMemoryCorpus; fn corpus(&self) -> &Self::Corpus { diff --git a/crates/libafl_bolts/src/tuples.rs b/crates/libafl_bolts/src/tuples.rs index 02b9170b18d..0a7cffa7f22 100644 --- a/crates/libafl_bolts/src/tuples.rs +++ b/crates/libafl_bolts/src/tuples.rs @@ -1299,11 +1299,7 @@ mod test { let (tuple, _handles) = get_tuple(); #[expect(clippy::let_unit_value)] let recovered = tuple.get_all(tuple_list!()); - #[expect(clippy::unit_cmp)] - // needs its own scope to make the clippy expect work - { - assert_eq!(recovered, ()); - } + assert_eq!(recovered, ()); } #[test] @@ -1346,11 +1342,7 @@ mod test { let mut tuple = get_tuple().0; #[expect(clippy::let_unit_value)] let recovered = tuple.get_all_mut(tuple_list!()); - #[expect(clippy::unit_cmp)] - // needs its own scope to make the clippy expect work - { - assert_eq!(recovered, tuple_list!()); - } + assert_eq!(recovered, tuple_list!()); } #[test] diff --git a/crates/libafl_frida/src/asan/errors.rs b/crates/libafl_frida/src/asan/errors.rs index fc34941ed4e..3c86c572e20 100644 --- a/crates/libafl_frida/src/asan/errors.rs +++ b/crates/libafl_frida/src/asan/errors.rs @@ -13,7 +13,7 @@ use frida_gum::interceptor::Interceptor; use frida_gum::{Gum, Process}; use libafl::{ Error, HasMetadata, - corpus::Testcase, + corpus::TestcaseMetadata, executors::ExitKind, feedbacks::{Feedback, StateInitializer}, observers::Observer, @@ -710,10 +710,11 @@ where _state: &mut S, _manager: &mut EM, _observers: &OT, - testcase: &mut Testcase, + _input: &I, + md: &mut TestcaseMetadata, ) -> Result<(), Error> { if let Some(errors) = &self.errors { - testcase.add_metadata(errors.clone()); + md.add_metadata(errors.clone()); } Ok(()) diff --git a/crates/libafl_frida/src/lib.rs b/crates/libafl_frida/src/lib.rs index 629183c58ef..6a550863a24 100644 --- a/crates/libafl_frida/src/lib.rs +++ b/crates/libafl_frida/src/lib.rs @@ -350,7 +350,7 @@ mod tests { use frida_gum::Gum; use libafl::{ Fuzzer, StdFuzzer, - corpus::{Corpus, InMemoryCorpus, Testcase}, + corpus::{Corpus, InMemoryCorpus}, events::NopEventManager, executors::{ExitKind, InProcessExecutor}, feedback_and_fast, feedback_or_fast, @@ -463,8 +463,7 @@ mod tests { let mut corpus = InMemoryCorpus::::new(); //TODO - make sure we use the right one - let testcase = Testcase::new(vec![0; 4].into()); - corpus.add(testcase).unwrap(); + corpus.add(vec![0; 4].into()).unwrap(); let rand = StdRand::with_seed(0); diff --git a/crates/libafl_libfuzzer/runtime/Cargo.toml.template b/crates/libafl_libfuzzer/runtime/Cargo.toml.template index 09f8ff2d52d..f201cdd3119 100644 --- a/crates/libafl_libfuzzer/runtime/Cargo.toml.template +++ b/crates/libafl_libfuzzer/runtime/Cargo.toml.template @@ -14,7 +14,7 @@ track_hit_feedbacks = [ ] tui_monitor = ["libafl/tui_monitor"] -[target.'cfg(not(windows))'.features] +[target.'cfg(not(target_os = "windows"))'.features] ## Enable the `fork` feature on non-windows platforms default = ["fork", "tui_monitor"] diff --git a/crates/libafl_libfuzzer/runtime/src/feedbacks.rs b/crates/libafl_libfuzzer/runtime/src/feedbacks.rs index 130a29bfcb4..5612ca3adaa 100644 --- a/crates/libafl_libfuzzer/runtime/src/feedbacks.rs +++ b/crates/libafl_libfuzzer/runtime/src/feedbacks.rs @@ -140,7 +140,8 @@ where _state: &mut S, _manager: &mut EM, _observers: &OT, - testcase: &mut Testcase, + _input: &I, + md: &mut TestcaseMetadata, ) -> Result<(), Error> { match self.exit_kind { ExitKind::Crash | ExitKind::Oom if OomFeedback::oomed() => { diff --git a/crates/libafl_libfuzzer/runtime/src/merge.rs b/crates/libafl_libfuzzer/runtime/src/merge.rs index a042d4d6333..4808bc5b03e 100644 --- a/crates/libafl_libfuzzer/runtime/src/merge.rs +++ b/crates/libafl_libfuzzer/runtime/src/merge.rs @@ -204,14 +204,12 @@ pub fn merge( } for id in fuzzer.scheduler().removable() { - let testcase = state.corpus_mut().remove(id)?; - fuzzer - .scheduler_mut() - .on_remove(&mut state, id, &Some(testcase))?; + state.corpus_mut().disable(id)?; + fuzzer.scheduler_mut().on_remove(&mut state, id)?; } for id in fuzzer.scheduler().current().clone() { - let mut testcase = state.corpus_mut().get(id)?.borrow_mut(); + let mut testcase = state.corpus().get(id)?; let file_path = testcase .file_path_mut() .as_mut() @@ -228,10 +226,8 @@ pub fn merge( new_file_path.push(base); if new_file_path.exists() { drop(testcase); - let testcase = state.corpus_mut().remove(id)?; - fuzzer - .scheduler_mut() - .on_remove(&mut state, id, &Some(testcase))?; + state.corpus_mut().remove(id)?; + fuzzer.scheduler_mut().on_remove(&mut state, id)?; } else { // False-positive: file_path is used just below rename(&file_path, &new_file_path)?; diff --git a/crates/libafl_libfuzzer/runtime/src/schedulers.rs b/crates/libafl_libfuzzer/runtime/src/schedulers.rs index ad8543eff17..2a09928a9bf 100644 --- a/crates/libafl_libfuzzer/runtime/src/schedulers.rs +++ b/crates/libafl_libfuzzer/runtime/src/schedulers.rs @@ -22,12 +22,7 @@ where I: Input, S: HasCorpus, { - fn on_remove( - &mut self, - _state: &mut S, - id: CorpusId, - _testcase: &Option>, - ) -> Result<(), Error> { + fn on_remove(&mut self, _state: &mut S, id: CorpusId) -> Result<(), Error> { self.all.remove(&id); Ok(()) } diff --git a/fuzzers/baby/tutorial/src/metadata.rs b/fuzzers/baby/tutorial/src/metadata.rs index effa8d96960..ef45a07f50d 100644 --- a/fuzzers/baby/tutorial/src/metadata.rs +++ b/fuzzers/baby/tutorial/src/metadata.rs @@ -21,9 +21,12 @@ pub struct PacketLenTestcasePenalty {} impl TestcasePenalty for PacketLenTestcasePenalty where - S: HasMetadata, + S: HasCorpus + HasMetadata, { - fn compute(_state: &S, entry: &mut Testcase) -> Result { + fn compute( + _state: &S, + entry: &mut Testcase>::TestcaseMetadataRefMut<'_>>, + ) -> Result { Ok(entry .metadata_map() .get::() @@ -61,11 +64,10 @@ impl Feedback for PacketLenFeedback { _state: &mut S, _manager: &mut EM, _observers: &OT, - testcase: &mut Testcase, + _input: &PacketData, + md: &mut TestcaseMetadata, ) -> Result<(), Error> { - testcase - .metadata_map_mut() - .insert(PacketLenMetadata { length: self.len }); + md.insert(PacketLenMetadata { length: self.len }); Ok(()) } } diff --git a/fuzzers/forkserver/libafl-fuzz/src/feedback/filepath.rs b/fuzzers/forkserver/libafl-fuzz/src/feedback/filepath.rs index 6f6b8c499a0..bc21fce4991 100644 --- a/fuzzers/forkserver/libafl-fuzz/src/feedback/filepath.rs +++ b/fuzzers/forkserver/libafl-fuzz/src/feedback/filepath.rs @@ -90,7 +90,8 @@ where state: &mut S, _manager: &mut EM, _observers: &OT, - testcase: &mut Testcase, + _input: &I, + md: &mut TestcaseMetadata, ) -> Result<(), Error> { (self.func)(state, testcase, &self.out_dir)?; Ok(()) diff --git a/fuzzers/forkserver/libafl-fuzz/src/feedback/persistent_record.rs b/fuzzers/forkserver/libafl-fuzz/src/feedback/persistent_record.rs index 13d6ff34128..a6b366c6e34 100644 --- a/fuzzers/forkserver/libafl-fuzz/src/feedback/persistent_record.rs +++ b/fuzzers/forkserver/libafl-fuzz/src/feedback/persistent_record.rs @@ -78,7 +78,8 @@ where state: &mut S, _manager: &mut EM, _observers: &OT, - testcase: &mut Testcase, + _input: &I, + _md: &mut TestcaseMetadata, ) -> Result<(), Error> { if self.should_run() { let file_path = testcase diff --git a/fuzzers/forkserver/libafl-fuzz/src/feedback/seed.rs b/fuzzers/forkserver/libafl-fuzz/src/feedback/seed.rs index 89aec0b462e..8a145986796 100644 --- a/fuzzers/forkserver/libafl-fuzz/src/feedback/seed.rs +++ b/fuzzers/forkserver/libafl-fuzz/src/feedback/seed.rs @@ -93,7 +93,8 @@ where state: &mut S, manager: &mut EM, observers: &OT, - testcase: &mut Testcase, + _input: &I, + md: &mut TestcaseMetadata, ) -> Result<(), Error> { self.inner .append_metadata(state, manager, observers, testcase)?; diff --git a/fuzzers/forkserver/libafl-fuzz/src/scheduler.rs b/fuzzers/forkserver/libafl-fuzz/src/scheduler.rs index bb84de69372..5933973f89e 100644 --- a/fuzzers/forkserver/libafl-fuzz/src/scheduler.rs +++ b/fuzzers/forkserver/libafl-fuzz/src/scheduler.rs @@ -20,15 +20,10 @@ where W: Scheduler + RemovableScheduler, S: HasTestcase, { - fn on_remove( - &mut self, - state: &mut S, - id: CorpusId, - testcase: &Option>, - ) -> Result<(), Error> { + fn on_remove(&mut self, state: &mut S, id: CorpusId) -> Result<(), Error> { match self { - Self::Queue(queue, _) => queue.on_remove(state, id, testcase), - Self::Weighted(weighted, _) => weighted.on_remove(state, id, testcase), + Self::Queue(queue, _) => queue.on_remove(state, id), + Self::Weighted(weighted, _) => weighted.on_remove(state, id), } }