From b7f5fc0366d3556ead964abd074d623d2e9bd926 Mon Sep 17 00:00:00 2001 From: DaniPopes <57450786+DaniPopes@users.noreply.github.com> Date: Wed, 9 Apr 2025 01:27:01 +0200 Subject: [PATCH] perf: switch md5 to xxhash --- Cargo.toml | 1 - crates/artifacts/solc/Cargo.toml | 5 +---- crates/artifacts/solc/src/sources.rs | 2 +- crates/compilers/Cargo.toml | 3 +-- crates/compilers/src/buildinfo.rs | 21 +++++------------ crates/compilers/src/compile/output/mod.rs | 3 +-- crates/core/Cargo.toml | 6 +++++ crates/core/src/utils/mod.rs | 26 ++++++++++++++++++++++ 8 files changed, 41 insertions(+), 26 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 808165fd..28571230 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,7 +46,6 @@ alloy-json-abi = { version = "0.8", features = ["serde_json"] } alloy-primitives = { version = "0.8", features = ["serde", "rand"] } cfg-if = "1.0" dunce = "1.0" -md-5 = "0.10" memmap2 = "0.9" path-slash = "0.2" rayon = "1.8" diff --git a/crates/artifacts/solc/Cargo.toml b/crates/artifacts/solc/Cargo.toml index 78533412..d4e1b954 100644 --- a/crates/artifacts/solc/Cargo.toml +++ b/crates/artifacts/solc/Cargo.toml @@ -31,9 +31,6 @@ yansi.workspace = true tokio = { workspace = true, optional = true, features = ["fs"] } futures-util = { workspace = true, optional = true } -# checksum -md-5 = { workspace = true, optional = true } - # walkdir walkdir = { workspace = true, optional = true } @@ -50,6 +47,6 @@ foundry-compilers-core = { workspace = true, features = ["test-utils"] } [features] async = ["dep:tokio", "dep:futures-util"] -checksum = ["dep:md-5"] +checksum = ["foundry-compilers-core/hasher"] walkdir = ["dep:walkdir", "foundry-compilers-core/walkdir"] rayon = ["dep:rayon"] diff --git a/crates/artifacts/solc/src/sources.rs b/crates/artifacts/solc/src/sources.rs index c9ef1fe6..b7e103d2 100644 --- a/crates/artifacts/solc/src/sources.rs +++ b/crates/artifacts/solc/src/sources.rs @@ -204,7 +204,7 @@ impl Source { /// Generate a non-cryptographically secure checksum of the given source. #[cfg(feature = "checksum")] pub fn content_hash_of(src: &str) -> String { - alloy_primitives::hex::encode(::digest(src)) + foundry_compilers_core::utils::unique_hash(src) } } diff --git a/crates/compilers/Cargo.toml b/crates/compilers/Cargo.toml index 7034d649..73808a04 100644 --- a/crates/compilers/Cargo.toml +++ b/crates/compilers/Cargo.toml @@ -20,7 +20,7 @@ foundry-compilers-artifacts = { workspace = true, features = [ "walkdir", "rayon", ] } -foundry-compilers-core = { workspace = true, features = ["regex"] } +foundry-compilers-core = { workspace = true, features = ["hasher", "regex"] } serde.workspace = true semver.workspace = true alloy-primitives.workspace = true @@ -28,7 +28,6 @@ serde_json.workspace = true tracing.workspace = true alloy-json-abi.workspace = true rayon.workspace = true -md-5.workspace = true thiserror.workspace = true path-slash.workspace = true yansi.workspace = true diff --git a/crates/compilers/src/buildinfo.rs b/crates/compilers/src/buildinfo.rs index 0d6a82f5..cce15919 100644 --- a/crates/compilers/src/buildinfo.rs +++ b/crates/compilers/src/buildinfo.rs @@ -3,9 +3,7 @@ use crate::compilers::{ CompilationError, CompilerContract, CompilerInput, CompilerOutput, Language, }; -use alloy_primitives::hex; use foundry_compilers_core::{error::Result, utils}; -use md5::Digest; use semver::Version; use serde::{de::DeserializeOwned, Deserialize, Serialize}; use std::{ @@ -97,22 +95,13 @@ impl RawBuildInfo { let version = input.version().clone(); let build_context = BuildContext::new(input, output)?; - let mut hasher = md5::Md5::new(); - - hasher.update(ETHERS_FORMAT_VERSION); - let solc_short = format!("{}.{}.{}", version.major, version.minor, version.patch); - hasher.update(&solc_short); - hasher.update(version.to_string()); - let input = serde_json::to_value(input)?; - hasher.update(&serde_json::to_string(&input)?); - - // create the hash for `{_format,solcVersion,solcLongVersion,input}` - // N.B. this is not exactly the same as hashing the json representation of these values but - // the must efficient one - let result = hasher.finalize(); - let id = hex::encode(result); + let id = utils::unique_hash_many([ + ETHERS_FORMAT_VERSION, + &version.to_string(), + &serde_json::to_string(&input)?, + ]); let mut build_info = BTreeMap::new(); diff --git a/crates/compilers/src/compile/output/mod.rs b/crates/compilers/src/compile/output/mod.rs index 6b4db352..23f27453 100644 --- a/crates/compilers/src/compile/output/mod.rs +++ b/crates/compilers/src/compile/output/mod.rs @@ -607,8 +607,7 @@ impl AggregatedCompilerOutput { /// /// There can be multiple `BuildInfo`, since we support multiple versions. /// - /// The created files have the md5 hash `{_format,solcVersion,solcLongVersion,input}` as their - /// file name + /// The created files have a unique identifier as their name. pub fn write_build_infos(&self, build_info_dir: &Path) -> Result<(), SolcError> { if self.build_infos.is_empty() { return Ok(()); diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index 8cdc379b..dd377ea7 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -23,6 +23,11 @@ serde_json.workspace = true serde.workspace = true thiserror.workspace = true +# hasher +xxhash-rust = { version = "0.8", optional = true, default-features = false, features = [ + "xxh3", +] } + # regex regex = { workspace = true, optional = true } @@ -46,6 +51,7 @@ tempfile.workspace = true [features] async = ["dep:tokio"] +hasher = ["dep:xxhash-rust"] project-util = ["dep:tempfile", "dep:fs_extra"] regex = ["dep:regex"] svm-solc = ["dep:svm", "dep:tokio"] diff --git a/crates/core/src/utils/mod.rs b/crates/core/src/utils/mod.rs index 9f0f4216..09a0ad87 100644 --- a/crates/core/src/utils/mod.rs +++ b/crates/core/src/utils/mod.rs @@ -78,6 +78,32 @@ pub static SUPPORTS_BASE_PATH: Lazy = pub static SUPPORTS_INCLUDE_PATH: Lazy = Lazy::new(|| VersionReq::parse(">=0.8.8").unwrap()); +/// A non-cryptographic hash function for creating unique identifiers. +/// +/// The exact algorithm being used shouldn't matter. +// See Hardhat: https://github.com/NomicFoundation/hardhat/blob/e9ab5332a5505a6d1fe9bfbc687f5f46bdff6dd7/packages/hardhat-core/src/internal/util/hash.ts#L1-L16 +#[cfg(feature = "hasher")] +pub fn unique_hash(input: impl AsRef<[u8]>) -> String { + encode_hash(xxhash_rust::xxh3::xxh3_64(input.as_ref())) +} + +/// A non-cryptographic hash function for creating unique identifiers. +/// +/// See [`unique_hash`] for more details. +#[cfg(feature = "hasher")] +pub fn unique_hash_many(inputs: impl IntoIterator>) -> String { + let mut hasher = xxhash_rust::xxh3::Xxh3Default::new(); + for input in inputs { + hasher.update(input.as_ref()); + } + encode_hash(hasher.digest()) +} + +#[cfg(feature = "hasher")] +fn encode_hash(x: u64) -> String { + hex::encode(x.to_be_bytes()) +} + /// Move a range by a specified offset pub fn range_by_offset(range: &Range, offset: isize) -> Range { Range {