diff --git a/rust-src/concordium_base/CHANGELOG.md b/rust-src/concordium_base/CHANGELOG.md index b6524bd10..28486033f 100644 --- a/rust-src/concordium_base/CHANGELOG.md +++ b/rust-src/concordium_base/CHANGELOG.md @@ -1,3 +1,7 @@ +## Unreleased + +- Memory pre allocation is capped to `MAX_PRE_ALLOCATED_SIZE=4096` when cbor decoding vectors and maps. + ## 10.0.0 (2026-01-09) - Introduce protocol version 10 variant `ProtocolVersion::P10`. diff --git a/rust-src/concordium_base/src/common/cbor.rs b/rust-src/concordium_base/src/common/cbor.rs index 777c92701..779937154 100644 --- a/rust-src/concordium_base/src/common/cbor.rs +++ b/rust-src/concordium_base/src/common/cbor.rs @@ -215,6 +215,7 @@ pub(crate) mod serde; pub mod value; pub use composites::*; +use core::mem::size_of; pub use decoder::*; pub use encoder::*; pub use primitives::*; @@ -229,6 +230,19 @@ use std::{ hash::Hash, }; +/// Maximum number of bytes to pre-allocate when decoding or deserializing CBOR vectors and maps. +/// This cap helps prevent excessive memory usage for large or untrusted inputs. +const MAX_PRE_ALLOCATED_SIZE: usize = 4096; + +/// Cap the allocated length to a fixed byte size (4kb currently) +fn cap_capacity(length: usize) -> usize { + length.min( + MAX_PRE_ALLOCATED_SIZE + .checked_div(size_of::()) + .unwrap_or(MAX_PRE_ALLOCATED_SIZE), + ) +} + /// Reexports and types for derive macros #[doc(hidden)] pub mod __private { @@ -823,7 +837,8 @@ impl CborDeserialize for Vec { Self: Sized, { let mut array_decoder = decoder.decode_array()?; - let mut vec = Vec::with_capacity(array_decoder.size().unwrap_or_default()); + let mut vec = + Vec::with_capacity(cap_capacity::(array_decoder.size().unwrap_or_default())); while let Some(element) = array_decoder.deserialize_element()? { vec.push(element); } @@ -849,7 +864,9 @@ impl CborDeserialize for Has Self: Sized, { let mut map_decoder = decoder.decode_map()?; - let mut map = HashMap::with_capacity(map_decoder.size().unwrap_or_default()); + let mut map = HashMap::with_capacity(cap_capacity::<(K, V)>( + map_decoder.size().unwrap_or_default(), + )); while let Some((key, value)) = map_decoder.deserialize_entry()? { map.insert(key, value); } @@ -1539,6 +1556,19 @@ mod test { assert_eq!(bytes_decoded, vec); } + /// Test huge array size. + /// Test that we don't try to allocate memory of the size. + #[test] + fn test_vec_huge_length() { + let cbor = hex::decode("9b00ffffffffffffff0102").unwrap(); + let err = cbor_decode::>(&cbor).unwrap_err(); + assert!( + err.to_string().contains("failed to fill whole buffer"), + "message: {}", + err.to_string() + ); + } + #[test] fn test_map() { let map: HashMap = [(1, 2), (3, 4)].into_iter().collect(); @@ -1549,6 +1579,19 @@ mod test { assert_eq!(bytes_decoded, map); } + /// Test huge map size. + /// Test that we don't try to allocate memory of the size. + #[test] + fn test_map_huge_length() { + let cbor = hex::decode("bb00ffffffffffffff01020304").unwrap(); + let err = cbor_decode::>(&cbor).unwrap_err(); + assert!( + err.to_string().contains("failed to fill whole buffer"), + "message: {}", + err.to_string() + ); + } + #[test] fn test_map_empty() { let map: HashMap = [].into_iter().collect(); @@ -1583,6 +1626,16 @@ mod test { assert_eq!(value_decoded, value); } + /// Test `cap_capacity` + #[test] + fn test_cap_capacity() { + assert_eq!(cap_capacity::(0), 0); + assert_eq!(cap_capacity::(100), 100); + // `u32` has size 4 as such `cap_capacity` will return: 4096 / 4 = 1024 + assert_eq!(cap_capacity::(5000), 1024); + assert_eq!(cap_capacity::<()>(10), 10); + } + /// Test that `cbor_decode` fails if there is remaining data #[test] fn test_remaining_data() { diff --git a/rust-src/concordium_base/src/common/cbor/decoder.rs b/rust-src/concordium_base/src/common/cbor/decoder.rs index 3347c3c47..998bf9c67 100644 --- a/rust-src/concordium_base/src/common/cbor/decoder.rs +++ b/rust-src/concordium_base/src/common/cbor/decoder.rs @@ -1,6 +1,7 @@ use crate::common::cbor::{ - CborArrayDecoder, CborDecoder, CborDeserialize, CborMapDecoder, CborSerializationError, + self, CborArrayDecoder, CborDecoder, CborDeserialize, CborMapDecoder, CborSerializationError, CborSerializationResult, DataItemHeader, DataItemType, SerializationOptions, + MAX_PRE_ALLOCATED_SIZE, }; use anyhow::anyhow; use ciborium_ll::Header; @@ -111,7 +112,7 @@ where } }; - let bytes = Vec::with_capacity(size.unwrap_or_default()); + let bytes = Vec::with_capacity(cbor::cap_capacity::(size.unwrap_or_default())); let mut cursor = Cursor::new(bytes); self.decode_bytes_impl(&mut cursor, size)?; Ok(cursor.into_inner()) @@ -128,7 +129,7 @@ where } }; - let mut bytes = Vec::with_capacity(size.unwrap_or_default()); + let mut bytes = Vec::with_capacity(cbor::cap_capacity::(size.unwrap_or_default())); self.decode_text_impl(&mut bytes, size)?; Ok(bytes) } @@ -221,12 +222,14 @@ where } trait CursorExt { - /// Advance the position of the cursor by `len`, or as many positions - /// as possible, and return the slice covering the advanced positions. + /// Request to advance the position of the cursor by `len`, + /// and return the slice covering the advanced positions. /// Cursors backed by dynamically sized collections like `Vec` - /// will append to the collection as needed and will always advance - /// the requested `len`. Cursors that cannot append will advance as far - /// as possible only. + /// will always be able to advance but may advance less than the requested `len` + /// (to avoid large allocations in one go). + /// Cursors backed by a data structure that cannot be extended will advance as far + /// as possible only. If the cursor cannot advance + /// any further, an empty slice is returned.. fn advance(&mut self, len: usize) -> &mut [u8]; } @@ -246,6 +249,8 @@ fn advance_vec> + AsMut>>( cursor: &mut Cursor, len: usize, ) -> &mut [u8] { + // cap pre allocation + let len = len.min(MAX_PRE_ALLOCATED_SIZE); let old_position = cursor.position() as usize; let new_position = old_position + len; let old_len = cursor.get_ref().as_ref().len(); @@ -286,16 +291,17 @@ impl Decoder { { let mut segments = self.inner.bytes(size); while let Some(mut segment) = segments.pull()? { - let left = segment.left(); - if left == 0 { - continue; - } - let advanced = dest.advance(left); - if advanced.len() != left { - return Err(anyhow!("fixed length byte string destination too short").into()); + loop { + let left = segment.left(); + if left == 0 { + break; + } + let advanced = dest.advance(left); + if advanced.is_empty() { + return Err(anyhow!("fixed size deserialization type too short").into()); + } + segment.pull(advanced)?; } - let read = segment.pull(advanced)?; - debug_assert_eq!(read.map(|bytes| bytes.len()), Some(left)); } Ok(()) @@ -313,15 +319,16 @@ impl Decoder { let mut dest = Cursor::new(dest); let mut segments = self.inner.text(size); while let Some(mut segment) = segments.pull()? { - let left = segment.left(); - if left == 0 { - continue; - } - let advanced = dest.advance(left); - debug_assert_eq!(advanced.len(), left); - segment.pull(advanced)?; - if segment.left() != 0 { - return Err(anyhow!("invalid UTF-8 in byte string").into()); + loop { + let left = segment.left(); + if left == 0 { + break; + } + let advanced = dest.advance(left); + if advanced.is_empty() { + return Err(anyhow!("fixed size deserialization type too short").into()); + } + segment.pull(advanced)?; } } @@ -623,7 +630,7 @@ mod test { assert!( error .to_string() - .contains("byte string destination too short"), + .contains("fixed size deserialization type too short"), "message: {}", error.to_string() ); @@ -701,6 +708,30 @@ mod test { ); } + /// Test byte string is longer than CBOR content and of huge size. Test + /// that we don't try to allocate memory of the length. + #[test] + fn test_bytes_length_invalid_huge() { + let cbor = hex::decode("5b00ffffffffffffff0102030405").unwrap(); + let mut decoder = Decoder::new(cbor.as_slice(), SerializationOptions::default()); + let err = decoder.decode_bytes().unwrap_err(); + assert!( + err.to_string().contains("failed to fill whole buffer"), + "message: {}", + err.to_string() + ); + } + + /// Test byte string longer than allocation capacity cap. + #[test] + fn test_bytes_length_above_allocation_capacity() { + let mut cbor = hex::decode("5a0000ffff").unwrap(); + cbor.extend(iter::repeat_n(0, 0xffff)); + let mut decoder = Decoder::new(cbor.as_slice(), SerializationOptions::default()); + let bytes = decoder.decode_bytes().unwrap(); + assert_eq!(bytes.len(), 0xffff); + } + /// Test text string is longer than CBOR content #[test] fn test_text_length_invalid() { @@ -714,6 +745,30 @@ mod test { ); } + /// Test text string is longer than CBOR content. Test + /// that we don't try to allocate memory of the length. + #[test] + fn test_text_length_invalid_huge() { + let cbor = hex::decode("7b00ffffffffffffff61626364").unwrap(); + let mut decoder = Decoder::new(cbor.as_slice(), SerializationOptions::default()); + let err = decoder.decode_text().unwrap_err(); + assert!( + err.to_string().contains("failed to fill whole buffer"), + "message: {}", + err.to_string() + ); + } + + /// Test text string longer than allocation capacity cap. + #[test] + fn test_text_length_above_allocation_capacity() { + let mut cbor = hex::decode("7a0000ffff").unwrap(); + cbor.extend(iter::repeat_n(0, 0xffff)); + let mut decoder = Decoder::new(cbor.as_slice(), SerializationOptions::default()); + let bytes = decoder.decode_text().unwrap(); + assert_eq!(bytes.len(), 0xffff); + } + /// Test decode UTF-8 two byte code point c2bd #[test] fn test_text_two_byte_code_point() { @@ -730,7 +785,7 @@ mod test { let mut decoder = Decoder::new(cbor.as_slice(), SerializationOptions::default()); let error = decoder.decode_text().unwrap_err(); assert!( - error.to_string().contains("invalid UTF-8"), + error.to_string().contains("CBOR syntax error"), "message: {}", error.to_string() ); @@ -743,7 +798,7 @@ mod test { let mut decoder = Decoder::new(cbor.as_slice(), SerializationOptions::default()); let error = decoder.decode_text().unwrap_err(); assert!( - error.to_string().contains("invalid UTF-8"), + error.to_string().contains("CBOR syntax error"), "message: {}", error.to_string() ); diff --git a/rust-src/concordium_base/src/common/cbor/primitives.rs b/rust-src/concordium_base/src/common/cbor/primitives.rs index 6eb4b15ef..e8a6645b6 100644 --- a/rust-src/concordium_base/src/common/cbor/primitives.rs +++ b/rust-src/concordium_base/src/common/cbor/primitives.rs @@ -759,7 +759,7 @@ mod test { let err = cbor_decode::<[u8; 4]>(&cbor).unwrap_err().to_string(); assert!( - err.contains("fixed length byte string destination too short"), + err.contains("fixed size deserialization type too short"), "err: {}", err ); diff --git a/rust-src/concordium_base/src/common/cbor/value.rs b/rust-src/concordium_base/src/common/cbor/value.rs index ed476e33d..c5955caea 100644 --- a/rust-src/concordium_base/src/common/cbor/value.rs +++ b/rust-src/concordium_base/src/common/cbor/value.rs @@ -1,5 +1,5 @@ use crate::common::cbor::{ - Bytes, CborArrayDecoder, CborArrayEncoder, CborDecoder, CborDeserialize, CborEncoder, + self, Bytes, CborArrayDecoder, CborArrayEncoder, CborDecoder, CborDeserialize, CborEncoder, CborMapDecoder, CborMapEncoder, CborSerializationResult, CborSerialize, DataItemHeader, }; use anyhow::Context; @@ -91,7 +91,9 @@ impl CborDeserialize for Value { ), DataItemHeader::Array(_) => { let mut array_decoder = decoder.decode_array()?; - let mut vec = Vec::with_capacity(array_decoder.size().unwrap_or_default()); + let mut vec = Vec::with_capacity(cbor::cap_capacity::( + array_decoder.size().unwrap_or_default(), + )); while let Some(element) = array_decoder.deserialize_element()? { vec.push(element); } @@ -99,7 +101,10 @@ impl CborDeserialize for Value { } DataItemHeader::Map(_) => { let mut map_decoder = decoder.decode_map()?; - let mut vec = Vec::with_capacity(map_decoder.size().unwrap_or_default()); + let mut vec = Vec::with_capacity(cbor::cap_capacity::( + map_decoder.size().unwrap_or_default(), + )); + while let Some(entry) = map_decoder.deserialize_entry()? { vec.push(entry); } @@ -247,6 +252,29 @@ mod test { assert_eq!(value_decoded, value); } + /// Test huge array size. + /// Test that we don't try to allocate memory of the size. + #[test] + fn test_array_huge_length() { + let cbor = hex::decode("9b00ffffffffffffff0103").unwrap(); + let err = cbor_decode::(&cbor).unwrap_err(); + assert!( + err.to_string().contains("failed to fill whole buffer"), + "message: {}", + err.to_string() + ); + } + + /// Test large array size. + /// Test vector longer than allocation capacity cap. + #[test] + fn test_array_large_length() { + let value = Value::Array(vec![Value::Positive(1u64); 10000]); + let cbor = cbor_encode(&value).unwrap(); + let value_decoded: Value = cbor_decode(&cbor).unwrap(); + assert_eq!(value_decoded, value); + } + #[test] fn test_map() { let value = Value::Map(vec![ @@ -259,4 +287,27 @@ mod test { let value_decoded: Value = cbor_decode(&cbor).unwrap(); assert_eq!(value_decoded, value); } + + /// Test huge map size. + /// Test that we don't try to allocate memory of the size. + #[test] + fn test_map_huge_length() { + let cbor = hex::decode("bb00ffffffffffffff01030204").unwrap(); + let err = cbor_decode::(&cbor).unwrap_err(); + assert!( + err.to_string().contains("failed to fill whole buffer"), + "message: {}", + err.to_string() + ); + } + + /// Test large map size. + /// Test vector longer than allocation capacity cap. + #[test] + fn test_map_large_length() { + let value = Value::Map(vec![(Value::Positive(1), Value::Positive(3)); 10000]); + let cbor = cbor_encode(&value).unwrap(); + let value_decoded: Value = cbor_decode(&cbor).unwrap(); + assert_eq!(value_decoded, value); + } }