Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions rust-src/concordium_base/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## Unreleased

- Memory pre allocation is capped to `MAX_PRE_ALLOCATED_SIZE=4096` when cbor decoding vectors and maps.

## 10.0.0 (2026-01-09)

- Introduce protocol version 10 variant `ProtocolVersion::P10`.
Expand Down
57 changes: 55 additions & 2 deletions rust-src/concordium_base/src/common/cbor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ pub(crate) mod serde;
pub mod value;

pub use composites::*;
use core::mem::size_of;
pub use decoder::*;
pub use encoder::*;
pub use primitives::*;
Expand All @@ -229,6 +230,19 @@ use std::{
hash::Hash,
};

/// Maximum number of bytes to pre-allocate when decoding or deserializing CBOR vectors and maps.
/// This cap helps prevent excessive memory usage for large or untrusted inputs.
const MAX_PRE_ALLOCATED_SIZE: usize = 4096;

/// Cap the allocated length to a fixed byte size (4kb currently)
fn cap_capacity<T>(length: usize) -> usize {
length.min(
MAX_PRE_ALLOCATED_SIZE
.checked_div(size_of::<T>())
.unwrap_or(MAX_PRE_ALLOCATED_SIZE),
)
}

/// Reexports and types for derive macros
#[doc(hidden)]
pub mod __private {
Expand Down Expand Up @@ -823,7 +837,8 @@ impl<T: CborDeserialize> CborDeserialize for Vec<T> {
Self: Sized,
{
let mut array_decoder = decoder.decode_array()?;
let mut vec = Vec::with_capacity(array_decoder.size().unwrap_or_default());
let mut vec =
Vec::with_capacity(cap_capacity::<T>(array_decoder.size().unwrap_or_default()));
while let Some(element) = array_decoder.deserialize_element()? {
vec.push(element);
}
Expand All @@ -849,7 +864,9 @@ impl<K: CborDeserialize + Eq + Hash, V: CborDeserialize> CborDeserialize for Has
Self: Sized,
{
let mut map_decoder = decoder.decode_map()?;
let mut map = HashMap::with_capacity(map_decoder.size().unwrap_or_default());
let mut map = HashMap::with_capacity(cap_capacity::<(K, V)>(
map_decoder.size().unwrap_or_default(),
));
while let Some((key, value)) = map_decoder.deserialize_entry()? {
map.insert(key, value);
}
Expand Down Expand Up @@ -1539,6 +1556,19 @@ mod test {
assert_eq!(bytes_decoded, vec);
}

/// Test huge array size.
/// Test that we don't try to allocate memory of the size.
#[test]
fn test_vec_huge_length() {
let cbor = hex::decode("9b00ffffffffffffff0102").unwrap();
let err = cbor_decode::<Vec<u64>>(&cbor).unwrap_err();
assert!(
err.to_string().contains("failed to fill whole buffer"),
"message: {}",
err.to_string()
);
}

#[test]
fn test_map() {
let map: HashMap<u64, u64> = [(1, 2), (3, 4)].into_iter().collect();
Expand All @@ -1549,6 +1579,19 @@ mod test {
assert_eq!(bytes_decoded, map);
}

/// Test huge map size.
/// Test that we don't try to allocate memory of the size.
#[test]
fn test_map_huge_length() {
let cbor = hex::decode("bb00ffffffffffffff01020304").unwrap();
let err = cbor_decode::<HashMap<u64, u64>>(&cbor).unwrap_err();
assert!(
err.to_string().contains("failed to fill whole buffer"),
"message: {}",
err.to_string()
);
}

#[test]
fn test_map_empty() {
let map: HashMap<u64, u64> = [].into_iter().collect();
Expand Down Expand Up @@ -1583,6 +1626,16 @@ mod test {
assert_eq!(value_decoded, value);
}

/// Test `cap_capacity`
#[test]
fn test_cap_capacity() {
assert_eq!(cap_capacity::<u32>(0), 0);
assert_eq!(cap_capacity::<u32>(100), 100);
// `u32` has size 4 as such `cap_capacity` will return: 4096 / 4 = 1024
assert_eq!(cap_capacity::<u32>(5000), 1024);
assert_eq!(cap_capacity::<()>(10), 10);
}

/// Test that `cbor_decode` fails if there is remaining data
#[test]
fn test_remaining_data() {
Expand Down
113 changes: 84 additions & 29 deletions rust-src/concordium_base/src/common/cbor/decoder.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::common::cbor::{
CborArrayDecoder, CborDecoder, CborDeserialize, CborMapDecoder, CborSerializationError,
self, CborArrayDecoder, CborDecoder, CborDeserialize, CborMapDecoder, CborSerializationError,
CborSerializationResult, DataItemHeader, DataItemType, SerializationOptions,
MAX_PRE_ALLOCATED_SIZE,
};
use anyhow::anyhow;
use ciborium_ll::Header;
Expand Down Expand Up @@ -111,7 +112,7 @@ where
}
};

let bytes = Vec::with_capacity(size.unwrap_or_default());
let bytes = Vec::with_capacity(cbor::cap_capacity::<u8>(size.unwrap_or_default()));
let mut cursor = Cursor::new(bytes);
self.decode_bytes_impl(&mut cursor, size)?;
Ok(cursor.into_inner())
Expand All @@ -128,7 +129,7 @@ where
}
};

let mut bytes = Vec::with_capacity(size.unwrap_or_default());
let mut bytes = Vec::with_capacity(cbor::cap_capacity::<u8>(size.unwrap_or_default()));
self.decode_text_impl(&mut bytes, size)?;
Ok(bytes)
}
Expand Down Expand Up @@ -221,12 +222,14 @@ where
}

trait CursorExt {
/// Advance the position of the cursor by `len`, or as many positions
/// as possible, and return the slice covering the advanced positions.
/// Request to advance the position of the cursor by `len`,
/// and return the slice covering the advanced positions.
/// Cursors backed by dynamically sized collections like `Vec`
/// will append to the collection as needed and will always advance
/// the requested `len`. Cursors that cannot append will advance as far
/// as possible only.
/// will always be able to advance but may advance less than the requested `len`
/// (to avoid large allocations in one go).
/// Cursors backed by a data structure that cannot be extended will advance as far
/// as possible only. If the cursor cannot advance
/// any further, an empty slice is returned..
fn advance(&mut self, len: usize) -> &mut [u8];
}

Expand All @@ -246,6 +249,8 @@ fn advance_vec<T: AsRef<Vec<u8>> + AsMut<Vec<u8>>>(
cursor: &mut Cursor<T>,
len: usize,
) -> &mut [u8] {
// cap pre allocation
let len = len.min(MAX_PRE_ALLOCATED_SIZE);
let old_position = cursor.position() as usize;
let new_position = old_position + len;
let old_len = cursor.get_ref().as_ref().len();
Expand Down Expand Up @@ -286,16 +291,17 @@ impl<R: Read> Decoder<R> {
{
let mut segments = self.inner.bytes(size);
while let Some(mut segment) = segments.pull()? {
let left = segment.left();
if left == 0 {
continue;
}
let advanced = dest.advance(left);
if advanced.len() != left {
return Err(anyhow!("fixed length byte string destination too short").into());
loop {
let left = segment.left();
if left == 0 {
break;
}
let advanced = dest.advance(left);
if advanced.is_empty() {
return Err(anyhow!("fixed size deserialization type too short").into());
}
segment.pull(advanced)?;
}
let read = segment.pull(advanced)?;
debug_assert_eq!(read.map(|bytes| bytes.len()), Some(left));
}

Ok(())
Expand All @@ -313,15 +319,16 @@ impl<R: Read> Decoder<R> {
let mut dest = Cursor::new(dest);
let mut segments = self.inner.text(size);
while let Some(mut segment) = segments.pull()? {
let left = segment.left();
if left == 0 {
continue;
}
let advanced = dest.advance(left);
debug_assert_eq!(advanced.len(), left);
segment.pull(advanced)?;
if segment.left() != 0 {
return Err(anyhow!("invalid UTF-8 in byte string").into());
loop {
let left = segment.left();
if left == 0 {
break;
}
let advanced = dest.advance(left);
if advanced.is_empty() {
return Err(anyhow!("fixed size deserialization type too short").into());
}
segment.pull(advanced)?;
}
}

Expand Down Expand Up @@ -623,7 +630,7 @@ mod test {
assert!(
error
.to_string()
.contains("byte string destination too short"),
.contains("fixed size deserialization type too short"),
"message: {}",
error.to_string()
);
Expand Down Expand Up @@ -701,6 +708,30 @@ mod test {
);
}

/// Test byte string is longer than CBOR content and of huge size. Test
/// that we don't try to allocate memory of the length.
#[test]
fn test_bytes_length_invalid_huge() {
let cbor = hex::decode("5b00ffffffffffffff0102030405").unwrap();
let mut decoder = Decoder::new(cbor.as_slice(), SerializationOptions::default());
let err = decoder.decode_bytes().unwrap_err();
assert!(
err.to_string().contains("failed to fill whole buffer"),
"message: {}",
err.to_string()
);
}

/// Test byte string longer than allocation capacity cap.
#[test]
fn test_bytes_length_above_allocation_capacity() {
let mut cbor = hex::decode("5a0000ffff").unwrap();
cbor.extend(iter::repeat_n(0, 0xffff));
let mut decoder = Decoder::new(cbor.as_slice(), SerializationOptions::default());
let bytes = decoder.decode_bytes().unwrap();
assert_eq!(bytes.len(), 0xffff);
}

/// Test text string is longer than CBOR content
#[test]
fn test_text_length_invalid() {
Expand All @@ -714,6 +745,30 @@ mod test {
);
}

/// Test text string is longer than CBOR content. Test
/// that we don't try to allocate memory of the length.
#[test]
fn test_text_length_invalid_huge() {
let cbor = hex::decode("7b00ffffffffffffff61626364").unwrap();
let mut decoder = Decoder::new(cbor.as_slice(), SerializationOptions::default());
let err = decoder.decode_text().unwrap_err();
assert!(
err.to_string().contains("failed to fill whole buffer"),
"message: {}",
err.to_string()
);
}

/// Test text string longer than allocation capacity cap.
#[test]
fn test_text_length_above_allocation_capacity() {
let mut cbor = hex::decode("7a0000ffff").unwrap();
cbor.extend(iter::repeat_n(0, 0xffff));
let mut decoder = Decoder::new(cbor.as_slice(), SerializationOptions::default());
let bytes = decoder.decode_text().unwrap();
assert_eq!(bytes.len(), 0xffff);
}

/// Test decode UTF-8 two byte code point c2bd
#[test]
fn test_text_two_byte_code_point() {
Expand All @@ -730,7 +785,7 @@ mod test {
let mut decoder = Decoder::new(cbor.as_slice(), SerializationOptions::default());
let error = decoder.decode_text().unwrap_err();
assert!(
error.to_string().contains("invalid UTF-8"),
error.to_string().contains("CBOR syntax error"),
"message: {}",
error.to_string()
);
Expand All @@ -743,7 +798,7 @@ mod test {
let mut decoder = Decoder::new(cbor.as_slice(), SerializationOptions::default());
let error = decoder.decode_text().unwrap_err();
assert!(
error.to_string().contains("invalid UTF-8"),
error.to_string().contains("CBOR syntax error"),
"message: {}",
error.to_string()
);
Expand Down
2 changes: 1 addition & 1 deletion rust-src/concordium_base/src/common/cbor/primitives.rs
Original file line number Diff line number Diff line change
Expand Up @@ -759,7 +759,7 @@ mod test {

let err = cbor_decode::<[u8; 4]>(&cbor).unwrap_err().to_string();
assert!(
err.contains("fixed length byte string destination too short"),
err.contains("fixed size deserialization type too short"),
"err: {}",
err
);
Expand Down
Loading