Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions rust-src/concordium_base/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## Unreleased

- Memory pre allocation is capped to `MAX_PRE_ALLOCATED_SIZE=4096` when cbor decoding vectors and maps.

## 10.0.0 (2026-01-09)

- Introduce protocol version 10 variant `ProtocolVersion::P10`.
Expand Down
55 changes: 53 additions & 2 deletions rust-src/concordium_base/src/common/cbor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ pub(crate) mod serde;
pub mod value;

pub use composites::*;
use core::mem::size_of;
pub use decoder::*;
pub use encoder::*;
pub use primitives::*;
Expand All @@ -229,6 +230,17 @@ use std::{
hash::Hash,
};

const MAX_PRE_ALLOCATED_SIZE: usize = 4096;

/// Cap the allocated length to a fixed byte size (4kb currently)
fn cap_capacity<T>(length: usize) -> usize {
length.min(
MAX_PRE_ALLOCATED_SIZE
.checked_div(size_of::<T>())
.unwrap_or(MAX_PRE_ALLOCATED_SIZE),
)
}

/// Reexports and types for derive macros
#[doc(hidden)]
pub mod __private {
Expand Down Expand Up @@ -823,7 +835,8 @@ impl<T: CborDeserialize> CborDeserialize for Vec<T> {
Self: Sized,
{
let mut array_decoder = decoder.decode_array()?;
let mut vec = Vec::with_capacity(array_decoder.size().unwrap_or_default());
let mut vec =
Vec::with_capacity(cap_capacity::<T>(array_decoder.size().unwrap_or_default()));
while let Some(element) = array_decoder.deserialize_element()? {
vec.push(element);
}
Expand All @@ -849,7 +862,9 @@ impl<K: CborDeserialize + Eq + Hash, V: CborDeserialize> CborDeserialize for Has
Self: Sized,
{
let mut map_decoder = decoder.decode_map()?;
let mut map = HashMap::with_capacity(map_decoder.size().unwrap_or_default());
let mut map = HashMap::with_capacity(cap_capacity::<(K, V)>(
map_decoder.size().unwrap_or_default(),
));
while let Some((key, value)) = map_decoder.deserialize_entry()? {
map.insert(key, value);
}
Expand Down Expand Up @@ -1539,6 +1554,19 @@ mod test {
assert_eq!(bytes_decoded, vec);
}

/// Test huge array size.
/// Test that we don't try to allocate memory of the size.
#[test]
fn test_vec_huge_length() {
let cbor = hex::decode("9b00ffffffffffffff0102").unwrap();
let err = cbor_decode::<Vec<u64>>(&cbor).unwrap_err();
assert!(
err.to_string().contains("failed to fill whole buffer"),
"message: {}",
err.to_string()
);
}

#[test]
fn test_map() {
let map: HashMap<u64, u64> = [(1, 2), (3, 4)].into_iter().collect();
Expand All @@ -1549,6 +1577,19 @@ mod test {
assert_eq!(bytes_decoded, map);
}

/// Test huge map size.
/// Test that we don't try to allocate memory of the size.
#[test]
fn test_map_huge_length() {
let cbor = hex::decode("bb00ffffffffffffff01020304").unwrap();
let err = cbor_decode::<HashMap<u64, u64>>(&cbor).unwrap_err();
assert!(
err.to_string().contains("failed to fill whole buffer"),
"message: {}",
err.to_string()
);
}

#[test]
fn test_map_empty() {
let map: HashMap<u64, u64> = [].into_iter().collect();
Expand Down Expand Up @@ -1583,6 +1624,16 @@ mod test {
assert_eq!(value_decoded, value);
}

/// Test `cap_capacity`
#[test]
fn test_cap_capacity() {
assert_eq!(cap_capacity::<u32>(0), 0);
assert_eq!(cap_capacity::<u32>(100), 100);
/// `u32` has size 4 as such `cap_capacity` will return: 4096 / 4 = 1024
assert_eq!(cap_capacity::<u32>(5000), 1024);
assert_eq!(cap_capacity::<()>(10), 10);
}

/// Test that `cbor_decode` fails if there is remaining data
#[test]
fn test_remaining_data() {
Expand Down
113 changes: 84 additions & 29 deletions rust-src/concordium_base/src/common/cbor/decoder.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::common::cbor::{
CborArrayDecoder, CborDecoder, CborDeserialize, CborMapDecoder, CborSerializationError,
self, CborArrayDecoder, CborDecoder, CborDeserialize, CborMapDecoder, CborSerializationError,
CborSerializationResult, DataItemHeader, DataItemType, SerializationOptions,
MAX_PRE_ALLOCATED_SIZE,
};
use anyhow::anyhow;
use ciborium_ll::Header;
Expand Down Expand Up @@ -111,7 +112,7 @@ where
}
};

let bytes = Vec::with_capacity(size.unwrap_or_default());
let bytes = Vec::with_capacity(cbor::cap_capacity::<u8>(size.unwrap_or_default()));
let mut cursor = Cursor::new(bytes);
self.decode_bytes_impl(&mut cursor, size)?;
Ok(cursor.into_inner())
Expand All @@ -128,7 +129,7 @@ where
}
};

let mut bytes = Vec::with_capacity(size.unwrap_or_default());
let mut bytes = Vec::with_capacity(cbor::cap_capacity::<u8>(size.unwrap_or_default()));
self.decode_text_impl(&mut bytes, size)?;
Ok(bytes)
}
Expand Down Expand Up @@ -221,12 +222,14 @@ where
}

trait CursorExt {
/// Advance the position of the cursor by `len`, or as many positions
/// as possible, and return the slice covering the advanced positions.
/// Request to advance the position of the cursor by `len`,
/// and return the slice covering the advanced positions.
/// Cursors backed by dynamically sized collections like `Vec`
/// will append to the collection as needed and will always advance
/// the requested `len`. Cursors that cannot append will advance as far
/// as possible only.
/// will always be able to advance but may advance less than the requested `len`
/// (to avoid large allocations in one go).
/// Cursors backed by a data structure that cannot be extended will advance as far
/// as possible only. If the cursor cannot advance
/// any further, an empty slice is returned..
fn advance(&mut self, len: usize) -> &mut [u8];
}

Expand All @@ -246,6 +249,8 @@ fn advance_vec<T: AsRef<Vec<u8>> + AsMut<Vec<u8>>>(
cursor: &mut Cursor<T>,
len: usize,
) -> &mut [u8] {
// cap pre allocation
let len = len.min(MAX_PRE_ALLOCATED_SIZE);
let old_position = cursor.position() as usize;
let new_position = old_position + len;
let old_len = cursor.get_ref().as_ref().len();
Expand Down Expand Up @@ -286,16 +291,17 @@ impl<R: Read> Decoder<R> {
{
let mut segments = self.inner.bytes(size);
while let Some(mut segment) = segments.pull()? {
let left = segment.left();
if left == 0 {
continue;
}
let advanced = dest.advance(left);
if advanced.len() != left {
return Err(anyhow!("fixed length byte string destination too short").into());
loop {
let left = segment.left();
if left == 0 {
break;
}
let advanced = dest.advance(left);
if advanced.is_empty() {
return Err(anyhow!("fixed size deserialization type too short").into());
}
segment.pull(advanced)?;
}
let read = segment.pull(advanced)?;
debug_assert_eq!(read.map(|bytes| bytes.len()), Some(left));
}

Ok(())
Expand All @@ -313,15 +319,16 @@ impl<R: Read> Decoder<R> {
let mut dest = Cursor::new(dest);
let mut segments = self.inner.text(size);
while let Some(mut segment) = segments.pull()? {
let left = segment.left();
if left == 0 {
continue;
}
let advanced = dest.advance(left);
debug_assert_eq!(advanced.len(), left);
segment.pull(advanced)?;
if segment.left() != 0 {
return Err(anyhow!("invalid UTF-8 in byte string").into());
loop {
let left = segment.left();
if left == 0 {
break;
}
let advanced = dest.advance(left);
if advanced.is_empty() {
return Err(anyhow!("fixed size deserialization type too short").into());
}
segment.pull(advanced)?;
}
}

Expand Down Expand Up @@ -623,7 +630,7 @@ mod test {
assert!(
error
.to_string()
.contains("byte string destination too short"),
.contains("fixed size deserialization type too short"),
"message: {}",
error.to_string()
);
Expand Down Expand Up @@ -701,6 +708,30 @@ mod test {
);
}

/// Test byte string is longer than CBOR content and of huge size. Test
/// that we don't try to allocate memory of the length.
#[test]
fn test_bytes_length_invalid_huge() {
let cbor = hex::decode("5b00ffffffffffffff0102030405").unwrap();
let mut decoder = Decoder::new(cbor.as_slice(), SerializationOptions::default());
let err = decoder.decode_bytes().unwrap_err();
assert!(
err.to_string().contains("failed to fill whole buffer"),
"message: {}",
err.to_string()
);
}

/// Test byte string longer than allocation capacity cap.
#[test]
fn test_bytes_length_above_allocation_capacity() {
let mut cbor = hex::decode("5a0000ffff").unwrap();
cbor.extend(iter::repeat_n(0, 0xffff));
let mut decoder = Decoder::new(cbor.as_slice(), SerializationOptions::default());
let bytes = decoder.decode_bytes().unwrap();
assert_eq!(bytes.len(), 0xffff);
}

/// Test text string is longer than CBOR content
#[test]
fn test_text_length_invalid() {
Expand All @@ -714,6 +745,30 @@ mod test {
);
}

/// Test text string is longer than CBOR content. Test
/// that we don't try to allocate memory of the length.
#[test]
fn test_text_length_invalid_huge() {
let cbor = hex::decode("7b00ffffffffffffff61626364").unwrap();
let mut decoder = Decoder::new(cbor.as_slice(), SerializationOptions::default());
let err = decoder.decode_text().unwrap_err();
assert!(
err.to_string().contains("failed to fill whole buffer"),
"message: {}",
err.to_string()
);
}

/// Test text string longer than allocation capacity cap.
#[test]
fn test_text_length_above_allocation_capacity() {
let mut cbor = hex::decode("7a0000ffff").unwrap();
cbor.extend(iter::repeat_n(0, 0xffff));
let mut decoder = Decoder::new(cbor.as_slice(), SerializationOptions::default());
let bytes = decoder.decode_text().unwrap();
assert_eq!(bytes.len(), 0xffff);
}

/// Test decode UTF-8 two byte code point c2bd
#[test]
fn test_text_two_byte_code_point() {
Expand All @@ -730,7 +785,7 @@ mod test {
let mut decoder = Decoder::new(cbor.as_slice(), SerializationOptions::default());
let error = decoder.decode_text().unwrap_err();
assert!(
error.to_string().contains("invalid UTF-8"),
error.to_string().contains("CBOR syntax error"),
"message: {}",
error.to_string()
);
Expand All @@ -743,7 +798,7 @@ mod test {
let mut decoder = Decoder::new(cbor.as_slice(), SerializationOptions::default());
let error = decoder.decode_text().unwrap_err();
assert!(
error.to_string().contains("invalid UTF-8"),
error.to_string().contains("CBOR syntax error"),
"message: {}",
error.to_string()
);
Expand Down
2 changes: 1 addition & 1 deletion rust-src/concordium_base/src/common/cbor/primitives.rs
Original file line number Diff line number Diff line change
Expand Up @@ -759,7 +759,7 @@ mod test {

let err = cbor_decode::<[u8; 4]>(&cbor).unwrap_err().to_string();
assert!(
err.contains("fixed length byte string destination too short"),
err.contains("fixed size deserialization type too short"),
"err: {}",
err
);
Expand Down
37 changes: 34 additions & 3 deletions rust-src/concordium_base/src/common/cbor/value.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::common::cbor::{
Bytes, CborArrayDecoder, CborArrayEncoder, CborDecoder, CborDeserialize, CborEncoder,
self, Bytes, CborArrayDecoder, CborArrayEncoder, CborDecoder, CborDeserialize, CborEncoder,
CborMapDecoder, CborMapEncoder, CborSerializationResult, CborSerialize, DataItemHeader,
};
use anyhow::Context;
Expand Down Expand Up @@ -91,15 +91,20 @@ impl CborDeserialize for Value {
),
DataItemHeader::Array(_) => {
let mut array_decoder = decoder.decode_array()?;
let mut vec = Vec::with_capacity(array_decoder.size().unwrap_or_default());
let mut vec = Vec::with_capacity(cbor::cap_capacity::<Value>(
array_decoder.size().unwrap_or_default(),
));
while let Some(element) = array_decoder.deserialize_element()? {
vec.push(element);
}
Value::Array(vec)
}
DataItemHeader::Map(_) => {
let mut map_decoder = decoder.decode_map()?;
let mut vec = Vec::with_capacity(map_decoder.size().unwrap_or_default());
let mut vec = Vec::with_capacity(cbor::cap_capacity::<Value>(
map_decoder.size().unwrap_or_default(),
));

while let Some(entry) = map_decoder.deserialize_entry()? {
vec.push(entry);
}
Expand Down Expand Up @@ -247,6 +252,19 @@ mod test {
assert_eq!(value_decoded, value);
}

/// Test huge array size.
/// Test that we don't try to allocate memory of the size.
#[test]
fn test_array_huge_length() {
let cbor = hex::decode("9b00ffffffffffffff0103").unwrap();
let err = cbor_decode::<Value>(&cbor).unwrap_err();
assert!(
err.to_string().contains("failed to fill whole buffer"),
"message: {}",
err.to_string()
);
}

#[test]
fn test_map() {
let value = Value::Map(vec![
Expand All @@ -259,4 +277,17 @@ mod test {
let value_decoded: Value = cbor_decode(&cbor).unwrap();
assert_eq!(value_decoded, value);
}

/// Test huge map size.
/// Test that we don't try to allocate memory of the size.
#[test]
fn test_map_huge_length() {
let cbor = hex::decode("bb00ffffffffffffff01030204").unwrap();
let err = cbor_decode::<Value>(&cbor).unwrap_err();
assert!(
err.to_string().contains("failed to fill whole buffer"),
"message: {}",
err.to_string()
);
}
}