Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
253 changes: 221 additions & 32 deletions src/diff/algorithm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -232,33 +232,113 @@ pub fn apply_diff_copy(original: &[u8], diffset: &DiffSet<'_>) -> Result<Vec<u8>
})
}

/// This function constructs destination by merging original with diff such that destination
/// becomes the changed version of the original.
/// Constructs destination by applying the diff to original, such that destination becomes the
/// post-diff state of the original.
///
/// Precondition:
/// - destination.len() == original.len()
pub fn merge_diff_copy(
destination: &mut [u8],
/// - destination.len() == diffset.changed_len()
/// - original.len() may differ from destination.len() to allow Solana
/// account resizing (shrink or expand).
/// Assumption:
/// - destination is assumed to be zero-initialized. That automatically holds true for freshly
/// allocated Solana account data. The function does NOT validate this assumption for performance reason.
/// Returns:
/// - Ok(&mut [u8]) where the slice contains the trailing unwritten bytes in destination and are
/// assumed to be already zero-initialized. Callers may write to those bytes or validate it.
/// Notes:
/// - Merge consists of:
/// - bytes covered by diff segments are written from diffset.
/// - unmodified regions are copied directly from original.
/// - In shrink case, extra trailing bytes from original are ignored.
/// - In expansion case, any remaining bytes beyond both the diff coverage
/// and original.len() stay unwritten and are assumed to be zero-initialized.
///
pub fn merge_diff_copy<'a>(
destination: &'a mut [u8],
original: &[u8],
diffset: &DiffSet<'_>,
) -> Result<(), ProgramError> {
if destination.len() != original.len() {
) -> Result<&'a mut [u8], ProgramError> {
if destination.len() != diffset.changed_len() {
return Err(DlpError::MergeDiffError.into());
}

let mut write_index = 0;
for item in diffset.iter() {
let (diff_segment, OffsetInData { start, end }) = item?;

if write_index < start {
if start > original.len() {
return Err(DlpError::InvalidDiff.into());
}
// copy the unchanged bytes
destination[write_index..start].copy_from_slice(&original[write_index..start]);
}

destination[start..end].copy_from_slice(diff_segment);
write_index = end;
}
if write_index < original.len() {
destination[write_index..].copy_from_slice(&original[write_index..]);
}
Ok(())

// Ensure we have overwritten all bytes in destination, otherwise "construction" of destination
// will be considered incomplete.
let num_bytes_written = match write_index.cmp(&destination.len()) {
Ordering::Equal => {
// It means the destination is fully constructed.
// Nothing to do here.

// It is possible that destination.len() <= original.len() i.e destination might have shrunk
// in which case we do not care about those bytes of original which are not part of
// destination anymore.
write_index
}
Ordering::Less => {
// destination is NOT fully constructed yet. Few bytes in the destination are still unwritten.
// Let's say the number of these unwritten bytes is: N.
//
// Now how do we construct these N unwritten bytes? We have already processed the
// diffset, so now where could the values for these N bytes come from?
//
// There are 3 scenarios:
// - All N bytes must be copied from remaining region of the original:
// - that means, destination.len() <= original.len()
// - and the destination might have shrunk, in which case we do not care about
// the extra bytes in the original: they're discarded.
// - Only (N-M) bytes come from original and the rest M bytes stay unwritten and are
// "assumed" to be already zero-initialized.
// - that means, destination.len() > original.len()
// - write_index + (N-M) == original.len()
// - and the destination has expanded.
// - None of these N bytes come from original. It's basically a special case of
// the second scenario: when M = N i.e all N bytes stay unwritten.
// - that means, destination.len() > original.len()
// - and also, write_index == original.len().
// - the destination has expanded just like the above case.
// - all N bytes are "assumed" to be already zero-initialized (by the caller)

if destination.len() <= original.len() {
// case: all n bytes come from original
let dest_len = destination.len();
destination[write_index..].copy_from_slice(&original[write_index..dest_len]);
dest_len
} else if write_index < original.len() {
// case: some bytes come from original and the rest are "assumed" to be
// zero-initialized (by the caller).
destination[write_index..original.len()].copy_from_slice(&original[write_index..]);
original.len()
} else {
// case: all N bytes are "assumed" to be zero-initialized (by the caller).
write_index
}
}
Ordering::Greater => {
// It is an impossible scenario. Even if the diff is corrupt, or the lengths of destinatiare are same
// or different, we'll not encounter this case. It only implies logic error.
return Err(DlpError::InfallibleError.into());
}
};

let (_, unwritten_bytes) = destination.split_at_mut(num_bytes_written);

Ok(unwritten_bytes)
}

// private function that does the actual work.
Expand Down Expand Up @@ -297,6 +377,58 @@ mod tests {
);
}

fn get_example_expected_diff(
changed_len: usize,
// additional_changes must apply after index 78 (index-in-data) !!
additional_changes: Vec<(u32, &[u8])>,
) -> Vec<u8> {
// expected: | 100 | 2 | 0 11 | 4 71 | 11 12 13 14 71 72 ... 78 |

let mut expected_diff = vec![];

// changed_len (u32)
expected_diff.extend_from_slice(&(changed_len as u32).to_le_bytes());

if additional_changes.is_empty() {
// 2 (u32)
expected_diff.extend_from_slice(&2u32.to_le_bytes());
} else {
expected_diff
.extend_from_slice(&(2u32 + additional_changes.len() as u32).to_le_bytes());
}

// -- offsets

// 0 11 (each u32)
expected_diff.extend_from_slice(&0u32.to_le_bytes());
expected_diff.extend_from_slice(&11u32.to_le_bytes());

// 4 71 (each u32)
expected_diff.extend_from_slice(&4u32.to_le_bytes());
expected_diff.extend_from_slice(&71u32.to_le_bytes());

let mut offset_in_diff = 12u32;
for (offset_in_data, diff) in additional_changes.iter() {
expected_diff.extend_from_slice(&offset_in_diff.to_le_bytes());
expected_diff.extend_from_slice(&offset_in_data.to_le_bytes());
offset_in_diff += diff.len() as u32;
}

// -- segments --

// 11 12 13 14 (each u8)
expected_diff.extend_from_slice(&0x01020304u32.to_le_bytes());
// 71 72 ... 78 (each u8)
expected_diff.extend_from_slice(&0x0102030405060708u64.to_le_bytes());

// append diff from additional_changes
for (_, diff) in additional_changes.iter() {
expected_diff.extend_from_slice(diff);
}

expected_diff
}

#[test]
fn test_using_example_data() {
let original = [0; 100];
Expand All @@ -311,42 +443,99 @@ mod tests {

let actual_diff = compute_diff(&original, &changed);
let actual_diffset = DiffSet::try_new(&actual_diff).unwrap();
let expected_diff = {
// expected: | 100 | 2 | 0 11 | 4 71 | 11 12 13 14 71 72 ... 78 |
let expected_diff = get_example_expected_diff(changed.len(), vec![]);

let mut serialized = vec![];
assert_eq!(actual_diff.len(), 4 + 4 + 8 + 8 + (4 + 8));
assert_eq!(actual_diff.as_slice(), expected_diff.as_slice());

// 100 (u32)
serialized.extend_from_slice(&(changed.len() as u32).to_le_bytes());
let expected_changed = apply_diff_copy(&original, &actual_diffset).unwrap();

// 2 (u32)
serialized.extend_from_slice(&2u32.to_le_bytes());
assert_eq!(changed.as_slice(), expected_changed.as_slice());

// 0 11 (each u32)
serialized.extend_from_slice(&0u32.to_le_bytes());
serialized.extend_from_slice(&11u32.to_le_bytes());
let expected_changed = {
let mut destination = vec![255; original.len()];
merge_diff_copy(&mut destination, &original, &actual_diffset).unwrap();
destination
};

assert_eq!(changed.as_slice(), expected_changed.as_slice());
}

#[test]
fn test_shrunk_account_data() {
// Note that changed_len cannot be lower than 79 because the last "changed" index is
// 78 in the diff.
const CHANGED_LEN: usize = 80;

// 4 71 (each u32)
serialized.extend_from_slice(&4u32.to_le_bytes());
serialized.extend_from_slice(&71u32.to_le_bytes());
let original = vec![0; 100];
let changed = {
let mut copy = original.clone();
copy.truncate(CHANGED_LEN);

// 11 12 13 14 (each u8)
serialized.extend_from_slice(&0x01020304u32.to_le_bytes());
// 71 72 ... 78 (each u8)
serialized.extend_from_slice(&0x0102030405060708u64.to_le_bytes());
serialized
// | 11 | 12 | 13 | 14 |
copy[11..=14].copy_from_slice(&0x01020304u32.to_le_bytes());
// | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 |
copy[71..=78].copy_from_slice(&0x0102030405060708u64.to_le_bytes());
copy
};

let actual_diff = compute_diff(&original, &changed);

let actual_diffset = DiffSet::try_new(&actual_diff).unwrap();

let expected_diff = get_example_expected_diff(CHANGED_LEN, vec![]);

assert_eq!(actual_diff.len(), 4 + 4 + 8 + 8 + (4 + 8));
assert_eq!(actual_diff.as_slice(), expected_diff.as_slice());

let expected_changed = apply_diff_copy(&original, &actual_diffset).unwrap();
let expected_changed = {
let mut destination = vec![255; CHANGED_LEN];
merge_diff_copy(&mut destination, &original, &actual_diffset).unwrap();
destination
};

assert_eq!(changed.as_slice(), expected_changed.as_slice());
}

#[test]
fn test_expanded_account_data() {
const CHANGED_LEN: usize = 120;

let original = vec![0; 100];
let changed = {
let mut copy = original.clone();
copy.resize(CHANGED_LEN, 0); // new bytes are zero-initialized

// | 11 | 12 | 13 | 14 |
copy[11..=14].copy_from_slice(&0x01020304u32.to_le_bytes());
// | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 |
copy[71..=78].copy_from_slice(&0x0102030405060708u64.to_le_bytes());
copy
};

let actual_diff = compute_diff(&original, &changed);

let actual_diffset = DiffSet::try_new(&actual_diff).unwrap();

// When an account expands, the extra bytes at the end become part of the diff, even if
// all of them are zeroes, that is why (100, &[0; 32]) is passed as additional_changes to
// the following function.
//
// TODO (snawaz): we could optimize compute_diff to not include the zero bytes which are
// part of the expansion.
let expected_diff = get_example_expected_diff(CHANGED_LEN, vec![(100, &[0; 20])]);

assert_eq!(actual_diff.len(), 4 + 4 + (8 + 8) + (4 + 8) + (4 + 4 + 20));
assert_eq!(actual_diff.as_slice(), expected_diff.as_slice());

let expected_changed = {
let mut destination = vec![255; original.len()];
merge_diff_copy(&mut destination, &original, &actual_diffset).unwrap();
let mut destination = vec![255; CHANGED_LEN];
let unwritten = merge_diff_copy(&mut destination, &original, &actual_diffset).unwrap();

// TODO (snawaz): unwritten == &mut [], is because currently the expanded bytes are part of the diff.
// Once compute_diff is optimized further, written must be &mut [0; 20].
assert_eq!(unwritten, &mut []);

destination
};

Expand Down
2 changes: 2 additions & 0 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ pub enum DlpError {
InvalidDiffAlignment = 16,
#[error("MergeDiff precondition did not meet")]
MergeDiffError = 17,
#[error("An infallible error is encountered possibly due to logic error")]
InfallibleError = 18,
}

impl From<DlpError> for ProgramError {
Expand Down