Skip to content

Commit bb607dc

Browse files
authored
Support deserializing bytes (#1820)
## Which issue does this PR close? - Closes #86 ## What changes are included in this PR? ## Are these changes tested?
1 parent 76d8e2d commit bb607dc

File tree

1 file changed

+297
-6
lines changed

1 file changed

+297
-6
lines changed

crates/iceberg/src/spec/values.rs

Lines changed: 297 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2682,12 +2682,71 @@ mod _serde {
26822682
Type::Primitive(PrimitiveType::String) => Ok(Some(Literal::string(v))),
26832683
_ => Err(invalid_err("string")),
26842684
},
2685-
// # TODO:https://github.com/apache/iceberg-rust/issues/86
2686-
// rust avro don't support deserialize any bytes representation now.
2687-
RawLiteralEnum::Bytes(_) => Err(invalid_err_with_reason(
2688-
"bytes",
2689-
"todo: rust avro doesn't support deserialize any bytes representation now",
2690-
)),
2685+
RawLiteralEnum::Bytes(v) => match ty {
2686+
Type::Primitive(PrimitiveType::Binary) => Ok(Some(Literal::binary(v.to_vec()))),
2687+
Type::Primitive(PrimitiveType::Fixed(expected_len)) => {
2688+
if v.len() == *expected_len as usize {
2689+
Ok(Some(Literal::fixed(v.to_vec())))
2690+
} else {
2691+
Err(invalid_err_with_reason(
2692+
"bytes",
2693+
&format!(
2694+
"Fixed type must be exactly {} bytes, got {}",
2695+
expected_len,
2696+
v.len()
2697+
),
2698+
))
2699+
}
2700+
}
2701+
Type::Primitive(PrimitiveType::Uuid) => {
2702+
if v.len() == 16 {
2703+
let bytes: [u8; 16] = v.as_slice().try_into().map_err(|_| {
2704+
invalid_err_with_reason("bytes", "UUID must be exactly 16 bytes")
2705+
})?;
2706+
Ok(Some(Literal::uuid(uuid::Uuid::from_bytes(bytes))))
2707+
} else {
2708+
Err(invalid_err_with_reason(
2709+
"bytes",
2710+
"UUID must be exactly 16 bytes",
2711+
))
2712+
}
2713+
}
2714+
Type::Primitive(PrimitiveType::Decimal { precision, .. }) => {
2715+
let required_bytes = Type::decimal_required_bytes(*precision)? as usize;
2716+
2717+
if v.len() == required_bytes {
2718+
// Pad the bytes to 16 bytes (i128 size) with sign extension
2719+
let mut padded_bytes = [0u8; 16];
2720+
let start_idx = 16 - v.len();
2721+
2722+
// Copy the input bytes to the end of the array
2723+
padded_bytes[start_idx..].copy_from_slice(&v);
2724+
2725+
// Sign extend if the number is negative (MSB is 1)
2726+
if !v.is_empty() && (v[0] & 0x80) != 0 {
2727+
// Fill the padding with 0xFF for negative numbers
2728+
for byte in &mut padded_bytes[..start_idx] {
2729+
*byte = 0xFF;
2730+
}
2731+
}
2732+
2733+
Ok(Some(Literal::Primitive(PrimitiveLiteral::Int128(
2734+
i128::from_be_bytes(padded_bytes),
2735+
))))
2736+
} else {
2737+
Err(invalid_err_with_reason(
2738+
"bytes",
2739+
&format!(
2740+
"Decimal with precision {} must be exactly {} bytes, got {}",
2741+
precision,
2742+
required_bytes,
2743+
v.len()
2744+
),
2745+
))
2746+
}
2747+
}
2748+
_ => Err(invalid_err("bytes")),
2749+
},
26912750
RawLiteralEnum::List(v) => match ty {
26922751
Type::List(ty) => Ok(Some(Literal::List(
26932752
v.list
@@ -3309,6 +3368,238 @@ mod tests {
33093368
}
33103369
}
33113370

3371+
fn check_raw_literal_bytes_serde_via_avro(
3372+
input_bytes: Vec<u8>,
3373+
expected_literal: Literal,
3374+
expected_type: &Type,
3375+
) {
3376+
use apache_avro::types::Value;
3377+
3378+
// Create an Avro bytes value and deserialize it through the RawLiteral path
3379+
let avro_value = Value::Bytes(input_bytes);
3380+
let raw_literal: _serde::RawLiteral = apache_avro::from_value(&avro_value).unwrap();
3381+
let result = raw_literal.try_into(expected_type).unwrap();
3382+
assert_eq!(result, Some(expected_literal));
3383+
}
3384+
3385+
fn check_raw_literal_bytes_error_via_avro(input_bytes: Vec<u8>, expected_type: &Type) {
3386+
use apache_avro::types::Value;
3387+
3388+
let avro_value = Value::Bytes(input_bytes);
3389+
let raw_literal: _serde::RawLiteral = apache_avro::from_value(&avro_value).unwrap();
3390+
let result = raw_literal.try_into(expected_type);
3391+
assert!(result.is_err(), "Expected error but got: {:?}", result);
3392+
}
3393+
3394+
#[test]
3395+
fn test_raw_literal_bytes_binary() {
3396+
let bytes = vec![1u8, 2u8, 3u8, 4u8, 5u8];
3397+
check_raw_literal_bytes_serde_via_avro(
3398+
bytes.clone(),
3399+
Literal::binary(bytes),
3400+
&Type::Primitive(PrimitiveType::Binary),
3401+
);
3402+
}
3403+
3404+
#[test]
3405+
fn test_raw_literal_bytes_binary_empty() {
3406+
let bytes = vec![];
3407+
check_raw_literal_bytes_serde_via_avro(
3408+
bytes.clone(),
3409+
Literal::binary(bytes),
3410+
&Type::Primitive(PrimitiveType::Binary),
3411+
);
3412+
}
3413+
3414+
#[test]
3415+
fn test_raw_literal_bytes_fixed_correct_length() {
3416+
let bytes = vec![1u8, 2u8, 3u8, 4u8];
3417+
check_raw_literal_bytes_serde_via_avro(
3418+
bytes.clone(),
3419+
Literal::fixed(bytes),
3420+
&Type::Primitive(PrimitiveType::Fixed(4)),
3421+
);
3422+
}
3423+
3424+
#[test]
3425+
fn test_raw_literal_bytes_fixed_wrong_length() {
3426+
let bytes = vec![1u8, 2u8, 3u8]; // 3 bytes, but expecting 4
3427+
check_raw_literal_bytes_error_via_avro(bytes, &Type::Primitive(PrimitiveType::Fixed(4)));
3428+
}
3429+
3430+
#[test]
3431+
fn test_raw_literal_bytes_fixed_empty_correct_length() {
3432+
let bytes = vec![];
3433+
check_raw_literal_bytes_serde_via_avro(
3434+
bytes.clone(),
3435+
Literal::fixed(bytes),
3436+
&Type::Primitive(PrimitiveType::Fixed(0)),
3437+
);
3438+
}
3439+
3440+
#[test]
3441+
fn test_raw_literal_bytes_uuid_correct_length() {
3442+
let uuid_bytes = vec![
3443+
0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x01, 0x23, 0x45, 0x67, 0x89, 0xab,
3444+
0xcd, 0xef,
3445+
];
3446+
let expected_uuid = u128::from_be_bytes([
3447+
0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x01, 0x23, 0x45, 0x67, 0x89, 0xab,
3448+
0xcd, 0xef,
3449+
]);
3450+
check_raw_literal_bytes_serde_via_avro(
3451+
uuid_bytes,
3452+
Literal::Primitive(PrimitiveLiteral::UInt128(expected_uuid)),
3453+
&Type::Primitive(PrimitiveType::Uuid),
3454+
);
3455+
}
3456+
3457+
#[test]
3458+
fn test_raw_literal_bytes_uuid_wrong_length() {
3459+
let bytes = vec![1u8, 2u8, 3u8]; // 3 bytes, but UUID needs 16
3460+
check_raw_literal_bytes_error_via_avro(bytes, &Type::Primitive(PrimitiveType::Uuid));
3461+
}
3462+
3463+
#[test]
3464+
fn test_raw_literal_bytes_decimal_precision_4_scale_2() {
3465+
// Precision 4 requires 2 bytes
3466+
let decimal_bytes = vec![0x04, 0xd2]; // 1234 in 2 bytes
3467+
let expected_decimal = 1234i128;
3468+
check_raw_literal_bytes_serde_via_avro(
3469+
decimal_bytes,
3470+
Literal::Primitive(PrimitiveLiteral::Int128(expected_decimal)),
3471+
&Type::Primitive(PrimitiveType::Decimal {
3472+
precision: 4,
3473+
scale: 2,
3474+
}),
3475+
);
3476+
}
3477+
3478+
#[test]
3479+
fn test_raw_literal_bytes_decimal_precision_4_negative() {
3480+
// Precision 4 requires 2 bytes, negative number
3481+
let decimal_bytes = vec![0xfb, 0x2e]; // -1234 in 2 bytes
3482+
let expected_decimal = -1234i128;
3483+
check_raw_literal_bytes_serde_via_avro(
3484+
decimal_bytes,
3485+
Literal::Primitive(PrimitiveLiteral::Int128(expected_decimal)),
3486+
&Type::Primitive(PrimitiveType::Decimal {
3487+
precision: 4,
3488+
scale: 2,
3489+
}),
3490+
);
3491+
}
3492+
3493+
#[test]
3494+
fn test_raw_literal_bytes_decimal_precision_9_scale_2() {
3495+
// Precision 9 requires 4 bytes
3496+
let decimal_bytes = vec![0x00, 0x12, 0xd6, 0x87]; // 1234567 in 4 bytes
3497+
let expected_decimal = 1234567i128;
3498+
check_raw_literal_bytes_serde_via_avro(
3499+
decimal_bytes,
3500+
Literal::Primitive(PrimitiveLiteral::Int128(expected_decimal)),
3501+
&Type::Primitive(PrimitiveType::Decimal {
3502+
precision: 9,
3503+
scale: 2,
3504+
}),
3505+
);
3506+
}
3507+
3508+
#[test]
3509+
fn test_raw_literal_bytes_decimal_precision_18_scale_2() {
3510+
// Precision 18 requires 8 bytes
3511+
let decimal_bytes = vec![0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0xd2]; // 1234 in 8 bytes
3512+
let expected_decimal = 1234i128;
3513+
check_raw_literal_bytes_serde_via_avro(
3514+
decimal_bytes,
3515+
Literal::Primitive(PrimitiveLiteral::Int128(expected_decimal)),
3516+
&Type::Primitive(PrimitiveType::Decimal {
3517+
precision: 18,
3518+
scale: 2,
3519+
}),
3520+
);
3521+
}
3522+
3523+
#[test]
3524+
fn test_raw_literal_bytes_decimal_precision_38_scale_2() {
3525+
// Precision 38 requires 16 bytes (maximum precision)
3526+
let decimal_bytes = vec![
3527+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3528+
0x04, 0xd2, // 1234 in 16 bytes
3529+
];
3530+
let expected_decimal = 1234i128;
3531+
check_raw_literal_bytes_serde_via_avro(
3532+
decimal_bytes,
3533+
Literal::Primitive(PrimitiveLiteral::Int128(expected_decimal)),
3534+
&Type::Primitive(PrimitiveType::Decimal {
3535+
precision: 38,
3536+
scale: 2,
3537+
}),
3538+
);
3539+
}
3540+
3541+
#[test]
3542+
fn test_raw_literal_bytes_decimal_precision_1_scale_0() {
3543+
// Precision 1 requires 1 byte
3544+
let decimal_bytes = vec![0x07]; // 7 in 1 byte
3545+
let expected_decimal = 7i128;
3546+
check_raw_literal_bytes_serde_via_avro(
3547+
decimal_bytes,
3548+
Literal::Primitive(PrimitiveLiteral::Int128(expected_decimal)),
3549+
&Type::Primitive(PrimitiveType::Decimal {
3550+
precision: 1,
3551+
scale: 0,
3552+
}),
3553+
);
3554+
}
3555+
3556+
#[test]
3557+
fn test_raw_literal_bytes_decimal_precision_1_negative() {
3558+
// Precision 1 requires 1 byte, negative number
3559+
let decimal_bytes = vec![0xf9]; // -7 in 1 byte (two's complement)
3560+
let expected_decimal = -7i128;
3561+
check_raw_literal_bytes_serde_via_avro(
3562+
decimal_bytes,
3563+
Literal::Primitive(PrimitiveLiteral::Int128(expected_decimal)),
3564+
&Type::Primitive(PrimitiveType::Decimal {
3565+
precision: 1,
3566+
scale: 0,
3567+
}),
3568+
);
3569+
}
3570+
3571+
#[test]
3572+
fn test_raw_literal_bytes_decimal_wrong_length() {
3573+
// 3 bytes provided, but precision 4 requires 2 bytes
3574+
let bytes = vec![1u8, 2u8, 3u8];
3575+
check_raw_literal_bytes_error_via_avro(
3576+
bytes,
3577+
&Type::Primitive(PrimitiveType::Decimal {
3578+
precision: 4,
3579+
scale: 2,
3580+
}),
3581+
);
3582+
}
3583+
3584+
#[test]
3585+
fn test_raw_literal_bytes_decimal_wrong_length_too_few() {
3586+
// 1 byte provided, but precision 9 requires 4 bytes
3587+
let bytes = vec![0x42];
3588+
check_raw_literal_bytes_error_via_avro(
3589+
bytes,
3590+
&Type::Primitive(PrimitiveType::Decimal {
3591+
precision: 9,
3592+
scale: 2,
3593+
}),
3594+
);
3595+
}
3596+
3597+
#[test]
3598+
fn test_raw_literal_bytes_unsupported_type() {
3599+
let bytes = vec![1u8, 2u8, 3u8, 4u8];
3600+
check_raw_literal_bytes_error_via_avro(bytes, &Type::Primitive(PrimitiveType::Int));
3601+
}
3602+
33123603
#[test]
33133604
fn avro_convert_test_int() {
33143605
check_convert_with_avro(

0 commit comments

Comments
 (0)