Skip to content

Add a JSON reader option to ignore type conflicts #7276

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion arrow-json/src/reader/boolean_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,16 @@ use crate::reader::tape::{Tape, TapeElement};
use crate::reader::ArrayDecoder;

#[derive(Default)]
pub struct BooleanArrayDecoder {}
pub struct BooleanArrayDecoder {
ignore_type_conflicts: bool,
}
impl BooleanArrayDecoder {
pub fn new(ignore_type_conflicts: bool) -> Self {
Self {
ignore_type_conflicts,
}
}
}

impl ArrayDecoder for BooleanArrayDecoder {
fn decode(&mut self, tape: &Tape<'_>, pos: &[u32]) -> Result<ArrayData, ArrowError> {
Expand All @@ -34,6 +43,7 @@ impl ArrayDecoder for BooleanArrayDecoder {
TapeElement::Null => builder.append_null(),
TapeElement::True => builder.append_value(true),
TapeElement::False => builder.append_value(false),
_ if self.ignore_type_conflicts => builder.append_null(),
_ => return Err(tape.error(*p, "boolean")),
}
}
Expand Down
47 changes: 22 additions & 25 deletions arrow-json/src/reader/decimal_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,17 @@ use crate::reader::ArrayDecoder;
pub struct DecimalArrayDecoder<D: DecimalType> {
precision: u8,
scale: i8,
ignore_type_conflicts: bool,
// Invariant and Send
phantom: PhantomData<fn(D) -> D>,
}

impl<D: DecimalType> DecimalArrayDecoder<D> {
pub fn new(precision: u8, scale: i8) -> Self {
pub fn new(precision: u8, scale: i8, ignore_type_conflicts: bool) -> Self {
Self {
precision,
scale,
ignore_type_conflicts,
phantom: PhantomData,
}
}
Expand All @@ -51,45 +53,40 @@ where
fn decode(&mut self, tape: &Tape<'_>, pos: &[u32]) -> Result<ArrayData, ArrowError> {
let mut builder = PrimitiveBuilder::<D>::with_capacity(pos.len());

// Factor out this logic to simplify call sites below; the compiler will inline it,
// producing a highly predictable branch whose cost should be trivial compared to the
// expensive and unpredictably branchy string parse that immediately precedes each call.
let append = |builder: &mut PrimitiveBuilder<D>, value: &str| {
match parse_decimal::<D>(value, self.precision, self.scale) {
Ok(value) => builder.append_value(value),
Err(_) if self.ignore_type_conflicts => builder.append_null(),
Err(e) => return Err(e),
}
Ok(())
};

for p in pos {
match tape.get(*p) {
TapeElement::Null => builder.append_null(),
TapeElement::String(idx) => {
let s = tape.get_string(idx);
let value = parse_decimal::<D>(s, self.precision, self.scale)?;
builder.append_value(value)
}
TapeElement::Number(idx) => {
let s = tape.get_string(idx);
let value = parse_decimal::<D>(s, self.precision, self.scale)?;
builder.append_value(value)
}
TapeElement::String(idx) => append(&mut builder, tape.get_string(idx))?,
TapeElement::Number(idx) => append(&mut builder, tape.get_string(idx))?,
TapeElement::I64(high) => match tape.get(*p + 1) {
TapeElement::I32(low) => {
let val = (((high as i64) << 32) | (low as u32) as i64).to_string();
let value = parse_decimal::<D>(&val, self.precision, self.scale)?;
builder.append_value(value)
append(&mut builder, &val)?
}
_ => unreachable!(),
},
TapeElement::I32(val) => {
let s = val.to_string();
let value = parse_decimal::<D>(&s, self.precision, self.scale)?;
builder.append_value(value)
}
TapeElement::I32(val) => append(&mut builder, &val.to_string())?,
TapeElement::F64(high) => match tape.get(*p + 1) {
TapeElement::F32(low) => {
let val = f64::from_bits(((high as u64) << 32) | low as u64).to_string();
let value = parse_decimal::<D>(&val, self.precision, self.scale)?;
builder.append_value(value)
append(&mut builder, &val)?
}
_ => unreachable!(),
},
TapeElement::F32(val) => {
let s = f32::from_bits(val).to_string();
let value = parse_decimal::<D>(&s, self.precision, self.scale)?;
builder.append_value(value)
}
TapeElement::F32(val) => append(&mut builder, &f32::from_bits(val).to_string())?,
_ if self.ignore_type_conflicts => builder.append_null(),
_ => return Err(tape.error(*p, "decimal")),
}
}
Expand Down
8 changes: 8 additions & 0 deletions arrow-json/src/reader/list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ pub struct ListArrayDecoder<O> {
data_type: DataType,
decoder: Box<dyn ArrayDecoder>,
phantom: PhantomData<O>,
ignore_type_conflicts: bool,
is_nullable: bool,
}

Expand All @@ -37,6 +38,7 @@ impl<O: OffsetSizeTrait> ListArrayDecoder<O> {
data_type: DataType,
coerce_primitive: bool,
strict_mode: bool,
ignore_type_conflicts: bool,
is_nullable: bool,
struct_mode: StructMode,
) -> Result<Self, ArrowError> {
Expand All @@ -49,6 +51,7 @@ impl<O: OffsetSizeTrait> ListArrayDecoder<O> {
field.data_type().clone(),
coerce_primitive,
strict_mode,
ignore_type_conflicts,
field.is_nullable(),
struct_mode,
)?;
Expand All @@ -57,6 +60,7 @@ impl<O: OffsetSizeTrait> ListArrayDecoder<O> {
data_type,
decoder,
phantom: Default::default(),
ignore_type_conflicts,
is_nullable,
})
}
Expand All @@ -83,6 +87,10 @@ impl<O: OffsetSizeTrait> ArrayDecoder for ListArrayDecoder<O> {
nulls.append(false);
*p + 1
}
(_, Some(nulls)) if self.ignore_type_conflicts => {
nulls.append(false);
*p + 1
}
_ => return Err(tape.error(*p, "[")),
};

Expand Down
9 changes: 9 additions & 0 deletions arrow-json/src/reader/map_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ pub struct MapArrayDecoder {
data_type: DataType,
keys: Box<dyn ArrayDecoder>,
values: Box<dyn ArrayDecoder>,
ignore_type_conflicts: bool,
is_nullable: bool,
}

Expand All @@ -36,6 +37,7 @@ impl MapArrayDecoder {
data_type: DataType,
coerce_primitive: bool,
strict_mode: bool,
ignore_type_conflicts: bool,
is_nullable: bool,
struct_mode: StructMode,
) -> Result<Self, ArrowError> {
Expand All @@ -60,13 +62,15 @@ impl MapArrayDecoder {
fields[0].data_type().clone(),
coerce_primitive,
strict_mode,
ignore_type_conflicts,
fields[0].is_nullable(),
struct_mode,
)?;
let values = make_decoder(
fields[1].data_type().clone(),
coerce_primitive,
strict_mode,
ignore_type_conflicts,
fields[1].is_nullable(),
struct_mode,
)?;
Expand All @@ -75,6 +79,7 @@ impl MapArrayDecoder {
data_type,
keys,
values,
ignore_type_conflicts,
is_nullable,
})
}
Expand Down Expand Up @@ -111,6 +116,10 @@ impl ArrayDecoder for MapArrayDecoder {
nulls.append(false);
p + 1
}
(_, Some(nulls)) if self.ignore_type_conflicts => {
nulls.append(false);
p + 1
}
_ => return Err(tape.error(p, "{")),
};

Expand Down
Loading
Loading