Skip to content

Commit 0a58593

Browse files
jorgecarleitaokszucs
authored andcommitted
ARROW-11265: [Rust] Made bool not ArrowNativeType
This PR removes the risk of boolean values to be converted to bytes via `ToByteSlice` by explicitly making `ArrowNativeType` be only used in types whose in-memory representation in Rust equates to the in-memory representation in Arrow. `bool` in Rust is a byte and in Arrow it is a bit. Overall, the direction of this PR is to have the traits represent one aspect of the type. In this case, `ArrowNativeType` is currently * a type that has the same in memory representation (ToByteSlice is implemented for it) * a json serializable type * something that can be casted to/from `usize`. This poses a problem because: 1. bools are serializable, not castable to usize, have different memory representation 2. fixed size (iX, uX) are serializable, castable to usize, have the same memory representation 3. fixed floating (f32, f64) are serializable, not castable to usize, have the same memory representation however, they all implement `ArrowNativeType`. This PR focus on splitting the json-serializable part of it. Closes #9212 from jorgecarleitao/fix_trait Authored-by: Jorge C. Leitao <[email protected]> Signed-off-by: Andrew Lamb <[email protected]>
1 parent 29cfed4 commit 0a58593

File tree

3 files changed

+52
-27
lines changed

3 files changed

+52
-27
lines changed

rust/arrow/src/compute/kernels/sort.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -385,7 +385,7 @@ where
385385
}
386386

387387
// insert valid and nan values in the correct order depending on the descending flag
388-
fn insert_valid_values<T: ArrowNativeType>(
388+
fn insert_valid_values<T>(
389389
result_slice: &mut [u32],
390390
offset: usize,
391391
valids: Vec<(u32, T)>,

rust/arrow/src/datatypes.rs

Lines changed: 49 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -199,11 +199,18 @@ pub struct Field {
199199
metadata: Option<BTreeMap<String, String>>,
200200
}
201201

202-
pub trait ArrowNativeType:
203-
fmt::Debug + Send + Sync + Copy + PartialOrd + FromStr + Default + 'static
204-
{
202+
/// Trait declaring any type that is serializable to JSON. This includes all primitive types (bool, i32, etc.).
203+
pub trait JsonSerializable: 'static {
205204
fn into_json_value(self) -> Option<Value>;
205+
}
206206

207+
/// Trait expressing a Rust type that has the same in-memory representation
208+
/// as Arrow. This includes `i16`, `f32`, but excludes `bool` (which in arrow is represented in bits).
209+
/// In little endian machines, types that implement [`ArrowNativeType`] can be memcopied to arrow buffers
210+
/// as is.
211+
pub trait ArrowNativeType:
212+
fmt::Debug + Send + Sync + Copy + PartialOrd + FromStr + Default + JsonSerializable
213+
{
207214
/// Convert native type from usize.
208215
fn from_usize(_: usize) -> Option<Self> {
209216
None
@@ -225,7 +232,8 @@ pub trait ArrowNativeType:
225232
}
226233
}
227234

228-
/// Trait indicating a primitive fixed-width type (bool, ints and floats).
235+
/// Trait bridging the dynamic-typed nature of Arrow (via [`DataType`]) with the
236+
/// static-typed nature of rust types ([`ArrowNativeType`]) for all types that implement [`ArrowNativeType`].
229237
pub trait ArrowPrimitiveType: 'static {
230238
/// Corresponding Rust native type for the primitive type.
231239
type Native: ArrowNativeType;
@@ -246,17 +254,19 @@ pub trait ArrowPrimitiveType: 'static {
246254
}
247255
}
248256

249-
impl ArrowNativeType for bool {
257+
impl JsonSerializable for bool {
250258
fn into_json_value(self) -> Option<Value> {
251259
Some(self.into())
252260
}
253261
}
254262

255-
impl ArrowNativeType for i8 {
263+
impl JsonSerializable for i8 {
256264
fn into_json_value(self) -> Option<Value> {
257-
Some(VNumber(Number::from(self)))
265+
Some(self.into())
258266
}
267+
}
259268

269+
impl ArrowNativeType for i8 {
260270
fn from_usize(v: usize) -> Option<Self> {
261271
num::FromPrimitive::from_usize(v)
262272
}
@@ -266,11 +276,13 @@ impl ArrowNativeType for i8 {
266276
}
267277
}
268278

269-
impl ArrowNativeType for i16 {
279+
impl JsonSerializable for i16 {
270280
fn into_json_value(self) -> Option<Value> {
271-
Some(VNumber(Number::from(self)))
281+
Some(self.into())
272282
}
283+
}
273284

285+
impl ArrowNativeType for i16 {
274286
fn from_usize(v: usize) -> Option<Self> {
275287
num::FromPrimitive::from_usize(v)
276288
}
@@ -280,11 +292,13 @@ impl ArrowNativeType for i16 {
280292
}
281293
}
282294

283-
impl ArrowNativeType for i32 {
295+
impl JsonSerializable for i32 {
284296
fn into_json_value(self) -> Option<Value> {
285-
Some(VNumber(Number::from(self)))
297+
Some(self.into())
286298
}
299+
}
287300

301+
impl ArrowNativeType for i32 {
288302
fn from_usize(v: usize) -> Option<Self> {
289303
num::FromPrimitive::from_usize(v)
290304
}
@@ -299,11 +313,13 @@ impl ArrowNativeType for i32 {
299313
}
300314
}
301315

302-
impl ArrowNativeType for i64 {
316+
impl JsonSerializable for i64 {
303317
fn into_json_value(self) -> Option<Value> {
304318
Some(VNumber(Number::from(self)))
305319
}
320+
}
306321

322+
impl ArrowNativeType for i64 {
307323
fn from_usize(v: usize) -> Option<Self> {
308324
num::FromPrimitive::from_usize(v)
309325
}
@@ -318,11 +334,13 @@ impl ArrowNativeType for i64 {
318334
}
319335
}
320336

321-
impl ArrowNativeType for u8 {
337+
impl JsonSerializable for u8 {
322338
fn into_json_value(self) -> Option<Value> {
323-
Some(VNumber(Number::from(self)))
339+
Some(self.into())
324340
}
341+
}
325342

343+
impl ArrowNativeType for u8 {
326344
fn from_usize(v: usize) -> Option<Self> {
327345
num::FromPrimitive::from_usize(v)
328346
}
@@ -332,11 +350,13 @@ impl ArrowNativeType for u8 {
332350
}
333351
}
334352

335-
impl ArrowNativeType for u16 {
353+
impl JsonSerializable for u16 {
336354
fn into_json_value(self) -> Option<Value> {
337-
Some(VNumber(Number::from(self)))
355+
Some(self.into())
338356
}
357+
}
339358

359+
impl ArrowNativeType for u16 {
340360
fn from_usize(v: usize) -> Option<Self> {
341361
num::FromPrimitive::from_usize(v)
342362
}
@@ -346,11 +366,13 @@ impl ArrowNativeType for u16 {
346366
}
347367
}
348368

349-
impl ArrowNativeType for u32 {
369+
impl JsonSerializable for u32 {
350370
fn into_json_value(self) -> Option<Value> {
351-
Some(VNumber(Number::from(self)))
371+
Some(self.into())
352372
}
373+
}
353374

375+
impl ArrowNativeType for u32 {
354376
fn from_usize(v: usize) -> Option<Self> {
355377
num::FromPrimitive::from_usize(v)
356378
}
@@ -360,11 +382,13 @@ impl ArrowNativeType for u32 {
360382
}
361383
}
362384

363-
impl ArrowNativeType for u64 {
385+
impl JsonSerializable for u64 {
364386
fn into_json_value(self) -> Option<Value> {
365-
Some(VNumber(Number::from(self)))
387+
Some(self.into())
366388
}
389+
}
367390

391+
impl ArrowNativeType for u64 {
368392
fn from_usize(v: usize) -> Option<Self> {
369393
num::FromPrimitive::from_usize(v)
370394
}
@@ -374,18 +398,21 @@ impl ArrowNativeType for u64 {
374398
}
375399
}
376400

377-
impl ArrowNativeType for f32 {
401+
impl JsonSerializable for f32 {
378402
fn into_json_value(self) -> Option<Value> {
379403
Number::from_f64(f64::round(self as f64 * 1000.0) / 1000.0).map(VNumber)
380404
}
381405
}
382406

383-
impl ArrowNativeType for f64 {
407+
impl JsonSerializable for f64 {
384408
fn into_json_value(self) -> Option<Value> {
385409
Number::from_f64(self).map(VNumber)
386410
}
387411
}
388412

413+
impl ArrowNativeType for f32 {}
414+
impl ArrowNativeType for f64 {}
415+
389416
// BooleanType is special: its bit-width is not the size of the primitive type, and its `index`
390417
// operation assumes bit-packing.
391418
#[derive(Debug)]

rust/arrow/src/util/integration_util.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -449,12 +449,10 @@ impl ArrowJsonBatch {
449449
for i in 0..col.len() {
450450
if col.is_null(i) {
451451
validity.push(1);
452-
data.push(
453-
Int8Type::default_value().into_json_value().unwrap(),
454-
);
452+
data.push(0i8.into());
455453
} else {
456454
validity.push(0);
457-
data.push(col.value(i).into_json_value().unwrap());
455+
data.push(col.value(i).into());
458456
}
459457
}
460458

0 commit comments

Comments
 (0)