Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Date data types can cast to a time zone-specific timestamp #7141

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
274 changes: 242 additions & 32 deletions arrow-cast/src/cast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -265,8 +265,8 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
}
(Timestamp(_, _), _) if to_type.is_numeric() => true,
(_, Timestamp(_, _)) if from_type.is_numeric() => true,
(Date64, Timestamp(_, None)) => true,
(Date32, Timestamp(_, None)) => true,
(Date64, Timestamp(_, _)) => true,
(Date32, Timestamp(_, _)) => true,
(
Timestamp(_, _),
Timestamp(_, _)
Expand Down Expand Up @@ -1806,44 +1806,63 @@ pub fn cast_with_options(
})?,
))
}
(Date64, Timestamp(TimeUnit::Second, None)) => Ok(Arc::new(
array
(Date64, Timestamp(TimeUnit::Second, _)) => {
let array = array
.as_primitive::<Date64Type>()
.unary::<_, TimestampSecondType>(|x| x / MILLISECONDS),
)),
(Date64, Timestamp(TimeUnit::Millisecond, None)) => {
cast_reinterpret_arrays::<Date64Type, TimestampMillisecondType>(array)
.unary::<_, TimestampSecondType>(|x| x / MILLISECONDS);

cast_with_options(&array, to_type, cast_options)
}
(Date64, Timestamp(TimeUnit::Microsecond, None)) => Ok(Arc::new(
array
(Date64, Timestamp(TimeUnit::Millisecond, _)) => {
let array = array
.as_primitive::<Date64Type>()
.unary::<_, TimestampMicrosecondType>(|x| x * (MICROSECONDS / MILLISECONDS)),
)),
(Date64, Timestamp(TimeUnit::Nanosecond, None)) => Ok(Arc::new(
array
.reinterpret_cast::<TimestampMillisecondType>();

cast_with_options(&array, to_type, cast_options)
}

(Date64, Timestamp(TimeUnit::Microsecond, _)) => {
let array = array
.as_primitive::<Date64Type>()
.unary::<_, TimestampNanosecondType>(|x| x * (NANOSECONDS / MILLISECONDS)),
)),
(Date32, Timestamp(TimeUnit::Second, None)) => Ok(Arc::new(
array
.unary::<_, TimestampMicrosecondType>(|x| x * (MICROSECONDS / MILLISECONDS));

cast_with_options(&array, to_type, cast_options)
}
(Date64, Timestamp(TimeUnit::Nanosecond, _)) => {
let array = array
.as_primitive::<Date64Type>()
.unary::<_, TimestampNanosecondType>(|x| x * (NANOSECONDS / MILLISECONDS));

cast_with_options(&array, to_type, cast_options)
}
(Date32, Timestamp(TimeUnit::Second, _)) => {
let array = array
.as_primitive::<Date32Type>()
.unary::<_, TimestampSecondType>(|x| (x as i64) * SECONDS_IN_DAY),
)),
(Date32, Timestamp(TimeUnit::Millisecond, None)) => Ok(Arc::new(
array
.unary::<_, TimestampSecondType>(|x| (x as i64) * SECONDS_IN_DAY);

cast_with_options(&array, to_type, cast_options)
}
(Date32, Timestamp(TimeUnit::Millisecond, _)) => {
let array = array
.as_primitive::<Date32Type>()
.unary::<_, TimestampMillisecondType>(|x| (x as i64) * MILLISECONDS_IN_DAY),
)),
(Date32, Timestamp(TimeUnit::Microsecond, None)) => Ok(Arc::new(
array
.unary::<_, TimestampMillisecondType>(|x| (x as i64) * MILLISECONDS_IN_DAY);

cast_with_options(&array, to_type, cast_options)
}
(Date32, Timestamp(TimeUnit::Microsecond, _)) => {
let array = array
.as_primitive::<Date32Type>()
.unary::<_, TimestampMicrosecondType>(|x| (x as i64) * MICROSECONDS_IN_DAY),
)),
(Date32, Timestamp(TimeUnit::Nanosecond, None)) => Ok(Arc::new(
array
.unary::<_, TimestampMicrosecondType>(|x| (x as i64) * MICROSECONDS_IN_DAY);

cast_with_options(&array, to_type, cast_options)
}
(Date32, Timestamp(TimeUnit::Nanosecond, _)) => {
let array = array
.as_primitive::<Date32Type>()
.unary::<_, TimestampNanosecondType>(|x| (x as i64) * NANOSECONDS_IN_DAY),
)),
.unary::<_, TimestampNanosecondType>(|x| (x as i64) * NANOSECONDS_IN_DAY);

cast_with_options(&array, to_type, cast_options)
}

(_, Duration(unit)) if from_type.is_numeric() => {
let array = cast_with_options(array, &Int64, cast_options)?;
Expand Down Expand Up @@ -5217,6 +5236,197 @@ mod tests {
}};
}

#[test]
fn test_cast_date32_to_timestamp_and_timestamp_with_timezone() {
let tz = "+0545"; // UTC + 0545 is Asia/Kathmandu
let a = Date32Array::from(vec![Some(18628), None, None]); // 2021-1-1, 2022-1-1
let array = Arc::new(a) as ArrayRef;

let b = cast(
&array,
&DataType::Timestamp(TimeUnit::Second, Some(tz.into())),
)
.unwrap();
let c = b.as_primitive::<TimestampSecondType>();
let string_array = cast(&c, &DataType::Utf8).unwrap();
let result = string_array.as_string::<i32>();
assert_eq!("2021-01-01T00:00:00+05:45", result.value(0));

let b = cast(&array, &DataType::Timestamp(TimeUnit::Second, None)).unwrap();
let c = b.as_primitive::<TimestampSecondType>();
let string_array = cast(&c, &DataType::Utf8).unwrap();
let result = string_array.as_string::<i32>();
assert_eq!("2021-01-01T00:00:00", result.value(0));
}
Comment on lines +5239 to +5260
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Proof that casting a Date to a Timestamp with/without time zones result in the same string.

Not the biggest fan of this test case, would appreciate some comments if we would want to a test case that compares the cast results (with/without time zone).


#[test]
fn test_cast_date32_to_timestamp_with_timezone() {
let tz = "+0545"; // UTC + 0545 is Asia/Kathmandu
let a = Date32Array::from(vec![Some(18628), Some(18993), None]); // 2021-1-1, 2022-1-1
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So the default tz of date is UTC, right?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So the default tz of date is UTC, right?

Yes, but shouldn't we ensure Dates cast to timestamps with a particular timezone?

I'm also a bit confused by this comment. Are you implying that a is improperly constructed? (i.e. the third element shouldn't be None?) Here's a similar test case that tests for time zone aware timestamps:

let array_with_tz =
TimestampMillisecondArray::from(vec![Some(864000003005), Some(1545696002001), None])
.with_timezone(tz.to_string());
let expected = vec![
Some("1997-05-19 05:45:03.005000"),
Some("2018-12-25 05:45:02.001000"),
None,
];
assert_cast_timestamp_to_string!(
array_with_tz,
DataType::Utf8View,
StringViewArray,
cast_options,
expected
);

Maybe I'm misunderstanding something. Would appreciate some clarification. Thank you!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test is okay for me. Thanks!

I'm just curious about which particular timezone we should use. Should we depend on a time zone configuration or use the UTC directly?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm just curious about which particular timezone we should use. Should we depend on a time zone configuration or use the UTC directly?

Other test cases use random time zones like the +0545 mentioned above. I'd argue that for the sake of testing, we'd want to use a non-default time zone.

let array = Arc::new(a) as ArrayRef;
let b = cast(
&array,
&DataType::Timestamp(TimeUnit::Second, Some(tz.into())),
)
.unwrap();
let c = b.as_primitive::<TimestampSecondType>();
assert_eq!(1609438500, c.value(0));
assert_eq!(1640974500, c.value(1));
assert!(c.is_null(2));

let string_array = cast(&c, &DataType::Utf8).unwrap();
let result = string_array.as_string::<i32>();
assert_eq!("2021-01-01T00:00:00+05:45", result.value(0));
assert_eq!("2022-01-01T00:00:00+05:45", result.value(1));
}

#[test]
fn test_cast_date32_to_timestamp_with_timezone_ms() {
let tz = "+0545"; // UTC + 0545 is Asia/Kathmandu
let a = Date32Array::from(vec![Some(18628), Some(18993), None]); // 2021-1-1, 2022-1-1
let array = Arc::new(a) as ArrayRef;
let b = cast(
&array,
&DataType::Timestamp(TimeUnit::Millisecond, Some(tz.into())),
)
.unwrap();
let c = b.as_primitive::<TimestampMillisecondType>();
assert_eq!(1609438500000, c.value(0));
assert_eq!(1640974500000, c.value(1));
assert!(c.is_null(2));

let string_array = cast(&c, &DataType::Utf8).unwrap();
let result = string_array.as_string::<i32>();
assert_eq!("2021-01-01T00:00:00+05:45", result.value(0));
assert_eq!("2022-01-01T00:00:00+05:45", result.value(1));
}

#[test]
fn test_cast_date32_to_timestamp_with_timezone_us() {
let tz = "+0545"; // UTC + 0545 is Asia/Kathmandu
let a = Date32Array::from(vec![Some(18628), Some(18993), None]); // 2021-1-1, 2022-1-1
let array = Arc::new(a) as ArrayRef;
let b = cast(
&array,
&DataType::Timestamp(TimeUnit::Microsecond, Some(tz.into())),
)
.unwrap();
let c = b.as_primitive::<TimestampMicrosecondType>();
assert_eq!(1609438500000000, c.value(0));
assert_eq!(1640974500000000, c.value(1));
assert!(c.is_null(2));

let string_array = cast(&c, &DataType::Utf8).unwrap();
let result = string_array.as_string::<i32>();
assert_eq!("2021-01-01T00:00:00+05:45", result.value(0));
assert_eq!("2022-01-01T00:00:00+05:45", result.value(1));
}

#[test]
fn test_cast_date32_to_timestamp_with_timezone_ns() {
let tz = "+0545"; // UTC + 0545 is Asia/Kathmandu
let a = Date32Array::from(vec![Some(18628), Some(18993), None]); // 2021-1-1, 2022-1-1
let array = Arc::new(a) as ArrayRef;
let b = cast(
&array,
&DataType::Timestamp(TimeUnit::Nanosecond, Some(tz.into())),
)
.unwrap();
let c = b.as_primitive::<TimestampNanosecondType>();
assert_eq!(1609438500000000000, c.value(0));
assert_eq!(1640974500000000000, c.value(1));
assert!(c.is_null(2));

let string_array = cast(&c, &DataType::Utf8).unwrap();
let result = string_array.as_string::<i32>();
assert_eq!("2021-01-01T00:00:00+05:45", result.value(0));
assert_eq!("2022-01-01T00:00:00+05:45", result.value(1));
}

#[test]
fn test_cast_date64_to_timestamp_with_timezone() {
let array = Date64Array::from(vec![Some(864000000005), Some(1545696000001), None]);
let tz = "+0545"; // UTC + 0545 is Asia/Kathmandu
let b = cast(
&array,
&DataType::Timestamp(TimeUnit::Second, Some(tz.into())),
)
.unwrap();

let c = b.as_primitive::<TimestampSecondType>();
assert_eq!(863979300, c.value(0));
assert_eq!(1545675300, c.value(1));
assert!(c.is_null(2));

let string_array = cast(&c, &DataType::Utf8).unwrap();
let result = string_array.as_string::<i32>();
assert_eq!("1997-05-19T00:00:00+05:45", result.value(0));
assert_eq!("2018-12-25T00:00:00+05:45", result.value(1));
}

#[test]
fn test_cast_date64_to_timestamp_with_timezone_ms() {
let array = Date64Array::from(vec![Some(864000000005), Some(1545696000001), None]);
let tz = "+0545"; // UTC + 0545 is Asia/Kathmandu
let b = cast(
&array,
&DataType::Timestamp(TimeUnit::Millisecond, Some(tz.into())),
)
.unwrap();

let c = b.as_primitive::<TimestampMillisecondType>();
assert_eq!(863979300005, c.value(0));
assert_eq!(1545675300001, c.value(1));
assert!(c.is_null(2));

let string_array = cast(&c, &DataType::Utf8).unwrap();
let result = string_array.as_string::<i32>();
assert_eq!("1997-05-19T00:00:00.005+05:45", result.value(0));
assert_eq!("2018-12-25T00:00:00.001+05:45", result.value(1));
}

#[test]
fn test_cast_date64_to_timestamp_with_timezone_us() {
let array = Date64Array::from(vec![Some(864000000005), Some(1545696000001), None]);
let tz = "+0545"; // UTC + 0545 is Asia/Kathmandu
let b = cast(
&array,
&DataType::Timestamp(TimeUnit::Microsecond, Some(tz.into())),
)
.unwrap();

let c = b.as_primitive::<TimestampMicrosecondType>();
assert_eq!(863979300005000, c.value(0));
assert_eq!(1545675300001000, c.value(1));
assert!(c.is_null(2));

let string_array = cast(&c, &DataType::Utf8).unwrap();
let result = string_array.as_string::<i32>();
assert_eq!("1997-05-19T00:00:00.005+05:45", result.value(0));
assert_eq!("2018-12-25T00:00:00.001+05:45", result.value(1));
}

#[test]
fn test_cast_date64_to_timestamp_with_timezone_ns() {
let array = Date64Array::from(vec![Some(864000000005), Some(1545696000001), None]);
let tz = "+0545"; // UTC + 0545 is Asia/Kathmandu
let b = cast(
&array,
&DataType::Timestamp(TimeUnit::Nanosecond, Some(tz.into())),
)
.unwrap();

let c = b.as_primitive::<TimestampNanosecondType>();
assert_eq!(863979300005000000, c.value(0));
assert_eq!(1545675300001000000, c.value(1));
assert!(c.is_null(2));

let string_array = cast(&c, &DataType::Utf8).unwrap();
let result = string_array.as_string::<i32>();
assert_eq!("1997-05-19T00:00:00.005+05:45", result.value(0));
assert_eq!("2018-12-25T00:00:00.001+05:45", result.value(1));
}

#[test]
fn test_cast_timestamp_to_strings() {
// "2018-12-25T00:00:02.001", "1997-05-19T00:00:03.005", None
Expand Down
Loading