Skip to content

Commit

Permalink
Store Timezone as Arc<str> (#3976)
Browse files Browse the repository at this point in the history
* Store Timezone as Arc<str>

* Fix serde

* Fix chrono-tz

* Format

* Add construction example
  • Loading branch information
tustvold authored Mar 30, 2023
1 parent be491b4 commit 9a4374f
Show file tree
Hide file tree
Showing 20 changed files with 76 additions and 92 deletions.
4 changes: 2 additions & 2 deletions arrow-array/src/array/primitive_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1063,12 +1063,12 @@ impl<T: ArrowTimestampType> PrimitiveArray<T> {
}

/// Construct a timestamp array with an optional timezone
pub fn with_timezone_opt(&self, timezone: Option<String>) -> Self {
pub fn with_timezone_opt<S: Into<Arc<str>>>(&self, timezone: Option<S>) -> Self {
let array_data = unsafe {
self.data
.clone()
.into_builder()
.data_type(DataType::Timestamp(T::UNIT, timezone))
.data_type(DataType::Timestamp(T::UNIT, timezone.map(Into::into)))
.build_unchecked()
};
PrimitiveArray::from(array_data)
Expand Down
3 changes: 1 addition & 2 deletions arrow-array/src/builder/primitive_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -575,8 +575,7 @@ mod tests {
assert_eq!(array.precision(), 1);
assert_eq!(array.scale(), 2);

let data_type =
DataType::Timestamp(TimeUnit::Nanosecond, Some("+00:00".to_string()));
let data_type = DataType::Timestamp(TimeUnit::Nanosecond, Some("+00:00".into()));
let mut builder =
TimestampNanosecondBuilder::new().with_data_type(data_type.clone());
builder.append_value(1);
Expand Down
2 changes: 1 addition & 1 deletion arrow-array/src/builder/struct_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,7 @@ mod tests {
Field::new("f1", DataType::Decimal128(1, 2), false),
Field::new(
"f2",
DataType::Timestamp(TimeUnit::Millisecond, Some("+00:00".to_string())),
DataType::Timestamp(TimeUnit::Millisecond, Some("+00:00".into())),
false,
),
];
Expand Down
18 changes: 9 additions & 9 deletions arrow-cast/src/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,7 @@ macro_rules! cast_list_to_string {
fn make_timestamp_array(
array: &PrimitiveArray<Int64Type>,
unit: TimeUnit,
tz: Option<String>,
tz: Option<Arc<str>>,
) -> ArrayRef {
match unit {
TimeUnit::Second => Arc::new(
Expand Down Expand Up @@ -2635,7 +2635,7 @@ fn cast_string_to_timestamp<
TimestampType: ArrowTimestampType<Native = i64>,
>(
array: &dyn Array,
to_tz: &Option<String>,
to_tz: &Option<Arc<str>>,
cast_options: &CastOptions,
) -> Result<ArrayRef, ArrowError> {
let string_array = array
Expand Down Expand Up @@ -8034,7 +8034,7 @@ mod tests {

let b = cast(
&b,
&DataType::Timestamp(TimeUnit::Nanosecond, Some("+00:00".to_string())),
&DataType::Timestamp(TimeUnit::Nanosecond, Some("+00:00".into())),
)
.unwrap();
let v = b.as_primitive::<TimestampNanosecondType>();
Expand All @@ -8044,7 +8044,7 @@ mod tests {

let b = cast(
&b,
&DataType::Timestamp(TimeUnit::Millisecond, Some("+02:00".to_string())),
&DataType::Timestamp(TimeUnit::Millisecond, Some("+02:00".into())),
)
.unwrap();
let v = b.as_primitive::<TimestampMillisecondType>();
Expand All @@ -8055,7 +8055,7 @@ mod tests {

#[test]
fn test_cast_utf8_to_timestamp() {
fn test_tz(tz: String) {
fn test_tz(tz: Arc<str>) {
let valid = StringArray::from(vec![
"2023-01-01 04:05:06.789000-08:00",
"2023-01-01 04:05:06.789000-07:00",
Expand Down Expand Up @@ -8091,8 +8091,8 @@ mod tests {
assert_eq!(1672531200000000000, c.value(8));
}

test_tz("+00:00".to_owned());
test_tz("+02:00".to_owned());
test_tz("+00:00".into());
test_tz("+02:00".into());
}

#[test]
Expand All @@ -8119,11 +8119,11 @@ mod tests {
let array = Arc::new(valid) as ArrayRef;
let b = cast(
&array,
&DataType::Timestamp(TimeUnit::Nanosecond, Some("+00:00".to_owned())),
&DataType::Timestamp(TimeUnit::Nanosecond, Some("+00:00".into())),
)
.unwrap();

let expect = DataType::Timestamp(TimeUnit::Nanosecond, Some("+00:00".to_owned()));
let expect = DataType::Timestamp(TimeUnit::Nanosecond, Some("+00:00".into()));

assert_eq!(b.data_type(), &expect);
let c = b
Expand Down
12 changes: 6 additions & 6 deletions arrow-csv/src/reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1739,7 +1739,7 @@ mod tests {
}

fn test_parse_timestamp_impl<T: ArrowTimestampType>(
timezone: Option<String>,
timezone: Option<Arc<str>>,
expected: &[i64],
) {
let csv = [
Expand Down Expand Up @@ -1775,23 +1775,23 @@ mod tests {
&[0, 0, -7_200_000_000_000],
);
test_parse_timestamp_impl::<TimestampNanosecondType>(
Some("+00:00".to_string()),
Some("+00:00".into()),
&[0, 0, -7_200_000_000_000],
);
test_parse_timestamp_impl::<TimestampNanosecondType>(
Some("-05:00".to_string()),
Some("-05:00".into()),
&[18_000_000_000_000, 0, -7_200_000_000_000],
);
test_parse_timestamp_impl::<TimestampMicrosecondType>(
Some("-03".to_string()),
Some("-03".into()),
&[10_800_000_000, 0, -7_200_000_000],
);
test_parse_timestamp_impl::<TimestampMillisecondType>(
Some("-03".to_string()),
Some("-03".into()),
&[10_800_000, 0, -7_200_000],
);
test_parse_timestamp_impl::<TimestampSecondType>(
Some("-03".to_string()),
Some("-03".into()),
&[10_800, 0, -7_200],
);
}
Expand Down
2 changes: 1 addition & 1 deletion arrow-csv/src/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -624,7 +624,7 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,foo
let schema = Schema::new(vec![
Field::new(
"c1",
DataType::Timestamp(TimeUnit::Millisecond, Some("+00:00".to_string())),
DataType::Timestamp(TimeUnit::Millisecond, Some("+00:00".into())),
true,
),
Field::new("c2", DataType::Timestamp(TimeUnit::Millisecond, None), true),
Expand Down
2 changes: 1 addition & 1 deletion arrow-integration-test/src/datatype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ pub fn data_type_from_json(json: &serde_json::Value) -> Result<DataType> {
};
let tz = match map.get("timezone") {
None => Ok(None),
Some(serde_json::Value::String(tz)) => Ok(Some(tz.clone())),
Some(Value::String(tz)) => Ok(Some(tz.as_str().into())),
_ => Err(ArrowError::ParseError(
"timezone must be a string".to_string(),
)),
Expand Down
8 changes: 4 additions & 4 deletions arrow-integration-test/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1104,10 +1104,10 @@ mod tests {

#[test]
fn test_arrow_data_equality() {
let secs_tz = Some("Europe/Budapest".to_string());
let millis_tz = Some("America/New_York".to_string());
let micros_tz = Some("UTC".to_string());
let nanos_tz = Some("Africa/Johannesburg".to_string());
let secs_tz = Some("Europe/Budapest".into());
let millis_tz = Some("America/New_York".into());
let micros_tz = Some("UTC".into());
let nanos_tz = Some("Africa/Johannesburg".into());

let schema = Schema::new(vec![
Field::new("bools-with-metadata-map", DataType::Boolean, true).with_metadata(
Expand Down
4 changes: 2 additions & 2 deletions arrow-integration-test/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,14 +131,14 @@ mod tests {
Field::new("c15", DataType::Timestamp(TimeUnit::Second, None), false),
Field::new(
"c16",
DataType::Timestamp(TimeUnit::Millisecond, Some("UTC".to_string())),
DataType::Timestamp(TimeUnit::Millisecond, Some("UTC".into())),
false,
),
Field::new(
"c17",
DataType::Timestamp(
TimeUnit::Microsecond,
Some("Africa/Johannesburg".to_string()),
Some("Africa/Johannesburg".into()),
),
false,
),
Expand Down
8 changes: 4 additions & 4 deletions arrow-ipc/src/convert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ pub(crate) fn get_data_type(field: crate::Field, may_be_dictionary: bool) -> Dat
}
crate::Type::Timestamp => {
let timestamp = field.type_as_timestamp().unwrap();
let timezone: Option<String> = timestamp.timezone().map(|tz| tz.to_string());
let timezone: Option<_> = timestamp.timezone().map(|tz| tz.into());
match timestamp.unit() {
crate::TimeUnit::SECOND => {
DataType::Timestamp(TimeUnit::Second, timezone)
Expand Down Expand Up @@ -636,8 +636,8 @@ pub(crate) fn get_fb_field_type<'a>(
}
}
Timestamp(unit, tz) => {
let tz = tz.clone().unwrap_or_default();
let tz_str = fbb.create_string(tz.as_str());
let tz = tz.as_deref().unwrap_or_default();
let tz_str = fbb.create_string(tz);
let mut builder = crate::TimestampBuilder::new(fbb);
let time_unit = match unit {
TimeUnit::Second => crate::TimeUnit::SECOND,
Expand Down Expand Up @@ -882,7 +882,7 @@ mod tests {
"timestamp[us]",
DataType::Timestamp(
TimeUnit::Microsecond,
Some("Africa/Johannesburg".to_string()),
Some("Africa/Johannesburg".into()),
),
false,
),
Expand Down
4 changes: 2 additions & 2 deletions arrow-json/src/raw/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -843,7 +843,7 @@ mod tests {
{"c": "1997-01-31T14:26:56.123-05:00", "d": "1997-01-31"}
"#;

let with_timezone = DataType::Timestamp(T::UNIT, Some("+08:00".to_string()));
let with_timezone = DataType::Timestamp(T::UNIT, Some("+08:00".into()));
let schema = Arc::new(Schema::new(vec![
Field::new("a", T::DATA_TYPE, true),
Field::new("b", T::DATA_TYPE, true),
Expand Down Expand Up @@ -1092,7 +1092,7 @@ mod tests {
do_test(DataType::Decimal128(2, 1));
do_test(DataType::Timestamp(
TimeUnit::Microsecond,
Some("+00:00".to_string()),
Some("+00:00".into()),
));
}
}
4 changes: 2 additions & 2 deletions arrow-row/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1533,12 +1533,12 @@ mod tests {
// Construct dictionary with a timezone
let dict = a.finish();
let values = TimestampNanosecondArray::from(dict.values().to_data());
let dict_with_tz = dict.with_values(&values.with_timezone("+02:00".to_string()));
let dict_with_tz = dict.with_values(&values.with_timezone("+02:00"));
let d = DataType::Dictionary(
Box::new(DataType::Int32),
Box::new(DataType::Timestamp(
TimeUnit::Nanosecond,
Some("+02:00".to_string()),
Some("+02:00".into()),
)),
);

Expand Down
2 changes: 1 addition & 1 deletion arrow-schema/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ path = "src/lib.rs"
bench = false

[dependencies]
serde = { version = "1.0", default-features = false, features = ["derive", "std"], optional = true }
serde = { version = "1.0", default-features = false, features = ["derive", "std", "rc"], optional = true }
bitflags = { version = "2.0.0", default-features = false, optional = true }

[features]
Expand Down
12 changes: 10 additions & 2 deletions arrow-schema/src/datatype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
// under the License.

use std::fmt;
use std::sync::Arc;

use crate::field::Field;

Expand Down Expand Up @@ -131,7 +132,14 @@ pub enum DataType {
/// empty to "Europe/Paris" would require converting the timestamp values
/// from "Europe/Paris" to "UTC", which seems counter-intuitive but is
/// nevertheless correct).
Timestamp(TimeUnit, Option<String>),
///
/// ```
/// # use arrow_schema::{DataType, TimeUnit};
/// DataType::Timestamp(TimeUnit::Second, None);
/// DataType::Timestamp(TimeUnit::Second, Some("literal".into()));
/// DataType::Timestamp(TimeUnit::Second, Some("string".to_string().into()));
/// ```
Timestamp(TimeUnit, Option<Arc<str>>),
/// A 32-bit date representing the elapsed time since UNIX epoch (1970-01-01)
/// in days (32 bits).
Date32,
Expand Down Expand Up @@ -476,7 +484,7 @@ impl DataType {
| DataType::Decimal128(_, _)
| DataType::Decimal256(_, _) => 0,
DataType::Timestamp(_, s) => {
s.as_ref().map(|s| s.capacity()).unwrap_or_default()
s.as_ref().map(|s| s.len()).unwrap_or_default()
}
DataType::List(field)
| DataType::FixedSizeList(field, _)
Expand Down
9 changes: 5 additions & 4 deletions arrow-schema/src/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
use crate::{ArrowError, DataType, Field, Schema, TimeUnit, UnionMode};
use bitflags::bitflags;
use std::sync::Arc;
use std::{
collections::HashMap,
ffi::{c_char, c_void, CStr, CString},
Expand Down Expand Up @@ -514,16 +515,16 @@ impl TryFrom<&FFI_ArrowSchema> for DataType {
["tsu", ""] => DataType::Timestamp(TimeUnit::Microsecond, None),
["tsn", ""] => DataType::Timestamp(TimeUnit::Nanosecond, None),
["tss", tz] => {
DataType::Timestamp(TimeUnit::Second, Some(tz.to_string()))
DataType::Timestamp(TimeUnit::Second, Some(Arc::from(*tz)))
}
["tsm", tz] => {
DataType::Timestamp(TimeUnit::Millisecond, Some(tz.to_string()))
DataType::Timestamp(TimeUnit::Millisecond, Some(Arc::from(*tz)))
}
["tsu", tz] => {
DataType::Timestamp(TimeUnit::Microsecond, Some(tz.to_string()))
DataType::Timestamp(TimeUnit::Microsecond, Some(Arc::from(*tz)))
}
["tsn", tz] => {
DataType::Timestamp(TimeUnit::Nanosecond, Some(tz.to_string()))
DataType::Timestamp(TimeUnit::Nanosecond, Some(Arc::from(*tz)))
}
_ => {
return Err(ArrowError::CDataInterface(format!(
Expand Down
2 changes: 1 addition & 1 deletion arrow-select/src/take.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1364,7 +1364,7 @@ mod tests {
let result = take_impl(&input, &index, None).unwrap();
match result.data_type() {
DataType::Timestamp(TimeUnit::Nanosecond, tz) => {
assert_eq!(tz.clone(), Some("UTC".to_owned()))
assert_eq!(tz.clone(), Some("UTC".into()))
}
_ => panic!(),
}
Expand Down
2 changes: 1 addition & 1 deletion arrow/tests/array_cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ fn create_decimal_array(
// Get a selection of datatypes to try and cast to
fn get_all_types() -> Vec<DataType> {
use DataType::*;
let tz_name = String::from("+08:00");
let tz_name: Arc<str> = Arc::from("+08:00");

let mut types = vec![
Null,
Expand Down
12 changes: 3 additions & 9 deletions arrow/tests/csv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,7 @@ fn test_export_csv_timestamps() {
let schema = Schema::new(vec![
Field::new(
"c1",
DataType::Timestamp(
TimeUnit::Millisecond,
Some("Australia/Sydney".to_string()),
),
DataType::Timestamp(TimeUnit::Millisecond, Some("Australia/Sydney".into())),
true,
),
Field::new("c2", DataType::Timestamp(TimeUnit::Millisecond, None), true),
Expand Down Expand Up @@ -68,10 +65,7 @@ fn test_export_csv_timestamps_using_rfc3339() {
let schema = Schema::new(vec![
Field::new(
"c1",
DataType::Timestamp(
TimeUnit::Millisecond,
Some("Australia/Sydney".to_string()),
),
DataType::Timestamp(TimeUnit::Millisecond, Some("Australia/Sydney".into())),
true,
),
Field::new("c2", DataType::Timestamp(TimeUnit::Millisecond, None), true),
Expand All @@ -85,7 +79,7 @@ fn test_export_csv_timestamps_using_rfc3339() {
//
vec![Some(1555584887378), Some(1635577147000)],
)
.with_timezone("Australia/Sydney".to_string());
.with_timezone("Australia/Sydney");
let c2 =
TimestampMillisecondArray::from(vec![Some(1555584887378), Some(1635577147000)]);
let batch =
Expand Down
Loading

0 comments on commit 9a4374f

Please sign in to comment.