From 9c36fbb1c2322455947c3016a2e3d742e7226b30 Mon Sep 17 00:00:00 2001 From: Bas Schoenmaeckers Date: Fri, 10 Jan 2025 11:38:21 +0100 Subject: [PATCH 1/2] Update pyo3 --- Cargo.lock | 10 +++++----- Cargo.toml | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dd3943ce8c89..4c02fbcc0fdc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3674,7 +3674,7 @@ dependencies = [ [[package]] name = "pyo3" version = "0.23.3" -source = "git+https://github.com/bschoenmaeckers/pyo3.git?branch=release-0.23#f209b1a87708a03f2234b744bb64a9f9825ec768" +source = "git+https://github.com/pyo3/pyo3.git?branch=release-0.23.4#5dbfe615e8568f59491711cc1411b15cc7753430" dependencies = [ "cfg-if", "chrono", @@ -3693,7 +3693,7 @@ dependencies = [ [[package]] name = "pyo3-build-config" version = "0.23.3" -source = "git+https://github.com/bschoenmaeckers/pyo3.git?branch=release-0.23#f209b1a87708a03f2234b744bb64a9f9825ec768" +source = "git+https://github.com/pyo3/pyo3.git?branch=release-0.23.4#5dbfe615e8568f59491711cc1411b15cc7753430" dependencies = [ "once_cell", "target-lexicon", @@ -3702,7 +3702,7 @@ dependencies = [ [[package]] name = "pyo3-ffi" version = "0.23.3" -source = "git+https://github.com/bschoenmaeckers/pyo3.git?branch=release-0.23#f209b1a87708a03f2234b744bb64a9f9825ec768" +source = "git+https://github.com/pyo3/pyo3.git?branch=release-0.23.4#5dbfe615e8568f59491711cc1411b15cc7753430" dependencies = [ "libc", "pyo3-build-config", @@ -3711,7 +3711,7 @@ dependencies = [ [[package]] name = "pyo3-macros" version = "0.23.3" -source = "git+https://github.com/bschoenmaeckers/pyo3.git?branch=release-0.23#f209b1a87708a03f2234b744bb64a9f9825ec768" +source = "git+https://github.com/pyo3/pyo3.git?branch=release-0.23.4#5dbfe615e8568f59491711cc1411b15cc7753430" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -3722,7 +3722,7 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" version = "0.23.3" -source = "git+https://github.com/bschoenmaeckers/pyo3.git?branch=release-0.23#f209b1a87708a03f2234b744bb64a9f9825ec768" +source = "git+https://github.com/pyo3/pyo3.git?branch=release-0.23.4#5dbfe615e8568f59491711cc1411b15cc7753430" dependencies = [ "heck", "proc-macro2", diff --git a/Cargo.toml b/Cargo.toml index adf529a7b960..91c9ed39c6b4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -62,7 +62,7 @@ once_cell = "1" parking_lot = "0.12" percent-encoding = "2.3" pin-project-lite = "0.2" -pyo3 = { git = "https://github.com/bschoenmaeckers/pyo3.git", branch = "release-0.23" } +pyo3 = { git = "https://github.com/pyo3/pyo3.git", branch = "release-0.23.4" } rand = "0.8" rand_distr = "0.4" raw-cpuid = "11" @@ -136,8 +136,8 @@ features = [ [patch.crates-io] # packed_simd_2 = { git = "https://github.com/rust-lang/packed_simd", rev = "e57c7ba11386147e6d2cbad7c88f376aab4bdc86" } # simd-json = { git = "https://github.com/ritchie46/simd-json", branch = "alignment" } -pyo3 = { git = "https://github.com/bschoenmaeckers/pyo3.git", branch = "release-0.23" } -pyo3-ffi = { git = "https://github.com/bschoenmaeckers/pyo3.git", branch = "release-0.23" } +pyo3 = { git = "https://github.com/pyo3/pyo3.git", branch = "release-0.23.4" } +pyo3-ffi = { git = "https://github.com/pyo3/pyo3.git", branch = "release-0.23.4" } [profile.mindebug-dev] inherits = "dev" From 45d66f0fbe347cf49e258d1f6710fb8787b4fbb0 Mon Sep 17 00:00:00 2001 From: Bas Schoenmaeckers Date: Wed, 11 Dec 2024 15:10:42 +0100 Subject: [PATCH 2/2] use rust to convert to/from python datetimes --- Cargo.lock | 2 + crates/polars-python/Cargo.toml | 3 +- .../polars-python/src/conversion/any_value.rs | 84 +++++++++---------- .../src/conversion/chunked_array.rs | 29 ++----- .../polars-python/src/conversion/datetime.rs | 34 +++++++- 5 files changed, 83 insertions(+), 69 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4c02fbcc0fdc..64f2f60e4e71 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3424,6 +3424,7 @@ dependencies = [ "bincode", "bytemuck", "bytes", + "chrono-tz", "either", "flate2", "itoa", @@ -3678,6 +3679,7 @@ source = "git+https://github.com/pyo3/pyo3.git?branch=release-0.23.4#5dbfe615e85 dependencies = [ "cfg-if", "chrono", + "chrono-tz", "indoc", "inventory", "libc", diff --git a/crates/polars-python/Cargo.toml b/crates/polars-python/Cargo.toml index 89e1af64fd1a..714fddb644dd 100644 --- a/crates/polars-python/Cargo.toml +++ b/crates/polars-python/Cargo.toml @@ -30,6 +30,7 @@ arboard = { workspace = true, optional = true } bincode = { workspace = true } bytemuck = { workspace = true } bytes = { workspace = true } +chrono-tz = { workspace = true } either = { workspace = true } flate2 = { workspace = true } itoa = { workspace = true } @@ -38,7 +39,7 @@ ndarray = { workspace = true } num-traits = { workspace = true } numpy = { workspace = true } once_cell = { workspace = true } -pyo3 = { workspace = true, features = ["abi3-py39", "chrono", "multiple-pymethods"] } +pyo3 = { workspace = true, features = ["abi3-py39", "chrono", "chrono-tz", "multiple-pymethods"] } recursive = { workspace = true } serde_json = { workspace = true, optional = true } thiserror = { workspace = true } diff --git a/crates/polars-python/src/conversion/any_value.rs b/crates/polars-python/src/conversion/any_value.rs index e986ab17fc09..18c133eb5a45 100644 --- a/crates/polars-python/src/conversion/any_value.rs +++ b/crates/polars-python/src/conversion/any_value.rs @@ -1,12 +1,16 @@ use std::borrow::{Borrow, Cow}; +use chrono_tz::Tz; #[cfg(feature = "object")] use polars::chunked_array::object::PolarsObjectSafe; #[cfg(feature = "object")] use polars::datatypes::OwnedObject; use polars::datatypes::{DataType, Field, PlHashMap, TimeUnit}; -use polars::prelude::{AnyValue, PlSmallStr, Series, TimeZone}; -use polars_core::export::chrono::{NaiveDate, NaiveDateTime, NaiveTime, TimeDelta, Timelike}; +use polars::export::chrono::{DateTime, FixedOffset}; +use polars::prelude::{AnyValue, PlSmallStr, Series}; +use polars_core::export::chrono::{ + Datelike, NaiveDate, NaiveDateTime, NaiveTime, TimeDelta, Timelike, +}; use polars_core::utils::any_values_to_supertype_and_n_dtypes; use polars_core::utils::arrow::temporal_conversions::date32_to_date; use pyo3::exceptions::{PyOverflowError, PyTypeError, PyValueError}; @@ -17,7 +21,7 @@ use pyo3::types::{ use pyo3::{intern, IntoPyObjectExt}; use super::datetime::{ - elapsed_offset_to_timedelta, nanos_since_midnight_to_naivetime, timestamp_to_naive_datetime, + datetime_to_py_object, elapsed_offset_to_timedelta, nanos_since_midnight_to_naivetime, }; use super::{decimal_to_digits, struct_dict, ObjectValue, Wrap}; use crate::error::PyPolarsErr; @@ -92,15 +96,11 @@ pub(crate) fn any_value_into_py_object<'py>( date.into_bound_py_any(py) }, AnyValue::Datetime(v, time_unit, time_zone) => { - datetime_to_py_object(py, utils, v, time_unit, time_zone) + datetime_to_py_object(py, v, time_unit, time_zone) + }, + AnyValue::DatetimeOwned(v, time_unit, time_zone) => { + datetime_to_py_object(py, v, time_unit, time_zone.as_ref().map(AsRef::as_ref)) }, - AnyValue::DatetimeOwned(v, time_unit, time_zone) => datetime_to_py_object( - py, - utils, - v, - time_unit, - time_zone.as_ref().map(AsRef::as_ref), - ), AnyValue::Duration(v, time_unit) => { let time_delta = elapsed_offset_to_timedelta(v, time_unit); time_delta.into_bound_py_any(py) @@ -142,28 +142,6 @@ pub(crate) fn any_value_into_py_object<'py>( } } -fn datetime_to_py_object<'py>( - py: Python<'py>, - utils: &Bound<'py, PyAny>, - v: i64, - tu: TimeUnit, - tz: Option<&TimeZone>, -) -> PyResult> { - if let Some(time_zone) = tz { - // When https://github.com/pola-rs/polars/issues/16199 is - // implemented, we'll switch to something like: - // - // let tz: chrono_tz::Tz = time_zone.parse().unwrap(); - // let datetime = tz.from_local_datetime(&naive_datetime).earliest().unwrap(); - // datetime.into_py(py) - let convert = utils.getattr(intern!(py, "to_py_datetime"))?; - let time_unit = tu.to_ascii(); - convert.call1((v, time_unit, time_zone.as_str())) - } else { - timestamp_to_naive_datetime(v, tu).into_pyobject(py) - } -} - /// Holds a Python type object and implements hashing / equality based on the pointer address of the /// type object. This is used as a hashtable key instead of only the `usize` pointer value, as we /// need to hold a ref to the Python type object to keep it alive. @@ -273,18 +251,34 @@ pub(crate) fn py_object_to_any_value<'py>( } fn get_datetime(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult> { - // Probably needs to wait for - // https://github.com/pola-rs/polars/issues/16199 to do it a faster way. - Python::with_gil(|py| { - let date = pl_utils(py) - .bind(py) - .getattr(intern!(py, "datetime_to_int")) - .unwrap() - .call1((ob, intern!(py, "us"))) - .unwrap(); - let v = date.extract::()?; - Ok(AnyValue::Datetime(v, TimeUnit::Microseconds, None)) - }) + let py = ob.py(); + let tzinfo = ob.getattr(intern!(py, "tzinfo"))?; + + let timestamp = if tzinfo.is_none() { + let datetime = ob.extract::()?; + let delta = datetime - NaiveDateTime::UNIX_EPOCH; + delta.num_microseconds().unwrap() + } else if tzinfo.hasattr(intern!(py, "key"))? { + let datetime = ob.extract::>()?; + if datetime.year() >= 2100 { + // chrono-tz does not support dates after 2100 + // https://github.com/chronotope/chrono-tz/issues/135 + pl_utils(py) + .bind(py) + .getattr(intern!(py, "datetime_to_int"))? + .call1((ob, intern!(py, "us")))? + .extract::()? + } else { + let delta = datetime.to_utc() - DateTime::UNIX_EPOCH; + delta.num_microseconds().unwrap() + } + } else { + let datetime = ob.extract::>()?; + let delta = datetime.to_utc() - DateTime::UNIX_EPOCH; + delta.num_microseconds().unwrap() + }; + + Ok(AnyValue::Datetime(timestamp, TimeUnit::Microseconds, None)) } fn get_timedelta(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult> { diff --git a/crates/polars-python/src/conversion/chunked_array.rs b/crates/polars-python/src/conversion/chunked_array.rs index de8ef187d4c4..8e5e56d61350 100644 --- a/crates/polars-python/src/conversion/chunked_array.rs +++ b/crates/polars-python/src/conversion/chunked_array.rs @@ -5,7 +5,7 @@ use pyo3::types::{PyBytes, PyList, PyNone, PyTuple}; use pyo3::{intern, BoundObject}; use super::datetime::{ - elapsed_offset_to_timedelta, nanos_since_midnight_to_naivetime, timestamp_to_naive_datetime, + datetime_to_py_object, elapsed_offset_to_timedelta, nanos_since_midnight_to_naivetime, }; use super::{decimal_to_digits, struct_dict}; use crate::prelude::*; @@ -78,27 +78,12 @@ impl<'py> IntoPyObject<'py> for &Wrap<&DatetimeChunked> { type Error = PyErr; fn into_pyobject(self, py: Python<'py>) -> Result { - let time_zone = self.0.time_zone(); - if time_zone.is_some() { - // Switch to more efficient code path in - // https://github.com/pola-rs/polars/issues/16199 - let utils = pl_utils(py).bind(py); - let convert = utils.getattr(intern!(py, "to_py_datetime"))?; - let time_unit = self.0.time_unit().to_ascii(); - let time_zone = time_zone.as_deref().into_pyobject(py)?; - let iter = self - .0 - .iter() - .map(|opt_v| opt_v.map(|v| convert.call1((v, time_unit, &time_zone)).unwrap())); - PyList::new(py, iter) - } else { - let time_unit = self.0.time_unit(); - let iter = self - .0 - .iter() - .map(|opt_v| opt_v.map(|v| timestamp_to_naive_datetime(v, time_unit))); - PyList::new(py, iter) - } + let time_zone = self.0.time_zone().as_ref(); + let time_unit = self.0.time_unit(); + let iter = self.0.iter().map(|opt_v| { + opt_v.map(|v| datetime_to_py_object(py, v, time_unit, time_zone).unwrap()) + }); + PyList::new(py, iter) } } diff --git a/crates/polars-python/src/conversion/datetime.rs b/crates/polars-python/src/conversion/datetime.rs index 4d7e6339c685..95273819b126 100644 --- a/crates/polars-python/src/conversion/datetime.rs +++ b/crates/polars-python/src/conversion/datetime.rs @@ -1,7 +1,16 @@ //! Utilities for converting dates, times, datetimes, and so on. +use std::str::FromStr; + +use chrono_tz::Tz; use polars::datatypes::TimeUnit; -use polars_core::export::chrono::{NaiveDateTime, NaiveTime, TimeDelta}; +use polars_core::datatypes::TimeZone; +use polars_core::export::chrono::{ + DateTime, FixedOffset, NaiveDateTime, NaiveTime, TimeDelta, TimeZone as _, +}; +use pyo3::{Bound, IntoPyObject, PyAny, PyResult, Python}; + +use crate::error::PyPolarsErr; pub fn elapsed_offset_to_timedelta(elapsed: i64, time_unit: TimeUnit) -> TimeDelta { let (in_second, nano_multiplier) = match time_unit { @@ -29,3 +38,26 @@ pub fn nanos_since_midnight_to_naivetime(nanos_since_midnight: i64) -> NaiveTime NaiveTime::from_hms_opt(0, 0, 0).unwrap() + elapsed_offset_to_timedelta(nanos_since_midnight, TimeUnit::Nanoseconds) } + +pub fn datetime_to_py_object<'py>( + py: Python<'py>, + v: i64, + tu: TimeUnit, + tz: Option<&TimeZone>, +) -> PyResult> { + if let Some(time_zone) = tz { + if let Ok(tz) = Tz::from_str(time_zone) { + let utc_datetime = DateTime::UNIX_EPOCH + elapsed_offset_to_timedelta(v, tu); + let datetime = utc_datetime.with_timezone(&tz); + datetime.into_pyobject(py) + } else if let Ok(tz) = FixedOffset::from_str(time_zone) { + let naive_datetime = timestamp_to_naive_datetime(v, tu); + let datetime = tz.from_utc_datetime(&naive_datetime); + datetime.into_pyobject(py) + } else { + Err(PyPolarsErr::Other(format!("Could not parse timezone: {time_zone}")).into()) + } + } else { + timestamp_to_naive_datetime(v, tu).into_pyobject(py) + } +}