Skip to content

Commit

Permalink
perf: Support datetime in predicate during hive partition pruning (#1…
Browse files Browse the repository at this point in the history
  • Loading branch information
nameexhaustion authored Jul 11, 2024
1 parent fbc56ff commit 662696a
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 1 deletion.
2 changes: 1 addition & 1 deletion crates/polars-io/src/predicates.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ impl ColumnStats {

/// Returns whether the [`DataType`] supports minimum/maximum operations.
fn use_min_max(dtype: &DataType) -> bool {
dtype.is_numeric()
dtype.to_physical().is_numeric()
|| matches!(
dtype,
DataType::String | DataType::Binary | DataType::Boolean
Expand Down
12 changes: 12 additions & 0 deletions py-polars/tests/unit/io/test_hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -729,3 +729,15 @@ def test_hive_write_dates(tmp_path: Path) -> None:
lf.collect(),
df.with_columns(pl.col("date1", "date2").cast(pl.String)),
)


@pytest.mark.write_disk()
def test_hive_predicate_dates_14712(
tmp_path: Path, monkeypatch: Any, capfd: Any
) -> None:
monkeypatch.setenv("POLARS_VERBOSE", "1")
pl.DataFrame({"a": [datetime(2024, 1, 1)]}).write_parquet_partitioned(
tmp_path, ["a"]
)
pl.scan_parquet(tmp_path).filter(pl.col("a") != datetime(2024, 1, 1)).collect()
assert "hive partitioning: skipped 1 files" in capfd.readouterr().err

0 comments on commit 662696a

Please sign in to comment.