From 4af49ae8fe9da3f8d7d077ace5ceb367f0916679 Mon Sep 17 00:00:00 2001 From: Gert Hulselmans Date: Fri, 10 Dec 2021 13:40:34 +0100 Subject: [PATCH] Rename "has_headers" to "has_header" and "stop_after_n_rows" to "n_rows". --- polars/benches/csv.rs | 2 +- polars/benches/groupby.rs | 2 +- polars/polars-io/src/csv.rs | 12 +- polars/polars-io/src/ipc.rs | 18 +-- polars/polars-io/src/lib.rs | 10 +- polars/polars-io/src/parquet.rs | 22 ++-- .../polars-lazy/src/datafusion/conversion.rs | 14 +-- polars/polars-lazy/src/frame.rs | 22 ++-- polars/polars-lazy/src/logical_plan/alp.rs | 6 +- .../src/logical_plan/conversion.rs | 8 +- polars/polars-lazy/src/logical_plan/mod.rs | 14 +-- .../optimizer/aggregate_pushdown.rs | 6 +- .../optimizer/aggregate_scan_projections.rs | 6 +- .../optimizer/predicate_pushdown/mod.rs | 4 +- .../optimizer/projection_pushdown.rs | 4 +- .../src/physical_plan/executors/mod.rs | 4 +- .../src/physical_plan/executors/scan.rs | 28 ++--- .../polars-lazy/src/physical_plan/planner.rs | 4 +- polars/src/docs/eager.rs | 2 +- py-polars/polars/internals/frame.py | 48 ++++---- py-polars/polars/internals/lazy_frame.py | 16 +-- py-polars/polars/io.py | 108 +++++++++--------- py-polars/src/dataframe.rs | 18 +-- py-polars/src/lazy/dataframe.rs | 12 +- py-polars/tests/test_df.py | 4 +- py-polars/tests/test_io.py | 2 +- 26 files changed, 187 insertions(+), 209 deletions(-) diff --git a/polars/benches/csv.rs b/polars/benches/csv.rs index 6cc504c3ccf4..21390a00ae13 100644 --- a/polars/benches/csv.rs +++ b/polars/benches/csv.rs @@ -6,7 +6,7 @@ fn prepare_reader() -> Result> { let path = std::env::var("CSV_SRC").expect("env var CSV_SRC pointing to the csv_file is not set"); - Ok(CsvReader::from_path(&path)?.with_stop_after_n_rows(Some(10000))) + Ok(CsvReader::from_path(&path)?.with_n_rows(Some(10000))) } fn csv_parsing_benchmark(c: &mut Criterion) { diff --git a/polars/benches/groupby.rs b/polars/benches/groupby.rs index f4ff73c781ea..70d767bee188 100644 --- a/polars/benches/groupby.rs +++ b/polars/benches/groupby.rs @@ -11,7 +11,7 @@ lazy_static! { let mut df = CsvReader::from_path(&path) .expect("could not read file") // 1M rows - .with_stop_after_n_rows(Some(1000000)) + .with_n_rows(Some(1000000)) .finish() .unwrap(); df.may_apply("id1", |s| s.cast(&DataType::Categorical)) diff --git a/polars/polars-io/src/csv.rs b/polars/polars-io/src/csv.rs index af14beee1eda..ce7af73d2c59 100644 --- a/polars/polars-io/src/csv.rs +++ b/polars/polars-io/src/csv.rs @@ -199,7 +199,7 @@ where /// Aggregates chunk afterwards to a single chunk. rechunk: bool, /// Stop reading from the csv after this number of rows is reached - stop_after_n_rows: Option, + n_rows: Option, // used by error ignore logic max_records: Option, skip_rows: usize, @@ -246,8 +246,8 @@ where /// Try to stop parsing when `n` rows are parsed. During multithreaded parsing the upper bound `n` cannot /// be guaranteed. - pub fn with_stop_after_n_rows(mut self, num_rows: Option) -> Self { - self.stop_after_n_rows = num_rows; + pub fn with_n_rows(mut self, num_rows: Option) -> Self { + self.n_rows = num_rows; self } @@ -415,7 +415,7 @@ where CsvReader { reader, rechunk: true, - stop_after_n_rows: None, + n_rows: None, max_records: Some(128), skip_rows: 0, projection: None, @@ -490,7 +490,7 @@ where let reader_bytes = get_reader_bytes(&mut self.reader)?; let mut csv_reader = CoreReader::new( reader_bytes, - self.stop_after_n_rows, + self.n_rows, self.skip_rows, self.projection, self.max_records, @@ -523,7 +523,7 @@ where let reader_bytes = get_reader_bytes(&mut self.reader)?; let mut csv_reader = CoreReader::new( reader_bytes, - self.stop_after_n_rows, + self.n_rows, self.skip_rows, self.projection, self.max_records, diff --git a/polars/polars-io/src/ipc.rs b/polars/polars-io/src/ipc.rs index a4eefd08af54..b528268ebd74 100644 --- a/polars/polars-io/src/ipc.rs +++ b/polars/polars-io/src/ipc.rs @@ -63,7 +63,7 @@ pub struct IpcReader { reader: R, /// Aggregates chunks afterwards to a single chunk. rechunk: bool, - stop_after_n_rows: Option, + n_rows: Option, projection: Option>, columns: Option>, } @@ -81,8 +81,8 @@ impl IpcReader { Ok(metadata.schema().clone()) } /// Stop reading when `n` rows are read. - pub fn with_stop_after_n_rows(mut self, num_rows: Option) -> Self { - self.stop_after_n_rows = num_rows; + pub fn with_n_rows(mut self, num_rows: Option) -> Self { + self.n_rows = num_rows; self } @@ -119,13 +119,7 @@ impl IpcReader { }), ); - finish_reader( - reader, - rechunk, - self.stop_after_n_rows, - predicate, - aggregate, - ) + finish_reader(reader, rechunk, self.n_rows, predicate, aggregate) } } @@ -150,7 +144,7 @@ where IpcReader { reader, rechunk: true, - stop_after_n_rows: None, + n_rows: None, columns: None, projection: None, } @@ -199,7 +193,7 @@ where } let ipc_reader = read::FileReader::new(&mut self.reader, metadata, self.projection); - finish_reader(ipc_reader, rechunk, self.stop_after_n_rows, None, None) + finish_reader(ipc_reader, rechunk, self.n_rows, None, None) } } diff --git a/polars/polars-io/src/lib.rs b/polars/polars-io/src/lib.rs index 5e99ad193c96..544d3cf2ebf1 100644 --- a/polars/polars-io/src/lib.rs +++ b/polars/polars-io/src/lib.rs @@ -68,18 +68,18 @@ pub trait ArrowReader { pub(crate) fn finish_reader( mut reader: R, rechunk: bool, - stop_after_n_rows: Option, + n_rows: Option, predicate: Option>, aggregate: Option<&[ScanAggregation]>, ) -> Result { use polars_core::utils::accumulate_dataframes_vertical; use std::convert::TryFrom; - let mut n_rows = 0; + let mut num_rows = 0; let mut parsed_dfs = Vec::with_capacity(1024); while let Some(batch) = reader.next_record_batch()? { - n_rows += batch.num_rows(); + num_rows += batch.num_rows(); let mut df = DataFrame::try_from(batch)?; @@ -102,8 +102,8 @@ pub(crate) fn finish_reader( } parsed_dfs.push(df); - if let Some(n) = stop_after_n_rows { - if n_rows >= n { + if let Some(n) = n_rows { + if num_rows >= n { break; } } diff --git a/polars/polars-io/src/parquet.rs b/polars/polars-io/src/parquet.rs index bc2bcd3b6480..a6dc3983bde3 100644 --- a/polars/polars-io/src/parquet.rs +++ b/polars/polars-io/src/parquet.rs @@ -34,7 +34,7 @@ use std::sync::Arc; pub struct ParquetReader { reader: R, rechunk: bool, - stop_after_n_rows: Option, + n_rows: Option, columns: Option>, projection: Option>, } @@ -56,24 +56,18 @@ where let reader = read::RecordReader::try_new( &mut self.reader, projection.map(|x| x.to_vec()), - self.stop_after_n_rows, + self.n_rows, None, None, )?; - finish_reader( - reader, - rechunk, - self.stop_after_n_rows, - predicate, - aggregate, - ) + finish_reader(reader, rechunk, self.n_rows, predicate, aggregate) } /// Stop parsing when `n` rows are parsed. By settings this parameter the csv will be parsed /// sequentially. - pub fn with_stop_after_n_rows(mut self, num_rows: Option) -> Self { - self.stop_after_n_rows = num_rows; + pub fn with_n_rows(mut self, num_rows: Option) -> Self { + self.n_rows = num_rows; self } @@ -116,7 +110,7 @@ where ParquetReader { reader, rechunk: false, - stop_after_n_rows: None, + n_rows: None, columns: None, projection: None, } @@ -145,11 +139,11 @@ where let reader = read::RecordReader::try_new( &mut self.reader, self.projection, - self.stop_after_n_rows, + self.n_rows, None, None, )?; - finish_reader(reader, rechunk, self.stop_after_n_rows, None, None) + finish_reader(reader, rechunk, self.n_rows, None, None) } } diff --git a/polars/polars-lazy/src/datafusion/conversion.rs b/polars/polars-lazy/src/datafusion/conversion.rs index fa5e2a410d5c..0c00b024612a 100644 --- a/polars/polars-lazy/src/datafusion/conversion.rs +++ b/polars/polars-lazy/src/datafusion/conversion.rs @@ -291,7 +291,7 @@ pub fn to_datafusion_lp(lp: LogicalPlan) -> Result { delimiter, ignore_errors, skip_rows, - stop_after_n_rows, + n_rows, .. } => { let schema = schema.to_arrow(); @@ -300,24 +300,20 @@ pub fn to_datafusion_lp(lp: LogicalPlan) -> Result { .delimiter(delimiter) .schema(&schema); if ignore_errors || skip_rows > 0 { - return Err(PolarsError::ComputeError("DataFusion does not support `ignore_errors`, `skip_rows`, `stop_after_n_rows`, `with_columns`".into())); + return Err(PolarsError::ComputeError("DataFusion does not support `ignore_errors`, `skip_rows`, `n_rows`, `with_columns`".into())); } let builder = LogicalPlanBuilder::scan_csv(try_path_to_str(&path)?, options, None).unwrap(); - match stop_after_n_rows { + match n_rows { Some(n) => builder.limit(n).unwrap().build().unwrap(), None => builder.build().unwrap(), } } #[cfg(feature = "parquet")] - ParquetScan { - path, - stop_after_n_rows, - .. - } => { + ParquetScan { path, n_rows, .. } => { let builder = LogicalPlanBuilder::scan_parquet(try_path_to_str(&path)?, None, 8).unwrap(); - match stop_after_n_rows { + match n_rows { Some(n) => builder.limit(n).unwrap().build().unwrap(), None => builder.build().unwrap(), } diff --git a/polars/polars-lazy/src/frame.rs b/polars/polars-lazy/src/frame.rs index 932c7fa417e7..6a943bd78f3f 100644 --- a/polars/polars-lazy/src/frame.rs +++ b/polars/polars-lazy/src/frame.rs @@ -39,7 +39,7 @@ pub struct LazyCsvReader<'a> { has_header: bool, ignore_errors: bool, skip_rows: usize, - stop_after_n_rows: Option, + n_rows: Option, cache: bool, schema: Option, schema_overwrite: Option<&'a Schema>, @@ -59,7 +59,7 @@ impl<'a> LazyCsvReader<'a> { has_header: true, ignore_errors: false, skip_rows: 0, - stop_after_n_rows: None, + n_rows: None, cache: true, schema: None, schema_overwrite: None, @@ -73,8 +73,8 @@ impl<'a> LazyCsvReader<'a> { /// Try to stop parsing when `n` rows are parsed. During multithreaded parsing the upper bound `n` cannot /// be guaranteed. - pub fn with_stop_after_n_rows(mut self, num_rows: Option) -> Self { - self.stop_after_n_rows = num_rows; + pub fn with_n_rows(mut self, num_rows: Option) -> Self { + self.n_rows = num_rows; self } @@ -184,7 +184,7 @@ impl<'a> LazyCsvReader<'a> { self.has_header, self.ignore_errors, self.skip_rows, - self.stop_after_n_rows, + self.n_rows, self.cache, self.schema, self.schema_overwrite, @@ -295,12 +295,8 @@ impl LazyFrame { /// Create a LazyFrame directly from a parquet scan. #[cfg(feature = "parquet")] - pub fn scan_parquet( - path: String, - stop_after_n_rows: Option, - cache: bool, - ) -> Result { - let mut lf: LazyFrame = LogicalPlanBuilder::scan_parquet(path, stop_after_n_rows, cache)? + pub fn scan_parquet(path: String, n_rows: Option, cache: bool) -> Result { + let mut lf: LazyFrame = LogicalPlanBuilder::scan_parquet(path, n_rows, cache)? .build() .into(); lf.opt_state.agg_scan_projection = true; @@ -309,9 +305,9 @@ impl LazyFrame { /// Create a LazyFrame directly from a ipc scan. #[cfg(feature = "ipc")] - pub fn scan_ipc(path: String, stop_after_n_rows: Option, cache: bool) -> Result { + pub fn scan_ipc(path: String, n_rows: Option, cache: bool) -> Result { let options = IpcOptions { - stop_after_n_rows, + n_rows, cache, with_columns: None, }; diff --git a/polars/polars-lazy/src/logical_plan/alp.rs b/polars/polars-lazy/src/logical_plan/alp.rs index e77ddbc1cd96..4a2cd732e1a2 100644 --- a/polars/polars-lazy/src/logical_plan/alp.rs +++ b/polars/polars-lazy/src/logical_plan/alp.rs @@ -60,7 +60,7 @@ pub enum ALogicalPlan { with_columns: Option>, predicate: Option, aggregate: Vec, - stop_after_n_rows: Option, + n_rows: Option, cache: bool, }, DataFrameScan { @@ -382,7 +382,7 @@ impl ALogicalPlan { output_schema, with_columns, predicate, - stop_after_n_rows, + n_rows, cache, .. } => { @@ -398,7 +398,7 @@ impl ALogicalPlan { with_columns: with_columns.clone(), predicate: new_predicate, aggregate: exprs, - stop_after_n_rows: *stop_after_n_rows, + n_rows: *n_rows, cache: *cache, } } diff --git a/polars/polars-lazy/src/logical_plan/conversion.rs b/polars/polars-lazy/src/logical_plan/conversion.rs index 9c594210f88d..aedc60117a5e 100644 --- a/polars/polars-lazy/src/logical_plan/conversion.rs +++ b/polars/polars-lazy/src/logical_plan/conversion.rs @@ -228,7 +228,7 @@ pub(crate) fn to_alp( with_columns, predicate, aggregate, - stop_after_n_rows, + n_rows, cache, } => ALogicalPlan::ParquetScan { path, @@ -240,7 +240,7 @@ pub(crate) fn to_alp( .into_iter() .map(|expr| to_aexpr(expr, expr_arena)) .collect(), - stop_after_n_rows, + n_rows, cache, }, LogicalPlan::DataFrameScan { @@ -695,7 +695,7 @@ pub(crate) fn node_to_lp( with_columns, predicate, aggregate, - stop_after_n_rows, + n_rows, cache, } => LogicalPlan::ParquetScan { path, @@ -703,7 +703,7 @@ pub(crate) fn node_to_lp( with_columns, predicate: predicate.map(|n| node_to_exp(n, expr_arena)), aggregate: nodes_to_exprs(&aggregate, expr_arena), - stop_after_n_rows, + n_rows, cache, }, ALogicalPlan::DataFrameScan { diff --git a/polars/polars-lazy/src/logical_plan/mod.rs b/polars/polars-lazy/src/logical_plan/mod.rs index d128de4230b5..ebce02d68095 100644 --- a/polars/polars-lazy/src/logical_plan/mod.rs +++ b/polars/polars-lazy/src/logical_plan/mod.rs @@ -141,7 +141,7 @@ impl LiteralValue { #[derive(Clone, Debug)] #[cfg(feature = "ipc")] pub struct IpcOptions { - pub(crate) stop_after_n_rows: Option, + pub(crate) n_rows: Option, pub(crate) with_columns: Option>, pub(crate) cache: bool, } @@ -153,7 +153,7 @@ pub struct CsvParserOptions { pub(crate) quote_char: Option, pub(crate) has_header: bool, pub(crate) skip_rows: usize, - pub(crate) stop_after_n_rows: Option, + pub(crate) n_rows: Option, pub(crate) with_columns: Option>, pub(crate) low_memory: bool, pub(crate) ignore_errors: bool, @@ -193,7 +193,7 @@ pub enum LogicalPlan { with_columns: Option>, predicate: Option, aggregate: Vec, - stop_after_n_rows: Option, + n_rows: Option, cache: bool, }, #[cfg(feature = "ipc")] @@ -819,7 +819,7 @@ impl LogicalPlanBuilder { #[cfg_attr(docsrs, doc(cfg(feature = "parquet")))] pub fn scan_parquet>( path: P, - stop_after_n_rows: Option, + n_rows: Option, cache: bool, ) -> Result { let path = path.into(); @@ -829,7 +829,7 @@ impl LogicalPlanBuilder { Ok(LogicalPlan::ParquetScan { path, schema, - stop_after_n_rows, + n_rows, with_columns: None, predicate: None, aggregate: vec![], @@ -863,7 +863,7 @@ impl LogicalPlanBuilder { has_header: bool, ignore_errors: bool, mut skip_rows: usize, - stop_after_n_rows: Option, + n_rows: Option, cache: bool, schema: Option>, schema_overwrite: Option<&Schema>, @@ -907,7 +907,7 @@ impl LogicalPlanBuilder { delimiter, ignore_errors, skip_rows, - stop_after_n_rows, + n_rows, with_columns: None, low_memory, cache, diff --git a/polars/polars-lazy/src/logical_plan/optimizer/aggregate_pushdown.rs b/polars/polars-lazy/src/logical_plan/optimizer/aggregate_pushdown.rs index 029d7e4d9b7c..3149081f2f02 100644 --- a/polars/polars-lazy/src/logical_plan/optimizer/aggregate_pushdown.rs +++ b/polars/polars-lazy/src/logical_plan/optimizer/aggregate_pushdown.rs @@ -155,7 +155,7 @@ impl OptimizationRule for AggregatePushdown { with_columns, predicate, aggregate, - stop_after_n_rows, + n_rows, cache, } => match self.accumulated_projections.is_empty() { true => { @@ -168,7 +168,7 @@ impl OptimizationRule for AggregatePushdown { with_columns, predicate, aggregate, - stop_after_n_rows, + n_rows, cache, }, ); @@ -183,7 +183,7 @@ impl OptimizationRule for AggregatePushdown { with_columns, predicate, aggregate, - stop_after_n_rows, + n_rows, cache, }) } diff --git a/polars/polars-lazy/src/logical_plan/optimizer/aggregate_scan_projections.rs b/polars/polars-lazy/src/logical_plan/optimizer/aggregate_scan_projections.rs index a497749a8a36..d32c78513941 100644 --- a/polars/polars-lazy/src/logical_plan/optimizer/aggregate_scan_projections.rs +++ b/polars/polars-lazy/src/logical_plan/optimizer/aggregate_scan_projections.rs @@ -149,7 +149,7 @@ impl OptimizationRule for AggScanProjection { predicate, aggregate, with_columns, - stop_after_n_rows, + n_rows, cache, } = lp { @@ -166,7 +166,7 @@ impl OptimizationRule for AggScanProjection { predicate, aggregate, with_columns, - stop_after_n_rows, + n_rows, cache, }; lp_arena.replace(node, lp); @@ -180,7 +180,7 @@ impl OptimizationRule for AggScanProjection { with_columns: new_with_columns, predicate, aggregate, - stop_after_n_rows, + n_rows, cache, }; Some(self.finish_rewrite(lp, expr_arena, lp_arena, &path, with_columns)) diff --git a/polars/polars-lazy/src/logical_plan/optimizer/predicate_pushdown/mod.rs b/polars/polars-lazy/src/logical_plan/optimizer/predicate_pushdown/mod.rs index 4d987931121c..b8d392937b7f 100644 --- a/polars/polars-lazy/src/logical_plan/optimizer/predicate_pushdown/mod.rs +++ b/polars/polars-lazy/src/logical_plan/optimizer/predicate_pushdown/mod.rs @@ -207,7 +207,7 @@ impl PredicatePushDown { with_columns, predicate, aggregate, - stop_after_n_rows, + n_rows, cache, } => { let predicate = predicate_at_scan(acc_predicates, predicate, expr_arena); @@ -219,7 +219,7 @@ impl PredicatePushDown { with_columns, predicate, aggregate, - stop_after_n_rows, + n_rows, cache, }; Ok(lp) diff --git a/polars/polars-lazy/src/logical_plan/optimizer/projection_pushdown.rs b/polars/polars-lazy/src/logical_plan/optimizer/projection_pushdown.rs index 5c06281ec603..1c16e249b713 100644 --- a/polars/polars-lazy/src/logical_plan/optimizer/projection_pushdown.rs +++ b/polars/polars-lazy/src/logical_plan/optimizer/projection_pushdown.rs @@ -348,7 +348,7 @@ impl ProjectionPushDown { schema, predicate, aggregate, - stop_after_n_rows, + n_rows, cache, .. } => { @@ -370,7 +370,7 @@ impl ProjectionPushDown { with_columns, predicate, aggregate, - stop_after_n_rows, + n_rows, cache, }; Ok(lp) diff --git a/polars/polars-lazy/src/physical_plan/executors/mod.rs b/polars/polars-lazy/src/physical_plan/executors/mod.rs index f7f24d38a12b..4fec3a3948ad 100644 --- a/polars/polars-lazy/src/physical_plan/executors/mod.rs +++ b/polars/polars-lazy/src/physical_plan/executors/mod.rs @@ -21,10 +21,10 @@ use std::path::PathBuf; const POLARS_VERBOSE: &str = "POLARS_VERBOSE"; -fn set_n_rows(stop_after_n_rows: Option) -> Option { +fn set_n_rows(n_rows: Option) -> Option { let fetch_rows = FETCH_ROWS.with(|fetch_rows| fetch_rows.get()); match fetch_rows { - None => stop_after_n_rows, + None => n_rows, Some(n) => Some(n), } } diff --git a/polars/polars-lazy/src/physical_plan/executors/scan.rs b/polars/polars-lazy/src/physical_plan/executors/scan.rs index f7294171f17f..a666b0cf18d9 100644 --- a/polars/polars-lazy/src/physical_plan/executors/scan.rs +++ b/polars/polars-lazy/src/physical_plan/executors/scan.rs @@ -39,7 +39,7 @@ fn prepare_scan_args<'a>( predicate: &Option>, with_columns: &mut Option>, schema: &mut SchemaRef, - stop_after_n_rows: Option, + n_rows: Option, aggregate: &'a [ScanAggregation], ) -> (File, Projection, StopNRows, Aggregation<'a>, Predicate) { let file = std::fs::File::open(&path).unwrap(); @@ -54,7 +54,7 @@ fn prepare_scan_args<'a>( .collect() }); - let stop_after_n_rows = set_n_rows(stop_after_n_rows); + let n_rows = set_n_rows(n_rows); let aggregate = if aggregate.is_empty() { None } else { @@ -64,7 +64,7 @@ fn prepare_scan_args<'a>( .clone() .map(|expr| Arc::new(PhysicalIoHelper { expr }) as Arc); - (file, projection, stop_after_n_rows, aggregate, predicate) + (file, projection, n_rows, aggregate, predicate) } #[cfg(feature = "ipc")] @@ -83,16 +83,16 @@ impl Executor for IpcExec { if let Some(df) = cached { return Ok(df); } - let (file, projection, stop_after_n_rows, aggregate, predicate) = prepare_scan_args( + let (file, projection, n_rows, aggregate, predicate) = prepare_scan_args( &self.path, &self.predicate, &mut self.options.with_columns, &mut self.schema, - self.options.stop_after_n_rows, + self.options.n_rows, &self.aggregate, ); let df = IpcReader::new(file) - .with_stop_after_n_rows(stop_after_n_rows) + .with_n_rows(n_rows) .finish_with_scan_ops( predicate, aggregate, @@ -117,7 +117,7 @@ pub struct ParquetExec { with_columns: Option>, predicate: Option>, aggregate: Vec, - stop_after_n_rows: Option, + n_rows: Option, cache: bool, } @@ -129,7 +129,7 @@ impl ParquetExec { with_columns: Option>, predicate: Option>, aggregate: Vec, - stop_after_n_rows: Option, + n_rows: Option, cache: bool, ) -> Self { ParquetExec { @@ -138,7 +138,7 @@ impl ParquetExec { with_columns, predicate, aggregate, - stop_after_n_rows, + n_rows, cache, } } @@ -151,17 +151,17 @@ impl Executor for ParquetExec { if let Some(df) = cached { return Ok(df); } - let (file, projection, stop_after_n_rows, aggregate, predicate) = prepare_scan_args( + let (file, projection, n_rows, aggregate, predicate) = prepare_scan_args( &self.path, &self.predicate, &mut self.with_columns, &mut self.schema, - self.stop_after_n_rows, + self.n_rows, &self.aggregate, ); let df = ParquetReader::new(file) - .with_stop_after_n_rows(stop_after_n_rows) + .with_n_rows(n_rows) .finish_with_scan_ops( predicate, aggregate, @@ -208,7 +208,7 @@ impl Executor for CsvExec { if projected_len == 0 { with_columns = None; } - let stop_after_n_rows = set_n_rows(self.options.stop_after_n_rows); + let n_rows = set_n_rows(self.options.n_rows); let predicate = self .predicate .clone() @@ -227,7 +227,7 @@ impl Executor for CsvExec { .with_delimiter(self.options.delimiter) .with_ignore_parser_errors(self.options.ignore_errors) .with_skip_rows(self.options.skip_rows) - .with_stop_after_n_rows(stop_after_n_rows) + .with_n_rows(n_rows) .with_columns(with_columns) .low_memory(self.options.low_memory) .with_null_values(self.options.null_values.clone()) diff --git a/polars/polars-lazy/src/physical_plan/planner.rs b/polars/polars-lazy/src/physical_plan/planner.rs index 26121785f7e6..87398948024a 100644 --- a/polars/polars-lazy/src/physical_plan/planner.rs +++ b/polars/polars-lazy/src/physical_plan/planner.rs @@ -179,7 +179,7 @@ impl DefaultPlanner { with_columns, predicate, aggregate, - stop_after_n_rows, + n_rows, cache, } => { let predicate = predicate @@ -193,7 +193,7 @@ impl DefaultPlanner { with_columns, predicate, aggregate, - stop_after_n_rows, + n_rows, cache, ))) } diff --git a/polars/src/docs/eager.rs b/polars/src/docs/eager.rs index 9f372a616d42..d77a35dda443 100644 --- a/polars/src/docs/eager.rs +++ b/polars/src/docs/eager.rs @@ -606,7 +606,7 @@ //! //! // write DataFrame to file //! CsvWriter::new(&mut file) -//! .has_headers(true) +//! .has_header(true) //! .with_delimiter(b',') //! .finish(df); //! # Ok(()) diff --git a/py-polars/polars/internals/frame.py b/py-polars/polars/internals/frame.py index 93299a92e22e..79e603bafc9c 100644 --- a/py-polars/polars/internals/frame.py +++ b/py-polars/polars/internals/frame.py @@ -367,9 +367,9 @@ def read_csv( file: Union[str, BinaryIO, bytes], infer_schema_length: Optional[int] = 100, batch_size: int = 64, - has_headers: bool = True, + has_header: bool = True, ignore_errors: bool = False, - stop_after_n_rows: Optional[int] = None, + n_rows: Optional[int] = None, skip_rows: int = 0, projection: Optional[tp.List[int]] = None, sep: str = ",", @@ -396,12 +396,12 @@ def read_csv( If set to `None`, a full table scan will be done (slow). batch_size Number of lines to read into the buffer at once. Modify this to change performance. - has_headers + has_header Indicate if first row of dataset is header or not. If set to False first row will be set to `column_x`, `x` being an enumeration over every column in the dataset. ignore_errors Try to keep reading lines if some lines yield errors. - stop_after_n_rows + n_rows After n rows are read from the CSV, it stops reading. During multi-threaded parsing, an upper bound of `n` rows cannot be guaranteed. @@ -445,7 +445,7 @@ def read_csv( -------- >>> df = pl.read_csv( - ... "file.csv", sep=";", stop_after_n_rows=25 + ... "file.csv", sep=";", n_rows=25 ... ) # doctest: +SKIP """ @@ -479,9 +479,9 @@ def read_csv( file, infer_schema_length, batch_size, - has_headers, + has_header, ignore_errors, - stop_after_n_rows, + n_rows, skip_rows, projection, sep, @@ -503,9 +503,9 @@ def read_csv( @staticmethod def read_parquet( file: Union[str, BinaryIO], - columns: Optional[tp.List[str]] = None, + columns: Optional[Union[tp.List[int], tp.List[str]]] = None, projection: Optional[tp.List[int]] = None, - stop_after_n_rows: Optional[int] = None, + n_rows: Optional[int] = None, ) -> "DataFrame": """ Read into a DataFrame from a parquet file. @@ -515,45 +515,45 @@ def read_parquet( file Path to a file or a file like object. Any valid filepath can be used. columns - Columns to select. + Columns to select. Accepts a list of column indices (starting at zero) or a list of column names. projection Indices of columns to select. Note that column indices start at zero. - stop_after_n_rows - Only read specified number of rows of the dataset. After `n` stops reading. + n_rows + Stop reading from parquet file after reading ``n_rows``. """ self = DataFrame.__new__(DataFrame) self._df = PyDataFrame.read_parquet( - file, columns, projection, stop_after_n_rows + file, columns, projection, n_rows ) return self @staticmethod def read_ipc( file: Union[str, BinaryIO], - columns: Optional[tp.List[str]] = None, + columns: Optional[tp.List[int], tp.List[str]]] = None, projection: Optional[tp.List[int]] = None, - stop_after_n_rows: Optional[int] = None, + n_rows: Optional[int] = None, ) -> "DataFrame": """ - Read into a DataFrame from Arrow IPC stream format. This is also called the feather format. + Read into a DataFrame from Arrow IPC stream format. This is also called the Feather (v2) format. Parameters ---------- file Path to a file or a file like object. columns - Columns to select. + Columns to select. Accepts a list of column indices (starting at zero) or a list of column names. projection Indices of columns to select. Note that column indices start at zero. - stop_after_n_rows - Only read specified number of rows of the dataset. After `n` stops reading. + n_rows + Stop reading from IPC file after reading ``n_rows``. Returns ------- DataFrame """ self = DataFrame.__new__(DataFrame) - self._df = PyDataFrame.read_ipc(file, columns, projection, stop_after_n_rows) + self._df = PyDataFrame.read_ipc(file, columns, projection, n_rows) return self @staticmethod @@ -778,7 +778,7 @@ def to_pandas( def to_csv( self, file: Optional[Union[TextIO, BytesIO, str, Path]] = None, - has_headers: bool = True, + has_header: bool = True, sep: str = ",", ) -> Optional[str]: """ @@ -788,7 +788,7 @@ def to_csv( ---------- file File path to which the file should be written. - has_headers + has_header Whether or not to include header in the CSV output. sep Separate CSV fields with this symbol. @@ -808,13 +808,13 @@ def to_csv( """ if file is None: buffer = BytesIO() - self._df.to_csv(buffer, has_headers, ord(sep)) + self._df.to_csv(buffer, has_header, ord(sep)) return str(buffer.getvalue(), encoding="utf-8") if isinstance(file, Path): file = str(file) - self._df.to_csv(file, has_headers, ord(sep)) + self._df.to_csv(file, has_header, ord(sep)) return None def to_ipc( diff --git a/py-polars/polars/internals/lazy_frame.py b/py-polars/polars/internals/lazy_frame.py index 0c08cfffca11..2045fc336137 100644 --- a/py-polars/polars/internals/lazy_frame.py +++ b/py-polars/polars/internals/lazy_frame.py @@ -42,11 +42,11 @@ def _from_pyldf(ldf: "PyLazyFrame") -> "LazyFrame": def scan_csv( file: str, infer_schema_length: Optional[int] = 100, - has_headers: bool = True, + has_header: bool = True, ignore_errors: bool = False, sep: str = ",", skip_rows: int = 0, - stop_after_n_rows: Optional[int] = None, + n_rows: Optional[int] = None, cache: bool = True, dtype: Optional[Dict[str, Type[DataType]]] = None, low_memory: bool = False, @@ -69,10 +69,10 @@ def scan_csv( self._ldf = PyLazyFrame.new_from_csv( file, sep, - has_headers, + has_header, ignore_errors, skip_rows, - stop_after_n_rows, + n_rows, cache, dtype_list, low_memory, @@ -86,26 +86,26 @@ def scan_csv( @staticmethod def scan_parquet( - file: str, stop_after_n_rows: Optional[int] = None, cache: bool = True + file: str, n_rows: Optional[int] = None, cache: bool = True ) -> "LazyFrame": """ See Also: `pl.scan_parquet` """ self = LazyFrame.__new__(LazyFrame) - self._ldf = PyLazyFrame.new_from_parquet(file, stop_after_n_rows, cache) + self._ldf = PyLazyFrame.new_from_parquet(file, n_rows, cache) return self @staticmethod def scan_ipc( - file: str, stop_after_n_rows: Optional[int] = None, cache: bool = True + file: str, n_rows: Optional[int] = None, cache: bool = True ) -> "LazyFrame": """ See Also: `pl.scan_ipc` """ self = LazyFrame.__new__(LazyFrame) - self._ldf = PyLazyFrame.new_from_ipc(file, stop_after_n_rows, cache) + self._ldf = PyLazyFrame.new_from_ipc(file, n_rows, cache) return self def pipe(self, func: Callable[..., Any], *args: Any, **kwargs: Any) -> Any: diff --git a/py-polars/polars/io.py b/py-polars/polars/io.py index 1f5288bc88d9..85764e64258b 100644 --- a/py-polars/polars/io.py +++ b/py-polars/polars/io.py @@ -137,9 +137,9 @@ def read_csv( file: Union[str, TextIO, BytesIO, Path, BinaryIO, bytes], infer_schema_length: Optional[int] = 100, batch_size: int = 8192, - has_headers: bool = True, + has_header: bool = True, ignore_errors: bool = False, - stop_after_n_rows: Optional[int] = None, + n_rows: Optional[int] = None, skip_rows: int = 0, projection: Optional[List[int]] = None, sep: str = ",", @@ -174,13 +174,13 @@ def read_csv( If set to `None`, a full table scan will be done (slow). batch_size Number of lines to read into the buffer at once. Modify this to change performance. - has_headers + has_header Indicate if first row of dataset is header or not. If set to False first row will be set to `column_x`, `x` being an enumeration over every column in the dataset starting at 1. ignore_errors Try to keep reading lines if some lines yield errors. - stop_after_n_rows - After n rows are read from the CSV, it stops reading. + n_rows + Stop reading from CSV file after reading ``n_rows``. During multi-threaded parsing, an upper bound of `n` rows cannot be guaranteed. skip_rows @@ -238,7 +238,7 @@ def read_csv( storage_options = storage_options or {} - if columns and not has_headers: + if columns and not has_header: for column in columns: if not column.startswith("column_"): raise ValueError( @@ -254,7 +254,7 @@ def read_csv( if ( use_pyarrow and dtypes is None - and stop_after_n_rows is None + and n_rows is None and n_threads is None and encoding == "utf8" and not low_memory @@ -264,7 +264,7 @@ def read_csv( include_columns = None if columns: - if not has_headers: + if not has_header: # Convert 'column_1', 'column_2', ... column names to 'f0', 'f1', ... column names for pyarrow, # if CSV file does not contain a header. include_columns = [f"f{int(column[7:]) - 1}" for column in columns] @@ -279,7 +279,7 @@ def read_csv( tbl = pa.csv.read_csv( data, pa.csv.ReadOptions( - skip_rows=skip_rows, autogenerate_column_names=not has_headers + skip_rows=skip_rows, autogenerate_column_names=not has_header ), pa.csv.ParseOptions(delimiter=sep), pa.csv.ConvertOptions( @@ -289,7 +289,7 @@ def read_csv( ), ) - if not has_headers: + if not has_header: # Rename 'f0', 'f1', ... columns names autogenated by pyarrow to 'column_1', 'column_2', ... tbl = tbl.rename_columns( [f"column_{int(column[1:]) + 1}" for column in tbl.column_names] @@ -313,7 +313,7 @@ def read_csv( # Get column names of requested columns. current_columns = columns[0 : len(new_columns)] - elif not has_headers: + elif not has_header: # When there are no header, column names are autogenerated (and known). if projection: @@ -363,9 +363,9 @@ def read_csv( file=data, infer_schema_length=infer_schema_length, batch_size=batch_size, - has_headers=has_headers, + has_header=has_header, ignore_errors=ignore_errors, - stop_after_n_rows=stop_after_n_rows, + n_rows=n_rows, skip_rows=skip_rows, projection=projection, sep=sep, @@ -389,11 +389,11 @@ def read_csv( def scan_csv( file: Union[str, Path], infer_schema_length: Optional[int] = 100, - has_headers: bool = True, + has_header: bool = True, ignore_errors: bool = False, sep: str = ",", skip_rows: int = 0, - stop_after_n_rows: Optional[int] = None, + n_rows: Optional[int] = None, cache: bool = True, dtype: Optional[Dict[str, Type[DataType]]] = None, low_memory: bool = False, @@ -414,7 +414,7 @@ def scan_csv( Path to a file. infer_schema_length The number of rows Polars will read to try to determine the schema. - has_headers + has_header If the CSV file has headers or not. ignore_errors Try to keep reading lines if some lines yield errors. @@ -422,8 +422,8 @@ def scan_csv( Delimiter/ value separator. skip_rows Start reading after `skip_rows`. - stop_after_n_rows - After n rows are read from the CSV, it stops reading. + n_rows + Stop reading from IPC file after reading ``n_rows``. During multi-threaded parsing, an upper bound of `n` rows cannot be guaranteed. cache Cache the result after reading. @@ -489,11 +489,11 @@ def scan_csv( file = str(file) return LazyFrame.scan_csv( file=file, - has_headers=has_headers, + has_header=has_header, sep=sep, ignore_errors=ignore_errors, skip_rows=skip_rows, - stop_after_n_rows=stop_after_n_rows, + n_rows=n_rows, cache=cache, dtype=dtype, low_memory=low_memory, @@ -507,11 +507,11 @@ def scan_csv( def scan_ipc( file: Union[str, Path], - stop_after_n_rows: Optional[int] = None, + n_rows: Optional[int] = None, cache: bool = True, ) -> LazyFrame: """ - Lazily read from an IPC file. + Lazily read from an Arrow IPC (Feather v2) file. This allows the query optimizer to push down predicates and projections to the scan level, thereby potentially reducing memory overhead. @@ -519,22 +519,22 @@ def scan_ipc( Parameters ---------- file - Path to a file. - stop_after_n_rows - After n rows are read from the parquet, it stops reading. + Path to a IPC file. + n_rows + Stop reading from IPC file after reading ``n_rows``. cache Cache the result after reading. """ if isinstance(file, Path): file = str(file) return LazyFrame.scan_ipc( - file=file, stop_after_n_rows=stop_after_n_rows, cache=cache + file=file, n_rows=n_rows, cache=cache ) def scan_parquet( file: Union[str, Path], - stop_after_n_rows: Optional[int] = None, + n_rows: Optional[int] = None, cache: bool = True, ) -> LazyFrame: """ @@ -547,15 +547,15 @@ def scan_parquet( ---------- file Path to a file. - stop_after_n_rows - After n rows are read from the parquet, it stops reading. + n_rows + Stop reading from parquet file after reading ``n_rows``. cache Cache the result after reading. """ if isinstance(file, Path): file = str(file) return LazyFrame.scan_parquet( - file=file, stop_after_n_rows=stop_after_n_rows, cache=cache + file=file, n_rows=n_rows, cache=cache ) @@ -570,7 +570,6 @@ def read_ipc_schema( file Path to a file or a file like object. - Returns ------- Dictionary mapping column names to datatypes @@ -580,32 +579,31 @@ def read_ipc_schema( def read_ipc( file: Union[str, BinaryIO, BytesIO, Path, bytes], - columns: Optional[List[str]] = None, + columns: Optional[Union[List[int], List[str]]] = None, projection: Optional[List[int]] = None, - stop_after_n_rows: Optional[int] = None, + n_rows: Optional[int] = None, use_pyarrow: bool = _PYARROW_AVAILABLE, memory_map: bool = True, storage_options: Optional[Dict] = None, ) -> DataFrame: """ - Read into a DataFrame from Arrow IPC stream format. This is also called the feather format. + Read into a DataFrame from Arrow IPC (Feather v2) file. Parameters ---------- file Path to a file or a file like object. - If ``fsspec`` is installed, it will be used to open remote files + If ``fsspec`` is installed, it will be used to open remote files. columns - Columns to select. - projection - Indices of columns to select. Note that column indices start at zero. - stop_after_n_rows - Only read specified number of rows of the dataset. After `n` stops reading. + Columns to select. Accepts a list of column indices (starting at zero) or a list of column names. + n_rows + Stop reading from IPC file after reading ``n_rows``. + Only valid when `use_pyarrow=False`. use_pyarrow Use pyarrow or the native rust reader. memory_map Memory map underlying file. This will likely increase performance. - Only used when 'use_pyarrow=True' + Only used when ``use_pyarrow=True``. storage_options Extra options that make sense for ``fsspec.open()`` or a particular storage connection, e.g. host, port, username, password, etc. @@ -614,9 +612,9 @@ def read_ipc( DataFrame """ if use_pyarrow: - if stop_after_n_rows: + if n_rows: raise ValueError( - "'stop_after_n_rows' cannot be used with 'use_pyarrow=True'." + "``n_rows`` cannot be used with ``use_pyarrow=True``." ) storage_options = storage_options or {} @@ -641,15 +639,15 @@ def read_ipc( data, columns=columns, projection=projection, - stop_after_n_rows=stop_after_n_rows, + n_rows=n_rows, ) def read_parquet( source: Union[str, List[str], Path, BinaryIO, BytesIO, bytes], - columns: Optional[List[str]] = None, + columns: Optional[Union[List[int], List[str]]] = None, projection: Optional[List[int]] = None, - stop_after_n_rows: Optional[int] = None, + n_rows: Optional[int] = None, use_pyarrow: bool = _PYARROW_AVAILABLE, memory_map: bool = True, storage_options: Optional[Dict] = None, @@ -663,19 +661,19 @@ def read_parquet( source Path to a file, list of files, or a file like object. If the path is a directory, that directory will be used as partition aware scan. - If ``fsspec`` is installed, it will be used to open remote files + If ``fsspec`` is installed, it will be used to open remote files. columns - Columns to select. + Columns to select. Accepts a list of column indices (starting at zero) or a list of column names. projection Indices of columns to select. Note that column indices start at zero. - stop_after_n_rows - After n rows are read from the parquet, it stops reading. - Only valid when 'use_pyarrow=False' + n_rows + Stop reading from parquet file after reading ``n_rows``. + Only valid when `use_pyarrow=False`. use_pyarrow Use pyarrow instead of the rust native parquet reader. The pyarrow reader is more stable. memory_map Memory map underlying file. This will likely increase performance. - Only used when 'use_pyarrow=True' + Only used when ``use_pyarrow=True``. storage_options Extra options that make sense for ``fsspec.open()`` or a particular storage connection, e.g. host, port, username, password, etc. **kwargs @@ -686,9 +684,9 @@ def read_parquet( DataFrame """ if use_pyarrow: - if stop_after_n_rows: + if n_rows: raise ValueError( - "'stop_after_n_rows' cannot be used with 'use_pyarrow=True'." + "``n_rows`` cannot be used with ``use_pyarrow=True``." ) storage_options = storage_options or {} @@ -717,7 +715,7 @@ def read_parquet( source_prep, columns=columns, projection=projection, - stop_after_n_rows=stop_after_n_rows, + n_rows=n_rows, ) diff --git a/py-polars/src/dataframe.rs b/py-polars/src/dataframe.rs index 168923ec3d37..b759352ab11e 100644 --- a/py-polars/src/dataframe.rs +++ b/py-polars/src/dataframe.rs @@ -84,7 +84,7 @@ impl PyDataFrame { chunk_size: usize, has_header: bool, ignore_errors: bool, - stop_after_n_rows: Option, + n_rows: Option, skip_rows: usize, projection: Option>, sep: &str, @@ -149,7 +149,7 @@ impl PyDataFrame { let df = CsvReader::new(mmap_bytes_r) .infer_schema(infer_schema_length) .has_header(has_header) - .with_stop_after_n_rows(stop_after_n_rows) + .with_n_rows(n_rows) .with_delimiter(sep.as_bytes()[0]) .with_skip_rows(skip_rows) .with_ignore_parser_errors(ignore_errors) @@ -178,7 +178,7 @@ impl PyDataFrame { py_f: PyObject, columns: Option>, projection: Option>, - stop_after_n_rows: Option, + n_rows: Option, ) -> PyResult { use EitherRustPythonFile::*; @@ -188,13 +188,13 @@ impl PyDataFrame { ParquetReader::new(buf) .with_projection(projection) .with_columns(columns) - .with_stop_after_n_rows(stop_after_n_rows) + .with_n_rows(n_rows) .finish() } Rust(f) => ParquetReader::new(f) .with_projection(projection) .with_columns(columns) - .with_stop_after_n_rows(stop_after_n_rows) + .with_n_rows(n_rows) .finish(), }; let df = result.map_err(PyPolarsEr::from)?; @@ -207,13 +207,13 @@ impl PyDataFrame { py_f: PyObject, columns: Option>, projection: Option>, - stop_after_n_rows: Option, + n_rows: Option, ) -> PyResult { let file = get_file_like(py_f, false)?; let df = IpcReader::new(file) .with_projection(projection) .with_columns(columns) - .with_stop_after_n_rows(stop_after_n_rows) + .with_n_rows(n_rows) .finish() .map_err(PyPolarsEr::from)?; Ok(PyDataFrame::new(df)) @@ -264,10 +264,10 @@ impl PyDataFrame { Ok(pydf) } - pub fn to_csv(&self, py_f: PyObject, has_headers: bool, sep: u8) -> PyResult<()> { + pub fn to_csv(&self, py_f: PyObject, has_header: bool, sep: u8) -> PyResult<()> { let mut buf = get_file_like(py_f, true)?; CsvWriter::new(&mut buf) - .has_header(has_headers) + .has_header(has_header) .with_delimiter(sep) .finish(&self.df) .map_err(PyPolarsEr::from)?; diff --git a/py-polars/src/lazy/dataframe.rs b/py-polars/src/lazy/dataframe.rs index a07d05499396..7c1ddfeff3a4 100644 --- a/py-polars/src/lazy/dataframe.rs +++ b/py-polars/src/lazy/dataframe.rs @@ -125,7 +125,7 @@ impl PyLazyFrame { has_header: bool, ignore_errors: bool, skip_rows: usize, - stop_after_n_rows: Option, + n_rows: Option, cache: bool, overwrite_dtype: Option>, low_memory: bool, @@ -157,7 +157,7 @@ impl PyLazyFrame { .has_header(has_header) .with_ignore_parser_errors(ignore_errors) .with_skip_rows(skip_rows) - .with_stop_after_n_rows(stop_after_n_rows) + .with_n_rows(n_rows) .with_cache(cache) .with_dtype_overwrite(overwrite_dtype.as_ref()) .low_memory(low_memory) @@ -197,21 +197,21 @@ impl PyLazyFrame { #[cfg(feature = "parquet")] pub fn new_from_parquet( path: String, - stop_after_n_rows: Option, + n_rows: Option, cache: bool, ) -> PyResult { let lf = - LazyFrame::scan_parquet(path, stop_after_n_rows, cache).map_err(PyPolarsEr::from)?; + LazyFrame::scan_parquet(path, n_rows, cache).map_err(PyPolarsEr::from)?; Ok(lf.into()) } #[staticmethod] pub fn new_from_ipc( path: String, - stop_after_n_rows: Option, + n_rows: Option, cache: bool, ) -> PyResult { - let lf = LazyFrame::scan_ipc(path, stop_after_n_rows, cache).map_err(PyPolarsEr::from)?; + let lf = LazyFrame::scan_ipc(path, n_rows, cache).map_err(PyPolarsEr::from)?; Ok(lf.into()) } diff --git a/py-polars/tests/test_df.py b/py-polars/tests/test_df.py index 5921fd2480c0..c4c0fd5cd7ff 100644 --- a/py-polars/tests/test_df.py +++ b/py-polars/tests/test_df.py @@ -496,7 +496,7 @@ def test_file_buffer() -> None: f = BytesIO() f.write(b"1,2,3,4,5,6\n7,8,9,10,11,12") f.seek(0) - df = pl.DataFrame.read_csv(f, has_headers=False) + df = pl.DataFrame.read_csv(f, has_header=False) assert df.shape == (2, 6) f.seek(0) @@ -700,7 +700,7 @@ def test_read_csv_categorical() -> None: f = BytesIO() f.write(b"col1,col2,col3,col4,col5,col6\n'foo',2,3,4,5,6\n'bar',8,9,10,11,12") f.seek(0) - df = pl.DataFrame.read_csv(f, has_headers=True, dtypes={"col1": pl.Categorical}) + df = pl.DataFrame.read_csv(f, has_header=True, dtypes={"col1": pl.Categorical}) assert df["col1"].dtype == pl.Categorical diff --git a/py-polars/tests/test_io.py b/py-polars/tests/test_io.py index cc34a86f4faf..ff2bab721296 100644 --- a/py-polars/tests/test_io.py +++ b/py-polars/tests/test_io.py @@ -238,7 +238,7 @@ def test_column_rename_and_dtype_overwrite() -> None: f, new_columns=["A", "B", "C"], dtypes={"A": pl.Utf8, "C": pl.Float32}, - has_headers=False, + has_header=False, ) assert df.dtypes == [pl.Utf8, pl.Int64, pl.Float32]