Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding quote option to writeCSV #174

Merged
merged 6 commits into from
Mar 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions __tests__/dataframe.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1560,12 +1560,21 @@ describe("io", () => {
const expected = "fooXbar\n1X6\n2X2\n9X8\n";
expect(actual).toEqual(expected);
});
test("writeCSV:string:quote", () => {
const df = pl.DataFrame({
bar: ["a,b,c", "d,e,f", "g,h,i"],
foo: [1, 2, 3],
});
const actual = df.writeCSV({ quote: "^" }).toString();
const expected = "bar,foo\n^a,b,c^,1.0\n^d,e,f^,2.0\n^g,h,i^,3.0\n";
expect(actual).toEqual(expected);
});
test("writeCSV:string:header", () => {
const actual = df
.clone()
.writeCSV({ sep: "X", includeHeader: false })
.writeCSV({ sep: "X", includeHeader: false, lineTerminator: "|" })
.toString();
const expected = "1X6\n2X2\n9X8\n";
const expected = "1X6|2X2|9X8|";
expect(actual).toEqual(expected);
});
test("writeCSV:stream", (done) => {
Expand Down
24 changes: 20 additions & 4 deletions polars/dataframe.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,25 @@ interface WriteMethods {
* If no options are specified, it will return a new string containing the contents
* ___
* @param dest file or stream to write to
* @param options
* @param options.hasHeader - Whether or not to include header in the CSV output.
* @param options.sep - Separate CSV fields with this symbol. _defaults to `,`_
* @param options.includeBom - Whether to include UTF-8 BOM in the CSV output.
* @param options.lineTerminator - String used to end each row.
* @param options.includeHeader - Whether or not to include header in the CSV output.
* @param options.sep - Separate CSV fields with this symbol. _defaults to `,`
* @param options.quote - Character to use for quoting. Default: \" Note: it will note be used when sep is used
* @param options.batchSize - Number of rows that will be processed per thread.
* @param options.datetimeFormat - A format string, with the specifiers defined by the
* `chrono <https://docs.rs/chrono/latest/chrono/format/strftime/index.html>`_
* Rust crate. If no format specified, the default fractional-second
* precision is inferred from the maximum timeunit found in the frame's
* Datetime cols (if any).
* @param options.dateFormat - A format string, with the specifiers defined by the
* `chrono <https://docs.rs/chrono/latest/chrono/format/strftime/index.html>`_
* Rust crate.
* @param options.timeFormat A format string, with the specifiers defined by the
* `chrono <https://docs.rs/chrono/latest/chrono/format/strftime/index.html>`_
* Rust crate.
* @param options.floatPrecision - Number of decimal places to write, applied to both `Float32` and `Float64` datatypes.
* @param options.nullValue - A string representing null values (defaulting to the empty string).
* @example
* ```
* > const df = pl.DataFrame({
Expand All @@ -86,7 +102,7 @@ interface WriteMethods {
* ... callback(null);
* ... }
* ... });
* > df.head(1).writeCSV(writeStream, {hasHeader: false});
* > df.head(1).writeCSV(writeStream, {includeHeader: false});
* writeStream: '1,6,a'
* ```
* @category IO
Expand Down
9 changes: 9 additions & 0 deletions polars/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,17 @@ export interface ConcatOptions {
* @category Options
*/
export interface WriteCsvOptions {
includeBom?: boolean;
includeHeader?: boolean;
sep?: string;
quote?: string;
lineTerminator?: string;
batchSize?: number;
datetimeFormat?: string;
dateFormat?: string;
timeFormat?: string;
floatPrecision?: number;
nullValue?: string;
}
/**
* Options for @see {@link LazyDataFrame.sinkCSV}
Expand Down
9 changes: 8 additions & 1 deletion src/conversion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -558,15 +558,22 @@ impl From<JsRowCount> for RowIndex {

#[napi(object)]
pub struct WriteCsvOptions {
pub include_bom: Option<bool>,
pub include_header: Option<bool>,
pub sep: Option<String>,
pub quote: Option<String>,
pub line_terminator: Option<String>,
pub batch_size: Option<i64>,
pub datetime_format: Option<String>,
pub date_format: Option<String>,
pub time_format: Option<String>,
pub float_precision: Option<i64>,
pub null_value: Option<String>
}

#[napi(object)]
pub struct SinkCsvOptions {
pub include_header: Option<bool>,
pub quote: Option<String>,
pub include_bom: Option<bool>,
pub separator: Option<String>,
pub line_terminator: Option<String>,
Expand Down
32 changes: 27 additions & 5 deletions src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1312,11 +1312,17 @@ impl JsDataFrame {
env: Env,
) -> napi::Result<()> {
let include_header = options.include_header.unwrap_or(true);
let sep = options.sep.unwrap_or(",".to_owned());
let sep = sep.as_bytes()[0];
let quote = options.quote.unwrap_or(",".to_owned());
let quote = quote.as_bytes()[0];

let sep = options.sep.unwrap_or(",".to_owned()).as_bytes()[0];
let quote = options.quote.unwrap_or("\"".to_owned()).as_bytes()[0];
let include_bom = options.include_bom.unwrap_or(false);
let line_terminator = options.line_terminator.unwrap_or("\n".to_owned());
let batch_size = NonZeroUsize::new(options.batch_size.unwrap_or(1024) as usize);
let date_format = options.date_format;
let time_format = options.time_format;
let datetime_format = options.datetime_format;
let float_precision: Option<usize> = options.float_precision.map(|fp| fp as usize);
let null_value = options.null_value.unwrap_or(SerializeOptions::default().null);

match path_or_buffer.get_type()? {
ValueType::String => {
let path: napi::JsString = unsafe { path_or_buffer.cast() };
Expand All @@ -1325,8 +1331,16 @@ impl JsDataFrame {
let f = std::fs::File::create(path).unwrap();
let f = BufWriter::new(f);
CsvWriter::new(f)
.include_bom(include_bom)
.include_header(include_header)
.with_separator(sep)
.with_line_terminator(line_terminator)
.with_batch_size(batch_size.unwrap())
.with_datetime_format(datetime_format)
.with_date_format(date_format)
.with_time_format(time_format)
.with_float_precision(float_precision)
.with_null_value(null_value)
.with_quote_char(quote)
.finish(&mut self.df)
.map_err(JsPolarsErr::from)?;
Expand All @@ -1336,8 +1350,16 @@ impl JsDataFrame {
let writeable = JsWriteStream { inner, env: &env };

CsvWriter::new(writeable)
.include_bom(include_bom)
.include_header(include_header)
.with_separator(sep)
.with_line_terminator(line_terminator)
.with_batch_size(batch_size.unwrap())
.with_datetime_format(datetime_format)
.with_date_format(date_format)
.with_time_format(time_format)
.with_float_precision(float_precision)
.with_null_value(null_value)
.with_quote_char(quote)
.finish(&mut self.df)
.map_err(JsPolarsErr::from)?;
Expand Down