Skip to content

Commit

Permalink
Refactor CSV parsing options and assertions (#168)
Browse files Browse the repository at this point in the history
* Refactor CSV parsing options and assertions

* Fix quotation and delimiter validation in assertCommonOptions

* Add changeset
  • Loading branch information
kamiazya authored Mar 18, 2024
1 parent 4a0077c commit 7fad0d1
Show file tree
Hide file tree
Showing 7 changed files with 116 additions and 123 deletions.
5 changes: 5 additions & 0 deletions .changeset/lazy-geckos-double.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"web-csv-toolbox": minor
---

Refactor CSV parsing options and assertions
4 changes: 1 addition & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ A CSV Toolbox utilizing Web Standard APIs.
- 🧩 Parse CSVs directly from `string`s, `ReadableStream`s, or `Response` objects.
- ⚙️ **Advanced Parsing Options**: Customize your experience with various delimiters and quotation marks.
- 🔄 Defaults to `,` and `"` respectively.
- 🛠️ Use multi-character/multi-byte delimiters and quotations.
- 💾 **Specialized Binary CSV Parsing**: Leverage Stream-based processing for versatility and strength.
- 🔄 Flexible BOM handling.
- 🗜️ Supports various compression formats.
Expand Down Expand Up @@ -294,8 +293,7 @@ You can use WebAssembly to parse CSV data for high performance.
- Parsing with WebAssembly is faster than parsing with JavaScript,
but it takes time to load the WebAssembly module.
- Supports only UTF-8 encoding csv data.
- Demiliter characters are limited to single-byte characters.
- Quotation characters is only `"`. (Double quotation mark)
- Quotation characters are only `"`. (Double quotation mark)
- If you pass a different character, it will throw an error.

```ts
Expand Down
16 changes: 0 additions & 16 deletions src/Lexer.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -195,22 +195,6 @@ describe("class Lexer", () => {
expect(actual).toStrictEqual(expected);
},
),
{
examples: [
[
{
csv: "QfQQff0Qf0Qf",
data: [["Q", "0"]],
options: { delimiter: "f0", quotation: "Qf" },
expected: [
{ type: Field, value: "Q" },
FieldDelimiter,
{ type: Field, value: "0" },
],
},
],
],
},
);
});

Expand Down
6 changes: 5 additions & 1 deletion src/__tests__/helper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,10 @@ export namespace FC {
}
const { excludes = [], ...constraints }: DelimiterConstraints = options;
return text({
minLength: 1,
...constraints,
minLength: 1,
maxLength: 1,
kindExcludes: ["string16bits", "unicode"],
})
.filter(_excludeFilter([...CRLF]))
.filter(_excludeFilter(excludes));
Expand All @@ -132,6 +134,8 @@ export namespace FC {
return text({
...constraints,
minLength: 1,
maxLength: 1,
kindExcludes: ["string16bits", "unicode"],
})
.filter(_excludeFilter([...CRLF]))
.filter(_excludeFilter(excludes));
Expand Down
127 changes: 48 additions & 79 deletions src/assertCommonOptions.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ import { fc } from "@fast-check/vitest";
import { describe, expect, it } from "vitest";
import { FC } from "./__tests__/helper.ts";
import { assertCommonOptions } from "./assertCommonOptions.ts";
import { COMMA, CRLF, DOUBLE_QUOTE } from "./constants.ts";
import { COMMA, CR, CRLF, DOUBLE_QUOTE, LF } from "./constants.ts";

describe("function assertCommonOptions", () => {
it("should be throw error if quotation is a empty character", () => {
it("should throw an error if quotation is an empty character", () => {
expect(() =>
assertCommonOptions({
quotation: "",
Expand All @@ -14,7 +14,7 @@ describe("function assertCommonOptions", () => {
).toThrow("quotation must not be empty");
});

it("should be throw error if delimiter is a empty character", () => {
it("should throw an error if delimiter is an empty character", () => {
expect(() =>
assertCommonOptions({
quotation: COMMA,
Expand All @@ -23,88 +23,57 @@ describe("function assertCommonOptions", () => {
).toThrow("delimiter must not be empty");
});

it("should be throw error if quotation includes CR or LF", () =>
it("should throw an error if delimiter is the same as quotation", async () => {
fc.assert(
fc.property(
fc.gen().map((g) => {
const EOL = g(() => fc.constantFrom("\n", "\r"));
const prefix = g(FC.text);
const sufix = g(FC.text);
return prefix + EOL + sufix;
}),
(invalidQuotation) => {
FC.text({ minLength: 1, maxLength: 1, excludes: [...CRLF] }).filter(
(v) => v.length === 1,
),
(value) => {
expect(() =>
assertCommonOptions({
quotation: invalidQuotation,
delimiter: DOUBLE_QUOTE,
}),
).toThrow("quotation must not include CR or LF");
assertCommonOptions({ quotation: value, delimiter: value }),
).toThrow(
"delimiter must not be the same as quotation, use different characters",
);
},
),
{
examples: [
// "\n" is included
["\n"],
// "\r" is included
["\r"],
// "\n" and "\r" are included
["\n\r"],
],
},
));
);
});

it("should be throw error if delimiter includes CR or LF", () =>
fc.assert(
fc.property(
fc.gen().map((g) => {
const EOL = g(() => fc.constantFrom("\n", "\r"));
const prefix = g(FC.text);
const sufix = g(FC.text);
return prefix + EOL + sufix;
it("should throw an error if quotation is CR or LF", () => {
for (const quotation of [CR, LF]) {
expect(() =>
assertCommonOptions({
quotation: quotation,
delimiter: DOUBLE_QUOTE,
}),
(invalidDelimiter) => {
expect(() =>
assertCommonOptions({
quotation: COMMA,
delimiter: invalidDelimiter,
}),
).toThrow("delimiter must not include CR or LF");
},
),
{
examples: [
// "\n" is included
["\n"],
// "\r" is included
["\r"],
// "\n" and "\r" are included
["\n\r"],
],
},
));

it("should be throw error if delimiter and quotation include each other as a substring", () =>
fc.assert(
fc.property(
fc.gen().map((g) => {
const excludes = [...CRLF];
const A = g(FC.text, { minLength: 1, excludes });
// B is a string that includes A as a substring.
const B = g(FC.text, { excludes }) + A + g(FC.text, { excludes });
return { A, B };
).toThrow("quotation must not include CR or LF");
}
for (const delimiter of [CR, LF]) {
expect(() =>
assertCommonOptions({
quotation: COMMA,
delimiter: delimiter,
}),
({ A, B }) => {
expect(() =>
assertCommonOptions({ quotation: A, delimiter: B }),
).toThrow(
"delimiter and quotation must not include each other as a substring",
);
expect(() =>
assertCommonOptions({ quotation: B, delimiter: A }),
).toThrow(
"delimiter and quotation must not include each other as a substring",
);
},
),
));
).toThrow("delimiter must not include CR or LF");
}
});

it("should throw an error if quotation is not a string", () => {
expect(() =>
assertCommonOptions({
quotation: 1 as unknown as string,
delimiter: DOUBLE_QUOTE,
}),
).toThrow("quotation must be a string");
});

it("should throw an error if delimiter is not a string", () => {
expect(() =>
assertCommonOptions({
quotation: COMMA,
delimiter: 1 as unknown as string,
}),
).toThrow("delimiter must be a string");
});
});
68 changes: 49 additions & 19 deletions src/assertCommonOptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,59 @@ import type { CommonOptions } from "./common/types.ts";
import { CR, LF } from "./constants.ts";

/**
* Assert that the options are valid.
*
* @param options The options to assert.
* Asserts that the provided value is a string and satisfies certain conditions.
* @param value - The value to be checked.
* @param name - The name of the option.
* @throws {Error} If the value is not a string or does not satisfy the conditions.
*/
export function assertCommonOptions(options: Required<CommonOptions>): void {
if (typeof options.quotation === "string" && options.quotation.length === 0) {
throw new Error("quotation must not be empty");
}
if (typeof options.delimiter === "string" && options.delimiter.length === 0) {
throw new Error("delimiter must not be empty");
}
if (options.quotation.includes(LF) || options.quotation.includes(CR)) {
throw new Error("quotation must not include CR or LF");
function assertOptionValue(
value: string,
name: string,
): asserts value is string {
if (typeof value === "string") {
switch (true) {
case value.length === 0:
throw new Error(`${name} must not be empty`);
case value.length > 1:
throw new Error(`${name} must be a single character`);
case value === LF:
case value === CR:
throw new Error(`${name} must not include CR or LF`);
default:
break;
}
} else {
throw new Error(`${name} must be a string`);
}
if (options.delimiter.includes(LF) || options.delimiter.includes(CR)) {
throw new Error("delimiter must not include CR or LF");
}

/**
* Asserts that the provided options object contains all the required properties.
* Throws an error if any required property is missing
* or if the delimiter and quotation length is not 1 byte character,
* or if the delimiter is the same as the quotation.
*
* @example
*
* ```ts
* assertCommonOptions({
* quotation: '"',
* delimiter: ',',
* });
* ```
*
* @param options - The options object to be validated.
* @throws {Error} If any required property is missing or if the delimiter is the same as the quotation.
*/
export function assertCommonOptions(
options: Required<CommonOptions>,
): asserts options is Required<CommonOptions> {
for (const [name, value] of Object.entries(options)) {
assertOptionValue(value, name);
}
if (
options.delimiter.includes(options.quotation) ||
options.quotation.includes(options.delimiter)
) {
if (options.delimiter === options.quotation) {
throw new Error(
"delimiter and quotation must not include each other as a substring",
"delimiter must not be the same as quotation, use different characters",
);
}
}
13 changes: 8 additions & 5 deletions src/common/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,23 @@ export type Token = FieldToken | typeof FieldDelimiter | typeof RecordDelimiter;
export interface CommonOptions {
/**
* CSV field delimiter.
* If you want to parse TSV, specify `'\t'`.
*
* @remarks
* If you want to parse TSV, specify `'\t'`.
* Detail restrictions are as follows:
*
* - Must not be empty
* - Must be a single character
* - Multi-byte characters are not supported
* - Must not include CR or LF
* - Must not be the same as the quotation
*
* This library supports multi-character delimiters.
* @default ','
*/
delimiter?: string;
/**
* CSV field quotation.
*
* @remarks
* This library supports multi-character quotations.
*
* @default '"'
*/
quotation?: string;
Expand Down

0 comments on commit 7fad0d1

Please sign in to comment.