Skip to content

Commit

Permalink
fix: properly parse js regex patterns (#92)
Browse files Browse the repository at this point in the history
  • Loading branch information
universalmind303 authored Aug 9, 2023
1 parent a438d31 commit e6c1edb
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 3 deletions.
21 changes: 19 additions & 2 deletions __tests__/expr.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -947,6 +947,23 @@ describe("expr.str", () => {
expect(actual).toFrameEqual(expected);
expect(seriesActual).toSeriesEqual(expected.getColumn("isLinux"));
});

test("contains:regex", () => {
const df = pl.DataFrame({
a: ["Foo", "foo", "FoO"],
});

const re = new RegExp("foo", "i");
const expected = pl.DataFrame({
a: ["Foo", "foo", "FoO"],
contains: [true, true, true],
});
const seriesActual = df.getColumn("a").str.contains(re).rename("contains");
const actual = df.withColumn(col("a").str.contains(re).as("contains"));
expect(actual).toFrameEqual(expected);
expect(seriesActual).toSeriesEqual(expected.getColumn("contains"));
});

test("split", () => {
const df = pl.DataFrame({ a: ["ab,cd", "e,fg", "h"] });
const expected = pl.DataFrame({
Expand Down Expand Up @@ -976,12 +993,12 @@ describe("expr.str", () => {

const seriesActual = df
.getColumn("a")
.str.extract(/candidate=(\w+)/g, 1)
.str.extract(/candidate=(\w+)/, 1)
.rename("candidate")
.toFrame();

const actual = df.select(
col("a").str.extract(/candidate=(\w+)/g, 1).as("candidate"),
col("a").str.extract(/candidate=(\w+)/, 1).as("candidate"),
);
expect(actual).toFrameEqual(expected);
expect(seriesActual).toFrameEqual(expected);
Expand Down
9 changes: 8 additions & 1 deletion polars/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,16 @@ export const isExprArray = (ty: any): ty is Expr[] =>
Array.isArray(ty) && Expr.isExpr(ty[0]);
export const isIterator = <T>(ty: any): ty is Iterable<T> =>
ty !== null && typeof ty[Symbol.iterator] === "function";

export const regexToString = (r: string | RegExp): string => {
if (isRegExp(r)) {
return r.source;
if (r.flags.includes("g")) {
throw new Error("global flag is not supported");
}
if (r.flags.includes("y")) {
throw new Error("sticky flag is not supported");
}
return r.flags ? `(?${r.flags})${r.source}` : r.source;
}

return r;
Expand Down

0 comments on commit e6c1edb

Please sign in to comment.