Skip to content

Commit

Permalink
Adding replace expression (#223)
Browse files Browse the repository at this point in the history
Adding `replace` expression to close #222

---------

Co-authored-by: Cory Grinstead <universalmind.candy@gmail.com>
  • Loading branch information
Bidek56 and universalmind303 authored Jun 12, 2024
1 parent 97a2ae0 commit e107597
Show file tree
Hide file tree
Showing 4 changed files with 206 additions and 3 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ features = [
"timezones",
"peaks",
"string_pad",
"replace",
"cov",
"http",
"cloud",
Expand Down
33 changes: 31 additions & 2 deletions __tests__/expr.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1135,7 +1135,7 @@ describe("expr.str", () => {
expect(actual).toFrameEqual(expected);
expect(seriesActual).toFrameEqual(expected);
});
test("replace", () => {
test("str.replace", () => {
const df = pl.DataFrame({
os: ["kali-linux", "debian-linux", "ubuntu-linux", "mac-sierra"],
});
Expand All @@ -1151,7 +1151,7 @@ describe("expr.str", () => {
expect(actual).toFrameEqual(expected);
expect(seriesActual).toFrameEqual(expected);
});
test("replaceAll", () => {
test("str.replaceAll", () => {
const df = pl.DataFrame({
os: [
"kali-linux-2021.3a",
Expand All @@ -1177,6 +1177,35 @@ describe("expr.str", () => {
expect(actual).toFrameEqual(expected);
expect(seriesActual).toFrameEqual(expected);
});
test("expr.replace", () => {
const df = pl.DataFrame({ a: [1, 2, 2, 3] });
let actual = df.withColumns(pl.col("a").replace(2, 100).alias("replaced"));
let expected = pl.DataFrame({
a: [1, 2, 2, 3],
replaced: [1, 100, 100, 3],
});
expect(actual).toFrameEqual(expected);
actual = df.withColumns(
pl.col("a").replace([2, 3], [100, 200], -1, pl.Float64).alias("replaced"),
);
expected = pl.DataFrame({ a: [1, 2, 2, 3], replaced: [-1, 100, 100, 200] });
expect(actual).toFrameEqual(expected);
const mapping = { 2: 100, 3: 200 };
actual = df.withColumns(
pl
.col("a")
.replace({ old: mapping, default_: -1, returnDtype: pl.Int64 })
.alias("replaced"),
);
expected = pl.DataFrame({ a: [1, 2, 2, 3], replaced: [-1, 100, 100, 200] });
expect(actual).toFrameEqual(expected);

actual = df.withColumns(
pl.col("a").replace({ old: mapping }).alias("replaced"),
);
expected = pl.DataFrame({ a: [1, 2, 2, 3], replaced: [1, 100, 100, 200] });
expect(actual).toFrameEqual(expected);
});
test("slice", () => {
const df = pl.DataFrame({
os: ["linux-kali", "linux-debian", "windows-vista"],
Expand Down
162 changes: 162 additions & 0 deletions polars/lazy/expr/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -743,6 +743,150 @@ export interface Expr
* The column will be coerced to UInt32. Give this dtype to make the coercion a no-op.
*/
repeatBy(by: Expr | string): Expr;
/**
* Replace values by different values.
* @param old - Value or sequence of values to replace.
Accepts expression input. Sequences are parsed as Series, other non-expression inputs are parsed as literals.
* @param new_ - Value or sequence of values to replace by.
Accepts expression input. Sequences are parsed as Series, other non-expression inputs are parsed as literals.
Length must match the length of `old` or have length 1.
* @param default_ - Set values that were not replaced to this value.
Defaults to keeping the original value.
Accepts expression input. Non-expression inputs are parsed as literals.
* @param returnDtype - The data type of the resulting expression. If set to `None` (default), the data type is determined automatically based on the other inputs.
* @see {@link str.replace}
* @example
* Replace a single value by another value. Values that were not replaced remain unchanged.
* ```
>>> const df = pl.DataFrame({"a": [1, 2, 2, 3]});
>>> df.withColumns(pl.col("a").replace(2, 100).alias("replaced"));
shape: (4, 2)
┌─────┬──────────┐
│ a ┆ replaced │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪══════════╡
│ 1 ┆ 1 │
│ 2 ┆ 100 │
│ 2 ┆ 100 │
│ 3 ┆ 3 │
└─────┴──────────┘
* ```
* Replace multiple values by passing sequences to the `old` and `new_` parameters.
* ```
>>> df.withColumns(pl.col("a").replace([2, 3], [100, 200]).alias("replaced"));
shape: (4, 2)
┌─────┬──────────┐
│ a ┆ replaced │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪══════════╡
│ 1 ┆ 1 │
│ 2 ┆ 100 │
│ 2 ┆ 100 │
│ 3 ┆ 200 │
└─────┴──────────┘
* ```
* Passing a mapping with replacements is also supported as syntactic sugar.
Specify a default to set all values that were not matched.
* ```
>>> const mapping = {2: 100, 3: 200};
>>> df.withColumns(pl.col("a").replace({ old: mapping, default_: -1, returnDtype: pl.Int64 }).alias("replaced");
shape: (4, 2)
┌─────┬──────────┐
│ a ┆ replaced │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪══════════╡
│ 1 ┆ -1 │
│ 2 ┆ 100 │
│ 2 ┆ 100 │
│ 3 ┆ 200 │
└─────┴──────────┘
* ```
Replacing by values of a different data type sets the return type based on
a combination of the `new` data type and either the original data type or the
default data type if it was set.
* ```
>>> const df = pl.DataFrame({"a": ["x", "y", "z"]});
>>> const mapping = {"x": 1, "y": 2, "z": 3};
>>> df.withColumns(pl.col("a").replace({ old: mapping }).alias("replaced"));
shape: (3, 2)
┌─────┬──────────┐
│ a ┆ replaced │
│ --- ┆ --- │
│ str ┆ str │
╞═════╪══════════╡
│ x ┆ 1 │
│ y ┆ 2 │
│ z ┆ 3 │
└─────┴──────────┘
>>> df.withColumns(pl.col("a").replace({ old: mapping, default_: None }).alias("replaced"));
shape: (3, 2)
┌─────┬──────────┐
│ a ┆ replaced │
│ --- ┆ --- │
│ str ┆ i64 │
╞═════╪══════════╡
│ x ┆ 1 │
│ y ┆ 2 │
│ z ┆ 3 │
└─────┴──────────┘
* ```
Set the `returnDtype` parameter to control the resulting data type directly.
* ```
>>> df.withColumns(pl.col("a").replace({ old: mapping, returnDtype: pl.UInt8 }).alias("replaced"));
shape: (3, 2)
┌─────┬──────────┐
│ a ┆ replaced │
│ --- ┆ --- │
│ str ┆ u8 │
╞═════╪══════════╡
│ x ┆ 1 │
│ y ┆ 2 │
│ z ┆ 3 │
└─────┴──────────┘
* ```
* Expression input is supported for all parameters.
* ```
>>> const df = pl.DataFrame({"a": [1, 2, 2, 3], "b": [1.5, 2.5, 5.0, 1.0]});
>>> df.withColumns(
... pl.col("a").replace({
... old: pl.col("a").max(),
... new_: pl.col("b").sum(),
... default_: pl.col("b"),
... }).alias("replaced")
... );
shape: (4, 3)
┌─────┬─────┬──────────┐
│ a ┆ b ┆ replaced │
│ --- ┆ --- ┆ --- │
│ i64 ┆ f64 ┆ f64 │
╞═════╪═════╪══════════╡
│ 1 ┆ 1.5 ┆ 1.5 │
│ 2 ┆ 2.5 ┆ 2.5 │
│ 2 ┆ 5.0 ┆ 5.0 │
│ 3 ┆ 1.0 ┆ 10.0 │
└─────┴─────┴──────────┘
* ```
*/
replace(
old: Expr | number | number[],
new_: Expr | number | number[],
default_?: Expr | number | number[],
returnDtype?: DataType,
): Expr;
replace({
old,
new_,
default_,
returnDtype,
}: {
old: unknown | Expr | number | number[];
new_?: Expr | number | number[];
default_?: Expr | number | number[];
returnDtype?: DataType;
}): Expr;
/** Reverse the arrays in the list */
reverse(): Expr;
/**
Expand Down Expand Up @@ -1421,6 +1565,24 @@ export const _Expr = (_expr: any): Expr => {

return _Expr(_expr.repeatBy(e));
},
replace(old, newValue, defaultValue, returnDtype) {
let oldIn: any = old;
let newIn = newValue;
let defIn = defaultValue;
if (old && typeof old === "object" && !Array.isArray(old)) {
oldIn = Object.keys(old["old"]);
newIn = Object.values(old["old"]);
defIn = old["default_"];
}
return _Expr(
_expr.replace(
exprToLitOrExpr(oldIn)._expr,
exprToLitOrExpr(newIn)._expr,
defIn ? exprToLitOrExpr(defIn)._expr : undefined,
returnDtype,
),
);
},
reverse() {
return _Expr(_expr.reverse());
},
Expand Down
13 changes: 12 additions & 1 deletion src/lazy/dsl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -954,7 +954,18 @@ impl JsExpr {
.split_exact_inclusive(by.0, n as usize)
.into()
}

#[napi(catch_unwind)]
pub fn replace(&self, old: &JsExpr, new: &JsExpr, default: Option<&JsExpr>, return_dtype: Option<Wrap<DataType>>) -> JsExpr {
self.inner
.clone()
.replace(
old.inner.clone(),
new.inner.clone(),
default.map(|e| e.inner.clone()),
return_dtype.map(|dt| dt.0),
)
.into()
}
#[napi(catch_unwind)]
pub fn year(&self) -> JsExpr {
self.clone().inner.dt().year().into()
Expand Down

0 comments on commit e107597

Please sign in to comment.