Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into s3-cloud
Browse files Browse the repository at this point in the history
  • Loading branch information
Bidek56 committed Mar 18, 2024
2 parents f9cfb12 + 31e7741 commit a2b9e14
Show file tree
Hide file tree
Showing 14 changed files with 558 additions and 29 deletions.
71 changes: 67 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ crate-type = ["cdylib", "lib"]
[dependencies]
ahash = "0.8.7"
bincode = "1.3.3"
napi = { version = "2.14.2", default-features = false, features = [
"napi8",
"serde-json",
napi = { version = "2.16.0", default-features = false, features = [
"napi8",
"serde-json",
] }
napi-derive = { version = "2.14.6", default-features = false }
napi-derive = { version = "2.16.0", default-features = false }
polars-core = { git = "https://github.com/pola-rs/polars.git", rev = "3cf4897e679b056d17a235d48867035265d43cdc", default-features = false }
polars-io = { git = "https://github.com/pola-rs/polars.git", rev = "3cf4897e679b056d17a235d48867035265d43cdc", default-features = false }
polars-lazy = { git = "https://github.com/pola-rs/polars.git", rev = "3cf4897e679b056d17a235d48867035265d43cdc", default-features = false }
Expand All @@ -30,6 +30,69 @@ either = "1.9"

[dependencies.polars]
features = [
"binary_encoding",
"rolling_window",
"json",
"dynamic_group_by",
"zip_with",
"simd",
"lazy",
"strings",
"temporal",
"random",
"object",
"fmt",
"performant",
"dtype-full",
"rows",
"round_series",
"is_unique",
"is_in",
"is_first_distinct",
"asof_join",
"cross_join",
"dot_product",
"concat_str",
"row_hash",
"reinterpret",
"mode",
"extract_jsonpath",
"cum_agg",
"rolling_window",
"repeat_by",
"interpolate",
"ewma",
"rank",
"propagate_nans",
"diff",
"pct_change",
"moment",
"diagonal_concat",
"abs",
"dot_diagram",
"dataframe_arithmetic",
"json",
"string_encoding",
"product",
"ndarray",
"unique_counts",
"log",
"serde-lazy",
"partition_by",
"pivot",
"semi_anti_join",
"parquet",
"to_dummies",
"ipc",
"avro",
"list_eval",
"arg_where",
"timezones",
"peaks",
"string_pad",
"cov",
"group_by_list",
"sql",
"binary_encoding",
"rolling_window",
"json",
Expand Down
30 changes: 27 additions & 3 deletions __tests__/dataframe.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1243,7 +1243,7 @@ describe("dataframe", () => {
expect(actual).toFrameEqual(expected);
});
test("pivot", () => {
const df = pl.DataFrame({
let df = pl.DataFrame({
a: pl.Series([1, 2, 3]).cast(pl.Int32),
b: pl
.Series([
Expand All @@ -1254,7 +1254,7 @@ describe("dataframe", () => {
.cast(pl.List(pl.Int32)),
});

const expected = pl
let expected = pl
.DataFrame({
a: pl.Series([1, 2, 3]).cast(pl.Int32),
"1": pl.Series([[1, 1], null, null]).cast(pl.List(pl.Int32)),
Expand All @@ -1263,14 +1263,38 @@ describe("dataframe", () => {
})
.select("a", "1", "2", "3");

const actual = df.pivot("b", {
let actual = df.pivot("b", {
index: "a",
columns: "a",
aggregateFunc: "first",
sortColumns: true,
});

expect(actual).toFrameEqual(expected, true);

df = pl.DataFrame({
a: ["beep", "bop"],
b: ["a", "b"],
c: ["s", "f"],
d: [7, 8],
e: ["x", "y"],
});
actual = df.pivot(["a", "e"], {
index: "b",
columns: ["c"],
aggregateFunc: "first",
separator: "|",
maintainOrder: true,
});

expected = pl.DataFrame({
b: ["a", "b"],
"a|c|s": ["beep", null],
"a|c|f": [null, "bop"],
"e|c|s": ["x", null],
"e|c|f": [null, "y"],
});
expect(actual).toFrameEqual(expected, true);
});
});
describe("join", () => {
Expand Down
81 changes: 81 additions & 0 deletions __tests__/sql.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import pl from "@polars";
describe("sql", () => {
test("execute", () => {
const df = pl.DataFrame({
values: [
["aa", "bb"],
[null, "cc"],
["dd", null],
],
});

const ctx = pl.SQLContext({ df });
const actual = ctx.execute("SELECT * FROM df").collectSync();

expect(actual).toFrameEqual(df);
const actual2 = ctx.execute("SELECT * FROM df", { eager: true });
expect(actual2).toFrameEqual(df);
});

test("register and query dataframe", () => {
const df = pl.DataFrame({ hello: ["world"] });
const ctx = pl.SQLContext();
ctx.register("frame_data", df);
const actual = ctx.execute("SELECT * FROM frame_data", { eager: true });

const expected = pl.DataFrame({ hello: ["world"] });

expect(actual).toFrameEqual(expected);
ctx.register("null_frame", null);

const actual2 = ctx.execute("SELECT * FROM null_frame", { eager: true });
const expected2 = pl.DataFrame();
expect(actual2).toFrameEqual(expected2);
});
test("register many", () => {
const lf1 = pl.DataFrame({ a: [1, 2, 3], b: ["m", "n", "o"] });
const lf2 = pl.DataFrame({ a: [2, 3, 4], c: ["p", "q", "r"] });

// Register multiple DataFrames at once
const ctx = pl.SQLContext().registerMany({ tbl1: lf1, tbl2: lf2 });
const tables = ctx.tables();

expect(tables).toEqual(expect.arrayContaining(["tbl1", "tbl2"]));
});
test("inspect", () => {
const df = pl.DataFrame({
a: [1, 2, 3],
b: ["m", "n", "o"],
});

const ctx = pl.SQLContext({ df });
const actual = ctx[Symbol.for("nodejs.util.inspect.custom")]();

const expected = "SQLContext: {df}";

expect(actual).toEqual(expected);
});
test("constructor with LazyFrames", () => {
const lf1 = pl.DataFrame({ a: [1, 2, 3], b: ["m", "n", "o"] }).lazy();
const lf2 = pl.DataFrame({ a: [2, 3, 4], c: ["p", "q", "r"] }).lazy();

const ctx = pl.SQLContext({ tbl1: lf1, tbl2: lf2 });
const tables = ctx.tables();
expect(tables).toEqual(expect.arrayContaining(["tbl1", "tbl2"]));
});
test("unregister", () => {
const df = pl.DataFrame({ hello: ["world"] });
const df2 = pl.DataFrame({ hello: ["world"] });
const df3 = pl.DataFrame({ hello: ["world"] });
const ctx = pl.SQLContext({ df, df2, df3 });

ctx.unregister("df");

const tables = ctx.tables();
expect(tables).toEqual(["df2", "df3"]);

ctx.unregister(["df2", "df3"]);
const tables2 = ctx.tables();
expect(tables2).toEqual([]);
});
});
Binary file modified bun.lockb
Binary file not shown.
18 changes: 15 additions & 3 deletions polars/dataframe.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1008,6 +1008,7 @@ export interface DataFrame
* Defaults to "first"
* @param options.maintainOrder Sort the grouped keys so that the output order is predictable.
* @param options.sortColumns Sort the transposed columns by name. Default is by order of discovery.
* @param options.separator Used as separator/delimiter in generated column names.
* @example
* ```
* > const df = pl.DataFrame(
Expand All @@ -1017,12 +1018,12 @@ export interface DataFrame
* ... "baz": [1, 2, 3, 4, 5, 6],
* ... }
* ... );
* > df.pivot({values:"baz", index:"foo", columns:"bar"});
* > df.pivot(values:"baz", {index:"foo", columns:"bar"});
* shape: (2, 4)
* ┌─────┬─────┬─────┬─────┐
* │ foo ┆ A ┆ B ┆ C │
* │ --- ┆ --- ┆ --- ┆ --- │
* │ str ┆ i64i64i64
* │ str ┆ f64f64f64
* ╞═════╪═════╪═════╪═════╡
* │ one ┆ 1 ┆ 2 ┆ 3 │
* ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
Expand All @@ -1047,6 +1048,7 @@ export interface DataFrame
| Expr;
maintainOrder?: boolean;
sortColumns?: boolean;
separator?: string;
},
): DataFrame;
pivot(options: {
Expand All @@ -1065,6 +1067,7 @@ export interface DataFrame
| Expr;
maintainOrder?: boolean;
sortColumns?: boolean;
separator?: string;
}): DataFrame;
// TODO!
// /**
Expand Down Expand Up @@ -2188,6 +2191,7 @@ export const _DataFrame = (_df: any): DataFrame => {
maintainOrder = true,
sortColumns = false,
aggregateFunc = "first",
separator,
} = options;
values = values_ ?? values;
values = typeof values === "string" ? [values] : values;
Expand Down Expand Up @@ -2216,7 +2220,15 @@ export const _DataFrame = (_df: any): DataFrame => {
}

return _DataFrame(
_df.pivotExpr(values, index, columns, fn, maintainOrder, sortColumns),
_df.pivotExpr(
values,
index,
columns,
fn,
maintainOrder,
sortColumns,
separator,
),
);
},
quantile(quantile) {
Expand Down
16 changes: 16 additions & 0 deletions polars/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ export * from "./lazy/dataframe";
export * from "./lazy";
import * as lazy from "./lazy";
export * from "./types";
import * as sql from "./sql";
export type { SQLContext } from "./sql";

export type { GroupBy } from "./groupby";
export namespace pl {
export import Expr = lazy.Expr;
Expand Down Expand Up @@ -109,6 +112,19 @@ export namespace pl {
export import list = lazy.list;
export import when = lazy.when;
export const version = pli.version();

/**
* Run SQL queries against DataFrame/LazyFrame data.
*
* @warning This functionality is considered **unstable**, although it is close to being
* considered stable. It may be changed at any point without it being considered
* a breaking change.
*/
export function SQLContext(
frames?: Record<string, DataFrame | LazyDataFrame>,
): sql.SQLContext {
return new sql.SQLContext(frames);
}
}
// eslint-disable-next-line no-undef
export default pl;
Loading

0 comments on commit a2b9e14

Please sign in to comment.