diff --git a/Cargo.lock b/Cargo.lock index 1a674cd..a747639 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -19,12 +19,11 @@ checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" [[package]] name = "ahash" -version = "0.8.6" +version = "0.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91429305e9f0a25f6205c5b8e0d2db09e0708a7a6df0f42212bb56c32c8ac97a" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" dependencies = [ "cfg-if", - "const-random", "getrandom", "once_cell", "version_check", @@ -78,9 +77,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.4" +version = "0.6.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ab91ebe16eb252986481c5b62f6098f3b698a45e34b5b98200cf20dd2484a44" +checksum = "d96bd03f33fe50a863e394ee9718a706f988b9079b20c3784fb726e7678b62fb" dependencies = [ "anstyle", "anstyle-parse", @@ -92,280 +91,106 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.4" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87" +checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" [[package]] name = "anstyle-parse" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "317b9a89c1868f5ea6ff1d9539a69f45dffc21ce321ac1fd1160dfa48c8e2140" +checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.0.0" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b" +checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" dependencies = [ - "windows-sys", + "windows-sys 0.52.0", ] [[package]] name = "anstyle-wincon" -version = "3.0.1" +version = "3.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0699d10d2f4d628a98ee7b57b289abbc98ff3bad977cb3152709d4bf2330628" +checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" dependencies = [ "anstyle", - "windows-sys", + "windows-sys 0.52.0", ] [[package]] name = "anyhow" -version = "1.0.75" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" - -[[package]] -name = "arrayref" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545" - -[[package]] -name = "arrayvec" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" - -[[package]] -name = "arrow" -version = "47.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fab9e93ba8ce88a37d5a30dce4b9913b75413dc1ac56cb5d72e5a840543f829" -dependencies = [ - "ahash", - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-csv", - "arrow-data", - "arrow-ipc", - "arrow-json", - "arrow-ord", - "arrow-row", - "arrow-schema", - "arrow-select", - "arrow-string", -] - -[[package]] -name = "arrow-arith" -version = "47.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc1d4e368e87ad9ee64f28b9577a3834ce10fe2703a26b28417d485bbbdff956" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "chrono", - "half", - "num", -] - -[[package]] -name = "arrow-array" -version = "47.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d02efa7253ede102d45a4e802a129e83bcc3f49884cab795b1ac223918e4318d" -dependencies = [ - "ahash", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "chrono", - "chrono-tz", - "half", - "hashbrown", - "num", -] - -[[package]] -name = "arrow-buffer" -version = "47.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fda119225204141138cb0541c692fbfef0e875ba01bfdeaed09e9d354f9d6195" -dependencies = [ - "bytes", - "half", - "num", -] - -[[package]] -name = "arrow-cast" -version = "47.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d825d51b9968868d50bc5af92388754056796dbc62a4e25307d588a1fc84dee" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", - "chrono", - "comfy-table", - "half", - "lexical-core", - "num", -] - -[[package]] -name = "arrow-csv" -version = "47.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43ef855dc6b126dc197f43e061d4de46b9d4c033aa51c2587657f7508242cef1" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "chrono", - "csv", - "csv-core", - "lazy_static", - "lexical-core", - "regex", -] - -[[package]] -name = "arrow-data" -version = "47.0.0" +version = "1.0.80" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "475a4c3699c8b4095ca61cecf15da6f67841847a5f5aac983ccb9a377d02f73a" -dependencies = [ - "arrow-buffer", - "arrow-schema", - "half", - "num", -] +checksum = "5ad32ce52e4161730f7098c077cd2ed6229b5804ccf99e5366be1ab72a98b4e1" [[package]] -name = "arrow-ipc" -version = "47.0.0" +name = "argminmax" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1248005c8ac549f869b7a840859d942bf62471479c1a2d82659d453eebcd166a" +checksum = "202108b46429b765ef483f8a24d5c46f48c14acfdacc086dd4ab6dddf6bcdbd2" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "flatbuffers", + "num-traits", ] [[package]] -name = "arrow-json" -version = "47.0.0" +name = "array-init-cursor" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f03d7e3b04dd688ccec354fe449aed56b831679f03e44ee2c1cfc4045067b69c" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "chrono", - "half", - "indexmap", - "lexical-core", - "num", - "serde", - "serde_json", -] +checksum = "bf7d0a018de4f6aa429b9d33d69edf69072b1c5b1cb8d3e4a5f7ef898fc3eb76" [[package]] -name = "arrow-ord" -version = "47.0.0" +name = "async-stream" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03b87aa408ea6a6300e49eb2eba0c032c88ed9dc19e0a9948489c55efdca71f4" +checksum = "cd56dd203fef61ac097dd65721a419ddccb106b2d2b70ba60a6b529f03961a51" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", - "half", - "num", + "async-stream-impl", + "futures-core", + "pin-project-lite", ] [[package]] -name = "arrow-row" -version = "47.0.0" +name = "async-stream-impl" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "114a348ab581e7c9b6908fcab23cb39ff9f060eb19e72b13f8fb8eaa37f65d22" +checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ - "ahash", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "half", - "hashbrown", + "proc-macro2", + "quote", + "syn 2.0.52", ] [[package]] -name = "arrow-schema" -version = "47.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d1d179c117b158853e0101bfbed5615e86fe97ee356b4af901f1c5001e1ce4b" - -[[package]] -name = "arrow-select" -version = "47.0.0" +name = "async-trait" +version = "0.1.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5c71e003202e67e9db139e5278c79f5520bb79922261dfe140e4637ee8b6108" +checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9" dependencies = [ - "ahash", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "num", + "proc-macro2", + "quote", + "syn 2.0.52", ] [[package]] -name = "arrow-string" -version = "47.0.0" +name = "atoi" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4cebbb282d6b9244895f4a9a912e55e57bce112554c7fa91fcec5459cb421ab" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", - "num", - "regex", - "regex-syntax 0.7.5", + "num-traits", ] [[package]] -name = "async-trait" -version = "0.1.74" +name = "atoi_simd" +version = "0.15.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a66537f1bb974b254c98ed142ff995236e81b9d0fe4db0575f46612cb15eb0f9" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.38", -] +checksum = "9ae037714f313c1353189ead58ef9eec30a8e8dc101b2622d461418fd59e28a9" [[package]] name = "autocfg" @@ -390,9 +215,9 @@ dependencies = [ [[package]] name = "base64" -version = "0.21.5" +version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35636a1494ede3b646cc98f74f8e62c773a38a659ebc777a2cf26b9b74171df9" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" [[package]] name = "bitflags" @@ -402,44 +227,13 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.4.1" +version = "2.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" +checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" dependencies = [ "serde", ] -[[package]] -name = "blake2" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" -dependencies = [ - "digest", -] - -[[package]] -name = "blake3" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0231f06152bf547e9c2b5194f247cd97aacf6dcd8b15d8e5ec0663f64580da87" -dependencies = [ - "arrayref", - "arrayvec", - "cc", - "cfg-if", - "constant_time_eq", -] - -[[package]] -name = "block-buffer" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" -dependencies = [ - "generic-array", -] - [[package]] name = "brotli" version = "3.4.0" @@ -463,15 +257,29 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.14.0" +version = "3.15.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ea184aa71bb362a1157c896979544cc23974e08fd265f29ea96b59f0b4a555b" + +[[package]] +name = "bytemuck" +version = "1.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" +checksum = "a2ef034f05691a48569bd920a96c81b9d91bbad1ab5ac7c4616c1f6ef36cb79f" +dependencies = [ + "bytemuck_derive", +] [[package]] -name = "byteorder" +name = "bytemuck_derive" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" +checksum = "965ab7eb5f8f97d2a083c799f3a1b994fc397b2fe2da5d1da1626ce15a39f2b1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.52", +] [[package]] name = "bytes" @@ -481,11 +289,10 @@ checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" [[package]] name = "cc" -version = "1.0.83" +version = "1.0.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +checksum = "02f341c093d19155a6e41631ce5971aac4e9a868262212153124c15fa22d1cdc" dependencies = [ - "jobserver", "libc", ] @@ -497,21 +304,22 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.31" +version = "0.4.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38" +checksum = "5bc015644b92d5890fab7489e49d21f879d5c990186827d42ec511919404f38b" dependencies = [ "android-tzdata", "iana-time-zone", "num-traits", - "windows-targets", + "serde", + "windows-targets 0.52.4", ] [[package]] name = "chrono-tz" -version = "0.8.4" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e23185c0e21df6ed832a12e2bda87c7d1def6842881fb634a8511ced741b0d76" +checksum = "d59ae0466b83e838b81a54256c39d5d7c20b9d7daa10510a242d9b75abd5936e" dependencies = [ "chrono", "chrono-tz-build", @@ -531,9 +339,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.4.7" +version = "4.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac495e00dcec98c83465d5ad66c5c4fabd652fd6686e7c6269b117e729a6f17b" +checksum = "c918d541ef2913577a0f9566e9ce27cb35b6df072075769e0b26cb5a554520da" dependencies = [ "clap_builder", "clap_derive", @@ -541,9 +349,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.4.7" +version = "4.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c77ed9a32a62e6ca27175d00d29d05ca32e396ea1eb5fb01d8256b669cec7663" +checksum = "9f3e7391dad68afb0c2ede1bf619f579a3dc9c2ec67f089baa397123a2f3d1eb" dependencies = [ "anstream", "anstyle", @@ -553,21 +361,21 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.4.7" +version = "4.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf9804afaaf59a91e75b022a30fb7229a7901f60c755489cc61c9b423b836442" +checksum = "307bc0538d5f0f83b8248db3087aa92fe504e4691294d0c96c0eabc33f47ba47" dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.52", ] [[package]] name = "clap_lex" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1" +checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" [[package]] name = "colorchoice" @@ -588,62 +396,70 @@ dependencies = [ ] [[package]] -name = "const-random" -version = "0.1.17" +name = "core-foundation-sys" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aaf16c9c2c612020bcfd042e170f6e32de9b9d75adb5277cdbbd2e2c8c8299a" -dependencies = [ - "const-random-macro", -] +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" [[package]] -name = "const-random-macro" -version = "0.1.16" +name = "crc32fast" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" dependencies = [ - "getrandom", - "once_cell", - "tiny-keccak", + "cfg-if", ] [[package]] -name = "constant_time_eq" -version = "0.3.0" +name = "crossbeam-channel" +version = "0.5.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" +checksum = "ab3db02a9c5b5121e1e42fbdb1aeb65f5e02624cc58c43f2884c6ccac0b82f95" +dependencies = [ + "crossbeam-utils", +] [[package]] -name = "core-foundation-sys" -version = "0.8.4" +name = "crossbeam-deque" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] [[package]] -name = "cpufeatures" -version = "0.2.11" +name = "crossbeam-epoch" +version = "0.9.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce420fe07aecd3e67c5f910618fe65e94158f6dcc0adf44e00d69ce2bdfe0fd0" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" dependencies = [ - "libc", + "crossbeam-utils", ] [[package]] -name = "crc32fast" -version = "1.3.2" +name = "crossbeam-queue" +version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +checksum = "df0346b5d5e76ac2fe4e327c5fd1118d6be7c51dfb18f9b7922923f287471e35" dependencies = [ - "cfg-if", + "crossbeam-utils", ] +[[package]] +name = "crossbeam-utils" +version = "0.8.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" + [[package]] name = "crossterm" version = "0.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.4.2", "crossterm_winapi", "libc", "mio", @@ -664,369 +480,121 @@ dependencies = [ ] [[package]] -name = "crunchy" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" - -[[package]] -name = "crypto-common" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +name = "dply" +version = "0.3.0" dependencies = [ - "generic-array", - "typenum", + "anyhow", + "clap", + "comfy-table", + "home", + "indoc", + "lru", + "nom", + "polars", + "reedline", + "regex", + "thiserror", ] [[package]] -name = "csv" -version = "1.3.0" +name = "dyn-clone" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" -dependencies = [ - "csv-core", - "itoa", - "ryu", - "serde", -] +checksum = "0d6ef0072f8a535281e4876be788938b528e9a1d43900b82c2569af7da799125" [[package]] -name = "csv-core" -version = "0.1.11" +name = "either" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" -dependencies = [ - "memchr", -] +checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" [[package]] -name = "dashmap" -version = "5.5.3" +name = "enum_dispatch" +version = "0.3.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" +checksum = "8f33313078bb8d4d05a2733a94ac4c2d8a0df9a2b84424ebf4f33bfc224a890e" dependencies = [ - "cfg-if", - "hashbrown", - "lock_api", "once_cell", - "parking_lot_core", + "proc-macro2", + "quote", + "syn 2.0.52", ] [[package]] -name = "datafusion" -version = "32.0.0" +name = "equivalent" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7014432223f4d721cb9786cd88bb89e7464e0ba984d4a7f49db7787f5f268674" -dependencies = [ - "ahash", - "arrow", - "arrow-array", - "arrow-schema", - "async-trait", - "bytes", - "chrono", - "dashmap", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", - "datafusion-optimizer", - "datafusion-physical-expr", - "datafusion-physical-plan", - "datafusion-sql", - "futures", - "glob", - "half", - "hashbrown", - "indexmap", - "itertools 0.11.0", - "log", - "num_cpus", - "object_store", - "parking_lot", - "parquet", - "percent-encoding", - "pin-project-lite", - "rand", - "sqlparser", - "tempfile", - "tokio", - "tokio-util", - "url", - "uuid", -] +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] -name = "datafusion-common" -version = "32.0.0" +name = "errno" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb3903ed8f102892f17b48efa437f3542159241d41c564f0d1e78efdc5e663aa" +checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" dependencies = [ - "ahash", - "arrow", - "arrow-array", - "arrow-buffer", - "arrow-schema", - "chrono", - "half", - "num_cpus", - "object_store", - "parquet", - "sqlparser", + "libc", + "windows-sys 0.52.0", ] [[package]] -name = "datafusion-execution" -version = "32.0.0" +name = "ethnum" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "780b73b2407050e53f51a9781868593f694102c59e622de9a8aafc0343c4f237" -dependencies = [ - "arrow", - "chrono", - "dashmap", - "datafusion-common", - "datafusion-expr", - "futures", - "hashbrown", - "log", - "object_store", - "parking_lot", - "rand", - "tempfile", - "url", -] +checksum = "b90ca2580b73ab6a1f724b76ca11ab632df820fd6040c336200d2c1df7b3c82c" [[package]] -name = "datafusion-expr" -version = "32.0.0" +name = "fallible-streaming-iterator" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24c382676338d8caba6c027ba0da47260f65ffedab38fda78f6d8043f607557c" -dependencies = [ - "ahash", - "arrow", - "arrow-array", - "datafusion-common", - "sqlparser", - "strum", - "strum_macros", -] +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" [[package]] -name = "datafusion-optimizer" -version = "32.0.0" +name = "fast-float" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f2904a432f795484fd45e29ded4537152adb60f636c05691db34fcd94c92c96" -dependencies = [ - "arrow", - "async-trait", - "chrono", - "datafusion-common", - "datafusion-expr", - "datafusion-physical-expr", - "hashbrown", - "itertools 0.11.0", - "log", - "regex-syntax 0.7.5", -] +checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c" [[package]] -name = "datafusion-physical-expr" -version = "32.0.0" +name = "fd-lock" +version = "3.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57b4968e9a998dc0476c4db7a82f280e2026b25f464e4aa0c3bb9807ee63ddfd" +checksum = "ef033ed5e9bad94e55838ca0ca906db0e043f517adda0c8b79c7a8c66c93c1b5" dependencies = [ - "ahash", - "arrow", - "arrow-array", - "arrow-buffer", - "arrow-schema", - "base64", - "blake2", - "blake3", - "chrono", - "datafusion-common", - "datafusion-expr", - "half", - "hashbrown", - "hex", - "indexmap", - "itertools 0.11.0", - "libc", - "log", - "md-5", - "paste", - "petgraph", - "rand", - "regex", - "sha2", - "unicode-segmentation", - "uuid", + "cfg-if", + "rustix", + "windows-sys 0.48.0", ] [[package]] -name = "datafusion-physical-plan" -version = "32.0.0" +name = "flate2" +version = "1.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efd0d1fe54e37a47a2d58a1232c22786f2c28ad35805fdcd08f0253a8b0aaa90" +checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" dependencies = [ - "ahash", - "arrow", - "arrow-array", - "arrow-buffer", - "arrow-schema", - "async-trait", - "chrono", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "futures", - "half", - "hashbrown", - "indexmap", - "itertools 0.11.0", - "log", - "once_cell", - "parking_lot", - "pin-project-lite", - "rand", - "tokio", - "uuid", + "crc32fast", + "miniz_oxide", ] [[package]] -name = "datafusion-sql" -version = "32.0.0" +name = "float-cmp" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b568d44c87ead99604d704f942e257c8a236ee1bbf890ee3e034ad659dcb2c21" +checksum = "98de4bbd547a563b716d8dfa9aad1cb19bfab00f4fa09a6a4ed21dbcf44ce9c4" dependencies = [ - "arrow", - "arrow-schema", - "datafusion-common", - "datafusion-expr", - "log", - "sqlparser", + "num-traits", ] [[package]] -name = "digest" -version = "0.10.7" +name = "foreign_vec" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" -dependencies = [ - "block-buffer", - "crypto-common", - "subtle", -] - -[[package]] -name = "doc-comment" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" - -[[package]] -name = "dply" -version = "0.2.1" -dependencies = [ - "anyhow", - "chrono", - "clap", - "comfy-table", - "datafusion", - "futures", - "hashbrown", - "home", - "indoc", - "lru", - "nom", - "num-traits", - "parking_lot", - "reedline", - "regex", - "thiserror", - "tokio", -] - -[[package]] -name = "either" -version = "1.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" - -[[package]] -name = "equivalent" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" - -[[package]] -name = "errno" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3e13f66a2f95e32a39eaa81f6b95d42878ca0e1db0c7543723dfe12557e860" -dependencies = [ - "libc", - "windows-sys", -] - -[[package]] -name = "fastrand" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" - -[[package]] -name = "fd-lock" -version = "3.0.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef033ed5e9bad94e55838ca0ca906db0e043f517adda0c8b79c7a8c66c93c1b5" -dependencies = [ - "cfg-if", - "rustix", - "windows-sys", -] - -[[package]] -name = "fixedbitset" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" - -[[package]] -name = "flatbuffers" -version = "23.5.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dac53e22462d78c16d64a1cd22371b54cc3fe94aa15e7886a2fa6e5d1ab8640" -dependencies = [ - "bitflags 1.3.2", - "rustc_version", -] - -[[package]] -name = "flate2" -version = "1.0.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" -dependencies = [ - "crc32fast", - "miniz_oxide", -] - -[[package]] -name = "form_urlencoded" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652" -dependencies = [ - "percent-encoding", -] +checksum = "ee1b05cbd864bcaecbd3455d6d967862d446e4ebfc3c2e5e5b9841e53cba6673" [[package]] name = "futures" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da0290714b38af9b4a7b094b8a37086d1b4e61f2df9122c3cad2577669145335" +checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" dependencies = [ "futures-channel", "futures-core", @@ -1039,9 +607,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff4dd66668b557604244583e3e1e1eada8c5c2e96a6d0d6653ede395b78bbacb" +checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" dependencies = [ "futures-core", "futures-sink", @@ -1049,15 +617,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb1d22c66e66d9d72e1758f0bd7d4fd0bee04cad842ee34587d68c07e45d088c" +checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" [[package]] name = "futures-executor" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f4fb8693db0cf099eadcca0efe2a5a22e4550f98ed16aba6c48700da29597bc" +checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" dependencies = [ "futures-core", "futures-task", @@ -1066,38 +634,38 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bf34a163b5c4c52d0478a4d757da8fb65cabef42ba90515efee0f6f9fa45aaa" +checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" [[package]] name = "futures-macro" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53b153fd91e4b0147f4aced87be237c98248656bb01050b96bf3ee89220a8ddb" +checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.52", ] [[package]] name = "futures-sink" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e36d3378ee38c2a36ad710c5d30c2911d752cb941c00c72dbabfb786a7970817" +checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" [[package]] name = "futures-task" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efd193069b0ddadc69c46389b740bbccdd97203899b48d09c5f7969591d6bae2" +checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" [[package]] name = "futures-util" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a19526d624e703a3179b3d322efec918b6246ea0fa51d41124525f00f1cc8104" +checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" dependencies = [ "futures-channel", "futures-core", @@ -1111,32 +679,24 @@ dependencies = [ "slab", ] -[[package]] -name = "generic-array" -version = "0.14.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" -dependencies = [ - "typenum", - "version_check", -] - [[package]] name = "getrandom" -version = "0.2.10" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" +checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" dependencies = [ "cfg-if", + "js-sys", "libc", "wasi", + "wasm-bindgen", ] [[package]] name = "gimli" -version = "0.28.0" +version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0" +checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" [[package]] name = "glob" @@ -1145,24 +705,33 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] -name = "half" -version = "2.3.1" +name = "halfbrown" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc52e53916c08643f1b56ec082790d1e86a32e58dc5268f897f313fbae7b4872" +checksum = "5681137554ddff44396e5f149892c769d45301dd9aa19c51602a89ee214cb0ec" dependencies = [ - "cfg-if", - "crunchy", - "num-traits", + "hashbrown 0.13.2", + "serde", +] + +[[package]] +name = "hashbrown" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" +dependencies = [ + "ahash", ] [[package]] name = "hashbrown" -version = "0.14.2" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f93e7192158dbcda357bdec5fb5788eebf8bbac027f3f33e719d29135ae84156" +checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" dependencies = [ "ahash", "allocator-api2", + "rayon", ] [[package]] @@ -1173,9 +742,9 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "hermit-abi" -version = "0.3.3" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" [[package]] name = "hex" @@ -1185,24 +754,18 @@ checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" [[package]] name = "home" -version = "0.5.5" +version = "0.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5444c27eef6923071f7ebcc33e3444508466a76f7a2b93da00ed6e19f30c1ddb" +checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" dependencies = [ - "windows-sys", + "windows-sys 0.52.0", ] -[[package]] -name = "humantime" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" - [[package]] name = "iana-time-zone" -version = "0.1.58" +version = "0.1.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8326b86b6cff230b97d0d312a6c40a60726df3332e721f72a1b035f451663b20" +checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -1221,24 +784,14 @@ dependencies = [ "cc", ] -[[package]] -name = "idna" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" -dependencies = [ - "unicode-bidi", - "unicode-normalization", -] - [[package]] name = "indexmap" -version = "2.0.2" +version = "2.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8adf3ddd720272c6ea8bf59463c04e0f93d0bbf7c5439b691bca2987e0270897" +checksum = "7b0b929d511467233429c45a44ac1dcaa21ba0f5ba11e4879e6ed28ddb4f9df4" dependencies = [ "equivalent", - "hashbrown", + "hashbrown 0.14.3", ] [[package]] @@ -1247,59 +800,46 @@ version = "2.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e186cfbae8084e513daff4240b4797e342f988cecda4fb6c939150f96315fd8" -[[package]] -name = "integer-encoding" -version = "3.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" - [[package]] name = "itertools" -version = "0.10.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" -dependencies = [ - "either", -] - -[[package]] -name = "itertools" -version = "0.11.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" dependencies = [ "either", ] [[package]] name = "itoa" -version = "1.0.9" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" +checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" [[package]] -name = "jobserver" -version = "0.1.27" +name = "itoap" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c37f63953c4c63420ed5fd3d6d398c719489b9f872b9fa683262f8edd363c7d" -dependencies = [ - "libc", -] +checksum = "9028f49264629065d057f340a86acb84867925865f73bbf8d47b4d149a7e88b8" [[package]] name = "js-sys" -version = "0.3.64" +version = "0.3.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a" +checksum = "406cda4b368d531c842222cf9d2600a9a4acce8d29423695379c6868a143a9ee" dependencies = [ "wasm-bindgen", ] [[package]] -name = "lazy_static" -version = "1.4.0" +name = "jsonpath_lib_polars_vendor" +version = "0.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +checksum = "f4bd9354947622f7471ff713eacaabdb683ccb13bba4edccaab9860abf480b7d" +dependencies = [ + "log", + "serde", + "serde_json", +] [[package]] name = "lexical-core" @@ -1367,9 +907,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.149" +version = "0.2.153" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" [[package]] name = "libm" @@ -1379,9 +919,9 @@ checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" [[package]] name = "linux-raw-sys" -version = "0.4.10" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da2479e8c062e40bf0066ffa0bc823de0a9368974af99c9f6df941d2c231e03f" +checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" [[package]] name = "lock_api" @@ -1395,17 +935,17 @@ dependencies = [ [[package]] name = "log" -version = "0.4.20" +version = "0.4.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" [[package]] name = "lru" -version = "0.12.0" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1efa59af2ddfad1854ae27d75009d538d0998b4b2fd47083e743ac1a10e46c60" +checksum = "d3262e75e648fce39813cb56ac41f3c3e3f65217ebf3844d818d1f9398cfb0dc" dependencies = [ - "hashbrown", + "hashbrown 0.14.3", ] [[package]] @@ -1429,20 +969,19 @@ dependencies = [ ] [[package]] -name = "md-5" -version = "0.10.6" +name = "memchr" +version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" -dependencies = [ - "cfg-if", - "digest", -] +checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" [[package]] -name = "memchr" -version = "2.6.4" +name = "memmap2" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" +checksum = "f49388d20533534cd19360ad3d6a7dadc885944aa802ba3995040c5ec11288c6" +dependencies = [ + "libc", +] [[package]] name = "minimal-lexical" @@ -1452,116 +991,89 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" dependencies = [ "adler", ] [[package]] name = "mio" -version = "0.8.9" +version = "0.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3dce281c5e46beae905d4de1870d8b1509a9142b62eedf18b443b011ca8343d0" +checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" dependencies = [ "libc", "log", "wasi", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] -name = "nom" -version = "7.1.3" +name = "multiversion" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +checksum = "b2c7b9d7fe61760ce5ea19532ead98541f6b4c495d87247aff9826445cf6872a" dependencies = [ - "memchr", - "minimal-lexical", + "multiversion-macros", + "target-features", ] [[package]] -name = "nu-ansi-term" -version = "0.49.0" +name = "multiversion-macros" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c073d3c1930d0751774acf49e66653acecb416c3a54c6ec095a9b11caddb5a68" +checksum = "26a83d8500ed06d68877e9de1dde76c1dbb83885dcdbda4ef44ccbc3fbda2ac8" dependencies = [ - "windows-sys", -] - -[[package]] -name = "num" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b05180d69e3da0e530ba2a1dae5110317e49e3b7f3d41be227dc5f92e49ee7af" -dependencies = [ - "num-bigint", - "num-complex", - "num-integer", - "num-iter", - "num-rational", - "num-traits", -] - -[[package]] -name = "num-bigint" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "608e7659b5c3d7cba262d894801b9ec9d00de989e8a82bd4bef91d08da45cdc0" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", + "proc-macro2", + "quote", + "syn 1.0.109", + "target-features", ] [[package]] -name = "num-complex" -version = "0.4.4" +name = "nom" +version = "7.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ba157ca0885411de85d6ca030ba7e2a83a28636056c7c699b07c8b6f7383214" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" dependencies = [ - "num-traits", + "memchr", + "minimal-lexical", ] [[package]] -name = "num-integer" -version = "0.1.45" +name = "now" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +checksum = "6d89e9874397a1f0a52fc1f197a8effd9735223cb2390e9dcc83ac6cd02923d0" dependencies = [ - "autocfg", - "num-traits", + "chrono", ] [[package]] -name = "num-iter" -version = "0.1.43" +name = "ntapi" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" +checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4" dependencies = [ - "autocfg", - "num-integer", - "num-traits", + "winapi", ] [[package]] -name = "num-rational" -version = "0.4.1" +name = "nu-ansi-term" +version = "0.50.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" +checksum = "dd2800e1520bdc966782168a627aa5d1ad92e33b984bf7c7615d31280c83ff14" dependencies = [ - "autocfg", - "num-bigint", - "num-integer", - "num-traits", + "windows-sys 0.48.0", ] [[package]] name = "num-traits" -version = "0.2.17" +version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" +checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" dependencies = [ "autocfg", "libm", @@ -1579,48 +1091,18 @@ dependencies = [ [[package]] name = "object" -version = "0.32.1" +version = "0.32.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" dependencies = [ "memchr", ] -[[package]] -name = "object_store" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f930c88a43b1c3f6e776dfe495b4afab89882dbc81530c632db2ed65451ebcb4" -dependencies = [ - "async-trait", - "bytes", - "chrono", - "futures", - "humantime", - "itertools 0.11.0", - "parking_lot", - "percent-encoding", - "snafu", - "tokio", - "tracing", - "url", - "walkdir", -] - [[package]] name = "once_cell" -version = "1.18.0" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" - -[[package]] -name = "ordered-float" -version = "2.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" -dependencies = [ - "num-traits", -] +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "parking_lot" @@ -1642,41 +1124,17 @@ dependencies = [ "libc", "redox_syscall", "smallvec", - "windows-targets", + "windows-targets 0.48.5", ] [[package]] -name = "parquet" -version = "47.0.0" +name = "parquet-format-safe" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0463cc3b256d5f50408c49a4be3a16674f4c8ceef60941709620a062b1f6bf4d" +checksum = "1131c54b167dd4e4799ce762e1ab01549ebb94d5bdd13e6ec1b467491c378e1f" dependencies = [ - "ahash", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-ipc", - "arrow-schema", - "arrow-select", - "base64", - "brotli", - "bytes", - "chrono", - "flate2", + "async-trait", "futures", - "hashbrown", - "lz4", - "num", - "num-bigint", - "object_store", - "paste", - "seq-macro", - "snap", - "thrift", - "tokio", - "twox-hash", - "zstd", ] [[package]] @@ -1688,27 +1146,11 @@ dependencies = [ "regex", ] -[[package]] -name = "paste" -version = "1.0.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" - [[package]] name = "percent-encoding" -version = "2.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" - -[[package]] -name = "petgraph" -version = "0.6.4" +version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" -dependencies = [ - "fixedbitset", - "indexmap", -] +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "phf" @@ -1762,9 +1204,408 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "pkg-config" -version = "0.3.27" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" + +[[package]] +name = "planus" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc1691dd09e82f428ce8d6310bd6d5da2557c82ff17694d2a32cad7242aea89f" +dependencies = [ + "array-init-cursor", +] + +[[package]] +name = "polars" +version = "0.38.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a26ef94cfedd5915da990a0b4740cca17b5854bd44a8e8c741fe732c02aac37" +dependencies = [ + "getrandom", + "polars-arrow", + "polars-core", + "polars-error", + "polars-io", + "polars-lazy", + "polars-ops", + "polars-parquet", + "polars-sql", + "polars-time", + "polars-utils", + "version_check", +] + +[[package]] +name = "polars-arrow" +version = "0.38.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48e71d30a9fa503bc3baaff3b4c48f08d402c442a50ea7fb9d475ce7b575425a" +dependencies = [ + "ahash", + "atoi", + "atoi_simd", + "bytemuck", + "chrono", + "chrono-tz", + "dyn-clone", + "either", + "ethnum", + "fast-float", + "foreign_vec", + "futures", + "getrandom", + "hashbrown 0.14.3", + "itoa", + "itoap", + "lz4", + "multiversion", + "num-traits", + "polars-arrow-format", + "polars-error", + "polars-utils", + "ryu", + "simdutf8", + "streaming-iterator", + "strength_reduce", + "version_check", + "zstd", +] + +[[package]] +name = "polars-arrow-format" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19b0ef2474af9396b19025b189d96e992311e6a47f90c53cd998b36c4c64b84c" +dependencies = [ + "planus", + "serde", +] + +[[package]] +name = "polars-compute" +version = "0.38.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26342dea46502e8a3322f484062869c2fa49185d512bce4fb44f350b559b4eae" +dependencies = [ + "bytemuck", + "either", + "num-traits", + "polars-arrow", + "polars-error", + "polars-utils", + "strength_reduce", + "version_check", +] + +[[package]] +name = "polars-core" +version = "0.38.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99e0885a8f1bd1f4d928f5eaa852825bf647b6b5e21e171b6af838f77b6565f3" +dependencies = [ + "ahash", + "bitflags 2.4.2", + "bytemuck", + "chrono", + "chrono-tz", + "comfy-table", + "either", + "hashbrown 0.14.3", + "indexmap", + "num-traits", + "once_cell", + "polars-arrow", + "polars-compute", + "polars-error", + "polars-row", + "polars-utils", + "rand", + "rand_distr", + "rayon", + "regex", + "smartstring", + "thiserror", + "version_check", + "xxhash-rust", +] + +[[package]] +name = "polars-error" +version = "0.38.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d259d905c17d8e8b2de1eadc94dc4186bf1d325f1be81b4087afea22a6f753d6" +dependencies = [ + "polars-arrow-format", + "regex", + "simdutf8", + "thiserror", +] + +[[package]] +name = "polars-io" +version = "0.38.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45f694b918ba2ee7e6f13e8415598f94009c390a9e61c95e6b9c26c8fe1a1a54" +dependencies = [ + "ahash", + "async-trait", + "atoi_simd", + "bytes", + "chrono", + "chrono-tz", + "fast-float", + "futures", + "home", + "itoa", + "memchr", + "memmap2", + "num-traits", + "once_cell", + "percent-encoding", + "polars-arrow", + "polars-core", + "polars-error", + "polars-json", + "polars-parquet", + "polars-time", + "polars-utils", + "rayon", + "regex", + "ryu", + "serde_json", + "simd-json", + "simdutf8", + "smartstring", + "tokio", + "tokio-util", +] + +[[package]] +name = "polars-json" +version = "0.38.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd7c4d33540a22cd49ccb9b4da77999604ba986707d4568be57fde2f719954ae" +dependencies = [ + "ahash", + "chrono", + "fallible-streaming-iterator", + "hashbrown 0.14.3", + "indexmap", + "itoa", + "num-traits", + "polars-arrow", + "polars-error", + "polars-utils", + "ryu", + "simd-json", + "streaming-iterator", +] + +[[package]] +name = "polars-lazy" +version = "0.38.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56e3b40272d24142bcecb2979b19ec8d8c1a14036cb3cea09ce8fb8a4a43bcde" +dependencies = [ + "ahash", + "bitflags 2.4.2", + "glob", + "once_cell", + "polars-arrow", + "polars-core", + "polars-io", + "polars-json", + "polars-ops", + "polars-pipe", + "polars-plan", + "polars-time", + "polars-utils", + "rayon", + "smartstring", + "version_check", +] + +[[package]] +name = "polars-ops" +version = "0.38.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd5bad61c2fa1977eb65bb719f12d4f68b908edf1106b91b3ab9615f9df8843e" +dependencies = [ + "ahash", + "argminmax", + "base64", + "bytemuck", + "chrono", + "chrono-tz", + "either", + "hashbrown 0.14.3", + "hex", + "indexmap", + "jsonpath_lib_polars_vendor", + "memchr", + "num-traits", + "polars-arrow", + "polars-compute", + "polars-core", + "polars-error", + "polars-json", + "polars-utils", + "rayon", + "regex", + "serde_json", + "smartstring", + "unicode-reverse", + "version_check", +] + +[[package]] +name = "polars-parquet" +version = "0.38.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06d84fb9b005a19ca523406df371d9329466ae87df48922d0d3d8955072502a4" +dependencies = [ + "ahash", + "async-stream", + "base64", + "brotli", + "ethnum", + "flate2", + "futures", + "lz4", + "num-traits", + "parquet-format-safe", + "polars-arrow", + "polars-error", + "polars-utils", + "seq-macro", + "simdutf8", + "snap", + "streaming-decompression", + "zstd", +] + +[[package]] +name = "polars-pipe" +version = "0.38.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" +checksum = "58097bef7208a5b833c4d832d948026854917b4a219d55ab1779eb36b59fac0f" +dependencies = [ + "crossbeam-channel", + "crossbeam-queue", + "enum_dispatch", + "hashbrown 0.14.3", + "num-traits", + "polars-arrow", + "polars-compute", + "polars-core", + "polars-io", + "polars-ops", + "polars-plan", + "polars-row", + "polars-utils", + "rayon", + "smartstring", + "uuid", + "version_check", +] + +[[package]] +name = "polars-plan" +version = "0.38.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56493c0e13aaccfcae59985db34da30cd4893e57edc9715d8688c96d7e911d47" +dependencies = [ + "ahash", + "bytemuck", + "chrono-tz", + "once_cell", + "percent-encoding", + "polars-arrow", + "polars-core", + "polars-io", + "polars-json", + "polars-ops", + "polars-parquet", + "polars-time", + "polars-utils", + "rayon", + "regex", + "smartstring", + "strum_macros", + "version_check", +] + +[[package]] +name = "polars-row" +version = "0.38.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7def6f9fc14fbfc0550bad615a757f3e1d86c00983c5ff23166fcdf205438d51" +dependencies = [ + "bytemuck", + "polars-arrow", + "polars-error", + "polars-utils", +] + +[[package]] +name = "polars-sql" +version = "0.38.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f9d7de9dca8170a20b6c4cb7bafaf724abe88e807646bc3c2e98f13a34a7c4c" +dependencies = [ + "hex", + "polars-arrow", + "polars-core", + "polars-error", + "polars-lazy", + "polars-plan", + "rand", + "serde", + "serde_json", + "sqlparser", +] + +[[package]] +name = "polars-time" +version = "0.38.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162c815c3cb0f859da40f056c8a0a9c4247900e1275702ae399192ea60acac2a" +dependencies = [ + "atoi", + "chrono", + "chrono-tz", + "now", + "once_cell", + "polars-arrow", + "polars-core", + "polars-error", + "polars-ops", + "polars-utils", + "regex", + "smartstring", +] + +[[package]] +name = "polars-utils" +version = "0.38.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fca7938ee789314ac92a0bf6c1c4e5eaeb5e428241df2519fd70f21dba49194" +dependencies = [ + "ahash", + "bytemuck", + "hashbrown 0.14.3", + "indexmap", + "num-traits", + "once_cell", + "polars-error", + "raw-cpuid", + "rayon", + "smartstring", + "sysinfo", + "version_check", +] [[package]] name = "ppv-lite86" @@ -1774,18 +1615,18 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "proc-macro2" -version = "1.0.69" +version = "1.0.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "134c189feb4956b20f6f547d2cf727d4c0fe06722b20a0eec87ed445a97f92da" +checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.33" +version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" dependencies = [ "proc-macro2", ] @@ -1794,30 +1635,69 @@ dependencies = [ name = "rand" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rand_distr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" +dependencies = [ + "num-traits", + "rand", +] + +[[package]] +name = "raw-cpuid" +version = "11.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d86a7c4638d42c44551f4791a20e687dbb4c3de1f33c43dd71e355cd429def1" dependencies = [ - "libc", - "rand_chacha", - "rand_core", + "bitflags 2.4.2", ] [[package]] -name = "rand_chacha" -version = "0.3.1" +name = "rayon" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +checksum = "e4963ed1bc86e4f3ee217022bd855b297cef07fb9eac5dfa1f788b220b49b3bd" dependencies = [ - "ppv-lite86", - "rand_core", + "either", + "rayon-core", ] [[package]] -name = "rand_core" -version = "0.6.4" +name = "rayon-core" +version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" dependencies = [ - "getrandom", + "crossbeam-deque", + "crossbeam-utils", ] [[package]] @@ -1831,14 +1711,14 @@ dependencies = [ [[package]] name = "reedline" -version = "0.25.0" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7dc1d1d369c194cf79acc204397aca1fecc4248df3e1c1eabb15e5ef2d16991" +checksum = "9e01ebfbdb1a88963121d3c928c97be7f10fec7795bec8b918c8cda1db7c29e6" dependencies = [ "chrono", "crossterm", "fd-lock", - "itertools 0.10.5", + "itertools", "nu-ansi-term", "serde", "strip-ansi-escapes", @@ -1849,35 +1729,49 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "ref-cast" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4846d4c50d1721b1a3bef8af76924eef20d5e723647333798c1b519b3a9473f" +dependencies = [ + "ref-cast-impl", +] + +[[package]] +name = "ref-cast-impl" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fddb4f8d99b0a2ebafc65a87a69a7b9875e4b1ae1f00db265d300ef7f28bccc" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.52", +] + [[package]] name = "regex" -version = "1.10.2" +version = "1.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" +checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" dependencies = [ "aho-corasick", "memchr", "regex-automata", - "regex-syntax 0.8.2", + "regex-syntax", ] [[package]] name = "regex-automata" -version = "0.4.3" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" +checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.8.2", + "regex-syntax", ] -[[package]] -name = "regex-syntax" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" - [[package]] name = "regex-syntax" version = "0.8.2" @@ -1890,26 +1784,17 @@ version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" -[[package]] -name = "rustc_version" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" -dependencies = [ - "semver", -] - [[package]] name = "rustix" -version = "0.38.21" +version = "0.38.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b426b0506e5d50a7d8dafcf2e81471400deb602392c7dd110815afb4eaf02a3" +checksum = "6ea3e1a662af26cd7a3ba09c0297a31af215563ecf42817c98df621387f4e949" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.4.2", "errno", "libc", "linux-raw-sys", - "windows-sys", + "windows-sys 0.52.0", ] [[package]] @@ -1920,18 +1805,9 @@ checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" [[package]] name = "ryu" -version = "1.0.15" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" - -[[package]] -name = "same-file" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" -dependencies = [ - "winapi-util", -] +checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" [[package]] name = "scopeguard" @@ -1939,12 +1815,6 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" -[[package]] -name = "semver" -version = "1.0.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "836fa6a3e1e547f9a2c4040802ec865b5d85f4014efe00555d7090a3dcaa1090" - [[package]] name = "seq-macro" version = "0.3.5" @@ -1953,46 +1823,36 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.190" +version = "1.0.197" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91d3c334ca1ee894a2c6f6ad698fe8c435b76d504b13d436f0685d648d6d96f7" +checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.190" +version = "1.0.197" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67c5609f394e5c2bd7fc51efda478004ea80ef42fee983d5c67a65e34f32c0e3" +checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.52", ] [[package]] name = "serde_json" -version = "1.0.108" +version = "1.0.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b" +checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0" dependencies = [ + "indexmap", "itoa", "ryu", "serde", ] -[[package]] -name = "sha2" -version = "0.10.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" -dependencies = [ - "cfg-if", - "cpufeatures", - "digest", -] - [[package]] name = "signal-hook" version = "0.3.17" @@ -2023,6 +1883,30 @@ dependencies = [ "libc", ] +[[package]] +name = "simd-json" +version = "0.13.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2faf8f101b9bc484337a6a6b0409cf76c139f2fb70a9e3aee6b6774be7bfbf76" +dependencies = [ + "ahash", + "getrandom", + "halfbrown", + "lexical-core", + "once_cell", + "ref-cast", + "serde", + "serde_json", + "simdutf8", + "value-trait", +] + +[[package]] +name = "simdutf8" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a" + [[package]] name = "siphasher" version = "0.3.11" @@ -2040,64 +1924,72 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.11.1" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "942b4a808e05215192e39f4ab80813e599068285906cc91aa64f923db842bd5a" +checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" [[package]] -name = "snafu" -version = "0.7.5" +name = "smartstring" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4de37ad025c587a29e8f3f5605c00f70b98715ef90b9061a815b9e59e9042d6" +checksum = "3fb72c633efbaa2dd666986505016c32c3044395ceaf881518399d2f4127ee29" dependencies = [ - "doc-comment", - "snafu-derive", + "autocfg", + "static_assertions", + "version_check", ] [[package]] -name = "snafu-derive" -version = "0.7.5" +name = "snap" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "990079665f075b699031e9c08fd3ab99be5029b96f3b78dc0709e8f77e4efebf" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "syn 1.0.109", -] +checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] -name = "snap" -version = "1.1.0" +name = "socket2" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e9f0ab6ef7eb7353d9119c170a436d1bf248eea575ac42d19d12f4e34130831" +checksum = "05ffd9c0a93b7543e062e759284fcf5f5e3b098501104bfbdde4d404db792871" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] [[package]] name = "sqlparser" -version = "0.38.0" +version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0272b7bb0a225320170c99901b4b5fb3a4384e255a7f2cc228f61e2ba3893e75" +checksum = "743b4dc2cbde11890ccb254a8fc9d537fa41b36da00de2a1c5e9848c9bc42bd7" dependencies = [ "log", - "sqlparser_derive", ] [[package]] -name = "sqlparser_derive" -version = "0.1.1" +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "streaming-decompression" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55fe75cb4a364c7f7ae06c7dbbc8d84bddd85d6cdf9975963c3935bc1991761e" +checksum = "bf6cc3b19bfb128a8ad11026086e31d3ce9ad23f8ea37354b31383a187c44cf3" dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", + "fallible-streaming-iterator", ] [[package]] -name = "static_assertions" -version = "1.1.0" +name = "streaming-iterator" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" + +[[package]] +name = "strength_reduce" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe895eb47f22e2ddd4dabc02bce419d2e643c8e3b585c78158b349195bc24d82" [[package]] name = "strip-ansi-escapes" @@ -2110,18 +2002,15 @@ dependencies = [ [[package]] name = "strsim" -version = "0.10.0" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" +checksum = "5ee073c9e4cd00e28217186dbe12796d692868f432bf2e97ee73bed0c56dfa01" [[package]] name = "strum" version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" -dependencies = [ - "strum_macros", -] [[package]] name = "strum_macros" @@ -2133,15 +2022,9 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.38", + "syn 2.0.52", ] -[[package]] -name = "subtle" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" - [[package]] name = "syn" version = "1.0.109" @@ -2155,9 +2038,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.38" +version = "2.0.52" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e96b79aaa137db8f61e26363a0c9b47d8b4ec75da28b7d1d614c2303e232408b" +checksum = "b699d15b36d1f02c3e7c69f8ffef53de37aefae075d8488d4ba1a7788d574a07" dependencies = [ "proc-macro2", "quote", @@ -2165,96 +2048,59 @@ dependencies = [ ] [[package]] -name = "tempfile" -version = "3.8.1" +name = "sysinfo" +version = "0.30.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ef1adac450ad7f4b3c28589471ade84f25f731a7a0fe30d71dfa9f60fd808e5" +checksum = "6746919caf9f2a85bff759535664c060109f21975c5ac2e8652e60102bd4d196" dependencies = [ "cfg-if", - "fastrand", - "redox_syscall", - "rustix", - "windows-sys", + "core-foundation-sys", + "libc", + "ntapi", + "once_cell", + "windows", ] +[[package]] +name = "target-features" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfb5fa503293557c5158bd215fdc225695e567a77e453f5d4452a50a193969bd" + [[package]] name = "thiserror" -version = "1.0.50" +version = "1.0.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9a7210f5c9a7156bb50aa36aed4c95afb51df0df00713949448cf9e97d382d2" +checksum = "1e45bcbe8ed29775f228095caf2cd67af7a4ccf756ebff23a306bf3e8b47b24b" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.50" +version = "1.0.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8" +checksum = "a953cb265bef375dae3de6663da4d3804eee9682ea80d8e2542529b73c531c81" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", -] - -[[package]] -name = "thrift" -version = "0.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" -dependencies = [ - "byteorder", - "integer-encoding", - "ordered-float", -] - -[[package]] -name = "tiny-keccak" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" -dependencies = [ - "crunchy", -] - -[[package]] -name = "tinyvec" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" -dependencies = [ - "tinyvec_macros", + "syn 2.0.52", ] -[[package]] -name = "tinyvec_macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" - [[package]] name = "tokio" -version = "1.33.0" +version = "1.36.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f38200e3ef7995e5ef13baec2f432a6da0aa9ac495b2c0e8f3b7eec2c92d653" +checksum = "61285f6515fa018fb2d1e46eb21223fff441ee8db5d0f1435e8ab4f5cdb80931" dependencies = [ "backtrace", "bytes", + "libc", + "mio", "num_cpus", - "parking_lot", "pin-project-lite", - "tokio-macros", -] - -[[package]] -name = "tokio-macros" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.38", + "socket2", + "windows-sys 0.48.0", ] [[package]] @@ -2270,59 +2116,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "tracing" -version = "0.1.40" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" -dependencies = [ - "pin-project-lite", - "tracing-attributes", - "tracing-core", -] - -[[package]] -name = "tracing-attributes" -version = "0.1.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.38", -] - -[[package]] -name = "tracing-core" -version = "0.1.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" -dependencies = [ - "once_cell", -] - -[[package]] -name = "twox-hash" -version = "1.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" -dependencies = [ - "cfg-if", - "static_assertions", -] - -[[package]] -name = "typenum" -version = "1.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" - -[[package]] -name = "unicode-bidi" -version = "0.3.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" - [[package]] name = "unicode-ident" version = "1.0.12" @@ -2330,19 +2123,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" [[package]] -name = "unicode-normalization" -version = "0.1.22" +name = "unicode-reverse" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" +checksum = "0bea5dacebb0d2d0a69a6700a05b59b3908bf801bf563a49bd27a1b60122962c" dependencies = [ - "tinyvec", + "unicode-segmentation", ] [[package]] name = "unicode-segmentation" -version = "1.10.1" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" +checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" [[package]] name = "unicode-width" @@ -2350,17 +2143,6 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" -[[package]] -name = "url" -version = "2.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "143b538f18257fac9cad154828a57c6bf5157e1aa604d4816b5995bf6de87ae5" -dependencies = [ - "form_urlencoded", - "idna", - "percent-encoding", -] - [[package]] name = "utf8parse" version = "0.2.1" @@ -2369,13 +2151,25 @@ checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" [[package]] name = "uuid" -version = "1.5.0" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88ad59a7560b41a70d191093a945f0b87bc1deeda46fb237479708a1d6b6cdfc" +checksum = "f00cc9702ca12d3c81455259621e676d0f7251cec66a21e98fe2e9a37db93b2a" dependencies = [ "getrandom", ] +[[package]] +name = "value-trait" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dad8db98c1e677797df21ba03fca7d3bf9bec3ca38db930954e4fe6e1ea27eb4" +dependencies = [ + "float-cmp", + "halfbrown", + "itoa", + "ryu", +] + [[package]] name = "version_check" version = "0.9.4" @@ -2402,16 +2196,6 @@ dependencies = [ "quote", ] -[[package]] -name = "walkdir" -version = "2.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee" -dependencies = [ - "same-file", - "winapi-util", -] - [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -2420,9 +2204,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.87" +version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342" +checksum = "c1e124130aee3fb58c5bdd6b639a0509486b0338acaaae0c84a5124b0f588b7f" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -2430,24 +2214,24 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.87" +version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd" +checksum = "c9e7e1900c352b609c8488ad12639a311045f40a35491fb69ba8c12f758af70b" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.52", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.87" +version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d" +checksum = "b30af9e2d358182b5c7449424f017eba305ed32a7010509ede96cdc4696c46ed" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -2455,22 +2239,22 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.87" +version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" +checksum = "642f325be6301eb8107a83d12a8ac6c1e1c54345a7ef1a9261962dfefda09e66" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.52", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.87" +version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" +checksum = "4f186bd2dcf04330886ce82d6f33dd75a7bfcf69ecf5763b89fcde53b6ac9838" [[package]] name = "winapi" @@ -2488,28 +2272,29 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" -[[package]] -name = "winapi-util" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" -dependencies = [ - "winapi", -] - [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be" +dependencies = [ + "windows-core", + "windows-targets 0.52.4", +] + [[package]] name = "windows-core" -version = "0.51.1" +version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1f8cf84f35d2db49a46868f947758c7a1138116f7fac3bc844f43ade1292e64" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" dependencies = [ - "windows-targets", + "windows-targets 0.52.4", ] [[package]] @@ -2518,7 +2303,16 @@ version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" dependencies = [ - "windows-targets", + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.4", ] [[package]] @@ -2527,13 +2321,28 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b" +dependencies = [ + "windows_aarch64_gnullvm 0.52.4", + "windows_aarch64_msvc 0.52.4", + "windows_i686_gnu 0.52.4", + "windows_i686_msvc 0.52.4", + "windows_x86_64_gnu 0.52.4", + "windows_x86_64_gnullvm 0.52.4", + "windows_x86_64_msvc 0.52.4", ] [[package]] @@ -2542,78 +2351,125 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" + [[package]] name = "windows_aarch64_msvc" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" + [[package]] name = "windows_i686_gnu" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" +[[package]] +name = "windows_i686_gnu" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" + [[package]] name = "windows_i686_msvc" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" +[[package]] +name = "windows_i686_msvc" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" + [[package]] name = "windows_x86_64_gnu" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" + [[package]] name = "windows_x86_64_msvc" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" + +[[package]] +name = "xxhash-rust" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "927da81e25be1e1a2901d59b81b37dd2efd1fc9c9345a55007f09bf5a2d3ee03" + [[package]] name = "zerocopy" -version = "0.7.20" +version = "0.7.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd66a62464e3ffd4e37bd09950c2b9dd6c4f8767380fabba0d523f9a775bc85a" +checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.7.20" +version = "0.7.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "255c4596d41e6916ced49cfafea18727b24d67878fa180ddfd69b9df34fd1726" +checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.52", ] [[package]] name = "zstd" -version = "0.12.4" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c" +checksum = "bffb3309596d527cfcba7dfc6ed6052f1d39dfbd7c867aa2e865e4a449c10110" dependencies = [ "zstd-safe", ] [[package]] name = "zstd-safe" -version = "6.0.6" +version = "7.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee98ffd0b48ee95e6c5168188e44a54550b1564d9d530ee21d5f0eaed1069581" +checksum = "43747c7422e2924c11144d5229878b98180ef8b06cca4ab5af37afc8a8d8ea3e" dependencies = [ - "libc", "zstd-sys", ] diff --git a/Cargo.toml b/Cargo.toml index 0524cc2..b766d31 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,26 +5,40 @@ repository = "https://github.com/vincev/dply-rs" edition = "2021" license = "Apache-2.0" name = "dply" -version = "0.2.1" +version = "0.3.0" rust-version = "1.70.0" [dependencies] anyhow = "1.0" -chrono = { version = "0.4.26", default-features = false } clap = { version = "4.2", features = ["derive"] } comfy-table = "7" -datafusion = { version = "32", default-features = false } -futures = "0.3.28" -hashbrown = "0.14.0" home = "0.5" -lru = "0.12" +lru = "0.12.0" nom = "7" -num-traits = "0.2.15" -parking_lot = "0.12.1" -reedline = "0.25" +reedline = "0.29" regex = "1.9.4" thiserror = "1.0" -tokio = { version = "1.29.1", features = ["rt-multi-thread", "macros", "sync"] } + +[dependencies.polars] +version = "0.38.0" +default-features = false +features = [ + "cross_join", + "csv", + "dtype-full", + "fmt", + "is_in", + "json", + "lazy", + "parquet", + "semi_anti_join", + "strings", + "timezones" +] [dev-dependencies] indoc = "2" + +[profile.dev] +opt-level = 0 +debug = 2 \ No newline at end of file diff --git a/README.md b/README.md index ade6146..46bb462 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,5 @@ dply is a command line tool for viewing, querying, and writing csv and parquet -files, inspired by [dplyr](https://dplyr.tidyverse.org/index.html) and powered by -[DataFusion](https://github.com/apache/arrow-datafusion). +files, inspired by [dplyr](https://dplyr.tidyverse.org/index.html). ## Usage overview diff --git a/src/engine.rs b/src/engine.rs index 644a6af..45d761b 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -2,13 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 //! Evaluate pipeline functions. -use anyhow::{bail, Result}; -use datafusion::{ - execution::context::SessionContext, logical_expr::LogicalPlan, physical_plan::ExecutionPlan, - prelude::Expr as DFExpr, -}; -use std::{collections::HashMap, future::Future, sync::Arc}; -use tokio::runtime; +use anyhow::{anyhow, bail, Result}; +use polars::prelude::*; +use std::collections::HashMap; use crate::{completions::Completions, config::FormatConfig, parser::Expr}; @@ -23,7 +19,6 @@ mod fmt; mod glimpse; mod group_by; mod head; -mod io; mod joins; mod json; mod mutate; @@ -35,13 +30,14 @@ mod show; mod summarize; mod unnest; +#[derive(Default)] pub struct Context { - /// Named logical plans. - vars: HashMap, - /// Logical plan passed from one pipeline step to the next. - plan: Option, - /// Columns passed to aggregate functions. - group: Option>, + /// Named data frames. + vars: HashMap, + /// Input dataframe passed from one pipeline step to the next. + df: Option, + /// Group passed to aggregate functions. + group: Option, /// Dataframe columns. columns: Vec, /// Optional output used for testing. @@ -50,30 +46,6 @@ pub struct Context { format_config: FormatConfig, /// Completions lru completions: Completions, - /// Tokio runtime to run async tasks. - runtime: runtime::Runtime, - /// Datafusion context - session: SessionContext, -} - -impl Default for Context { - fn default() -> Self { - let runtime = runtime::Builder::new_multi_thread() - .enable_all() - .build() - .unwrap(); - Self { - vars: Default::default(), - plan: Default::default(), - group: Default::default(), - columns: Default::default(), - output: Default::default(), - format_config: Default::default(), - completions: Default::default(), - runtime, - session: Default::default(), - } - } } impl Context { @@ -87,78 +59,46 @@ impl Context { self.vars.keys().cloned().collect() } - /// Returns the plan associated with the given variable. - fn get_plan(&self, name: &str) -> Option { - self.vars.get(name).cloned() - } - /// Returns the active dataframe or group columns. fn columns(&self) -> &Vec { &self.columns } - /// Returns datafusion context - fn session(&self) -> &SessionContext { - &self.session - } - - /// Returns the current format configuration - fn format_config(&self) -> &FormatConfig { - &self.format_config - } - - async fn create_physical_plan( - &self, - logical_plan: &LogicalPlan, - ) -> Result> { - let plan = self - .session - .state() - .create_physical_plan(logical_plan) - .await?; - Ok(plan) - } - - /// Returns datafusion context - fn block_on(&self, future: F) -> F::Output { - self.runtime.block_on(future) - } - /// Clear the context removing the active group and dataframe. fn clear(&mut self) { - self.plan = None; + self.df = None; self.group = None; } /// Returns and consume the input dataframe. - fn take_plan(&mut self) -> Option { - self.plan.take() + fn take_df(&mut self) -> Option { + self.df.take() } /// Sets the dataframe to be used in pipeline steps. - fn set_plan(&mut self, plan: LogicalPlan) { + fn set_df(&mut self, df: LazyFrame) -> Result<()> { assert!(self.group.is_none()); - // Get unqualified column names. - self.columns = plan + self.columns = df .schema() - .fields() - .iter() - .map(|f| f.name().to_owned()) - .collect(); + .map_err(|e| anyhow!("Schema error: {e}"))? + .iter_names() + .map(|s| s.to_string()) + .collect::>(); self.update_completions(); - self.plan = Some(plan); + self.df = Some(df); + Ok(()) } - /// Sets the grouping columns used for aggregation. - fn set_group(&mut self, plan: LogicalPlan, group: Vec) { - self.set_plan(plan); - self.group = Some(group); + /// Returns the dataframe associated to the given variable. + fn get_df(&self, name: &str) -> Option<&LazyFrame> { + self.vars.get(name) } - fn take_group(&mut self) -> Option> { + /// Returns and consume the active group. + fn take_group(&mut self) -> Option { self.group.take() } @@ -167,30 +107,67 @@ impl Context { self.group.is_some() } + /// Sets the active group. + fn set_group(&mut self, group: LazyGroupBy) -> Result<()> { + assert!(self.df.is_none()); + + self.columns = group + .logical_plan + .schema() + .map_err(|e| anyhow!("Schema error: {e}"))? + .iter_names() + .map(|s| s.to_string()) + .collect::>(); + + self.update_completions(); + + self.group = Some(group); + Ok(()) + } + fn update_completions(&mut self) { self.completions.add(&self.columns); } - fn show(&mut self, plan: LogicalPlan) -> Result<()> { - if let Some(mut output) = self.output.take() { - self.runtime.block_on(fmt::test(self, plan, &mut output))?; - self.output = Some(output); - Ok(()) + /// Print results to the context output. + fn print(&mut self, df: DataFrame) -> Result<()> { + self.set_fmt(); + + if let Some(write) = self.output.as_mut() { + fmt::df_test(write, df)?; } else { - self.runtime.block_on(fmt::show(self, plan)) + println!("{df}"); } + Ok(()) } - fn glimpse(&mut self, plan: LogicalPlan) -> Result<()> { - if let Some(mut output) = self.output.take() { - self.runtime - .block_on(fmt::glimpse(self, plan, &mut output))?; - self.output = Some(output); - Ok(()) + /// Show a glimpse view of the datafrmae. + fn glimpse(&mut self, df: LazyFrame) -> Result<()> { + self.set_fmt(); + + if let Some(write) = self.output.as_mut() { + fmt::glimpse(write, df)?; } else { - let output = &mut std::io::stdout(); - self.runtime.block_on(fmt::glimpse(self, plan, output)) + fmt::glimpse(&mut std::io::stdout(), df)?; } + + Ok(()) + } + + fn set_fmt(&self) { + if let Some(w) = self.format_config.max_table_width { + std::env::set_var("POLARS_TABLE_WIDTH", w.to_string()); + } + + std::env::set_var( + "POLARS_FMT_MAX_COLS", + self.format_config.max_columns.to_string(), + ); + + std::env::set_var( + "POLARS_FMT_STR_LEN", + self.format_config.max_column_width.to_string(), + ); } } @@ -205,7 +182,7 @@ pub fn eval_to_string(exprs: &[Expr]) -> Result { let mut ctx = Context { output: Some(Default::default()), format_config: FormatConfig { - max_table_width: Some(82), + max_column_width: 82, ..Default::default() }, ..Default::default() @@ -233,22 +210,22 @@ fn eval_pipelines(exprs: &[Expr], ctx: &mut Context) -> Result<()> { fn eval_pipeline_step(expr: &Expr, ctx: &mut Context) -> Result<()> { match expr { Expr::Function(name, args) => match name.as_str() { - "anti_join" => joins::eval(args, ctx, joins::JoinType::Anti)?, + "anti_join" => joins::eval(args, ctx, JoinType::Anti)?, "arrange" => arrange::eval(args, ctx)?, "config" => config::eval(args, ctx)?, "count" => count::eval(args, ctx)?, - "cross_join" => joins::eval(args, ctx, joins::JoinType::Cross)?, + "cross_join" => joins::eval(args, ctx, JoinType::Cross)?, "csv" => csv::eval(args, ctx)?, "distinct" => distinct::eval(args, ctx)?, "filter" => filter::eval(args, ctx)?, "glimpse" => glimpse::eval(args, ctx)?, "group_by" => group_by::eval(args, ctx)?, "head" => head::eval(args, ctx)?, - "inner_join" => joins::eval(args, ctx, joins::JoinType::Inner)?, + "inner_join" => joins::eval(args, ctx, JoinType::Inner)?, "json" => json::eval(args, ctx)?, - "left_join" => joins::eval(args, ctx, joins::JoinType::Left)?, + "left_join" => joins::eval(args, ctx, JoinType::Left)?, "mutate" => mutate::eval(args, ctx)?, - "outer_join" => joins::eval(args, ctx, joins::JoinType::Outer)?, + "outer_join" => joins::eval(args, ctx, JoinType::Outer { coalesce: true })?, "parquet" => parquet::eval(args, ctx)?, "relocate" => relocate::eval(args, ctx)?, "rename" => rename::eval(args, ctx)?, @@ -260,11 +237,11 @@ fn eval_pipeline_step(expr: &Expr, ctx: &mut Context) -> Result<()> { }, Expr::Identifier(name) => { // If there is an input assign it to the variable. - if let Some(plan) = ctx.take_plan() { + if let Some(plan) = ctx.take_df() { ctx.vars.insert(name.to_owned(), plan.clone()); - ctx.set_plan(plan); + ctx.set_df(plan)?; } else if let Some(plan) = ctx.vars.get(name) { - ctx.set_plan(plan.clone()); + ctx.set_df(plan.clone())?; } else if ctx.is_grouping() { bail!("Cannot assign a group to variable '{name}'"); } else { diff --git a/src/engine/args.rs b/src/engine/args.rs index 62441e2..af75192 100644 --- a/src/engine/args.rs +++ b/src/engine/args.rs @@ -1,8 +1,9 @@ // Copyright (C) 2023 Vince Vasta // SPDX-License-Identifier: Apache-2.0 use anyhow::{anyhow, bail, Result}; -use chrono::{NaiveDate, NaiveDateTime}; -use datafusion::{common::DFSchema, logical_expr::Expr as DFExpr, prelude::*}; +use polars::export::chrono::prelude::*; +use polars::lazy::dsl::Expr as PolarsExpr; +use polars::prelude::*; use std::str::FromStr; use crate::parser::{Expr, Operator}; @@ -17,16 +18,6 @@ pub fn string(expr: &Expr) -> String { } } -/// Returns the value from a number expression. -/// -/// Panics if the expression is not a number. -pub fn number(expr: &Expr) -> f64 { - match expr { - Expr::Number(s) => *s, - _ => panic!("{expr} is not a number expression"), - } -} - /// Returns the string from an identifier expression. /// /// Panics if the expression is not an identifier. @@ -37,43 +28,29 @@ pub fn identifier(expr: &Expr) -> String { } } -/// Returns a datafusion column expression and quotes the name. -/// -/// The `col` function in datafusion makes identifiers lower case, this function -/// quotes the name so that it preserves case. -pub fn str_to_col(s: impl Into) -> DFExpr { - DFExpr::Column(Column::new_unqualified(s)) -} - -/// Returns a datafusion column if it is in the schema. -pub fn expr_to_col(expr: &Expr, schema: &DFSchema) -> Result { +/// Returns a Polars column if it is in the schema. +pub fn column(expr: &Expr, schema: &Schema) -> Result { let column = identifier(expr); - if schema.has_column_with_unqualified_name(&column) { - Ok(str_to_col(column)) - } else { - Err(anyhow!("Unknown column '{expr}'")) - } + schema + .get(&column) + .map(|_| col(&column)) + .ok_or_else(|| anyhow!("Unknown column '{expr}'")) } -/// Returns a datafusion qualified column if it is in the schema. +/// Returns the value from a number expression. /// -/// This is needed for when window function expressions are transformed to a -/// column expression as their name needs the table. -pub fn expr_to_qualified_col(expr: &Expr, schema: &DFSchema) -> Result { - let column = identifier(expr); - - if let Ok(field) = schema.field_with_unqualified_name(&column) { - let qualifier = field.qualifier().cloned(); - Ok(DFExpr::Column(Column::new(qualifier, field.name()))) - } else { - Err(anyhow!("Unknown column '{expr}'")) +/// Panics if the expression is not a number. +pub fn number(expr: &Expr) -> f64 { + match expr { + Expr::Number(s) => *s, + _ => panic!("{expr} is not a number expression"), } } /// Returns a date time from a string. /// /// Returns an error if the string is not a valid date time. -pub fn timestamp(expr: &Expr) -> Result { +pub fn timestamp(expr: &Expr) -> Result { let ts = string(expr); let ts = ts.trim(); @@ -85,23 +62,22 @@ pub fn timestamp(expr: &Expr) -> Result { }) .map_err(|e| anyhow!("Invalid timestamp string {ts}: {e}"))?; - Ok(lit_timestamp_nano(dt.timestamp_nanos_opt().unwrap())) + Ok(dt) } -/// Returns the value of a named boolean variable like `overwrite = false`. -pub fn named_bool(args: &[Expr], name: &str) -> bool { +pub fn named_bool(args: &[Expr], name: &str) -> Result { for arg in args { if let Expr::BinaryOp(lhs, Operator::Assign, rhs) = arg { match (lhs.as_ref(), rhs.as_ref()) { (Expr::Identifier(lhs), Expr::Identifier(rhs)) if lhs == name => { - return bool::from_str(rhs).unwrap_or(false); + return Ok(bool::from_str(rhs)?); } _ => {} } } } - false + Ok(false) } /// Returns the value of a named integer variable like `schema_rows = 2000`. diff --git a/src/engine/arrange.rs b/src/engine/arrange.rs index ebb8713..cba9b5c 100644 --- a/src/engine/arrange.rs +++ b/src/engine/arrange.rs @@ -1,7 +1,7 @@ // Copyright (C) 2023 Vince Vasta // SPDX-License-Identifier: Apache-2.0 use anyhow::{bail, Result}; -use datafusion::logical_expr::LogicalPlanBuilder; +use polars::prelude::*; use crate::parser::Expr; @@ -11,9 +11,10 @@ use super::*; /// /// Parameters are checked before evaluation by the typing module. pub fn eval(args: &[Expr], ctx: &mut Context) -> Result<()> { - if let Some(plan) = ctx.take_plan() { + if let Some(df) = ctx.take_df() { let schema_cols = ctx.columns(); let mut columns = Vec::with_capacity(args.len()); + let mut descending = Vec::with_capacity(args.len()); for arg in args { match arg { @@ -24,7 +25,8 @@ pub fn eval(args: &[Expr], ctx: &mut Context) -> Result<()> { bail!("arrange error: Unknown column {column}"); } - columns.push(args::str_to_col(&column).sort(false, false)); + columns.push(col(&column)); + descending.push(true); } Expr::Identifier(column) => { // arrange(column) @@ -32,14 +34,14 @@ pub fn eval(args: &[Expr], ctx: &mut Context) -> Result<()> { bail!("arrange error: Unknown column {column}"); } - columns.push(args::str_to_col(column).sort(true, false)); + columns.push(col(column)); + descending.push(false); } _ => {} } } - let plan = LogicalPlanBuilder::from(plan).sort(columns)?.build()?; - ctx.set_plan(plan); + ctx.set_df(df.sort_by_exprs(columns, descending, true, false))?; } else if ctx.is_grouping() { bail!("arrange error: must call summarize after a group_by"); } else { diff --git a/src/engine/count.rs b/src/engine/count.rs index d5eddfd..a288238 100644 --- a/src/engine/count.rs +++ b/src/engine/count.rs @@ -1,7 +1,7 @@ // Copyright (C) 2023 Vince Vasta // SPDX-License-Identifier: Apache-2.0 use anyhow::{bail, Result}; -use datafusion::logical_expr::{self, Expr as DFExpr, LogicalPlan, LogicalPlanBuilder}; +use polars::prelude::*; use crate::parser::Expr; @@ -11,7 +11,7 @@ use super::*; /// /// Parameters are checked before evaluation by the typing module. pub fn eval(args: &[Expr], ctx: &mut Context) -> Result<()> { - if let Some(plan) = ctx.take_plan() { + if let Some(df) = ctx.take_df() { let schema_cols = ctx.columns(); let mut columns = Vec::new(); @@ -21,7 +21,7 @@ pub fn eval(args: &[Expr], ctx: &mut Context) -> Result<()> { bail!("count error: Unknown column {column}"); } - let expr = args::str_to_col(column); + let expr = col(column); if !columns.contains(&expr) { columns.push(expr); } @@ -30,21 +30,25 @@ pub fn eval(args: &[Expr], ctx: &mut Context) -> Result<()> { let agg_col = find_agg_column(schema_cols.as_slice()); - let plan = if !columns.is_empty() { - let plan = count(plan, columns.clone(), &agg_col)?; + let df = if !columns.is_empty() { + let ncol = columns.last().unwrap().clone(); + let df = df + .group_by(&columns) + .agg([ncol.is_not_null().count().alias(&agg_col)]); - if args::named_bool(args, "sort") { - let mut sort_cols = vec![args::str_to_col(&agg_col).sort(false, false)]; - sort_cols.extend(columns.into_iter().map(|c| c.sort(true, false))); - LogicalPlanBuilder::from(plan).sort(sort_cols)?.build()? - } else { - plan + let mut sort_mask = vec![false; columns.len()]; + + if args::named_bool(args, "sort")? { + columns.insert(0, col(&agg_col)); + sort_mask.insert(0, true); } + + df.sort_by_exprs(columns, sort_mask, false, false) } else { - count(plan, vec![], &agg_col)? + df.select(&[col(&schema_cols[0]).count().alias(&agg_col)]) }; - ctx.set_plan(plan); + ctx.set_df(df)?; } else if ctx.is_grouping() { bail!("count error: must call summarize after a group_by"); } else { @@ -54,14 +58,6 @@ pub fn eval(args: &[Expr], ctx: &mut Context) -> Result<()> { Ok(()) } -pub fn count(plan: LogicalPlan, group: Vec, name: &str) -> Result { - let agg_col = logical_expr::count(logical_expr::lit(1u8)).alias(name); - let plan = LogicalPlanBuilder::from(plan) - .aggregate(group, vec![agg_col])? - .build()?; - Ok(plan) -} - /// If there is a column named `n` use `nn`, or `nnn`, etc. fn find_agg_column(cols: &[String]) -> String { let mut col = "n".to_string(); diff --git a/src/engine/csv.rs b/src/engine/csv.rs index 266d56e..b73ae56 100644 --- a/src/engine/csv.rs +++ b/src/engine/csv.rs @@ -1,17 +1,8 @@ // Copyright (C) 2023 Vince Vasta // SPDX-License-Identifier: Apache-2.0 use anyhow::{anyhow, bail, Result}; -use datafusion::{ - arrow::csv, - common::DEFAULT_CSV_EXTENSION, - datasource::{ - file_format::csv::CsvFormat, - listing::{ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl}, - provider_as_source, - }, - logical_expr::{LogicalPlanBuilder, UNNAMED_TABLE}, -}; -use std::{num::NonZeroUsize, path::Path, sync::Arc}; +use polars::prelude::*; +use std::path::PathBuf; use crate::parser::Expr; @@ -21,59 +12,30 @@ use super::*; /// /// Parameters are checked before evaluation by the typing module. pub fn eval(args: &[Expr], ctx: &mut Context) -> Result<()> { - let path = args::string(&args[0]); - let overwrite = args::named_bool(args, "overwrite"); + // csv("nyctaxi.csv") + let path = PathBuf::from(args::string(&args[0])); + // csv("nyctaxi.csv", overwrite = true) + let overwrite = args::named_bool(args, "overwrite")?; // If there is an input dataframe save it to disk. - if let Some(plan) = ctx.take_plan() { - if !overwrite && Path::new(&path).exists() { - bail!("csv error: file '{}' already exists.", path); + if let Some(df) = ctx.take_df() { + if !overwrite && path.exists() { + bail!("csv error: file '{}' already exists", path.display()); } - ctx.set_plan(plan.clone()); - - let (_, mut rx) = io::execute_plan(plan, ctx)?; - let file = std::fs::File::create(&path) - .map_err(|e| anyhow!("csv error: cannot create file '{}' {e}", path))?; - let mut writer = csv::Writer::new(file); - - while let Some(batch) = rx.blocking_recv() { - writer.write(&batch?)?; - } - } else { - // Read the data frame and set it as input for the next task. - let table_path = ListingTableUrl::parse(&path)?; - - let num_cpus = std::thread::available_parallelism() - .unwrap_or(NonZeroUsize::new(2).unwrap()) - .get(); + .map_err(|e| anyhow!("csv error: cannot create file '{}' {e}", path.display()))?; - let file_format = CsvFormat::default(); + let mut out_df = df.clone().collect()?; + ctx.set_df(df)?; - // Use default extension for recursive loading. - let extension = if Path::new(&path).is_dir() { - DEFAULT_CSV_EXTENSION - } else { - "" - }; - - let listing_options = ListingOptions::new(Arc::new(file_format)) - .with_file_extension(extension) - .with_target_partitions(num_cpus); - - let resolved_schema = - ctx.block_on(listing_options.infer_schema(&ctx.session().state(), &table_path))?; - - let config = ListingTableConfig::new(table_path) - .with_listing_options(listing_options) - .with_schema(resolved_schema); - - let table_provider = ListingTable::try_new(config)?; - let table_source = provider_as_source(Arc::new(table_provider)); - let plan = LogicalPlanBuilder::scan(UNNAMED_TABLE, table_source, None)?.build()?; - - ctx.set_plan(plan); + CsvWriter::new(file).finish(&mut out_df)?; + } else { + let reader = LazyCsvReader::new(&path).with_infer_schema_length(Some(1000)); + let df = reader + .finish() + .map_err(|e| anyhow!("csv error: cannot read file '{}' {e}", path.display()))?; + ctx.set_df(df)?; } Ok(()) diff --git a/src/engine/distinct.rs b/src/engine/distinct.rs index abce53e..59e08ef 100644 --- a/src/engine/distinct.rs +++ b/src/engine/distinct.rs @@ -1,7 +1,7 @@ // Copyright (C) 2023 Vince Vasta // SPDX-License-Identifier: Apache-2.0 use anyhow::{bail, Result}; -use datafusion::logical_expr::LogicalPlanBuilder; +use polars::prelude::*; use crate::parser::Expr; @@ -11,7 +11,7 @@ use super::*; /// /// Parameters are checked before evaluation by the typing module. pub fn eval(args: &[Expr], ctx: &mut Context) -> Result<()> { - if let Some(plan) = ctx.take_plan() { + if let Some(df) = ctx.take_df() { let schema_cols = ctx.columns(); let mut select_columns = Vec::new(); @@ -26,21 +26,15 @@ pub fn eval(args: &[Expr], ctx: &mut Context) -> Result<()> { } } - let plan = if !select_columns.is_empty() { - let columns = select_columns - .iter() - .map(args::str_to_col) - .collect::>(); - - LogicalPlanBuilder::from(plan) - .project(columns)? - .distinct()? - .build()? + let df = if !select_columns.is_empty() { + let columns = select_columns.iter().map(|c| col(c)).collect::>(); + df.select(&columns) + .unique_stable(Some(select_columns), UniqueKeepStrategy::First) } else { - LogicalPlanBuilder::from(plan).distinct()?.build()? + df.unique_stable(None, UniqueKeepStrategy::First) }; - ctx.set_plan(plan); + ctx.set_df(df)?; } else if ctx.is_grouping() { bail!("distinct error: must call summarize after a group_by"); } else { diff --git a/src/engine/filter.rs b/src/engine/filter.rs index 7292b3d..5006676 100644 --- a/src/engine/filter.rs +++ b/src/engine/filter.rs @@ -1,17 +1,21 @@ // Copyright (C) 2023 Vince Vasta // SPDX-License-Identifier: Apache-2.0 -use anyhow::{anyhow, bail, Result}; -use datafusion::{ - arrow::{ - array::{ArrayRef, BooleanArray}, - datatypes::*, - }, - common::cast::{as_list_array, as_primitive_array, as_string_array}, - common::DFSchema, - logical_expr::{create_udf, lit, Expr as DFExpr, LogicalPlanBuilder, Volatility}, - physical_plan::functions::make_scalar_function, -}; -use std::sync::Arc; +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +use anyhow::{bail, Result}; +use polars::export::regex; +use polars::lazy::dsl::Expr as PolarsExpr; +use polars::prelude::*; use crate::parser::{Expr, Operator}; @@ -21,13 +25,17 @@ use super::*; /// /// Parameters are checked before evaluation by the typing module. pub fn eval(args: &[Expr], ctx: &mut Context) -> Result<()> { - if let Some(mut plan) = ctx.take_plan() { + if let Some(mut df) = ctx.take_df() { for arg in args { - let expr = eval_expr(arg, plan.schema())?; - plan = LogicalPlanBuilder::from(plan).filter(expr)?.build()?; + let expr = df + .schema() + .map_err(anyhow::Error::from) + .and_then(|schema| eval_expr(arg, &schema)) + .map_err(|e| anyhow!("filter error: {e}"))?; + df = df.filter(expr); } - ctx.set_plan(plan); + ctx.set_df(df)?; } else if ctx.is_grouping() { bail!("filter error: must call summarize after a group_by"); } else { @@ -37,7 +45,7 @@ pub fn eval(args: &[Expr], ctx: &mut Context) -> Result<()> { Ok(()) } -fn eval_expr(expr: &Expr, schema: &DFSchema) -> Result { +fn eval_expr(expr: &Expr, schema: &Schema) -> Result { match expr { Expr::BinaryOp(lhs, op, rhs) => { let lhs = eval_expr(lhs, schema)?; @@ -45,7 +53,7 @@ fn eval_expr(expr: &Expr, schema: &DFSchema) -> Result { let result = match op { Operator::Eq => lhs.eq(rhs), - Operator::NotEq => lhs.not_eq(rhs), + Operator::NotEq => lhs.neq(rhs), Operator::Lt => lhs.lt(rhs), Operator::LtEq => lhs.lt_eq(rhs), Operator::Gt => lhs.gt(rhs), @@ -57,200 +65,96 @@ fn eval_expr(expr: &Expr, schema: &DFSchema) -> Result { Ok(result) } - Expr::Identifier(_) => args::expr_to_col(expr, schema), + Expr::Identifier(_) => args::column(expr, schema), Expr::String(s) => Ok(lit(s.clone())), Expr::Number(n) => Ok(lit(*n)), - Expr::Function(name, args) if name == "dt" => Ok(args::timestamp(&args[0])?), - Expr::UnaryOp(Operator::Not, expr) => { - eval_predicate(expr, schema).map(|expr| DFExpr::Not(expr.into())) + Expr::Function(name, args) if name == "dt" => { + let ts = args::timestamp(&args[0])?; + Ok(lit(ts)) } + Expr::UnaryOp(Operator::Not, expr) => eval_predicate(expr, schema).map(|expr| expr.not()), Expr::Function(_, _) => eval_predicate(expr, schema), _ => panic!("Unexpected filter expression {expr}"), } } -fn eval_predicate(expr: &Expr, schema: &DFSchema) -> Result { +fn eval_predicate(expr: &Expr, schema: &Schema) -> Result { match expr { Expr::Function(name, args) if name == "contains" => { let column = args::identifier(&args[0]); let column_type = schema - .field_with_unqualified_name(&column) - .map(|f| f.data_type()) - .map_err(|_| anyhow!("Unknown `contains` column '{column}'"))?; + .get(&column) + .ok_or_else(|| anyhow!("Unknown contains column '{column}'"))?; match column_type { - lt @ DataType::List(_) - | lt @ DataType::LargeList(_) - | lt @ DataType::FixedSizeList(_, _) => list_contains(&column, &args[1], lt), - DataType::Utf8 | DataType::LargeUtf8 => string_contains(&column, &args[1]), + DataType::List(elem_type) => list_contains(&column, &args[1], elem_type), + DataType::String => string_contains(&column, &args[1]), _ => Err(anyhow!("Column '{column}' must be a str or a list")), } } Expr::Function(name, args) if name == "is_null" => { - args::expr_to_col(&args[0], schema).map(|c| c.is_null()) + let r = args::column(&args[0], schema).map(|c| c.is_null()); + println!("{r:?}"); + r } _ => panic!("Unexpected filter expression {expr}"), } } -fn list_contains(column: &str, key: &Expr, list_type: &DataType) -> Result { - let elem_type = match list_type { - DataType::List(elem) | DataType::LargeList(elem) | DataType::FixedSizeList(elem, _) => { - elem.data_type() - } - _ => bail!("Unsopperted list type"), - }; +fn list_contains(column: &str, pattern: &Expr, elem_type: &DataType) -> Result { + println!("contain {elem_type}"); + use DataType::*; + + match (elem_type, pattern) { + (Int8, Expr::Number(n)) => Ok(col(column).list().contains(lit(*n as i8))), + (Int16, Expr::Number(n)) => Ok(col(column).list().contains(lit(*n as i16))), + (Int32, Expr::Number(n)) => Ok(col(column).list().contains(lit(*n as i32))), + (Int64, Expr::Number(n)) => Ok(col(column).list().contains(lit(*n as i64))), + (UInt8, Expr::Number(n)) => Ok(col(column).list().contains(lit(*n as u8))), + (UInt16, Expr::Number(n)) => Ok(col(column).list().contains(lit(*n as u16))), + (UInt32, Expr::Number(n)) => Ok(col(column).list().contains(lit(*n as u32))), + (UInt64, Expr::Number(n)) => Ok(col(column).list().contains(lit(*n as u64))), + (Float32, Expr::Number(n)) => Ok(col(column).list().contains(lit(*n as f32))), + (Float64, Expr::Number(n)) => Ok(col(column).list().contains(lit(*n))), + (String, Expr::String(s)) => { + let re = regex::Regex::new(s) + .map_err(|_| anyhow!("invalid contains regex '{s}' for column '{column}'"))?; + + let function = move |s: Series| { + let ca = s.list()?; + let mut bools = Vec::with_capacity(ca.len()); + + ca.into_iter().for_each(|arr| { + let found = if let Some(s) = arr { + s.str() + .map(|ca| { + ca.into_iter() + .any(|s| s.map(|s| re.is_match(s)).unwrap_or(false)) + }) + .unwrap_or_default() + } else { + false + }; + + bools.push(found); + }); + + Ok(Some(BooleanChunked::new(ca.name(), bools).into_series())) + }; - match (elem_type, key) { - (DataType::Int8, Expr::Number(key)) => { - list_contains_number::(column, *key, list_type) - } - (DataType::Int16, Expr::Number(key)) => { - list_contains_number::(column, *key, list_type) - } - (DataType::Int32, Expr::Number(key)) => { - list_contains_number::(column, *key, list_type) - } - (DataType::Int64, Expr::Number(key)) => { - list_contains_number::(column, *key, list_type) - } - (DataType::UInt8, Expr::Number(key)) => { - list_contains_number::(column, *key, list_type) - } - (DataType::UInt16, Expr::Number(key)) => { - list_contains_number::(column, *key, list_type) - } - (DataType::UInt32, Expr::Number(key)) => { - list_contains_number::(column, *key, list_type) - } - (DataType::UInt64, Expr::Number(key)) => { - list_contains_number::(column, *key, list_type) - } - (DataType::Float16, Expr::Number(key)) => { - list_contains_number::(column, *key, list_type) - } - (DataType::Float32, Expr::Number(key)) => { - list_contains_number::(column, *key, list_type) - } - (DataType::Float64, Expr::Number(key)) => { - list_contains_number::(column, *key, list_type) - } - (DataType::Utf8, Expr::String(pattern)) | (DataType::LargeUtf8, Expr::String(pattern)) => { - list_contains_utf8(column, pattern, list_type) + // Using apply avoid crash with debug build. + Ok(col(column).apply(function, GetOutput::from_type(DataType::Boolean))) } _ => bail!("contains error: invalid type {elem_type} for column '{column}'"), } } -fn list_contains_number(column: &str, key: f64, list_type: &DataType) -> Result -where - T: ArrowPrimitiveType, - T::Native: num_traits::NumCast, -{ - let matcher_udf = move |args: &[ArrayRef]| { - assert_eq!(args.len(), 1); - - let key = num_traits::NumCast::from(key).unwrap_or_default(); - let result = as_list_array(&args[0])? - .iter() - .map(|list| match list { - Some(array) => { - let numbers = as_primitive_array::(&array).ok()?; - Some( - numbers - .iter() - .any(|v| v.map(|n| n.is_eq(key)).unwrap_or(false)), - ) - } - None => Some(false), - }) - .collect::(); - Ok(Arc::new(result) as ArrayRef) - }; - - let matcher_udf = make_scalar_function(matcher_udf); - - let matcher_udf = create_udf( - "matcher", - // Expects a list of utf8 - vec![list_type.clone()], - // Returns boolean. - Arc::new(DataType::Boolean), - Volatility::Immutable, - matcher_udf, - ); - - Ok(matcher_udf.call(vec![args::str_to_col(column)])) -} - -fn list_contains_utf8(column: &str, pattern: &str, list_type: &DataType) -> Result { - let re = regex::Regex::new(pattern) - .map_err(|_| anyhow!("invalid contains regex '{pattern}' for column '{column}'"))?; - - let matcher_udf = move |args: &[ArrayRef]| { - assert_eq!(args.len(), 1); - - let result = as_list_array(&args[0])? - .iter() - .map(|list| match list { - Some(array) => { - let strings = as_string_array(&array).ok()?; - Some( - strings - .iter() - .any(|v| v.map(|s| re.is_match(s)).unwrap_or(false)), - ) - } - None => Some(false), - }) - .collect::(); - Ok(Arc::new(result) as ArrayRef) - }; - - let matcher_udf = make_scalar_function(matcher_udf); - - let matcher_udf = create_udf( - "matcher", - // Expects a list of utf8 - vec![list_type.clone()], - // Returns boolean. - Arc::new(DataType::Boolean), - Volatility::Immutable, - matcher_udf, - ); - - Ok(matcher_udf.call(vec![args::str_to_col(column)])) -} - -fn string_contains(column: &str, pattern: &Expr) -> Result { +fn string_contains(column: &str, pattern: &Expr) -> Result { if let Expr::String(re) = pattern { - let re = regex::Regex::new(re) + regex::Regex::new(re) .map_err(|_| anyhow!("invalid contains regex '{re}' for column '{column}'"))?; - let matcher_udf = move |args: &[ArrayRef]| { - // Mathes on only one string argument. - assert_eq!(args.len(), 1); - - let result = as_string_array(&args[0])? - .iter() - .map(|v| v.map(|s| re.is_match(s)).or(Some(false))) - .collect::(); - Ok(Arc::new(result) as ArrayRef) - }; - - let matcher_udf = make_scalar_function(matcher_udf); - - let matcher_udf = create_udf( - "matcher", - // Expects an array of strings. - vec![DataType::Utf8], - // Returns boolean. - Arc::new(DataType::Boolean), - Volatility::Immutable, - matcher_udf, - ); - - Ok(matcher_udf.call(vec![args::str_to_col(column)])) + Ok(col(column).str().contains(lit(re.to_owned()), false)) } else { Err(anyhow!( "contains predicate for column '{column}' must be a regex" diff --git a/src/engine/fmt.rs b/src/engine/fmt.rs index 1b2c11b..c39d1b4 100644 --- a/src/engine/fmt.rs +++ b/src/engine/fmt.rs @@ -1,197 +1,76 @@ // Copyright (C) 2023 Vince Vasta // SPDX-License-Identifier: Apache-2.0 use anyhow::Result; -use comfy_table::presets::*; -use comfy_table::*; -use datafusion::{ - arrow::datatypes::{DataType, IntervalUnit, TimeUnit}, - arrow::{ - array::{ - Array, DurationMicrosecondArray, DurationMillisecondArray, DurationNanosecondArray, - DurationSecondArray, Float16Array, Float32Array, Float64Array, Int64Array, - IntervalDayTimeArray, IntervalMonthDayNanoArray, IntervalYearMonthArray, - }, - record_batch::RecordBatch, - util::display::{ArrayFormatter as ArrowArrayFormatter, FormatOptions}, - }, - execution::context::TaskContext, - logical_expr::{LogicalPlan, LogicalPlanBuilder}, -}; -use futures::TryStreamExt; -use std::{io::Write, sync::Arc, time::Instant}; - -use super::{count, Context}; - -/// Prints the plan results. -pub async fn show(ctx: &Context, plan: LogicalPlan) -> Result<()> { - // Get column types before consuming the dataframe so that we can show them - // even if the dataframe is empty. - let format_config = ctx.format_config(); - let num_cols = plan.schema().fields().len(); - let truncate_cols = format_config.max_columns < num_cols; - - let mut fields = plan - .schema() - .fields() - .iter() - .take(format_config.max_columns) - .map(|f| format!("{}\n---\n{}", f.name(), fmt_data_type(f.data_type()))) - .collect::>(); - - if truncate_cols { - fields.push("...".to_string()); - } - - let constraints = fields - .iter() - .map(|f| { - let w = f.len().clamp(5, 16); - ColumnConstraint::LowerBoundary(Width::Fixed(w as u16)) - }) - .collect::>(); - - let mut table = Table::new(); - table - .load_preset(UTF8_FULL_CONDENSED) - .set_content_arrangement(ContentArrangement::Dynamic); - table.set_header(fields); - table.set_constraints(constraints); - - if let Some(cols) = format_config.max_table_width { - table.set_width(cols as u16); - } - - let fmt_opts = fmt_opts(); - let mut num_rows = 0; - - let start = Instant::now(); - - for_each_batch(ctx, plan, |batch| { - num_rows += batch.num_rows(); - let formatters = batch - .columns() - .iter() - .take(format_config.max_columns) - .map(|c| ArrayFormatter::try_new(c.as_ref(), &fmt_opts)) - .collect::, _>>()?; - - for row in 0..batch.num_rows() { - let mut cells = formatters - .iter() - .map(|f| Cell::new(fmt_value(f.value(row), format_config.max_column_width))) - .collect::>(); - - if truncate_cols { - cells.push(Cell::new("...")); - } - - table.add_row(cells); - } - - Ok(()) - }) - .await?; - - println!( - "shape: ({}, {}) elapsed: {:.3}s", - fmt_usize(num_rows), - fmt_usize(num_cols), - start.elapsed().as_millis() as f64 / 1000.0 - ); - println!("{}", table); - - Ok(()) -} +use comfy_table::presets; +use comfy_table::{ColumnConstraint, ContentArrangement, Width}; +use comfy_table::{Row, Table}; +use polars::prelude::*; +use std::{env, io::Write}; /// Prints a dataframe in test format, used for test comparisons. -pub async fn test(ctx: &Context, plan: LogicalPlan, output: &mut dyn Write) -> Result<()> { - // Get column types before consuming the dataframe so that we can show them - // even if the dataframe is empty. - let col_names = plan - .schema() - .fields() - .iter() - .map(|f| f.name().to_owned()) - .collect::>(); +pub fn df_test(out: &mut dyn Write, df: DataFrame) -> Result<()> { + env::set_var("POLARS_FMT_TABLE_CELL_LIST_LEN", "6"); - let col_types = plan - .schema() - .fields() - .iter() - .map(|f| fmt_data_type(f.data_type())) - .collect::>(); - - let mut batches = Vec::new(); - for_each_batch(ctx, plan, |batch| { - batches.push(batch); - Ok(()) - }) - .await?; + let height = df.height(); - let num_rows = batches.iter().map(|b| b.num_rows()).sum::(); - - writeln!(output, "shape: ({}, {})", num_rows, col_names.len())?; + let (row, cols) = df.shape(); + writeln!(out, "shape: ({}, {})", row, cols)?; // Write columns - writeln!(output, "{}", col_names.join("|"))?; + let row = df + .fields() + .into_iter() + .map(|f| f.name().to_string()) + .collect::>() + .join("|"); + writeln!(out, "{row}")?; // Write columns types - writeln!(output, "{}", col_types.join("|"))?; + let row = df + .fields() + .into_iter() + .map(|f| f.data_type().to_string()) + .collect::>() + .join("|"); + writeln!(out, "{row}")?; // Header separator - writeln!(output, "---")?; + writeln!(out, "---")?; // Write values - let fmt_opts = fmt_opts(); - - for batch in batches { - let formatters = batch - .columns() + for i in 0..height { + let row = df + .get_columns() .iter() - .map(|c| ArrayFormatter::try_new(c.as_ref(), &fmt_opts)) - .collect::, _>>()?; - - for row in 0..batch.num_rows() { - let values = formatters - .iter() - .map(|f| fmt_value(f.value(row), 1024)) - .collect::>(); - writeln!(output, "{}", values.join("|"))?; - } + .map(|s| s.str_value(i).unwrap()) + .collect::>() + .join("|"); + writeln!(out, "{row}")?; } // Data separator - writeln!(output, "---")?; + writeln!(out, "---")?; Ok(()) } /// Prints a dataframe in glimpse format. -pub async fn glimpse(ctx: &Context, plan: LogicalPlan, output: &mut dyn Write) -> Result<()> { - let mut num_rows = 0; - let count_plan = count::count(plan.clone(), vec![], "n")?; - for_each_batch(ctx, count_plan, |batch| { - num_rows = *batch - .columns() - .first() - .and_then(|c| c.as_any().downcast_ref::()) - .and_then(|a| a.values().first()) - .unwrap_or(&0) as usize; - Ok(()) - }) - .await?; - - let num_cols = plan.schema().fields().len(); - - let format_config = ctx.format_config(); +pub fn glimpse(w: &mut dyn Write, df: LazyFrame) -> Result<()> { + let num_rows = df + .clone() + .count() + .collect()? + .max_horizontal()? + .unwrap_or_default() + .max::()? + .unwrap_or_default(); + + let df = df.fetch(100)?; + let num_cols = df.get_columns().len(); let mut table = Table::new(); table.set_content_arrangement(ContentArrangement::DynamicFullWidth); - table.load_preset(UTF8_FULL_CONDENSED); - - if let Some(cols) = format_config.max_table_width { - table.set_width(cols as u16); - } + table.load_preset(presets::UTF8_FULL_CONDENSED); let info = format!( "Rows: {}\nCols: {}", @@ -200,41 +79,25 @@ pub async fn glimpse(ctx: &Context, plan: LogicalPlan, output: &mut dyn Write) - ); table.set_header(vec![info, "Type".into(), "Values".into()]); - const NUM_VALUES: usize = 100; - - let plan = LogicalPlanBuilder::from(plan) - .limit(0, Some(NUM_VALUES))? - .build()?; - - let fmt_opts = fmt_opts(); - - for_each_batch(ctx, plan, |batch| { - let columns = batch.columns().iter(); - - for (fld, col) in batch.schema().fields().into_iter().zip(columns) { - let mut row = Row::new(); - row.add_cell(fld.name().into()); - row.add_cell(fmt_data_type(fld.data_type()).into()); - - let fmt = ArrayFormatter::try_new(col.as_ref(), &fmt_opts)?; - let mut values = Vec::with_capacity(NUM_VALUES); - - for idx in 0..col.len() { - values.push(fmt_value( - fmt.value(idx).to_string(), - format_config.max_column_width, - )); - } + if let Ok(slen) = std::env::var("POLARS_FMT_STR_LEN") { + table.set_width(slen.parse()?); + } - row.add_cell(values.join(", ").into()); - row.max_height(1); + for col in df.get_columns() { + let mut row = Row::new(); + row.add_cell(col.name().into()); + row.add_cell(format!("{}", col.dtype()).into()); - table.add_row(row); + let mut values = Vec::with_capacity(10); + for value in col.iter() { + values.push(format!("{}", value)); } - Ok(()) - }) - .await?; + row.add_cell(values.join(", ").into()); + row.max_height(1); + + table.add_row(row); + } table.set_constraints(vec![ ColumnConstraint::LowerBoundary(Width::Fixed(10)), @@ -242,128 +105,10 @@ pub async fn glimpse(ctx: &Context, plan: LogicalPlan, output: &mut dyn Write) - ColumnConstraint::UpperBoundary(Width::Percentage(90)), ]); - writeln!(output, "{table}")?; - Ok(()) -} - -enum ArrayFormatter<'a> { - Arrow(ArrowArrayFormatter<'a>), - Float16(&'a Float16Array), - Float32(&'a Float32Array), - Float64(&'a Float64Array), - Duration(TimeUnit, &'a dyn Array), - Interval(IntervalUnit, &'a dyn Array), -} - -impl<'a> ArrayFormatter<'a> { - pub fn try_new(array: &'a dyn Array, options: &FormatOptions<'a>) -> Result { - let formatter = match array.data_type() { - DataType::Float16 => { - ArrayFormatter::Float16(array.as_any().downcast_ref::().unwrap()) - } - DataType::Float32 => { - ArrayFormatter::Float32(array.as_any().downcast_ref::().unwrap()) - } - DataType::Float64 => { - ArrayFormatter::Float64(array.as_any().downcast_ref::().unwrap()) - } - DataType::Duration(tu) => ArrayFormatter::Duration(tu.clone(), array), - DataType::Interval(iu) => ArrayFormatter::Interval(iu.clone(), array), - _ => ArrayFormatter::Arrow(ArrowArrayFormatter::try_new(array, options)?), - }; - - Ok(formatter) - } - - fn value(&self, idx: usize) -> String { - match &self { - ArrayFormatter::Arrow(f) => f.value(idx).to_string(), - ArrayFormatter::Float16(a) => { - if a.is_null(idx) { - "null".to_string() - } else { - fmt_float(f64::from(a.value(idx))) - } - } - ArrayFormatter::Float32(a) => { - if a.is_null(idx) { - "null".to_string() - } else { - fmt_float(f64::from(a.value(idx))) - } - } - ArrayFormatter::Float64(a) => { - if a.is_null(idx) { - "null".to_string() - } else { - fmt_float(a.value(idx)) - } - } - ArrayFormatter::Duration(tu, a) => fmt_duration(tu, *a, idx), - ArrayFormatter::Interval(iu, a) => fmt_interval(iu, *a, idx), - } - } -} - -/// Invoke function for each record batch generated by the plan. -/// -/// This funcion consume the stream for each partition without bringing all the -/// results in memory. -async fn for_each_batch(ctx: &Context, plan: LogicalPlan, mut f: F) -> Result<()> -where - F: FnMut(RecordBatch) -> Result<()>, -{ - let plan = ctx.create_physical_plan(&plan).await?; - - let task_context = Arc::new(TaskContext::from(ctx.session())); - for partition in 0..plan.output_partitioning().partition_count() { - let mut stream = plan.execute(partition, task_context.clone())?; - while let Some(batch) = stream.try_next().await? { - f(batch)?; - } - } - + writeln!(w, "{table}")?; Ok(()) } -fn fmt_opts<'a>() -> FormatOptions<'a> { - FormatOptions::default() - .with_display_error(true) - .with_null("null") -} - -fn fmt_value(v: String, max_len: usize) -> String { - if v.chars().count() <= max_len { - v - } else { - let last_idx = v - .char_indices() - .take(max_len) - .map(|(idx, _)| idx) - .last() - .unwrap_or(0); - format!("{}...", &v[..last_idx]) - } -} - -fn fmt_float(v: f64) -> String { - if v.fract() == 0.0 { - format!("{:>.1}", v) - } else { - let mut s = format!("{:>.6}", v); - - while s.ends_with('0') { - s.pop(); - } - - if s.ends_with('.') { - s.push('0'); - } - - s - } -} - fn fmt_usize(n: usize) -> String { // Colon separated groups of 3. let mut s = n.to_string(); @@ -374,255 +119,3 @@ fn fmt_usize(n: usize) -> String { s } - -fn fmt_duration(tu: &TimeUnit, array: &dyn Array, idx: usize) -> String { - if array.is_null(idx) { - "null".to_string() - } else { - let (secs, nsecs) = match tu { - TimeUnit::Second => { - let secs = array - .as_any() - .downcast_ref::() - .unwrap() - .value(idx); - (secs, 0) - } - TimeUnit::Millisecond => { - let msecs = array - .as_any() - .downcast_ref::() - .unwrap() - .value(idx); - (msecs / 1_000, (msecs % 1_000) * 1_000_000) - } - TimeUnit::Microsecond => { - let musecs = array - .as_any() - .downcast_ref::() - .unwrap() - .value(idx); - (musecs / 1_000_000, (musecs % 1_000_000) * 1_000) - } - TimeUnit::Nanosecond => { - let nsecs = array - .as_any() - .downcast_ref::() - .unwrap() - .value(idx); - (nsecs / 1_000_000_000, nsecs % 1_000_000_000) - } - }; - - let days = secs / 86_400; - let hours = (secs % 86_400) / 3600; - let mins = (secs % 3_600) / 60; - let secs = secs % 60; - - let dhm = if days > 0 { - format!("{days}d {hours}h {mins}m ") - } else if hours > 0 { - format!("{hours}h {mins}m ") - } else if mins > 0 { - format!("{mins}m ") - } else { - "".to_string() - }; - - if nsecs % 1_000 != 0 { - format!("{dhm}{secs}.{nsecs}s") - } else if nsecs % 1_000_000 != 0 { - format!("{dhm}{secs}.{}s", nsecs / 1_000) - } else if nsecs % 1_000_000_000 != 0 { - format!("{dhm}{secs}.{}s", nsecs / 1_000_000) - } else { - format!("{dhm}{secs}s") - } - } -} - -fn fmt_interval(iu: &IntervalUnit, array: &dyn Array, idx: usize) -> String { - if array.is_null(idx) { - "null".to_string() - } else { - match iu { - IntervalUnit::YearMonth => { - let interval = array - .as_any() - .downcast_ref::() - .unwrap() - .value(idx) as f64; - let years = (interval / 12_f64).floor(); - let month = interval - (years * 12_f64); - format!("{years}Y {month}M") - } - IntervalUnit::DayTime => { - let value = array - .as_any() - .downcast_ref::() - .unwrap() - .value(idx) as u64; - - let days: i32 = ((value & 0xFFFFFFFF00000000) >> 32) as i32; - let ms_part: i32 = (value & 0xFFFFFFFF) as i32; - let secs = ms_part / 1_000; - let mins = secs / 60; - let hours = mins / 60; - let secs = secs - (mins * 60); - let mins = mins - (hours * 60); - let ms = ms_part % 1_000; - let sign = if secs < 0 || ms < 0 { "-" } else { "" }; - - if days != 0 { - format!( - "{}D {}h {}m {}{}.{:03}s", - days, - hours, - mins, - sign, - secs.abs(), - ms.abs(), - ) - } else if hours != 0 { - format!( - "{}h {}m {}{}.{:03}s", - hours, - mins, - sign, - secs.abs(), - ms.abs(), - ) - } else if mins != 0 { - format!("{}m {}{}.{:03}s", mins, sign, secs.abs(), ms.abs(),) - } else { - format!("{}{}.{:03}s", sign, secs.abs(), ms.abs()) - } - } - IntervalUnit::MonthDayNano => { - let value = array - .as_any() - .downcast_ref::() - .unwrap() - .value(idx) as u128; - - let months: i32 = ((value & 0xFFFFFFFF000000000000000000000000) >> 96) as i32; - let days: i32 = ((value & 0xFFFFFFFF0000000000000000) >> 64) as i32; - let ns_part: i64 = (value & 0xFFFFFFFFFFFFFFFF) as i64; - let secs = ns_part / 1_000_000_000; - let mins = secs / 60; - let hours = mins / 60; - let secs = secs - (mins * 60); - let mins = mins - (hours * 60); - let ns = ns_part % 1_000_000_000; - let secs_sign = if secs < 0 || ns < 0 { "-" } else { "" }; - - let ns = if ns == 0 { - "s".to_string() - } else { - format!(".{:09}s", ns.abs()) - }; - - if months != 0 { - format!( - "{}M {}D {}h {}m {}{}{ns}", - months, - days, - hours, - mins, - secs_sign, - secs.abs(), - ) - } else if days != 0 { - format!( - "{}D {}h {}m {}{}{ns}", - days, - hours, - mins, - secs_sign, - secs.abs(), - ) - } else if hours != 0 { - format!("{}h {}m {}{}{ns}", hours, mins, secs_sign, secs.abs(),) - } else if mins != 0 { - format!("{}m {}{}{ns}", mins, secs_sign, secs.abs()) - } else { - format!("{}{}{ns}", secs_sign, secs.abs()) - } - } - } - } -} - -fn fmt_data_type(dt: &DataType) -> String { - let s = match dt { - DataType::Null => "null", - DataType::Boolean => "bool", - DataType::Int8 => "i8", - DataType::Int16 => "i16", - DataType::Int32 => "i32", - DataType::Int64 => "i64", - DataType::UInt8 => "u8", - DataType::UInt16 => "u16", - DataType::UInt32 => "u32", - DataType::UInt64 => "u64", - DataType::Float16 => "f16", - DataType::Float32 => "f32", - DataType::Float64 => "f64", - DataType::Timestamp(tu, tz) => { - return match tz { - Some(tz) => format!("datetime[{}, {}]", fmt_time_unit(tu), tz), - None => format!("datetime[{}]", fmt_time_unit(tu)), - } - } - DataType::Date32 => "date32", - DataType::Date64 => "date64", - DataType::Time32(tu) | DataType::Time64(tu) => { - return format!("time[{}]", fmt_time_unit(tu)) - } - DataType::Duration(tu) => return format!("duration[{}]", fmt_time_unit(tu)), - DataType::Interval(iu) => return format!("interval[{}]", fmt_interval_unit(iu)), - DataType::Binary => "binary", - DataType::FixedSizeBinary(_) => "binary", - DataType::LargeBinary => "binary", - DataType::Utf8 => "str", - DataType::LargeUtf8 => "str", - DataType::List(inner) | DataType::FixedSizeList(inner, _) | DataType::LargeList(inner) => { - return format!("list[{}]", fmt_data_type(inner.data_type())); - } - DataType::Struct(fields) => return format!("struct[{}]", fields.len()), - DataType::Union(fields, _) => return format!("union[{}]", fields.len()), - DataType::Dictionary(k, v) => { - return format!("dict[{}, {}]", fmt_data_type(k), fmt_data_type(v)) - } - DataType::Decimal128(p, s) | DataType::Decimal256(p, s) => { - return format!("decimal[.{p},{s}]") - } - DataType::Map(v, _) => return format!("map[{}]", fmt_data_type(v.data_type())), - DataType::RunEndEncoded(rf, vf) => { - return format!( - "ree[{}, {}]", - fmt_data_type(rf.data_type()), - fmt_data_type(vf.data_type()) - ) - } - }; - - s.to_string() -} - -fn fmt_time_unit(tu: &TimeUnit) -> &str { - match tu { - TimeUnit::Second => "s", - TimeUnit::Millisecond => "ms", - TimeUnit::Microsecond => "μs", - TimeUnit::Nanosecond => "ns", - } -} - -fn fmt_interval_unit(iu: &IntervalUnit) -> &str { - match iu { - IntervalUnit::YearMonth => "ym", - IntervalUnit::DayTime => "dt", - IntervalUnit::MonthDayNano => "mdn", - } -} diff --git a/src/engine/glimpse.rs b/src/engine/glimpse.rs index 19d6d46..96f5dfb 100644 --- a/src/engine/glimpse.rs +++ b/src/engine/glimpse.rs @@ -10,8 +10,8 @@ use super::*; /// /// Parameters are checked before evaluation by the typing module. pub fn eval(_args: &[Expr], ctx: &mut Context) -> Result<()> { - if let Some(plan) = ctx.take_plan() { - ctx.glimpse(plan)?; + if let Some(df) = ctx.take_df() { + ctx.glimpse(df)?; } else if ctx.is_grouping() { bail!("glimpse error: must call summarize after a group_by"); } else { diff --git a/src/engine/group_by.rs b/src/engine/group_by.rs index 35e1a73..2dd1ff2 100644 --- a/src/engine/group_by.rs +++ b/src/engine/group_by.rs @@ -1,6 +1,7 @@ // Copyright (C) 2023 Vince Vasta // SPDX-License-Identifier: Apache-2.0 use anyhow::{bail, Result}; +use polars::prelude::*; use crate::parser::Expr; @@ -10,7 +11,7 @@ use super::*; /// /// Parameters are checked before evaluation by the typing module. pub fn eval(args: &[Expr], ctx: &mut Context) -> Result<()> { - if let Some(plan) = ctx.take_plan() { + if let Some(df) = ctx.take_df() { let schema_cols = ctx.columns(); let mut columns = Vec::new(); @@ -20,14 +21,14 @@ pub fn eval(args: &[Expr], ctx: &mut Context) -> Result<()> { bail!("group_by error: Unknown column {column}"); } - let expr = args::str_to_col(column); + let expr = col(column); if !columns.contains(&expr) { columns.push(expr); } } } - ctx.set_group(plan, columns); + ctx.set_group(df.group_by_stable(&columns))?; } else { bail!("group_by error: missing input dataframe"); } diff --git a/src/engine/head.rs b/src/engine/head.rs index 0a40ffa..580652f 100644 --- a/src/engine/head.rs +++ b/src/engine/head.rs @@ -1,7 +1,6 @@ // Copyright (C) 2023 Vince Vasta // SPDX-License-Identifier: Apache-2.0 use anyhow::{bail, Result}; -use datafusion::logical_expr::LogicalPlanBuilder; use crate::parser::Expr; @@ -11,18 +10,15 @@ use super::*; /// /// Parameters are checked before evaluation by the typing module. pub fn eval(args: &[Expr], ctx: &mut Context) -> Result<()> { - if let Some(plan) = ctx.take_plan() { + if let Some(df) = ctx.take_df() { let limit = if !args.is_empty() { - args::number(&args[0]) as usize + args::number(&args[0]) as u32 } else { 10 }; - let plan = LogicalPlanBuilder::from(plan) - .limit(0, Some(limit))? - .build()?; - - ctx.show(plan)?; + let df = df.limit(limit).collect()?; + ctx.print(df)?; } else if ctx.is_grouping() { bail!("head error: must call summarize after a group_by"); } else { diff --git a/src/engine/io.rs b/src/engine/io.rs deleted file mode 100644 index e8a339b..0000000 --- a/src/engine/io.rs +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright (C) 2023 Vince Vasta -// SPDX-License-Identifier: Apache-2.0 -use anyhow::{anyhow, Result}; -use datafusion::{ - arrow::{datatypes::SchemaRef, record_batch::RecordBatch}, - execution::context::TaskContext, - logical_expr::LogicalPlan, -}; -use futures::StreamExt; -use std::sync::Arc; -use tokio::sync::mpsc; - -use super::*; - -pub fn execute_plan( - plan: LogicalPlan, - ctx: &mut Context, -) -> Result<(SchemaRef, mpsc::Receiver>)> { - ctx.block_on(async { - let plan = ctx.create_physical_plan(&plan).await?; - let task_context = Arc::new(TaskContext::from(ctx.session())); - - let num_partitions = plan.output_partitioning().partition_count(); - let (tx, rx) = mpsc::channel::>(num_partitions * 16); - - for partition in 0..plan.output_partitioning().partition_count() { - tokio::task::spawn({ - let plan = plan.clone(); - let sender = tx.clone(); - let task_context = task_context.clone(); - async move { - match plan.execute(partition, task_context) { - Ok(mut s) => { - while let Some(batch) = s.next().await { - sender - .send(batch.map_err(anyhow::Error::from)) - .await - .unwrap(); - } - } - Err(e) => sender - .send(Err(anyhow!("Plan exec error: {e}"))) - .await - .unwrap(), - } - } - }); - } - - Ok::<_, anyhow::Error>((plan.schema(), rx)) - }) - .map_err(anyhow::Error::from) -} diff --git a/src/engine/joins.rs b/src/engine/joins.rs index ae5d585..f0155ad 100644 --- a/src/engine/joins.rs +++ b/src/engine/joins.rs @@ -1,83 +1,44 @@ // Copyright (C) 2023 Vince Vasta // SPDX-License-Identifier: Apache-2.0 use anyhow::{bail, Result}; -use datafusion::{ - common::{Column, JoinType as DFJoinType}, - logical_expr::LogicalPlanBuilder, -}; +use polars::prelude::*; use std::collections::HashSet; use crate::parser::{Expr, Operator}; use super::*; -/// Join type -pub enum JoinType { - /// Anti left join. - Anti, - /// Cross join - Cross, - /// Inner join - Inner, - /// Left join - Left, - /// Outer join - Outer, -} - -const LHS_TABLE: &str = "lhs"; -const RHS_TABLE: &str = "rhs"; - /// Evaluates a join call. /// /// Parameters are checked before evaluation by the typing module. pub fn eval(args: &[Expr], ctx: &mut Context, join_type: JoinType) -> Result<()> { - if let Some(lhs_plan) = ctx.take_plan() { + if let Some(lhs_df) = ctx.take_df() { let rhs_df_name = args::identifier(&args[0]); - if let Some(rhs_plan) = ctx.get_plan(&rhs_df_name) { - // Assign table names to the left and right sides to avoid - // collisions when tables have columns with the same name. - let lhs_plan = LogicalPlanBuilder::from(lhs_plan) - .alias(LHS_TABLE)? - .build()?; - - let rhs_plan = LogicalPlanBuilder::from(rhs_plan) - .alias(RHS_TABLE)? - .build()?; - - let lhs_schema = lhs_plan.schema(); - let rhs_schema = rhs_plan.schema(); + if let Some(rhs_df) = ctx.get_df(&rhs_df_name) { + let lhs_schema = lhs_df.schema().map_err(|e| anyhow!("join error: {e}"))?; + let rhs_schema = rhs_df.schema().map_err(|e| anyhow!("join error: {e}"))?; let lhs_schema_cols = lhs_schema - .fields() - .iter() - .map(|f| f.name().to_owned()) + .iter_names() + .map(|s| s.to_string()) .collect::>(); let rhs_schema_cols = rhs_schema - .fields() - .iter() - .map(|f| f.name().to_owned()) + .iter_names() + .map(|s| s.to_string()) .collect::>(); // If no join columns are specified use common columns let (lhs_cols, rhs_cols) = if args.len() == 1 { let common_cols = lhs_schema_cols .intersection(&rhs_schema_cols) - .map(|s| s.to_string()) + .map(|s| col(s)) .collect::>(); if common_cols.is_empty() { bail!("join error: Missing join columns for '{rhs_df_name}'"); } - common_cols - .into_iter() - .map(|s| { - ( - Column::new(Some(LHS_TABLE), s.clone()), - Column::new(Some(RHS_TABLE), s), - ) - }) - .unzip() + + (common_cols.clone(), common_cols) } else { let mut lhs_cols = Vec::with_capacity(args.len()); let mut rhs_cols = Vec::with_capacity(args.len()); @@ -88,24 +49,18 @@ pub fn eval(args: &[Expr], ctx: &mut Context, join_type: JoinType) -> Result<()> if !lhs_schema_cols.contains(&lhs_col) { bail!("join error: Unknown column '{lhs_col}'"); } - lhs_cols.push(Column::new(Some(LHS_TABLE), lhs_col.clone())); + lhs_cols.push(col(&lhs_col)); let rhs_col = args::identifier(rhs); if !rhs_schema_cols.contains(&rhs_col) { bail!("join error: Unknown column '{rhs_col}'"); } - rhs_cols.push(Column::new(Some(RHS_TABLE), rhs_col.clone())); - - let lhs_type = lhs_schema - .field_with_unqualified_name(&lhs_col) - .map(|f| f.data_type()); + rhs_cols.push(col(&rhs_col)); - let rhs_type = rhs_schema - .field_with_unqualified_name(&rhs_col) - .map(|f| f.data_type()); - - let have_same_type = lhs_type - .and_then(|lt| rhs_type.map(|rt| lt == rt)) + let have_same_type = lhs_schema + .get(&lhs_col) + .zip(rhs_schema.get(&rhs_col)) + .map(|(ldt, rdt)| ldt == rdt) .unwrap_or(false); if !have_same_type { bail!( @@ -118,32 +73,8 @@ pub fn eval(args: &[Expr], ctx: &mut Context, join_type: JoinType) -> Result<()> (lhs_cols, rhs_cols) }; - let plan = if let JoinType::Cross = join_type { - LogicalPlanBuilder::from(lhs_plan) - .cross_join(rhs_plan)? - .build()? - } else { - let join_type = match join_type { - JoinType::Inner => DFJoinType::Inner, - JoinType::Left => DFJoinType::Left, - JoinType::Anti => DFJoinType::LeftAnti, - _ => DFJoinType::Full, - }; - - LogicalPlanBuilder::from(lhs_plan) - .join(rhs_plan, join_type, (lhs_cols, rhs_cols.clone()), None)? - .build()? - }; - - // Remove righ table columns for inner and left join. - let plan = match join_type { - JoinType::Inner | JoinType::Left => remove_rhs_columns(plan, rhs_cols)?, - _ => plan, - }; - - let plan = rename_duplicate_columns(plan)?; - - ctx.set_plan(plan); + let join_args = JoinArgs::new(join_type); + ctx.set_df(lhs_df.join(rhs_df.clone(), lhs_cols, rhs_cols, join_args))?; } else { bail!("join error: undefined dataframe variable '{rhs_df_name}'"); } @@ -155,52 +86,3 @@ pub fn eval(args: &[Expr], ctx: &mut Context, join_type: JoinType) -> Result<()> Ok(()) } - -fn remove_rhs_columns(plan: LogicalPlan, rhs_cols: Vec) -> Result { - let columns = plan - .schema() - .fields() - .iter() - .map(|f| f.qualified_column()) - .filter(|c| !rhs_cols.contains(c)) - .map(DFExpr::Column) - .collect::>(); - let plan = LogicalPlanBuilder::from(plan).project(columns)?.build()?; - Ok(plan) -} - -fn rename_duplicate_columns(plan: LogicalPlan) -> Result { - let mut duplicates = HashSet::new(); - let mut found = HashSet::new(); - - for field in plan.schema().fields() { - if found.contains(field.name()) { - duplicates.insert(field.name().to_owned()); - } else { - found.insert(field.name().to_owned()); - } - } - - let columns = plan - .schema() - .fields() - .iter() - .map(|f| { - let column = f.qualified_column(); - let is_rhs = column - .relation - .as_ref() - .map(|r| r.table() == RHS_TABLE) - .unwrap_or(false); - let expr = DFExpr::Column(column.clone()); - if is_rhs && duplicates.contains(f.name()) { - expr.alias(format!("{}_rhs", column.name)) - } else { - expr - } - }) - .collect::>(); - - let plan = LogicalPlanBuilder::from(plan).project(columns)?.build()?; - Ok(plan) -} diff --git a/src/engine/json.rs b/src/engine/json.rs index 0648976..0febba1 100644 --- a/src/engine/json.rs +++ b/src/engine/json.rs @@ -1,17 +1,8 @@ // Copyright (C) 2023 Vince Vasta // SPDX-License-Identifier: Apache-2.0 use anyhow::{anyhow, bail, Result}; -use datafusion::{ - arrow::json, - common::DEFAULT_JSON_EXTENSION, - datasource::{ - file_format::json::JsonFormat, - listing::{ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl}, - provider_as_source, - }, - logical_expr::{LogicalPlanBuilder, UNNAMED_TABLE}, -}; -use std::{num::NonZeroUsize, path::Path, sync::Arc}; +use polars::prelude::*; +use std::path::PathBuf; use crate::parser::Expr; @@ -21,61 +12,31 @@ use super::*; /// /// Parameters are checked before evaluation by the typing module. pub fn eval(args: &[Expr], ctx: &mut Context) -> Result<()> { - let path = args::string(&args[0]); - let overwrite = args::named_bool(args, "overwrite"); + let path = PathBuf::from(args::string(&args[0])); + let overwrite = args::named_bool(args, "overwrite")?; // If there is an input dataframe save it to disk. - if let Some(plan) = ctx.take_plan() { - if !overwrite && Path::new(&path).exists() { - bail!("json error: file '{}' already exists.", path); + if let Some(df) = ctx.take_df() { + if !overwrite && path.exists() { + bail!("json error: file '{}' already exists.", path.display()); } - ctx.set_plan(plan.clone()); - - let (_, mut rx) = io::execute_plan(plan, ctx)?; - let file = std::fs::File::create(&path) - .map_err(|e| anyhow!("json error: cannot create file '{}' {e}", path))?; - let mut writer = json::LineDelimitedWriter::new(file); + .map_err(|e| anyhow!("parquet error: cannot create file '{}' {e}", path.display()))?; - while let Some(batch) = rx.blocking_recv() { - writer.write(&batch?)?; - } + let mut out_df = df.clone().collect()?; + ctx.set_df(df)?; + + JsonWriter::new(file) + .with_json_format(JsonFormat::JsonLines) + .finish(&mut out_df)?; } else { // Read the data frame and set it as input for the next task. - let table_path = ListingTableUrl::parse(&path)?; - - let num_cpus = std::thread::available_parallelism() - .unwrap_or(NonZeroUsize::new(2).unwrap()) - .get(); - - let schema_infer_rows = args::named_usize(args, "schema_rows")?; - - let file_format = JsonFormat::default().with_schema_infer_max_rec(schema_infer_rows); - - // Use default extension for recursive loading. - let extension = if Path::new(&path).is_dir() { - DEFAULT_JSON_EXTENSION - } else { - "" - }; - - let listing_options = ListingOptions::new(Arc::new(file_format)) - .with_file_extension(extension) - .with_target_partitions(num_cpus); - - let resolved_schema = - ctx.block_on(listing_options.infer_schema(&ctx.session().state(), &table_path))?; - - let config = ListingTableConfig::new(table_path) - .with_listing_options(listing_options) - .with_schema(resolved_schema); - - let table_provider = ListingTable::try_new(config)?; - let table_source = provider_as_source(Arc::new(table_provider)); - let plan = LogicalPlanBuilder::scan(UNNAMED_TABLE, table_source, None)?.build()?; - - ctx.set_plan(plan); + let df = LazyJsonLineReader::new(&path) + .with_infer_schema_length(Some(1000)) + .finish() + .map_err(|e| anyhow!("json error: cannot read file '{}' {e}", path.display()))?; + ctx.set_df(df)?; } Ok(()) diff --git a/src/engine/mutate.rs b/src/engine/mutate.rs index 4b5c02a..66bdfbf 100644 --- a/src/engine/mutate.rs +++ b/src/engine/mutate.rs @@ -1,19 +1,21 @@ // Copyright (C) 2023 Vince Vasta // SPDX-License-Identifier: Apache-2.0 -use anyhow::{anyhow, bail, Result}; -use datafusion::{ - arrow::{array::ArrayRef, compute::kernels, datatypes::*}, - common::{ - tree_node::{Transformed, TreeNode}, - DFSchema, - }, - logical_expr::{ - aggregate_function::AggregateFunction, cast, create_udf, expr, expr_fn, lit, utils, - window_frame::WindowFrame, BuiltInWindowFunction, Expr as DFExpr, LogicalPlanBuilder, - Volatility, WindowFunction, - }, - physical_plan::functions::make_scalar_function, -}; +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +use anyhow::{bail, Result}; +use polars::lazy::dsl::{duration, DurationArgs, Expr as PolarsExpr, StrptimeOptions}; +use polars::prelude::*; +use std::collections::HashSet; use crate::parser::{Expr, Operator}; @@ -23,53 +25,31 @@ use super::*; /// /// Parameters are checked before evaluation by the typing module. pub fn eval(args: &[Expr], ctx: &mut Context) -> Result<()> { - if let Some(mut plan) = ctx.take_plan() { + if let Some(mut df) = ctx.take_df() { + let mut used_aliases = HashSet::new(); + for arg in args { match arg { Expr::BinaryOp(lhs, Operator::Assign, rhs) => { - // Save current plan columns for projection - let schema_cols = plan - .schema() - .fields() - .iter() - .map(|f| f.name().to_owned()) - .collect::>(); - let alias = args::identifier(lhs); - let expr = eval_expr(rhs, &plan) - .map_err(|e| anyhow!("mutate error: {e}"))? - .alias(&alias); - - // Extract window functions for evaluation before project. - let window_exprs = utils::find_window_exprs(&[expr.clone()]); - plan = LogicalPlanBuilder::window_plan(plan, window_exprs)?; - - // Transform window functions expression to column expressions - // so that we can use them in the final projection plan. - let expr = expr.transform(&|expr| { - if matches!(expr, DFExpr::WindowFunction { .. }) { - let expr = utils::expr_as_column_expr(&expr, &plan)?; - Ok(Transformed::Yes(expr)) - } else { - Ok(Transformed::No(expr)) - } - })?; - - // Replace or append evaluated expression for projection. - let mut columns = schema_cols.iter().map(args::str_to_col).collect::>(); - if let Some(idx) = schema_cols.iter().position(|c| c == &alias) { - columns[idx] = expr; + if used_aliases.contains(&alias) { + bail!("mutate error: duplicate alias '{alias}'"); } else { - columns.push(expr); - }; + used_aliases.insert(alias.clone()); + } - plan = LogicalPlanBuilder::from(plan).project(columns)?.build()?; + let expr = df + .schema() + .map_err(anyhow::Error::from) + .and_then(|schema| eval_expr(rhs, &schema)) + .map_err(|e| anyhow!("mutate error: {e}"))?; + df = df.with_column(expr.alias(&alias)); } _ => panic!("Unexpected mutate expression: {arg}"), } } - ctx.set_plan(plan); + ctx.set_df(df)?; } else if ctx.is_grouping() { bail!("mutate error: must call summarize after a group_by"); } else { @@ -79,172 +59,110 @@ pub fn eval(args: &[Expr], ctx: &mut Context) -> Result<()> { Ok(()) } -fn eval_expr(expr: &Expr, plan: &LogicalPlan) -> Result { - let schema = plan.schema(); +fn eval_expr(expr: &Expr, schema: &Schema) -> Result { match expr { Expr::BinaryOp(lhs, op, rhs) => { - let lhs = eval_expr(lhs, plan)?; - let rhs = eval_expr(rhs, plan)?; + let lhs = eval_expr(lhs, schema)?; + let rhs = eval_expr(rhs, schema)?; let result = match op { Operator::Plus => lhs + rhs, Operator::Minus => lhs - rhs, Operator::Multiply => lhs * rhs, Operator::Divide => lhs / rhs, - Operator::Mod => lhs % expr_fn::cast(rhs, DataType::UInt64), + Operator::Mod => lhs % rhs.cast(DataType::UInt64), _ => panic!("Unexpected mutate operator {op}"), }; Ok(result) } - Expr::Identifier(_) => args::expr_to_col(expr, plan.schema()), + Expr::Identifier(_) => args::column(expr, schema), Expr::String(s) => Ok(lit(s.clone())), Expr::Number(n) => Ok(lit(*n)), Expr::Function(name, args) if name == "ymd_hms" => { - args::expr_to_col(&args[0], schema).map(expr_fn::to_timestamp_millis) - } - Expr::Function(name, args) if name == "dnanos" => { - args::expr_to_col(&args[0], schema).map(|e| to_duration(e, TimeUnit::Nanosecond)) - } + args::column(&args[0], schema).map(|c| { + c.str().to_datetime( + Some(TimeUnit::Nanoseconds), + None, + StrptimeOptions::default(), + lit("raise"), + ) + }) + } + Expr::Function(name, args) if name == "dnanos" => args::column(&args[0], schema).map(|c| { + duration(DurationArgs { + nanoseconds: c, + ..Default::default() + }) + }), Expr::Function(name, args) if name == "dmicros" => { - args::expr_to_col(&args[0], schema).map(|e| to_duration(e, TimeUnit::Microsecond)) + args::column(&args[0], schema).map(|c| { + duration(DurationArgs { + microseconds: c, + ..Default::default() + }) + }) } Expr::Function(name, args) if name == "dmillis" => { - args::expr_to_col(&args[0], schema).map(|e| to_duration(e, TimeUnit::Millisecond)) - } - Expr::Function(name, args) if name == "dsecs" => { - args::expr_to_col(&args[0], schema).map(|e| to_duration(e, TimeUnit::Second)) - } + args::column(&args[0], schema).map(|c| { + duration(DurationArgs { + milliseconds: c, + ..Default::default() + }) + }) + } + Expr::Function(name, args) if name == "dsecs" => args::column(&args[0], schema).map(|c| { + duration(DurationArgs { + seconds: c, + ..Default::default() + }) + }), Expr::Function(name, args) if name == "nanos" => { - duration_to_i64(&args[0], schema, TimeUnit::Nanosecond) + args::column(&args[0], schema).map(|c| c.dt().total_nanoseconds()) } Expr::Function(name, args) if name == "micros" => { - duration_to_i64(&args[0], schema, TimeUnit::Microsecond) + args::column(&args[0], schema).map(|c| c.dt().total_microseconds()) } Expr::Function(name, args) if name == "millis" => { - duration_to_i64(&args[0], schema, TimeUnit::Millisecond) + args::column(&args[0], schema).map(|c| c.dt().total_milliseconds()) } Expr::Function(name, args) if name == "secs" => { - duration_to_i64(&args[0], schema, TimeUnit::Second) + args::column(&args[0], schema).map(|c| c.dt().total_seconds()) } Expr::Function(name, args) if name == "field" => { let field_name = args::identifier(&args[1]); - args::expr_to_qualified_col(&args[0], schema).map(|e| e.field(field_name)) - } - Expr::Function(name, args) if name == "len" => { - let column = args::identifier(&args[0]); - match schema - .field_with_unqualified_name(&column) - .map(|f| f.data_type()) - { - Ok(dt @ DataType::List(_) | dt @ DataType::Utf8) => list_len(&column, dt), - Ok(_) => Err(anyhow!("`len` column '{column}' must be a list or string")), - Err(_) => Err(anyhow!("Unknown column '{column}'")), - } + args::column(&args[0], schema).map(|c| c.struct_().field_by_name(&field_name)) } Expr::Function(name, args) if name == "mean" => { - args::expr_to_qualified_col(&args[0], schema) - .map(|e| window_fn(e, AggregateFunction::Avg)) + args::column(&args[0], schema).map(|c| c.mean()) } Expr::Function(name, args) if name == "median" => { - args::expr_to_qualified_col(&args[0], schema) - .map(|e| window_fn(e, AggregateFunction::Median)) + args::column(&args[0], schema).map(|c| c.median()) } Expr::Function(name, args) if name == "min" => { - args::expr_to_qualified_col(&args[0], schema) - .map(|e| window_fn(e, AggregateFunction::Min)) + args::column(&args[0], schema).map(|c| c.min()) } Expr::Function(name, args) if name == "max" => { - args::expr_to_qualified_col(&args[0], schema) - .map(|e| window_fn(e, AggregateFunction::Max)) + args::column(&args[0], schema).map(|c| c.max()) + } + Expr::Function(name, args) if name == "len" => { + let column = args::identifier(&args[0]); + match schema.get(&column) { + Some(DataType::List(_)) => Ok(col(&column).list().len()), + Some(DataType::String) => Ok(col(&column).str().len_chars()), + Some(_) => Err(anyhow!("`len` column '{column}' must be list or String")), + None => Err(anyhow!("Unknown column '{column}'")), + } + } + Expr::Function(name, _args) if name == "row" => { + let (col_name, _) = schema + .get_at_index(0) + .ok_or_else(|| anyhow!("No columns found"))?; + Ok(col(col_name).map( + |s| Ok(Some(Series::from_iter(1..=(s.len() as u64)))), + GetOutput::from_type(DataType::UInt64), + )) } - Expr::Function(name, _args) if name == "row" => Ok(row_fn()), _ => panic!("Unexpected mutate expression {expr}"), } } - -fn to_duration(expr: DFExpr, time_unit: TimeUnit) -> DFExpr { - let i64_expr = cast(expr, DataType::Int64); - cast(i64_expr, DataType::Duration(time_unit)) -} - -fn duration_to_i64(expr: &Expr, schema: &DFSchema, to_unit: TimeUnit) -> Result { - let col_expr = args::expr_to_col(expr, schema)?; - let col_name = args::identifier(expr); - - let data_type = schema - .field_with_unqualified_name(&col_name) - .map(|f| f.data_type()) - .map_err(|_| anyhow!("Unknown column {col_name}"))?; - - if let DataType::Duration(duration_unit) = data_type { - let units = cast(col_expr, DataType::Int64); - let result = match (duration_unit, to_unit) { - (TimeUnit::Second, TimeUnit::Second) - | (TimeUnit::Millisecond, TimeUnit::Millisecond) - | (TimeUnit::Microsecond, TimeUnit::Microsecond) - | (TimeUnit::Nanosecond, TimeUnit::Nanosecond) => units, - - (TimeUnit::Second, TimeUnit::Millisecond) - | (TimeUnit::Microsecond, TimeUnit::Nanosecond) - | (TimeUnit::Millisecond, TimeUnit::Microsecond) => units * lit(1_000), - - (TimeUnit::Second, TimeUnit::Microsecond) - | (TimeUnit::Millisecond, TimeUnit::Nanosecond) => units * lit(1_000_000), - - (TimeUnit::Second, TimeUnit::Nanosecond) => units * lit(1_000_000_000), - - (TimeUnit::Millisecond, TimeUnit::Second) - | (TimeUnit::Microsecond, TimeUnit::Millisecond) - | (TimeUnit::Nanosecond, TimeUnit::Microsecond) => units / lit(1_000.0), - - (TimeUnit::Microsecond, TimeUnit::Second) - | (TimeUnit::Nanosecond, TimeUnit::Millisecond) => units / lit(1_000_000.0), - (TimeUnit::Nanosecond, TimeUnit::Second) => units / lit(1_000_000_000.0), - }; - - Ok(cast(result, DataType::Int64)) - } else { - Err(anyhow!("Column '{col_name}' must be a duration type")) - } -} - -fn window_fn(expr: DFExpr, agg: AggregateFunction) -> DFExpr { - DFExpr::WindowFunction(expr::WindowFunction::new( - WindowFunction::AggregateFunction(agg), - vec![expr], - vec![], - vec![], - WindowFrame::new(false), - )) -} - -fn row_fn() -> DFExpr { - DFExpr::WindowFunction(expr::WindowFunction::new( - WindowFunction::BuiltInWindowFunction(BuiltInWindowFunction::RowNumber), - vec![], - vec![], - vec![], - WindowFrame::new(false), - )) -} - -fn list_len(column: &str, list_type: &DataType) -> Result { - let len_udf = move |args: &[ArrayRef]| { - assert_eq!(args.len(), 1); - let result = kernels::length::length(&args[0])?; - Ok(result) - }; - - let len_udf = make_scalar_function(len_udf); - - let len_udf = create_udf( - "len", - vec![list_type.clone()], - Arc::new(DataType::Int32), - Volatility::Immutable, - len_udf, - ); - - Ok(len_udf.call(vec![args::str_to_col(column)])) -} diff --git a/src/engine/parquet.rs b/src/engine/parquet.rs index 57846c0..aa4c6f9 100644 --- a/src/engine/parquet.rs +++ b/src/engine/parquet.rs @@ -1,21 +1,8 @@ // Copyright (C) 2023 Vince Vasta // SPDX-License-Identifier: Apache-2.0 use anyhow::{anyhow, bail, Result}; -use datafusion::{ - common::DEFAULT_PARQUET_EXTENSION, - datasource::{ - file_format::parquet::ParquetFormat, - listing::{ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl}, - provider_as_source, - }, - logical_expr::{LogicalPlanBuilder, UNNAMED_TABLE}, - parquet::{ - arrow::arrow_writer::ArrowWriter, - basic::{Compression, ZstdLevel}, - file::properties::WriterProperties, - }, -}; -use std::{num::NonZeroUsize, path::Path, sync::Arc}; +use polars::prelude::*; +use std::path::PathBuf; use crate::parser::Expr; @@ -25,63 +12,29 @@ use super::*; /// /// Parameters are checked before evaluation by the typing module. pub fn eval(args: &[Expr], ctx: &mut Context) -> Result<()> { - let path = args::string(&args[0]); - let overwrite = args::named_bool(args, "overwrite"); + // parquet("nyctaxi.parquet") + let path = PathBuf::from(args::string(&args[0])); + // parquet("nyctaxi.parquet", overwrite = true) + let overwrite = args::named_bool(args, "overwrite")?; // If there is an input dataframe save it to disk. - if let Some(plan) = ctx.take_plan() { - if !overwrite && Path::new(&path).exists() { - bail!("parquet error: file '{}' already exists.", path); + if let Some(df) = ctx.take_df() { + if !overwrite && path.exists() { + bail!("parquet error: file '{}' already exists.", path.display()); } - ctx.set_plan(plan.clone()); - - let (schema, mut rx) = io::execute_plan(plan, ctx)?; - let file = std::fs::File::create(&path) - .map_err(|e| anyhow!("parquet error: cannot create file '{}' {e}", path))?; - let props = WriterProperties::builder() - .set_compression(Compression::ZSTD(ZstdLevel::default())) - .build(); - let mut writer = ArrowWriter::try_new(file, schema, Some(props))?; + .map_err(|e| anyhow!("parquet error: cannot create file '{}' {e}", path.display()))?; - while let Some(batch) = rx.blocking_recv() { - writer.write(&batch?)?; - } + let mut out_df = df.clone().collect()?; + ctx.set_df(df)?; - writer.close()?; + ParquetWriter::new(file).finish(&mut out_df)?; } else { // Read the data frame and set it as input for the next task. - let table_path = ListingTableUrl::parse(&path)?; - - let num_cpus = std::thread::available_parallelism() - .unwrap_or(NonZeroUsize::new(2).unwrap()) - .get(); - - // Use default extension for recursive loading. - let extension = if Path::new(&path).is_dir() { - DEFAULT_PARQUET_EXTENSION - } else { - "" - }; - - let file_format = ParquetFormat::new(); - let listing_options = ListingOptions::new(Arc::new(file_format)) - .with_file_extension(extension) - .with_target_partitions(num_cpus); - - let resolved_schema = - ctx.block_on(listing_options.infer_schema(&ctx.session().state(), &table_path))?; - - let config = ListingTableConfig::new(table_path) - .with_listing_options(listing_options) - .with_schema(resolved_schema); - - let table_provider = ListingTable::try_new(config)?; - let table_source = provider_as_source(Arc::new(table_provider)); - let plan = LogicalPlanBuilder::scan(UNNAMED_TABLE, table_source, None)?.build()?; - - ctx.set_plan(plan); + let df = LazyFrame::scan_parquet(&path, ScanArgsParquet::default()) + .map_err(|e| anyhow!("parquet error: cannot read file '{}' {e}", path.display()))?; + ctx.set_df(df)?; } Ok(()) diff --git a/src/engine/relocate.rs b/src/engine/relocate.rs index e5b5138..16232d4 100644 --- a/src/engine/relocate.rs +++ b/src/engine/relocate.rs @@ -1,7 +1,7 @@ // Copyright (C) 2023 Vince Vasta // SPDX-License-Identifier: Apache-2.0 use anyhow::{bail, Result}; -use datafusion::logical_expr::LogicalPlanBuilder; +use polars::prelude::*; use crate::parser::{Expr, Operator}; @@ -21,9 +21,9 @@ enum RelocateTo { /// /// Parameters are checked before evaluation by the typing module. pub fn eval(args: &[Expr], ctx: &mut Context) -> Result<()> { - if let Some(plan) = ctx.take_plan() { - let mut schema_cols = ctx.columns().clone(); - let mut relocate_cols = Vec::new(); + if let Some(df) = ctx.take_df() { + let schema_cols = ctx.columns(); + let mut relocate_cols = Vec::<&str>::new(); let mut relocate_to = RelocateTo::Default; for arg in args { @@ -48,14 +48,15 @@ pub fn eval(args: &[Expr], ctx: &mut Context) -> Result<()> { bail!("relocate error: Unknown column {column}"); } - if !relocate_cols.contains(column) { - relocate_cols.push(column.to_owned()); + if !relocate_cols.contains(&column.as_str()) { + relocate_cols.push(column); } } _ => {} } } + let mut schema_cols = schema_cols.iter().map(|s| s.as_str()).collect::>(); match relocate_to { RelocateTo::Default => { // Relocate columns to the left. @@ -76,14 +77,8 @@ pub fn eval(args: &[Expr], ctx: &mut Context) -> Result<()> { } }; - let columns = schema_cols - .into_iter() - .map(args::str_to_col) - .collect::>(); - - let plan = LogicalPlanBuilder::from(plan).project(columns)?.build()?; - - ctx.set_plan(plan); + let columns = schema_cols.into_iter().map(col).collect::>(); + ctx.set_df(df.select(&columns))?; } else if ctx.is_grouping() { bail!("relocate error: must call summarize after a group_by"); } else { diff --git a/src/engine/rename.rs b/src/engine/rename.rs index 1b51c9c..8b414d6 100644 --- a/src/engine/rename.rs +++ b/src/engine/rename.rs @@ -1,7 +1,7 @@ // Copyright (C) 2023 Vince Vasta // SPDX-License-Identifier: Apache-2.0 use anyhow::{bail, Result}; -use datafusion::logical_expr::LogicalPlanBuilder; +use polars::prelude::*; use crate::parser::{Expr, Operator}; @@ -11,19 +11,15 @@ use super::*; /// /// Parameters are checked before evaluation by the typing module. pub fn eval(args: &[Expr], ctx: &mut Context) -> Result<()> { - if let Some(plan) = ctx.take_plan() { - let mut schema_cols = ctx - .columns() - .iter() - .map(args::str_to_col) - .collect::>(); + if let Some(df) = ctx.take_df() { + let mut schema_cols = ctx.columns().iter().map(|c| col(c)).collect::>(); for arg in args { if let Expr::BinaryOp(lhs, Operator::Assign, rhs) = arg { let alias = args::identifier(lhs); - let column = args::str_to_col(args::identifier(rhs)); + let column = args::identifier(rhs); - if let Some(idx) = schema_cols.iter().position(|c| c == &column) { + if let Some(idx) = schema_cols.iter().position(|c| c == &col(&column)) { schema_cols[idx] = schema_cols[idx].clone().alias(&alias); } else { bail!("rename error: Unknown column {column}"); @@ -31,10 +27,7 @@ pub fn eval(args: &[Expr], ctx: &mut Context) -> Result<()> { } } - let plan = LogicalPlanBuilder::from(plan) - .project(schema_cols)? - .build()?; - ctx.set_plan(plan); + ctx.set_df(df.select(&schema_cols))?; } else if ctx.is_grouping() { bail!("rename error: must call summarize after a group_by"); } else { diff --git a/src/engine/select.rs b/src/engine/select.rs index 9171aa0..8f2c53c 100644 --- a/src/engine/select.rs +++ b/src/engine/select.rs @@ -1,7 +1,8 @@ // Copyright (C) 2023 Vince Vasta // SPDX-License-Identifier: Apache-2.0 use anyhow::{bail, Result}; -use datafusion::logical_expr::{Expr as DFExpr, LogicalPlanBuilder}; +use polars::lazy::dsl::Expr as PolarsExpr; +use polars::prelude::*; use crate::parser::{Expr, Operator}; @@ -11,7 +12,7 @@ use super::*; /// /// Parameters are checked before evaluation by the typing module. pub fn eval(args: &[Expr], ctx: &mut Context) -> Result<()> { - if let Some(plan) = ctx.take_plan() { + if let Some(df) = ctx.take_df() { let schema_cols = ctx.columns(); let mut select_columns = Vec::new(); @@ -31,7 +32,7 @@ pub fn eval(args: &[Expr], ctx: &mut Context) -> Result<()> { // select(alias = column) let alias = args::identifier(lhs); let column = args::identifier(rhs); - let expr = args::str_to_col(&column).alias(&alias); + let expr = col(&column).alias(&alias); if !select_columns.contains(&expr) { select_columns.push(expr); @@ -43,7 +44,7 @@ pub fn eval(args: &[Expr], ctx: &mut Context) -> Result<()> { bail!("select error: Unknown column {column}"); } - let expr = args::str_to_col(column); + let expr = col(column); if !select_columns.contains(&expr) { select_columns.push(expr); } @@ -52,10 +53,7 @@ pub fn eval(args: &[Expr], ctx: &mut Context) -> Result<()> { } } - let plan = LogicalPlanBuilder::from(plan) - .project(select_columns)? - .build()?; - ctx.set_plan(plan); + ctx.set_df(df.select(&select_columns))?; } else if ctx.is_grouping() { bail!("select error: must call summarize after a group_by"); } else { @@ -65,7 +63,7 @@ pub fn eval(args: &[Expr], ctx: &mut Context) -> Result<()> { Ok(()) } -fn filter_columns(expr: &Expr, schema_cols: &[String], negate: bool) -> Vec { +fn filter_columns(expr: &Expr, schema_cols: &[String], negate: bool) -> Vec { match expr { Expr::Function(name, args) if name == "starts_with" => { // select(starts_with("pattern")) @@ -73,7 +71,7 @@ fn filter_columns(expr: &Expr, schema_cols: &[String], negate: bool) -> Vec { @@ -82,7 +80,7 @@ fn filter_columns(expr: &Expr, schema_cols: &[String], negate: bool) -> Vec { @@ -91,7 +89,7 @@ fn filter_columns(expr: &Expr, schema_cols: &[String], negate: bool) -> Vec Vec::new(), diff --git a/src/engine/show.rs b/src/engine/show.rs index 491750b..a97c654 100644 --- a/src/engine/show.rs +++ b/src/engine/show.rs @@ -10,8 +10,9 @@ use super::*; /// /// Parameters are checked before evaluation by the typing module. pub fn eval(_args: &[Expr], ctx: &mut Context) -> Result<()> { - if let Some(plan) = ctx.take_plan() { - ctx.show(plan)?; + if let Some(df) = ctx.take_df() { + let df = df.collect()?; + ctx.print(df)?; } else if ctx.is_grouping() { bail!("show error: must call summarize after a group_by"); } else { diff --git a/src/engine/summarize.rs b/src/engine/summarize.rs index c5d7d84..3eae0e3 100644 --- a/src/engine/summarize.rs +++ b/src/engine/summarize.rs @@ -1,26 +1,21 @@ // Copyright (C) 2023 Vince Vasta // SPDX-License-Identifier: Apache-2.0 -use anyhow::{anyhow, bail, Result}; -use datafusion::{ - arrow::{ - array::ArrayRef, - compute::{self, SortOptions}, - datatypes::DataType, - }, - common::DFSchema, - error::{DataFusionError, Result as DFResult}, - logical_expr::{ - aggregate_function::AggregateFunction, create_udaf, expr, expr_fn, lit, Accumulator, - Expr as DFExpr, LogicalPlanBuilder, Volatility, - }, - scalar::ScalarValue, -}; -use hashbrown::{HashMap, HashSet}; -use parking_lot::Mutex; -use std::sync::{ - atomic::{AtomicU64, Ordering}, - Arc, OnceLock, -}; +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +use anyhow::{bail, Result}; +use polars::lazy::dsl::Expr as PolarsExpr; +use polars::prelude::*; +use std::collections::HashSet; use crate::parser::{Expr, Operator}; @@ -30,18 +25,22 @@ use super::*; /// /// Parameters are checked before evaluation by the typing module. pub fn eval(args: &[Expr], ctx: &mut Context) -> Result<()> { - if let Some(plan) = ctx.take_plan() { - let schema = plan.schema(); - - let exprs = eval_args(args, schema).map_err(|e| anyhow!("summarize error: {e}"))?; - - let group = ctx.take_group().unwrap_or_default(); - - let plan = LogicalPlanBuilder::from(plan) - .aggregate(group, exprs)? - .build()?; - - ctx.set_plan(plan); + if let Some(group) = ctx.take_group() { + let columns = group + .logical_plan + .schema() + .map(|s| s.into_owned()) + .map_err(anyhow::Error::from) + .and_then(|schema| eval_args(args, ctx, &schema, true)) + .map_err(|e| anyhow!("summarize error: {e}"))?; + ctx.set_df(group.agg(&columns))?; + } else if let Some(df) = ctx.take_df() { + let columns = df + .schema() + .map_err(anyhow::Error::from) + .and_then(|schema| eval_args(args, ctx, &schema, false)) + .map_err(|e| anyhow!("summarize error: {e}"))?; + ctx.set_df(df.select(&columns))?; } else { bail!("summarize error: missing input group or dataframe"); } @@ -49,7 +48,13 @@ pub fn eval(args: &[Expr], ctx: &mut Context) -> Result<()> { Ok(()) } -fn eval_args(args: &[Expr], schema: &DFSchema) -> Result> { +fn eval_args( + args: &[Expr], + ctx: &mut Context, + schema: &Schema, + grouping: bool, +) -> Result> { + let schema_cols = ctx.columns(); let mut aliases = HashSet::new(); let mut columns = Vec::new(); @@ -64,44 +69,34 @@ fn eval_args(args: &[Expr], schema: &DFSchema) -> Result> { aliases.insert(alias.clone()); let column = match rhs.as_ref() { - Expr::Function(name, _) if name == "n" => Ok(expr_fn::count(lit(1))), - Expr::Function(name, args) if name == "list" => { - args::expr_to_col(&args[0], schema).map(expr_fn::array_agg) - } + Expr::Function(name, _) if name == "n" => Ok(col(&schema_cols[0]).count()), + Expr::Function(name, args) if name == "list" => args::column(&args[0], schema) + .map(|c| if grouping { c } else { c.implode() }), Expr::Function(name, args) if name == "max" => { - args::expr_to_col(&args[0], schema).map(expr_fn::max) + args::column(&args[0], schema).map(|c| c.max()) } Expr::Function(name, args) if name == "mean" => { - args::expr_to_col(&args[0], schema).map(expr_fn::avg) + args::column(&args[0], schema).map(|c| c.mean()) } Expr::Function(name, args) if name == "median" => { - args::expr_to_col(&args[0], schema).map(expr_fn::median) + args::column(&args[0], schema).map(|c| c.median()) } Expr::Function(name, args) if name == "min" => { - args::expr_to_col(&args[0], schema).map(expr_fn::min) + args::column(&args[0], schema).map(|c| c.min()) } Expr::Function(name, args) if name == "quantile" => { - let column = args::identifier(&args[0]); - let pct = args::number(&args[1]); - - if (0.0..=1.0).contains(&pct) { - let dt = schema - .field_with_unqualified_name(&column) - .map(|f| f.data_type()) - .map_err(|_| anyhow!("quantile: unknown column {column}"))?; - Ok(quantile(args::str_to_col(column), pct, dt)) - } else { - Err(anyhow!("quantile: Quantile value must [0, 1]")) - } + let quantile = args::number(&args[1]); + args::column(&args[0], schema) + .map(|c| c.quantile(lit(quantile), QuantileInterpolOptions::Linear)) } Expr::Function(name, args) if name == "sd" => { - args::expr_to_col(&args[0], schema).map(expr_fn::stddev) + args::column(&args[0], schema).map(|c| c.std(1)) } Expr::Function(name, args) if name == "sum" => { - args::expr_to_col(&args[0], schema).map(expr_fn::sum) + args::column(&args[0], schema).map(|c| c.sum()) } Expr::Function(name, args) if name == "var" => { - args::expr_to_col(&args[0], schema).map(var) + args::column(&args[0], schema).map(|c| c.var(1)) } _ => panic!("Unexpected summarize expression {rhs}"), }?; @@ -114,135 +109,3 @@ fn eval_args(args: &[Expr], schema: &DFSchema) -> Result> { Ok(columns) } - -fn var(expr: DFExpr) -> DFExpr { - DFExpr::AggregateFunction(expr::AggregateFunction::new( - AggregateFunction::Variance, - vec![expr], - false, - None, - None, - )) -} - -// This function implement an exact quantile as DataFusion only provide only -// approximate quantile. -fn quantile(expr: DFExpr, quantile: f64, data_type: &DataType) -> DFExpr { - static LAST_CALL: AtomicU64 = AtomicU64::new(0); - - // We need to give a different name to each udaf to make sure that different - // calls to quantile produce different results. - let quantile = create_udaf( - &format!("quantile-{}", LAST_CALL.fetch_add(1, Ordering::Relaxed)), - vec![data_type.clone()], - Arc::new(data_type.clone()), - Volatility::Immutable, - Arc::new(move |dt| Ok(Box::new(Quantile::new(dt, quantile)))), - Arc::new(vec![DataType::UInt64]), - ); - - quantile.call(vec![expr]) -} - -type QuantileStates = Arc>>>; - -#[derive(Debug)] -struct Quantile { - quantile: f64, - state_id: u64, - shared_states: QuantileStates, - data_type: DataType, - arrays: Vec, -} - -impl Quantile { - fn new(data_type: &DataType, quantile: f64) -> Self { - static LAST_STATE_ID: AtomicU64 = AtomicU64::new(0); - static STATES_HASH: OnceLock = OnceLock::new(); - - let hash = STATES_HASH.get_or_init(|| Arc::new(Mutex::new(HashMap::default()))); - - let state_id = LAST_STATE_ID.fetch_add(1, Ordering::Relaxed); - Self { - quantile, - state_id, - shared_states: hash.clone(), - data_type: data_type.clone(), - arrays: Default::default(), - } - } -} - -impl Accumulator for Quantile { - fn state(&self) -> DFResult> { - let mut states = self.shared_states.lock(); - states.insert(self.state_id, self.arrays.clone()); - - Ok(vec![ScalarValue::UInt64(Some(self.state_id))]) - } - - fn update_batch(&mut self, values: &[ArrayRef]) -> DFResult<()> { - assert_eq!(values.len(), 1); - let array = &values[0]; - - assert_eq!(array.data_type(), &self.data_type); - self.arrays.push(array.clone()); - - Ok(()) - } - - fn merge_batch(&mut self, states: &[ArrayRef]) -> DFResult<()> { - assert_eq!(states.len(), 1); - - let array = &states[0]; - assert!(matches!(array.data_type(), DataType::UInt64)); - for index in 0..array.len() { - match ScalarValue::try_from_array(array, index)? { - ScalarValue::UInt64(Some(id)) => { - let mut states = self.shared_states.lock(); - if let Some(arrays) = states.remove(&id) { - self.arrays.extend(arrays); - } else { - // If this happens something is broken. - panic!("No state found for id {id}"); - } - } - ScalarValue::UInt64(_) => {} - v => { - return Err(DataFusionError::Internal(format!( - "Unexpected state in quantile aggregator: {v:?}" - ))) - } - } - } - - Ok(()) - } - - fn evaluate(&self) -> DFResult { - let arrays = self.arrays.iter().map(|a| a.as_ref()).collect::>(); - let values = compute::concat(&arrays)?; - let length = values.len() - values.null_count(); - - if length == 0 { - return ScalarValue::try_from(values.data_type()); - } - - let options = SortOptions { - descending: false, - nulls_first: false, - }; - - let idx = ((length - 1) as f64 * self.quantile).floor() as usize; - let limit = (idx + 1).min(length); - let sorted = compute::sort_limit(&values, Some(options), Some(limit))?; - ScalarValue::try_from_array(&sorted, idx) - } - - fn size(&self) -> usize { - let arrays_size: usize = self.arrays.iter().map(|a| a.len()).sum(); - - std::mem::size_of_val(self) + arrays_size + self.data_type.size() - - std::mem::size_of_val(&self.data_type) - } -} diff --git a/src/engine/unnest.rs b/src/engine/unnest.rs index 51995a2..9f90876 100644 --- a/src/engine/unnest.rs +++ b/src/engine/unnest.rs @@ -1,8 +1,21 @@ // Copyright (C) 2023 Vince Vasta // SPDX-License-Identifier: Apache-2.0 -use crate::parser::Expr; +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. use anyhow::{bail, Result}; -use datafusion::{arrow::datatypes::*, logical_expr::LogicalPlanBuilder}; +use polars::prelude::*; + +use crate::parser::Expr; use super::*; @@ -10,30 +23,24 @@ use super::*; /// /// Parameters are checked before evaluation by the typing module. pub fn eval(args: &[Expr], ctx: &mut Context) -> Result<()> { - if let Some(mut plan) = ctx.take_plan() { + if let Some(mut df) = ctx.take_df() { for arg in args { let column = args::identifier(arg); - let schema = plan.schema(); + let schema = df.schema().map_err(|e| anyhow!("unnest error: {e}"))?; - match schema - .field_with_unqualified_name(&column) - .map(|f| f.data_type()) - { - Ok(DataType::List(_)) => { - plan = LogicalPlanBuilder::from(plan) - .unnest_column(column)? - .build()?; + match schema.get(&column) { + Some(DataType::List(_)) => { + df = df.explode(vec![col(&column)]); + } + Some(DataType::Struct(_)) => { + df = df.unnest([&column]); } - // TODO: This needs changes to DataFusion to work, or a plan extension. - // Some(DataType::Struct(_)) => { - // df = df.unnest([&column]); - // } - Ok(_) => bail!("unnest error: '{column}' is not a list or struct type"), - Err(_) => bail!("unnest error: unknown column '{column}'"), + Some(_) => bail!("unnest error: '{column}' is not a list or struct type"), + None => bail!("unnest error: unknown column '{column}'"), } } - ctx.set_plan(plan); + ctx.set_df(df)?; } else { bail!("unnest error: missing input dataframe"); } diff --git a/src/parser.rs b/src/parser.rs index 2a66c6e..a6341c9 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -391,7 +391,7 @@ pub fn parse(input: &str) -> Result> { let input = input .lines() .filter(|line| comment(line).is_err()) - .map(|line| format!("{line}\n")) + .map(|line| line.to_string() + "\n") .collect::(); match root(input.trim().trim_end_matches(';')) { diff --git a/src/repl.rs b/src/repl.rs index 4f6411c..47318a7 100644 --- a/src/repl.rs +++ b/src/repl.rs @@ -153,6 +153,7 @@ impl Completer for CustomCompleter { .map(|value| Suggestion { value, description: None, + style: None, extra: None, span: Span::new(prefix_pos, pos), append_whitespace: false, @@ -170,6 +171,7 @@ impl Completer for CustomCompleter { .map(|value| Suggestion { value, description: None, + style: None, extra: None, span: Span::new(prefix_pos, pos), append_whitespace: false, diff --git a/tests/functions/count.rs b/tests/functions/count.rs index 4728c8f..5bd931e 100644 --- a/tests/functions/count.rs +++ b/tests/functions/count.rs @@ -20,7 +20,7 @@ fn count_column() -> Result<()> { r#" shape: (5, 2) payment_type|n - str|i64 + str|u32 --- Cash|53 Credit card|185 @@ -49,7 +49,7 @@ fn count_sorted() -> Result<()> { r#" shape: (5, 2) payment_type|n - str|i64 + str|u32 --- Credit card|185 Cash|53 @@ -80,7 +80,7 @@ fn count_agg_column_name() -> Result<()> { r#" shape: (5, 2) payment_type|nn - str|i64 + str|u32 --- Credit card|185 Cash|53 @@ -111,7 +111,7 @@ fn count_multi_cols() -> Result<()> { r#" shape: (16, 3) payment_type|passenger_count|n - str|i64|i64 + str|i64|u32 --- Cash|1|36 Cash|2|7 @@ -152,7 +152,7 @@ fn count_multi_cols_sorted() -> Result<()> { r#" shape: (16, 3) payment_type|passenger_count|n - str|i64|i64 + str|i64|u32 --- Credit card|1|144 Cash|1|36 @@ -193,7 +193,7 @@ fn count_no_cols() -> Result<()> { r#" shape: (1, 1) n - i64 + u32 --- 250 --- diff --git a/tests/functions/df_var.rs b/tests/functions/df_var.rs index 7e54a42..0785adf 100644 --- a/tests/functions/df_var.rs +++ b/tests/functions/df_var.rs @@ -39,18 +39,18 @@ fn df_variable() -> Result<()> { --- shape: (10, 5) tpep_pickup_datetime|tpep_dropoff_datetime|passenger_count|trip_distance|total_amount - datetime[μs]|datetime[μs]|i64|f64|f64 + datetime[ns]|datetime[ns]|i64|f64|f64 --- - 2022-11-22T19:27:01|2022-11-22T19:45:53|1|3.14|22.56 - 2022-11-27T16:43:26|2022-11-27T16:50:06|2|1.06|9.8 - 2022-11-12T16:58:37|2022-11-12T17:12:31|1|2.36|17.76 - 2022-11-30T22:24:08|2022-11-30T22:39:16|1|5.2|26.16 - 2022-11-26T23:03:41|2022-11-26T23:23:48|3|0.0|19.55 - 2022-11-30T14:46:43|2022-11-30T15:17:39|1|2.39|22.3 - 2022-11-22T14:36:34|2022-11-22T14:46:38|2|1.52|11.8 - 2022-11-28T09:54:14|2022-11-28T10:02:07|1|0.51|11.3 - 2022-11-09T17:39:58|2022-11-09T17:58:30|1|0.98|19.56 - 2022-11-20T00:33:58|2022-11-20T00:42:35|2|2.14|15.36 + 2022-11-22 19:27:01|2022-11-22 19:45:53|1|3.14|22.56 + 2022-11-27 16:43:26|2022-11-27 16:50:06|2|1.06|9.8 + 2022-11-12 16:58:37|2022-11-12 17:12:31|1|2.36|17.76 + 2022-11-30 22:24:08|2022-11-30 22:39:16|1|5.2|26.16 + 2022-11-26 23:03:41|2022-11-26 23:23:48|3|0.0|19.55 + 2022-11-30 14:46:43|2022-11-30 15:17:39|1|2.39|22.3 + 2022-11-22 14:36:34|2022-11-22 14:46:38|2|1.52|11.8 + 2022-11-28 09:54:14|2022-11-28 10:02:07|1|0.51|11.3 + 2022-11-09 17:39:58|2022-11-09 17:58:30|1|0.98|19.56 + 2022-11-20 00:33:58|2022-11-20 00:42:35|2|2.14|15.36 --- "# ) diff --git a/tests/functions/filter.rs b/tests/functions/filter.rs index deda078..e83b355 100644 --- a/tests/functions/filter.rs +++ b/tests/functions/filter.rs @@ -335,17 +335,17 @@ fn filter_dates() -> Result<()> { r#" shape: (9, 2) tpep_pickup_datetime|tpep_dropoff_datetime - datetime[μs]|datetime[μs] - --- - 2022-11-01T07:31:16|2022-11-01T08:19:44 - 2022-11-01T10:45:13|2022-11-01T10:53:56 - 2022-11-01T11:17:08|2022-11-01T12:08:15 - 2022-11-01T11:33:46|2022-11-01T12:03:15 - 2022-11-01T16:18:07|2022-11-01T16:27:30 - 2022-11-01T17:43:51|2022-11-01T17:52:45 - 2022-11-01T17:48:38|2022-11-01T17:59:55 - 2022-11-01T19:25:41|2022-11-01T19:32:33 - 2022-11-01T19:39:09|2022-11-01T19:45:10 + datetime[ns]|datetime[ns] + --- + 2022-11-01 07:31:16|2022-11-01 08:19:44 + 2022-11-01 10:45:13|2022-11-01 10:53:56 + 2022-11-01 11:17:08|2022-11-01 12:08:15 + 2022-11-01 11:33:46|2022-11-01 12:03:15 + 2022-11-01 16:18:07|2022-11-01 16:27:30 + 2022-11-01 17:43:51|2022-11-01 17:52:45 + 2022-11-01 17:48:38|2022-11-01 17:59:55 + 2022-11-01 19:25:41|2022-11-01 19:32:33 + 2022-11-01 19:39:09|2022-11-01 19:45:10 --- "# ) @@ -368,12 +368,12 @@ fn filter_dates() -> Result<()> { r#" shape: (4, 2) tpep_pickup_datetime|tpep_dropoff_datetime - datetime[μs]|datetime[μs] + datetime[ns]|datetime[ns] --- - 2022-11-01T17:43:51|2022-11-01T17:52:45 - 2022-11-01T17:48:38|2022-11-01T17:59:55 - 2022-11-01T19:25:41|2022-11-01T19:32:33 - 2022-11-01T19:39:09|2022-11-01T19:45:10 + 2022-11-01 17:43:51|2022-11-01 17:52:45 + 2022-11-01 17:48:38|2022-11-01 17:59:55 + 2022-11-01 19:25:41|2022-11-01 19:32:33 + 2022-11-01 19:39:09|2022-11-01 19:45:10 --- "# ) @@ -396,12 +396,12 @@ fn filter_dates() -> Result<()> { r#" shape: (4, 2) tpep_pickup_datetime|tpep_dropoff_datetime - datetime[μs]|datetime[μs] + datetime[ns]|datetime[ns] --- - 2022-11-02T02:02:12|2022-11-02T02:02:19 - 2022-11-02T10:17:58|2022-11-02T10:36:07 - 2022-11-02T10:40:38|2022-11-02T10:43:58 - 2022-11-02T11:06:01|2022-11-02T11:35:00 + 2022-11-02 02:02:12|2022-11-02 02:02:19 + 2022-11-02 10:17:58|2022-11-02 10:36:07 + 2022-11-02 10:40:38|2022-11-02 10:43:58 + 2022-11-02 11:06:01|2022-11-02 11:35:00 --- "# ) @@ -481,16 +481,16 @@ fn filter_list_contains() -> Result<()> { tags list[str] --- - [tag7] - [tag2, tag4, tag7] - [tag5, tag6, tag7, tag7] - [tag2, tag3, tag7, tag8] - [tag4, tag7, tag8] - [tag2, tag2, tag2, tag7] - [tag7] - [tag5, tag7] - [tag6, tag7] - [tag5, tag6, tag7, tag9] + ["tag7"] + ["tag2", "tag4", "tag7"] + ["tag5", "tag6", "tag7", "tag7"] + ["tag2", "tag3", "tag7", "tag8"] + ["tag4", "tag7", "tag8"] + ["tag2", "tag2", "tag2", "tag7"] + ["tag7"] + ["tag5", "tag7"] + ["tag6", "tag7"] + ["tag5", "tag6", "tag7", "tag9"] --- "# ) @@ -546,11 +546,11 @@ fn filter_list_not_contains() -> Result<()> { tags list[str] --- - [tag2, tag5, tag8, tag8] - [tag9] - [tag5] - [tag7] - [tag2, tag3, tag4] + ["tag2", "tag5", "tag8", "tag8"] + ["tag9"] + ["tag5"] + ["tag7"] + ["tag2", "tag3", "tag4"] --- "# ) @@ -636,16 +636,16 @@ fn filter_is_null() -> Result<()> { ints|tags list[u32]|list[str] --- - null|[tag1, tag3, tag5] - null|[tag1, tag4, tag5, tag5] - null|[tag1, tag3, tag7, tag8] - null|[tag1, tag2, tag8] - null|[tag1, tag9] - null|[tag1, tag2, tag7] - null|[tag1, tag2, tag6] - null|[tag1, tag3] - null|[tag1, tag7, tag9, tag9] - null|[tag1, tag8] + null|["tag1", "tag3", "tag5"] + null|["tag1", "tag4", "tag5", "tag5"] + null|["tag1", "tag3", "tag7", "tag8"] + null|["tag1", "tag2", "tag8"] + null|["tag1", "tag9"] + null|["tag1", "tag2", "tag7"] + null|["tag1", "tag2", "tag6"] + null|["tag1", "tag3"] + null|["tag1", "tag7", "tag9", "tag9"] + null|["tag1", "tag8"] --- "# ) @@ -672,16 +672,16 @@ fn filter_is_not_null() -> Result<()> { ints|tags list[u32]|list[str] --- - [6]|[tag1, tag3, tag6, tag9] - [9, 23, 38, 92]|[tag1, tag5, tag9, tag9] - [4]|[tag1, tag5, tag9] - [8, 46, 49, 88]|[tag1] - [11, 49]|[tag1, tag4, tag8, tag8] - [47]|[tag1, tag6, tag9] - [34, 77]|[tag1, tag7] - [21, 28, 94]|[tag1, tag3, tag9] - [17, 43]|[tag1, tag2, tag5, tag9] - [26, 62]|[tag1, tag4, tag6] + [6]|["tag1", "tag3", "tag6", "tag9"] + [9, 23, 38, 92]|["tag1", "tag5", "tag9", "tag9"] + [4]|["tag1", "tag5", "tag9"] + [8, 46, 49, 88]|["tag1"] + [11, 49]|["tag1", "tag4", "tag8", "tag8"] + [47]|["tag1", "tag6", "tag9"] + [34, 77]|["tag1", "tag7"] + [21, 28, 94]|["tag1", "tag3", "tag9"] + [17, 43]|["tag1", "tag2", "tag5", "tag9"] + [26, 62]|["tag1", "tag4", "tag6"] --- "# ) diff --git a/tests/functions/glimpse.rs b/tests/functions/glimpse.rs index 42d2e79..06a844c 100644 --- a/tests/functions/glimpse.rs +++ b/tests/functions/glimpse.rs @@ -18,15 +18,15 @@ fn glimpse_parquet() -> Result<()> { │ Cols: 19 ┆ ┆ │ ╞═══════════════════════╪══════════════╪═════════════════════════════════════════╡ │ VendorID ┆ i64 ┆ 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1, ... │ - │ tpep_pickup_datetime ┆ datetime[μs] ┆ 2022-11-22T19:27:01,... │ - │ tpep_dropoff_datetime ┆ datetime[μs] ┆ 2022-11-22T19:45:53,... │ + │ tpep_pickup_datetime ┆ datetime[ns] ┆ 2022-11-22 19:27:01, 2022-11-27... │ + │ tpep_dropoff_datetime ┆ datetime[ns] ┆ 2022-11-22 19:45:53, 2022-11-27... │ │ passenger_count ┆ i64 ┆ 1, 2, 1, 1, 3, 1, 2, 1, 1, 2, 2, 1, ... │ │ trip_distance ┆ f64 ┆ 3.14, 1.06, 2.36, 5.2, 0.0, 2.39, 1.... │ - │ rate_code ┆ str ┆ Standard, Standard, Standard, Standa... │ - │ store_and_fwd_flag ┆ str ┆ N, N, N, N, N, N, N, N, N, N, N, N, ... │ + │ rate_code ┆ str ┆ "Standard", "Standard", "Standard",... │ + │ store_and_fwd_flag ┆ str ┆ "N", "N", "N", "N", "N", "N", "N", "... │ │ PULocationID ┆ i64 ┆ 234, 48, 142, 79, 237, 137, 107, 229... │ │ DOLocationID ┆ i64 ┆ 141, 142, 236, 75, 230, 140, 162, 16... │ - │ payment_type ┆ str ┆ Credit card, Cash, Credit card, Cred... │ + │ payment_type ┆ str ┆ "Credit card", "Cash", "Credit card"... │ │ fare_amount ┆ f64 ┆ 14.5, 6.5, 11.5, 18.0, 12.5, 19.0, 8... │ │ extra ┆ f64 ┆ 1.0, 0.0, 0.0, 0.5, 3.0, 0.0, 0.0, 0... │ │ mta_tax ┆ f64 ┆ 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0... │ @@ -37,7 +37,7 @@ fn glimpse_parquet() -> Result<()> { │ congestion_surcharge ┆ f64 ┆ 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2... │ │ airport_fee ┆ f64 ┆ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0... │ └───────────────────────┴──────────────┴─────────────────────────────────────────┘ - "# + "# ) ); @@ -52,31 +52,31 @@ fn glimpse_csv() -> Result<()> { input, indoc!( r#" - ┌───────────────────────┬──────────────┬─────────────────────────────────────────┐ - │ Rows: 250 ┆ Type ┆ Values │ - │ Cols: 19 ┆ ┆ │ - ╞═══════════════════════╪══════════════╪═════════════════════════════════════════╡ - │ VendorID ┆ i64 ┆ 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1, ... │ - │ tpep_pickup_datetime ┆ datetime[ns] ┆ 2022-11-22T19:27:01,... │ - │ tpep_dropoff_datetime ┆ datetime[ns] ┆ 2022-11-22T19:45:53,... │ - │ passenger_count ┆ i64 ┆ 1, 2, 1, 1, 3, 1, 2, 1, 1, 2, 2, 1, ... │ - │ trip_distance ┆ f64 ┆ 3.14, 1.06, 2.36, 5.2, 0.0, 2.39, 1.... │ - │ rate_code ┆ str ┆ Standard, Standard, Standard, Standa... │ - │ store_and_fwd_flag ┆ str ┆ N, N, N, N, N, N, N, N, N, N, N, N, ... │ - │ PULocationID ┆ i64 ┆ 234, 48, 142, 79, 237, 137, 107, 229... │ - │ DOLocationID ┆ i64 ┆ 141, 142, 236, 75, 230, 140, 162, 16... │ - │ payment_type ┆ str ┆ Credit card, Cash, Credit card, Cred... │ - │ fare_amount ┆ f64 ┆ 14.5, 6.5, 11.5, 18.0, 12.5, 19.0, 8... │ - │ extra ┆ f64 ┆ 1.0, 0.0, 0.0, 0.5, 3.0, 0.0, 0.0, 0... │ - │ mta_tax ┆ f64 ┆ 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0... │ - │ tip_amount ┆ f64 ┆ 3.76, 0.0, 2.96, 4.36, 3.25, 0.0, 0.... │ - │ tolls_amount ┆ f64 ┆ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0... │ - │ improvement_surcharge ┆ f64 ┆ 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0... │ - │ total_amount ┆ f64 ┆ 22.56, 9.8, 17.76, 26.16, 19.55, 22.... │ - │ congestion_surcharge ┆ f64 ┆ 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2... │ - │ airport_fee ┆ f64 ┆ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0... │ - └───────────────────────┴──────────────┴─────────────────────────────────────────┘ - "# + ┌───────────────────────┬────────┬───────────────────────────────────────────────┐ + │ Rows: 250 ┆ Type ┆ Values │ + │ Cols: 19 ┆ ┆ │ + ╞═══════════════════════╪════════╪═══════════════════════════════════════════════╡ + │ VendorID ┆ i64 ┆ 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1, 1, 2, ... │ + │ tpep_pickup_datetime ┆ str ┆ "2022-11-22T19:27:01.000000000",... │ + │ tpep_dropoff_datetime ┆ str ┆ "2022-11-22T19:45:53.000000000",... │ + │ passenger_count ┆ i64 ┆ 1, 2, 1, 1, 3, 1, 2, 1, 1, 2, 2, 1, 1, 1, ... │ + │ trip_distance ┆ f64 ┆ 3.14, 1.06, 2.36, 5.2, 0.0, 2.39, 1.52, 0.... │ + │ rate_code ┆ str ┆ "Standard", "Standard", "Standard",... │ + │ store_and_fwd_flag ┆ str ┆ "N", "N", "N", "N", "N", "N", "N", "N", "N... │ + │ PULocationID ┆ i64 ┆ 234, 48, 142, 79, 237, 137, 107, 229, 162,... │ + │ DOLocationID ┆ i64 ┆ 141, 142, 236, 75, 230, 140, 162, 161, 186... │ + │ payment_type ┆ str ┆ "Credit card", "Cash", "Credit card", "Cre... │ + │ fare_amount ┆ f64 ┆ 14.5, 6.5, 11.5, 18.0, 12.5, 19.0, 8.5, 6.... │ + │ extra ┆ f64 ┆ 1.0, 0.0, 0.0, 0.5, 3.0, 0.0, 0.0, 0.0, 1.... │ + │ mta_tax ┆ f64 ┆ 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.... │ + │ tip_amount ┆ f64 ┆ 3.76, 0.0, 2.96, 4.36, 3.25, 0.0, 0.0, 2.0... │ + │ tolls_amount ┆ f64 ┆ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.... │ + │ improvement_surcharge ┆ f64 ┆ 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.... │ + │ total_amount ┆ f64 ┆ 22.56, 9.8, 17.76, 26.16, 19.55, 22.3, 11.... │ + │ congestion_surcharge ┆ f64 ┆ 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.... │ + │ airport_fee ┆ f64 ┆ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.... │ + └───────────────────────┴────────┴───────────────────────────────────────────────┘ + "# ) ); diff --git a/tests/functions/group_by.rs b/tests/functions/group_by.rs index 4534bab..620f92e 100644 --- a/tests/functions/group_by.rs +++ b/tests/functions/group_by.rs @@ -26,7 +26,7 @@ fn group_by_mean_sd_var() -> Result<()> { r#" shape: (5, 5) payment_type|mean_price|std_price|var_price|n - str|f64|f64|f64|i64 + str|f64|f64|f64|u32 --- Credit card|22.378757|16.095337|259.059865|185 Cash|18.458491|12.545236|157.382955|53 @@ -61,7 +61,7 @@ fn group_by_min_max() -> Result<()> { r#" shape: (5, 4) payment_type|min_price|max_price|n - str|f64|f64|i64 + str|f64|f64|u32 --- Credit card|8.5|84.36|185 Cash|3.3|63.1|53 @@ -99,12 +99,12 @@ fn group_by_median_quantile() -> Result<()> { r#" shape: (5, 7) payment_type|median_price|q25_price|q50_price|q75_price|q95_price|n - str|f64|f64|f64|f64|f64|i64 + str|f64|f64|f64|f64|f64|u32 --- - Credit card|16.56|12.43|16.56|23.76|56.09|185 - Cash|14.8|11.8|14.8|22.3|41.55|53 - Unknown|22.72|18.17|22.72|28.39|45.5|9 - Dispute|-0.5|-8.3|-8.3|-8.3|-8.3|2 + Credit card|16.56|12.43|16.56|23.76|64.114|185 + Cash|14.8|11.8|14.8|22.3|49.67|53 + Unknown|22.72|18.17|22.72|28.39|50.882|9 + Dispute|-0.5|-4.4|-0.5|3.4|6.52|2 No charge|8.8|8.8|8.8|8.8|8.8|1 --- "# @@ -137,9 +137,9 @@ fn summarize_median_quantile() -> Result<()> { r#" shape: (1, 6) median|q25|q50|q75|q95|n - u32|u32|u32|u32|u32|i64 + f64|f64|f64|f64|f64|u32 --- - 50|25|50|75|95|100 + 50.5|25.75|50.5|75.25|95.05|100 --- "# ) @@ -240,7 +240,7 @@ fn summarize_list() -> Result<()> { r#" shape: (1, 3) amounts|fares|n - list[f64]|list[f64]|i64 + list[f64]|list[f64]|u32 --- [3.3, 7.3, 8.3]|[2.5, 4.0, 5.0]|3 --- @@ -267,7 +267,7 @@ fn summarize_list() -> Result<()> { r#" shape: (9, 3) amounts|fares|n - f64|f64|i64 + f64|f64|u32 --- 3.3|2.5|3 3.3|4.0|3 diff --git a/tests/functions/join.rs b/tests/functions/join.rs index 5732945..05242ec 100644 --- a/tests/functions/join.rs +++ b/tests/functions/join.rs @@ -66,7 +66,7 @@ fn left_join() -> Result<()> { indoc!( r#" shape: (10, 4) - shape_id|left_key|shape_id_rhs|right_val + shape_id|left_key|shape_id_right|right_val u32|f64|u32|f64 --- 1|2.0|null|null @@ -144,7 +144,7 @@ fn outer_join() -> Result<()> { filter(shape_id < 8) | mutate(left_val = shape_id * 2) | outer_join(right_df) | - arrange(shape_id, left_val, shape_id_rhs, right_val) | + arrange(shape_id, left_val, right_val) | head() "#}; @@ -152,20 +152,20 @@ fn outer_join() -> Result<()> { input, indoc!( r#" - shape: (10, 4) - shape_id|left_val|shape_id_rhs|right_val - u32|f64|u32|f64 + shape: (10, 3) + shape_id|left_val|right_val + u32|f64|f64 --- - 1|2.0|null|null - 2|4.0|null|null - 3|6.0|null|null - 4|8.0|null|null - 5|10.0|5|10.0 - 6|12.0|6|12.0 - 7|14.0|7|14.0 - null|null|8|16.0 - null|null|9|18.0 - null|null|10|20.0 + 1|2.0|null + 2|4.0|null + 3|6.0|null + 4|8.0|null + 5|10.0|10.0 + 6|12.0|12.0 + 7|14.0|14.0 + 8|null|16.0 + 9|null|18.0 + 10|null|20.0 --- "# ) @@ -197,7 +197,7 @@ fn cross_join() -> Result<()> { indoc!( r#" shape: (6, 4) - shape_id|left_val|shape_id_rhs|right_val + shape_id|left_val|shape_id_right|right_val u32|f64|u32|f64 --- 21|42.0|1|2.0 diff --git a/tests/functions/json.rs b/tests/functions/json.rs index 3e488a5..f235bbb 100644 --- a/tests/functions/json.rs +++ b/tests/functions/json.rs @@ -19,7 +19,7 @@ fn json_load() -> Result<()> { r#" shape: (1, 1) n - i64 + u32 --- 4 --- diff --git a/tests/functions/main.rs b/tests/functions/main.rs index 53055d5..667acbe 100644 --- a/tests/functions/main.rs +++ b/tests/functions/main.rs @@ -26,7 +26,7 @@ macro_rules! assert_interpreter { Ok(output) => { if output != $expected { panic!( - "Interpreter error expected output:\n{}\n===\nGenerated output:\n{}\n===", + "Interpreter error:\nexpected:\n\n{}\nfound:\n\n{}", $expected, output ); } diff --git a/tests/functions/mutate.rs b/tests/functions/mutate.rs index 7a9a206..cbb82bb 100644 --- a/tests/functions/mutate.rs +++ b/tests/functions/mutate.rs @@ -16,7 +16,7 @@ fn mutate_arith() -> Result<()> { mutate( travel_time = tpep_dropoff_datetime - tpep_pickup_datetime, trip_distance_km = trip_distance_mi * 1.60934, - avg_speed_km_h = trip_distance_km / (secs(travel_time) / 3600) + avg_speed_km_h = trip_distance_km / (travel_time / 3.6e12) ) | relocate(trip_distance_km, after = trip_distance_mi) | head(10) @@ -28,18 +28,18 @@ fn mutate_arith() -> Result<()> { r#" shape: (10, 6) tpep_pickup_datetime|tpep_dropoff_datetime|trip_distance_mi|trip_distance_km|travel_time|avg_speed_km_h - datetime[μs]|datetime[μs]|f64|f64|duration[μs]|f64 + datetime[ns]|datetime[ns]|f64|f64|duration[ns]|f64 --- - 2022-11-22T19:27:01|2022-11-22T19:45:53|3.14|5.053328|18m 52s|16.070653 - 2022-11-27T16:43:26|2022-11-27T16:50:06|1.06|1.7059|6m 40s|15.353104 - 2022-11-12T16:58:37|2022-11-12T17:12:31|2.36|3.798042|13m 54s|16.394428 - 2022-11-30T22:24:08|2022-11-30T22:39:16|5.2|8.368568|15m 8s|33.179344 - 2022-11-26T23:03:41|2022-11-26T23:23:48|0.0|0.0|20m 7s|0.0 - 2022-11-30T14:46:43|2022-11-30T15:17:39|2.39|3.846323|30m 56s|7.46054 - 2022-11-22T14:36:34|2022-11-22T14:46:38|1.52|2.446197|10m 4s|14.579981 - 2022-11-28T09:54:14|2022-11-28T10:02:07|0.51|0.820763|7m 53s|6.246825 - 2022-11-09T17:39:58|2022-11-09T17:58:30|0.98|1.577153|18m 32s|5.105892 - 2022-11-20T00:33:58|2022-11-20T00:42:35|2.14|3.443988|8m 37s|23.981345 + 2022-11-22 19:27:01|2022-11-22 19:45:53|3.14|5.0533276|18m 52s|16.070653 + 2022-11-27 16:43:26|2022-11-27 16:50:06|1.06|1.7059004|6m 40s|15.353104 + 2022-11-12 16:58:37|2022-11-12 17:12:31|2.36|3.7980424|13m 54s|16.394428 + 2022-11-30 22:24:08|2022-11-30 22:39:16|5.2|8.368568|15m 8s|33.179344 + 2022-11-26 23:03:41|2022-11-26 23:23:48|0.0|0.0|20m 7s|0.0 + 2022-11-30 14:46:43|2022-11-30 15:17:39|2.39|3.8463226|30m 56s|7.46054 + 2022-11-22 14:36:34|2022-11-22 14:46:38|1.52|2.4461968|10m 4s|14.579981 + 2022-11-28 09:54:14|2022-11-28 10:02:07|0.51|0.8207634|7m 53s|6.246825 + 2022-11-09 17:39:58|2022-11-09 17:58:30|0.98|1.5771532|18m 32s|5.105892 + 2022-11-20 00:33:58|2022-11-20 00:42:35|2.14|3.443988|8m 37s|23.981345 --- "# ) @@ -226,7 +226,7 @@ fn mutate_dt() -> Result<()> { parquet("tests/data/nyctaxi.parquet") | select(trip_distance, tpep_pickup_datetime) | mutate( - date_string = "2022-11-27T16:43:26", + date_string = "2022-11-27 16:43:26", date_datetime = ymd_hms(date_string) ) | head(2) @@ -238,10 +238,10 @@ fn mutate_dt() -> Result<()> { r#" shape: (2, 4) trip_distance|tpep_pickup_datetime|date_string|date_datetime - f64|datetime[μs]|str|datetime[ms] + f64|datetime[ns]|str|datetime[ns] --- - 3.14|2022-11-22T19:27:01|2022-11-27T16:43:26|2022-11-27T16:43:26 - 1.06|2022-11-27T16:43:26|2022-11-27T16:43:26|2022-11-27T16:43:26 + 3.14|2022-11-22 19:27:01|2022-11-27 16:43:26|2022-11-27 16:43:26 + 1.06|2022-11-27 16:43:26|2022-11-27 16:43:26|2022-11-27 16:43:26 --- "# ) @@ -269,17 +269,17 @@ fn mutate_len() -> Result<()> { r#" shape: (10, 3) ints_len|floats_len|tags_len - i32|i32|i32 + u32|u32|u32 --- 3|4|4 1|3|1 - null|4|1 + 0|4|1 2|4|1 - null|4|3 + 0|4|3 1|1|3 4|1|4 - null|2|null - 4|null|null + 0|2|0 + 4|0|0 1|4|4 --- "# @@ -301,7 +301,7 @@ fn mutate_len() -> Result<()> { r#" shape: (4, 3) rate_code|n|rate_len - str|i64|i32 + str|u32|u32 --- JFK|11|3 Negotiated|2|10 @@ -405,7 +405,7 @@ fn mutate_field() -> Result<()> { r#" shape: (4, 3) rate_code|n|rate_len - str|i64|i32 + str|u32|u32 --- JFK|11|3 Negotiated|2|10 @@ -441,7 +441,7 @@ fn mutate_durations() -> Result<()> { r#" shape: (5, 5) travel_time|travel_time_secs|travel_time_millis|travel_time_micros|travel_time_nanos - duration[μs]|i64|i64|i64|i64 + duration[ns]|i64|i64|i64|i64 --- 18m 52s|1132|1132000|1132000000|1132000000000 6m 40s|400|400000|400000000|400000000000 @@ -479,7 +479,7 @@ fn mutate_durations() -> Result<()> { r#" shape: (5, 4) travel_time|dtravel_time_millis|dtravel_time_micros|dtravel_time_nanos - duration[μs]|duration[ms]|duration[μs]|duration[ns] + duration[ns]|duration[μs]|duration[μs]|duration[μs] --- 18m 52s|18m 52s|18m 52s|18m 52s 6m 40s|6m 40s|6m 40s|6m 40s diff --git a/tests/functions/relocate.rs b/tests/functions/relocate.rs index 5a4876c..14cb7d0 100644 --- a/tests/functions/relocate.rs +++ b/tests/functions/relocate.rs @@ -19,9 +19,9 @@ fn relocate_default() -> Result<()> { r#" shape: (1, 19) payment_type|passenger_count|VendorID|tpep_pickup_datetime|tpep_dropoff_datetime|trip_distance|rate_code|store_and_fwd_flag|PULocationID|DOLocationID|fare_amount|extra|mta_tax|tip_amount|tolls_amount|improvement_surcharge|total_amount|congestion_surcharge|airport_fee - str|i64|i64|datetime[μs]|datetime[μs]|f64|str|str|i64|i64|f64|f64|f64|f64|f64|f64|f64|f64|f64 + str|i64|i64|datetime[ns]|datetime[ns]|f64|str|str|i64|i64|f64|f64|f64|f64|f64|f64|f64|f64|f64 --- - Credit card|1|2|2022-11-22T19:27:01|2022-11-22T19:45:53|3.14|Standard|N|234|141|14.5|1.0|0.5|3.76|0.0|0.3|22.56|2.5|0.0 + Credit card|1|2|2022-11-22 19:27:01|2022-11-22 19:45:53|3.14|Standard|N|234|141|14.5|1.0|0.5|3.76|0.0|0.3|22.56|2.5|0.0 --- "# ) @@ -44,9 +44,9 @@ fn relocate_before_first() -> Result<()> { r#" shape: (1, 19) payment_type|passenger_count|VendorID|tpep_pickup_datetime|tpep_dropoff_datetime|trip_distance|rate_code|store_and_fwd_flag|PULocationID|DOLocationID|fare_amount|extra|mta_tax|tip_amount|tolls_amount|improvement_surcharge|total_amount|congestion_surcharge|airport_fee - str|i64|i64|datetime[μs]|datetime[μs]|f64|str|str|i64|i64|f64|f64|f64|f64|f64|f64|f64|f64|f64 + str|i64|i64|datetime[ns]|datetime[ns]|f64|str|str|i64|i64|f64|f64|f64|f64|f64|f64|f64|f64|f64 --- - Credit card|1|2|2022-11-22T19:27:01|2022-11-22T19:45:53|3.14|Standard|N|234|141|14.5|1.0|0.5|3.76|0.0|0.3|22.56|2.5|0.0 + Credit card|1|2|2022-11-22 19:27:01|2022-11-22 19:45:53|3.14|Standard|N|234|141|14.5|1.0|0.5|3.76|0.0|0.3|22.56|2.5|0.0 --- "# ) @@ -69,9 +69,9 @@ fn relocate_before() -> Result<()> { r#" shape: (1, 19) VendorID|tpep_pickup_datetime|tpep_dropoff_datetime|trip_distance|rate_code|store_and_fwd_flag|PULocationID|DOLocationID|payment_type|passenger_count|fare_amount|extra|mta_tax|tip_amount|tolls_amount|improvement_surcharge|total_amount|congestion_surcharge|airport_fee - i64|datetime[μs]|datetime[μs]|f64|str|str|i64|i64|str|i64|f64|f64|f64|f64|f64|f64|f64|f64|f64 + i64|datetime[ns]|datetime[ns]|f64|str|str|i64|i64|str|i64|f64|f64|f64|f64|f64|f64|f64|f64|f64 --- - 2|2022-11-22T19:27:01|2022-11-22T19:45:53|3.14|Standard|N|234|141|Credit card|1|14.5|1.0|0.5|3.76|0.0|0.3|22.56|2.5|0.0 + 2|2022-11-22 19:27:01|2022-11-22 19:45:53|3.14|Standard|N|234|141|Credit card|1|14.5|1.0|0.5|3.76|0.0|0.3|22.56|2.5|0.0 --- "# ) @@ -94,9 +94,9 @@ fn relocate_after() -> Result<()> { r#" shape: (1, 19) VendorID|tpep_pickup_datetime|tpep_dropoff_datetime|trip_distance|rate_code|store_and_fwd_flag|PULocationID|DOLocationID|fare_amount|payment_type|passenger_count|extra|mta_tax|tip_amount|tolls_amount|improvement_surcharge|total_amount|congestion_surcharge|airport_fee - i64|datetime[μs]|datetime[μs]|f64|str|str|i64|i64|f64|str|i64|f64|f64|f64|f64|f64|f64|f64|f64 + i64|datetime[ns]|datetime[ns]|f64|str|str|i64|i64|f64|str|i64|f64|f64|f64|f64|f64|f64|f64|f64 --- - 2|2022-11-22T19:27:01|2022-11-22T19:45:53|3.14|Standard|N|234|141|14.5|Credit card|1|1.0|0.5|3.76|0.0|0.3|22.56|2.5|0.0 + 2|2022-11-22 19:27:01|2022-11-22 19:45:53|3.14|Standard|N|234|141|14.5|Credit card|1|1.0|0.5|3.76|0.0|0.3|22.56|2.5|0.0 --- "# ) @@ -119,9 +119,9 @@ fn relocate_after_last() -> Result<()> { r#" shape: (1, 19) VendorID|tpep_pickup_datetime|tpep_dropoff_datetime|trip_distance|rate_code|store_and_fwd_flag|PULocationID|DOLocationID|fare_amount|extra|mta_tax|tip_amount|tolls_amount|improvement_surcharge|total_amount|congestion_surcharge|airport_fee|payment_type|passenger_count - i64|datetime[μs]|datetime[μs]|f64|str|str|i64|i64|f64|f64|f64|f64|f64|f64|f64|f64|f64|str|i64 + i64|datetime[ns]|datetime[ns]|f64|str|str|i64|i64|f64|f64|f64|f64|f64|f64|f64|f64|f64|str|i64 --- - 2|2022-11-22T19:27:01|2022-11-22T19:45:53|3.14|Standard|N|234|141|14.5|1.0|0.5|3.76|0.0|0.3|22.56|2.5|0.0|Credit card|1 + 2|2022-11-22 19:27:01|2022-11-22 19:45:53|3.14|Standard|N|234|141|14.5|1.0|0.5|3.76|0.0|0.3|22.56|2.5|0.0|Credit card|1 --- "# ) @@ -144,9 +144,9 @@ fn relocate_same_col() -> Result<()> { r#" shape: (1, 19) VendorID|tpep_pickup_datetime|tpep_dropoff_datetime|passenger_count|payment_type|trip_distance|rate_code|store_and_fwd_flag|PULocationID|DOLocationID|fare_amount|extra|mta_tax|tip_amount|tolls_amount|improvement_surcharge|total_amount|congestion_surcharge|airport_fee - i64|datetime[μs]|datetime[μs]|i64|str|f64|str|str|i64|i64|f64|f64|f64|f64|f64|f64|f64|f64|f64 + i64|datetime[ns]|datetime[ns]|i64|str|f64|str|str|i64|i64|f64|f64|f64|f64|f64|f64|f64|f64|f64 --- - 2|2022-11-22T19:27:01|2022-11-22T19:45:53|1|Credit card|3.14|Standard|N|234|141|14.5|1.0|0.5|3.76|0.0|0.3|22.56|2.5|0.0 + 2|2022-11-22 19:27:01|2022-11-22 19:45:53|1|Credit card|3.14|Standard|N|234|141|14.5|1.0|0.5|3.76|0.0|0.3|22.56|2.5|0.0 --- "# ) diff --git a/tests/functions/rename.rs b/tests/functions/rename.rs index c3f0b3d..39d6f8d 100644 --- a/tests/functions/rename.rs +++ b/tests/functions/rename.rs @@ -31,9 +31,9 @@ fn rename() -> Result<()> { r#" shape: (1, 6) vendor_id|pickup_datetime|dropoff_datetime|pu_location_id|do_location_id|total_amount - i64|datetime[μs]|datetime[μs]|i64|i64|f64 + i64|datetime[ns]|datetime[ns]|i64|i64|f64 --- - 2|2022-11-22T19:27:01|2022-11-22T19:45:53|234|141|22.56 + 2|2022-11-22 19:27:01|2022-11-22 19:45:53|234|141|22.56 --- "# ) diff --git a/tests/functions/select.rs b/tests/functions/select.rs index 074229c..e79252f 100644 --- a/tests/functions/select.rs +++ b/tests/functions/select.rs @@ -25,11 +25,11 @@ fn select_columns() -> Result<()> { r#" shape: (3, 5) rate_code|tip_amount|tpep_pickup_datetime|airport_fee|tpep_dropoff_datetime - str|f64|datetime[μs]|f64|datetime[μs] + str|f64|datetime[ns]|f64|datetime[ns] --- - Standard|3.76|2022-11-22T19:27:01|0.0|2022-11-22T19:45:53 - Standard|0.0|2022-11-27T16:43:26|0.0|2022-11-27T16:50:06 - Standard|2.96|2022-11-12T16:58:37|0.0|2022-11-12T17:12:31 + Standard|3.76|2022-11-22 19:27:01|0.0|2022-11-22 19:45:53 + Standard|0.0|2022-11-27 16:43:26|0.0|2022-11-27 16:50:06 + Standard|2.96|2022-11-12 16:58:37|0.0|2022-11-12 17:12:31 --- "# ) @@ -57,11 +57,11 @@ fn select_rename() -> Result<()> { r#" shape: (3, 4) pickup_datetime|dropoff_datetime|vendor_id|pu_location_id - datetime[μs]|datetime[μs]|i64|i64 + datetime[ns]|datetime[ns]|i64|i64 --- - 2022-11-22T19:27:01|2022-11-22T19:45:53|2|234 - 2022-11-27T16:43:26|2022-11-27T16:50:06|2|48 - 2022-11-12T16:58:37|2022-11-12T17:12:31|2|142 + 2022-11-22 19:27:01|2022-11-22 19:45:53|2|234 + 2022-11-27 16:43:26|2022-11-27 16:50:06|2|48 + 2022-11-12 16:58:37|2022-11-12 17:12:31|2|142 --- "# ) @@ -84,11 +84,11 @@ fn select_starts_with() -> Result<()> { r#" shape: (3, 2) tpep_pickup_datetime|tpep_dropoff_datetime - datetime[μs]|datetime[μs] + datetime[ns]|datetime[ns] --- - 2022-11-22T19:27:01|2022-11-22T19:45:53 - 2022-11-27T16:43:26|2022-11-27T16:50:06 - 2022-11-12T16:58:37|2022-11-12T17:12:31 + 2022-11-22 19:27:01|2022-11-22 19:45:53 + 2022-11-27 16:43:26|2022-11-27 16:50:06 + 2022-11-12 16:58:37|2022-11-12 17:12:31 --- "# ) @@ -146,11 +146,11 @@ fn select_ends_with() -> Result<()> { r#" shape: (3, 2) tpep_pickup_datetime|tpep_dropoff_datetime - datetime[μs]|datetime[μs] + datetime[ns]|datetime[ns] --- - 2022-11-22T19:27:01|2022-11-22T19:45:53 - 2022-11-27T16:43:26|2022-11-27T16:50:06 - 2022-11-12T16:58:37|2022-11-12T17:12:31 + 2022-11-22 19:27:01|2022-11-22 19:45:53 + 2022-11-27 16:43:26|2022-11-27 16:50:06 + 2022-11-12 16:58:37|2022-11-12 17:12:31 --- "# ) @@ -206,11 +206,11 @@ fn select_contains() -> Result<()> { r#" shape: (3, 2) tpep_pickup_datetime|tpep_dropoff_datetime - datetime[μs]|datetime[μs] + datetime[ns]|datetime[ns] --- - 2022-11-22T19:27:01|2022-11-22T19:45:53 - 2022-11-27T16:43:26|2022-11-27T16:50:06 - 2022-11-12T16:58:37|2022-11-12T17:12:31 + 2022-11-22 19:27:01|2022-11-22 19:45:53 + 2022-11-27 16:43:26|2022-11-27 16:50:06 + 2022-11-12 16:58:37|2022-11-12 17:12:31 --- "# ) diff --git a/tests/functions/show.rs b/tests/functions/show.rs index 1e2785e..472ab15 100644 --- a/tests/functions/show.rs +++ b/tests/functions/show.rs @@ -106,9 +106,9 @@ fn show_timestamps() -> Result<()> { secs|micros|nanos datetime[ms]|datetime[μs]|datetime[ns] --- - 2023-08-02T23:09:42|2023-08-02T23:09:42.456642|2023-08-02T23:09:42.456642001 - 2023-08-02T23:09:46|2023-08-02T23:09:46.159043|2023-08-02T23:09:46.159043342 - 2023-08-02T23:09:54|2023-08-02T23:09:54.041828|2023-08-02T23:09:54.041828923 + 2023-08-02 23:09:42|2023-08-02 23:09:42.456642|2023-08-02 23:09:42.456642001 + 2023-08-02 23:09:46|2023-08-02 23:09:46.159043|2023-08-02 23:09:46.159043342 + 2023-08-02 23:09:54|2023-08-02 23:09:54.041828|2023-08-02 23:09:54.041828923 --- "# ) diff --git a/tests/functions/unnest.rs b/tests/functions/unnest.rs index 81418f8..b6f2f25 100644 --- a/tests/functions/unnest.rs +++ b/tests/functions/unnest.rs @@ -22,16 +22,16 @@ fn unnest_ints() -> Result<()> { r#" shape: (10, 3) shape_id|ints_len|ints - u32|i32|u32 + u32|u32|u32 --- 1|3|3 1|3|88 1|3|94 2|1|73 - 3|null|null + 3|0|null 4|2|43 4|2|97 - 5|null|null + 5|0|null 6|1|65 7|4|1 --- @@ -59,7 +59,7 @@ fn unnest_str() -> Result<()> { r#" shape: (10, 3) shape_id|tags_len|tags - u32|i32|str + u32|u32|str --- 1|4|tag2 1|4|tag5 @@ -96,7 +96,7 @@ fn unnest_floats() -> Result<()> { r#" shape: (12, 3) shape_id|floats_len|floats - u32|i32|f64 + u32|u32|f64 --- 1|4|2.5 1|4|3.5 @@ -119,7 +119,6 @@ fn unnest_floats() -> Result<()> { } #[test] -#[ignore = "need unnest structs"] fn unnest_structs() -> Result<()> { let input = indoc! {r#" parquet("tests/data/structs.parquet") |