Skip to content

Commit

Permalink
refactor: update polars based drivers to use <scheme>://<file> format
Browse files Browse the repository at this point in the history
  • Loading branch information
brianheineman committed Dec 15, 2024
1 parent 9c18a62 commit 438142f
Show file tree
Hide file tree
Showing 10 changed files with 37 additions and 67 deletions.
16 changes: 8 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,26 +69,26 @@ rsql --url "<url>" -- "<query>"

| Driver | URL |
|--------------------|---------------------------------------------------------------------------------------------------------------------------|
| arrow (polars) | `arrow://?file=<file>` |
| avro (polars) | `avro://?file=<file>` |
| arrow (polars) | `arrow://<file>` |
| avro (polars) | `avro://<file>` |
| cockroachdb (sqlx) | `cockroachdb://<user[:password>]@<host>[:<port>]/<database>` |
| csv (polars) | `csv://?file=<file>[&has_header=<true/false>][&quote=<char>][&skip_rows=<n>]` |
| delimited (polars) | `delimited://?file=<file>[&separator=<char>][&has_header=<true/false>][&quote=<char>][&skip_rows=<n>]` |
| csv (polars) | `csv://<file>[?has_header=<true/false>][&quote=<char>][&skip_rows=<n>]` |
| delimited (polars) | `delimited://<file>[?separator=<char>][&has_header=<true/false>][&quote=<char>][&skip_rows=<n>]` |
| duckdb | `duckdb://?<memory=true>[&file=<database_file>]` |
| json (polars) | `json://?file=<file>` |
| jsonl (polars) | `jsonl://?file=<file>` |
| json (polars) | `json://<file>` |
| jsonl (polars) | `jsonl://<file>` |
| libsql¹ | `libsql://<host>?[<memory=true>][&file=<database_file>][&auth_token=<token>]` |
| mariadb (sqlx) | `mariadb://<user>[:<password>]@<host>[:<port>]/<database>` |
| mysql (sqlx) | `mysql://<user>[:<password>]@<host>[:<port>]/<database>` |
| parquet (polars) | `parquet://?file=<file>` |
| parquet (polars) | `parquet://<file>` |
| postgres | `postgres://<user>[:<password>]@<host>[:<port>]/<database>?<embedded=true>` |
| postgresql (sqlx) | `postgresql://<user>[:<password>]@<host>[:<port>]/<database>?<embedded=true>` |
| redshift (sqlx) | `redshift://<user[:password>]@<host>[:<port>]/<database>` |
| rusqlite | `rusqlite://?<memory=true>[&file=<database_file>]` |
| snowflake | `snowflake://<user>[:<token>]@<account>.snowflakecomputing.com/[?private_key_file=pkey_file&public_key_file=pubkey_file]` |
| sqlite (sqlx) | `sqlite://?<memory=true>[&file=<database_file>]` |
| sqlserver | `sqlserver://<user>[:<password>]@<host>[:<port>]/<database>` |
| tsv (polars) | `tsv://?file=<file>[&has_header=<true/false>][&quote=<char>][&skip_rows=<n>]` |
| tsv (polars) | `tsv://<file>[?has_header=<true/false>][&quote=<char>][&skip_rows=<n>]` |

¹ `libsql` needs to be enabled with the `libsql` feature flag; it is disabled by default as it conflicts
with `rusqlite`.
Expand Down
16 changes: 8 additions & 8 deletions rsql_cli/docs/src/chapter2/drivers/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,26 +12,26 @@ The drivers command displays the available database drivers.

| Driver | Description | URL |
|---------------|--------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------|
| `arrow` | Arrow IPC file driver provided by [Polars](https://github.com/pola-rs/polars) | `arrow://?file=<file>` |
| `avro` | Avro file driver provided by [Polars](https://github.com/pola-rs/polars) | `avro://?file=<file>` |
| `arrow` | Arrow IPC file driver provided by [Polars](https://github.com/pola-rs/polars) | `arrow://<file>` |
| `avro` | Avro file driver provided by [Polars](https://github.com/pola-rs/polars) | `avro://<file>` |
| `cockroachdb` | CockroachDB driver provided by [SQLx](https://github.com/launchbadge/sqlx) | `redshift://<user>[:<password>]@<host>[:<port>]/<database>` |
| `csv` | Comma Separated Value (CSV) file driver provided by [Polars](https://github.com/pola-rs/polars) | `csv://?file=<file>[&has_header=<true/false>][&quote=<char>][&skip_rows=<n>]` |
| `delimited` | Delimited file driver provided by [Polars](https://github.com/pola-rs/polars) | `delimited://?file=<file>[&separator=<char>][&has_header=<true/false>][&quote=<char>][&skip_rows=<n>]` |
| `csv` | Comma Separated Value (CSV) file driver provided by [Polars](https://github.com/pola-rs/polars) | `csv://<file>[?has_header=<true/false>][&quote=<char>][&skip_rows=<n>]` |
| `delimited` | Delimited file driver provided by [Polars](https://github.com/pola-rs/polars) | `delimited://<file>[?separator=<char>][&has_header=<true/false>][&quote=<char>][&skip_rows=<n>]` |
| `duckdb` | DuckDB provided by [DuckDB](https://duckdb.org/) | `duckdb://?<memory=true>[&file=<database_file>]` |
| `json` | JSON file driver provided by [Polars](https://github.com/pola-rs/polars) | `json://?file=<file>` |
| `jsonl` | JSONL file driver provided by [Polars](https://github.com/pola-rs/polars) | `jsonl://?file=<file>` |
| `json` | JSON file driver provided by [Polars](https://github.com/pola-rs/polars) | `json://<file>` |
| `jsonl` | JSONL file driver provided by [Polars](https://github.com/pola-rs/polars) | `jsonl://<file>` |
| `libsql` | LibSQL provided by [Turso](https://github.com/tursodatabase/libsql) | `libsql://<host>?[<memory=true>][&file=<database_file>][&auth_token=<token>]` |
| `mariadb` | MariaDB provided by [SQLx](https://github.com/launchbadge/sqlx) | `mariadb://<user>[:<password>]@<host>[:<port>]/<database>` |
| `mysql` | MySQL provided by [SQLx](https://github.com/launchbadge/sqlx) | `mysql://<user>[:<password>]@<host>[:<port>]/<database>` |
| `parquet` | Parquet file driver provided by [Polars](https://github.com/pola-rs/polars) | `parquet://?file=<file>` |
| `parquet` | Parquet file driver provided by [Polars](https://github.com/pola-rs/polars) | `parquet://<file>` |
| `postgres` | PostgreSQL driver provided by [rust-postgres](https://github.com/sfackler/rust-postgres) | `postgres://<user>[:<password>]@<host>[:<port>]/<database>?<embedded=true>` |
| `postgresql` | PostgreSQL driver provided by [SQLx](https://github.com/launchbadge/sqlx) | `postgresql://<user>[:<password>]@<host>[:<port>]/<database>?<embedded=true>` |
| `redshift` | Redshift driver provided by [SQLx](https://github.com/launchbadge/sqlx) | `redshift://<user>[:<password>]@<host>[:<port>]/<database>` |
| `rusqlite` | SQLite provided by [Rusqlite](https://github.com/rusqlite/rusqlite?tab=readme-ov-file#rusqlite) | `rusqlite://?<memory=true>[&file=<database_file>]` |
| `snowflake` | Snowflake provided by [Snowflake SQL API](https://docs.snowflake.com/en/developer-guide/sql-api/index) | `snowflake://<user>[:<token>]@<account>.snowflakecomputing.com/[?private_key_file=pkey_file&public_key_file=pubkey_file]` |
| `sqlite` | SQLite provided by [SQLx](https://github.com/launchbadge/sqlx) | `sqlite://?<memory=true>[&file=<database_file>]` |
| `sqlserver` | SQL Server provided by [Tiberius](https://github.com/prisma/tiberius) | `sqlserver://<user>[:<password>]@<host>[:<port>]/<database>` |
| `tsv` | Tab Separated Value (TSV) file driver provided by [Polars](https://github.com/pola-rs/polars) | `tsv://?file=<file>[&has_header=<true/false>][&quote=<char>][&skip_rows=<n>]` |
| `tsv` | Tab Separated Value (TSV) file driver provided by [Polars](https://github.com/pola-rs/polars) | `tsv://<file>[?has_header=<true/false>][&quote=<char>][&skip_rows=<n>]` |

### Examples

Expand Down
12 changes: 2 additions & 10 deletions rsql_drivers/src/arrow/driver.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
use crate::error::Result;
use crate::polars::Connection;
use crate::Error::InvalidUrl;
use async_trait::async_trait;
use polars::io::SerReader;
use polars::prelude::{IntoLazy, IpcReader};
use polars_sql::SQLContext;
use std::collections::HashMap;
use std::fs::File;
use url::Url;

Expand All @@ -24,13 +22,7 @@ impl crate::Driver for Driver {
_password: Option<String>,
) -> Result<Box<dyn crate::Connection>> {
let parsed_url = Url::parse(url.as_str())?;
let query_parameters: HashMap<String, String> =
parsed_url.query_pairs().into_owned().collect();

// Read Options
let file_name = query_parameters
.get("file")
.ok_or(InvalidUrl("Missing file parameter".to_string()))?;
let file_name = parsed_url.path();
let file = File::open(file_name)?;

let data_frame = IpcReader::new(file).set_rechunk(true).finish()?;
Expand All @@ -51,7 +43,7 @@ mod test {
const CRATE_DIRECTORY: &str = env!("CARGO_MANIFEST_DIR");

fn database_url() -> String {
format!("arrow://?file={CRATE_DIRECTORY}/../datasets/users.arrow")
format!("arrow://{CRATE_DIRECTORY}/../datasets/users.arrow")
}

#[tokio::test]
Expand Down
12 changes: 2 additions & 10 deletions rsql_drivers/src/avro/driver.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
use crate::error::Result;
use crate::polars::Connection;
use crate::Error::InvalidUrl;
use async_trait::async_trait;
use polars::io::avro::AvroReader;
use polars::io::SerReader;
use polars::prelude::IntoLazy;
use polars_sql::SQLContext;
use std::collections::HashMap;
use std::fs::File;
use url::Url;

Expand All @@ -25,13 +23,7 @@ impl crate::Driver for Driver {
_password: Option<String>,
) -> Result<Box<dyn crate::Connection>> {
let parsed_url = Url::parse(url.as_str())?;
let query_parameters: HashMap<String, String> =
parsed_url.query_pairs().into_owned().collect();

// Read Options
let file_name = query_parameters
.get("file")
.ok_or(InvalidUrl("Missing file parameter".to_string()))?;
let file_name = parsed_url.path();
let file = File::open(file_name)?;

let data_frame = AvroReader::new(file).set_rechunk(true).finish()?;
Expand All @@ -52,7 +44,7 @@ mod test {
const CRATE_DIRECTORY: &str = env!("CARGO_MANIFEST_DIR");

fn database_url() -> String {
format!("avro://?file={CRATE_DIRECTORY}/../datasets/users.avro")
format!("avro://{CRATE_DIRECTORY}/../datasets/users.avro")
}

#[tokio::test]
Expand Down
6 changes: 3 additions & 3 deletions rsql_drivers/src/csv/driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ impl crate::Driver for Driver {
url: String,
password: Option<String>,
) -> Result<Box<dyn crate::Connection>> {
let url = format!("{url}&separator=,");
let url = format!("{url}?separator=,");
DelimitedDriver.connect(url, password).await
}
}
Expand All @@ -28,15 +28,15 @@ mod test {
const CRATE_DIRECTORY: &str = env!("CARGO_MANIFEST_DIR");

fn database_url() -> String {
format!("csv://?file={CRATE_DIRECTORY}/../datasets/users.csv")
format!("csv://{CRATE_DIRECTORY}/../datasets/users.csv")
}

#[tokio::test]
async fn test_driver_connect() -> anyhow::Result<()> {
let database_url = database_url();
let driver_manager = DriverManager::default();
let mut connection = driver_manager.connect(&database_url).await?;
let expected_url = format!("{database_url}&separator=,");
let expected_url = format!("{database_url}?separator=,");
assert_eq!(&expected_url, connection.url());
connection.close().await?;
Ok(())
Expand Down
8 changes: 3 additions & 5 deletions rsql_drivers/src/delimited/driver.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::error::Result;
use crate::polars::Connection;
use crate::Error::{ConversionError, InvalidUrl};
use crate::Error::ConversionError;
use async_trait::async_trait;
use polars::io::SerReader;
use polars::prelude::{CsvParseOptions, CsvReadOptions, IntoLazy};
Expand Down Expand Up @@ -29,9 +29,7 @@ impl crate::Driver for Driver {
parsed_url.query_pairs().into_owned().collect();

// Read Options
let file_name = query_parameters
.get("file")
.ok_or(InvalidUrl("Missing file parameter".to_string()))?;
let file_name = parsed_url.path();
let file = File::open(file_name)?;
let has_header = query_parameters
.get("has_header")
Expand Down Expand Up @@ -123,7 +121,7 @@ mod test {
const CRATE_DIRECTORY: &str = env!("CARGO_MANIFEST_DIR");

fn database_url() -> String {
format!("delimited://?file={CRATE_DIRECTORY}/../datasets/users.pipe&separator=|")
format!("delimited://{CRATE_DIRECTORY}/../datasets/users.pipe?separator=|")
}

#[tokio::test]
Expand Down
8 changes: 3 additions & 5 deletions rsql_drivers/src/json/driver.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::error::Result;
use crate::polars::Connection;
use crate::Error::{ConversionError, InvalidUrl};
use crate::Error::ConversionError;
use async_trait::async_trait;
use polars::io::SerReader;
use polars::prelude::{IntoLazy, JsonReader};
Expand Down Expand Up @@ -29,9 +29,7 @@ impl crate::Driver for Driver {
parsed_url.query_pairs().into_owned().collect();

// Read Options
let file_name = query_parameters
.get("file")
.ok_or(InvalidUrl("Missing file parameter".to_string()))?;
let file_name = parsed_url.path();
let file = File::open(file_name)?;
let ignore_errors = query_parameters
.get("ignore_errors")
Expand Down Expand Up @@ -72,7 +70,7 @@ mod test {
const CRATE_DIRECTORY: &str = env!("CARGO_MANIFEST_DIR");

fn database_url() -> String {
format!("json://?file={CRATE_DIRECTORY}/../datasets/users.json")
format!("json://{CRATE_DIRECTORY}/../datasets/users.json")
}

#[tokio::test]
Expand Down
8 changes: 3 additions & 5 deletions rsql_drivers/src/jsonl/driver.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::error::Result;
use crate::polars::Connection;
use crate::Error::{ConversionError, InvalidUrl};
use crate::Error::ConversionError;
use async_trait::async_trait;
use polars::io::SerReader;
use polars::prelude::{IntoLazy, JsonLineReader};
Expand Down Expand Up @@ -29,9 +29,7 @@ impl crate::Driver for Driver {
parsed_url.query_pairs().into_owned().collect();

// Read Options
let file_name = query_parameters
.get("file")
.ok_or(InvalidUrl("Missing file parameter".to_string()))?;
let file_name = parsed_url.path();
let file = File::open(file_name)?;
let ignore_errors = query_parameters
.get("ignore_errors")
Expand Down Expand Up @@ -72,7 +70,7 @@ mod test {
const CRATE_DIRECTORY: &str = env!("CARGO_MANIFEST_DIR");

fn database_url() -> String {
format!("jsonl://?file={CRATE_DIRECTORY}/../datasets/users.jsonl")
format!("jsonl://{CRATE_DIRECTORY}/../datasets/users.jsonl")
}

#[tokio::test]
Expand Down
12 changes: 2 additions & 10 deletions rsql_drivers/src/parquet/driver.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
use crate::error::Result;
use crate::polars::Connection;
use crate::Error::InvalidUrl;
use async_trait::async_trait;
use polars::io::SerReader;
use polars::prelude::{IntoLazy, ParquetReader};
use polars_sql::SQLContext;
use std::collections::HashMap;
use std::fs::File;
use url::Url;

Expand All @@ -24,13 +22,7 @@ impl crate::Driver for Driver {
_password: Option<String>,
) -> Result<Box<dyn crate::Connection>> {
let parsed_url = Url::parse(url.as_str())?;
let query_parameters: HashMap<String, String> =
parsed_url.query_pairs().into_owned().collect();

// Read Options
let file_name = query_parameters
.get("file")
.ok_or(InvalidUrl("Missing file parameter".to_string()))?;
let file_name = parsed_url.path();
let file = File::open(file_name)?;

let data_frame = ParquetReader::new(file).set_rechunk(true).finish()?;
Expand All @@ -51,7 +43,7 @@ mod test {
const CRATE_DIRECTORY: &str = env!("CARGO_MANIFEST_DIR");

fn database_url() -> String {
format!("parquet://?file={CRATE_DIRECTORY}/../datasets/users.parquet")
format!("parquet://{CRATE_DIRECTORY}/../datasets/users.parquet")
}

#[tokio::test]
Expand Down
Loading

0 comments on commit 438142f

Please sign in to comment.