pacman82
diff --git a/‎.github/workflows/release.yml
+1-1 b/‎.github/workflows/release.yml
+1-1
diff --git a/‎.github/workflows/test.yml
+2-2 b/‎.github/workflows/test.yml
+2-2
diff --git a/‎README.md
+3-3 b/‎README.md
+3-3
diff --git a/‎docker-compose.yml
+1-1 b/‎docker-compose.yml
+1-1
diff --git a/‎src/enum_args.rs
+1-1 b/‎src/enum_args.rs
+1-1
diff --git a/‎src/insert.rs
+2-2 b/‎src/insert.rs
+2-2
diff --git a/‎src/main.rs
+17-17 b/‎src/main.rs
+17-17
diff --git a/‎src/parquet_buffer.rs
+8-8 b/‎src/parquet_buffer.rs
+8-8
diff --git a/‎src/query.rs
+1-1 b/‎src/query.rs
+1-1
diff --git a/‎src/query/batch_size_limit.rs
+1-1 b/‎src/query/batch_size_limit.rs
+1-1
diff --git a/‎src/query/boolean.rs
+2-4 b/‎src/query/boolean.rs
+2-4
diff --git a/‎src/query/column_strategy.rs
+9-9 b/‎src/query/column_strategy.rs
+9-9
diff --git a/‎src/query/current_file.rs
+2-2 b/‎src/query/current_file.rs
+2-2
diff --git a/‎src/query/date.rs
+2-4 b/‎src/query/date.rs
+2-4
@@ -115,7 +115,7 @@ jobs:
       - name: Checkout
         uses: actions/checkout@v2
 
-      - name: Install latests rust toolchain
+      - name: Install latest rust toolchain
         uses: actions-rs/toolchain@v1
         with:
           toolchain: stable
 
@@ -31,7 +31,7 @@ jobs:
       - name: Checkout
         uses: actions/checkout@v2
 
-      - name: Install latests rust toolchain
+      - name: Install latest rust toolchain
         uses: actions-rs/toolchain@v1
         with:
           toolchain: stable
@@ -66,7 +66,7 @@ jobs:
     runs-on: ubuntu-latest
     if: ${{ github.actor == 'dependabot[bot]' && github.event_name == 'pull_request'}}
     steps:
-      - name: Merge Depenabot Pull Request
+      - name: Merge Dependabot Pull Request
         run: gh pr merge --auto --merge "$PR_URL"
         env:
           PR_URL: ${{github.event.pull_request.html_url}}
 
@@ -93,13 +93,13 @@ If you have a rust tool chain installed, you can install this tool via cargo.
 cargo install odbc2parquet
 ```
 
-### Build in docker from stracth
+### Build in docker `from scratch`
 
-```
+```dockerfile
 FROM rust:alpine AS builder
 
 RUN apk add --no-cache musl-dev unixodbc-static
-# In addation to unixodbc you also want to install the database drivers you need.
+# In addition to unixodbc you also want to install the database drivers you need and `COPY` them over to the `runner`
 
 WORKDIR /src/odbc2parquet
 COPY . .
 
@@ -26,5 +26,5 @@ services:
     # Overrides default command so things don't shut down after the process ends.
     command: sleep infinity
 
-    # Allows accessing dbs through the port forwardings. dev container behaves like host for networking
+    # Allows accessing dbs through port forwarding. dev container behaves like host for networking
     network_mode: host
@@ -78,7 +78,7 @@ pub fn encoding_from_str(source: &str) -> Result<Encoding, Error> {
         "delta-byte-array" => Encoding::DELTA_BYTE_ARRAY,
         "delta-length-byte-array" => Encoding::DELTA_LENGTH_BYTE_ARRAY,
         "rle" => Encoding::RLE,
-        // ommitted, not a valid fallback encoding
+        // omitted, not a valid fallback encoding
         //"rle-dictionary" => Encoding::RLE_DICTIONARY,
         _ => bail!(
             "Sorry, I do not know a column encoding called '{}'.",
 
@@ -94,7 +94,7 @@ pub fn insert(odbc_env: &Environment, insert_opt: &InsertOpt) -> Result<(), Erro
             batch_size = num_rows;
             let descs = column_buf_desc.iter().map(|(desc, _)| *desc);
             // An inefficiency here: Currently `odbc-api`s interface forces us to prepare the
-            // statetement again, in case we need to allocate more row groups.
+            // statement again, in case we need to allocate more row groups.
             odbc_buffer = odbc_conn
                 .prepare(&insert_statement)?
                 .into_column_inserter(batch_size, descs)?;
@@ -573,7 +573,7 @@ fn parquet_type_to_odbc_buffer_desc(
                             BufferDesc::WText { max_str_len },
                             ByteArrayType::map_to_wtext(
                                 move |text, index, odbc_buf| {
-                                    // This allocation is not strictly neccessary, we could just as
+                                    // This allocation is not strictly necessary, we could just as
                                     // write directly into the buffer or at least preallocate the
                                     // U16String.
                                     let value = U16String::from_str(
 
@@ -23,7 +23,7 @@ use clap_complete::{generate, Shell};
 #[derive(Parser)]
 #[clap(version)]
 struct Cli {
-    /// Only print errors to standard error stream. Supresses warnings and all other log levels
+    /// Only print errors to standard error stream. Suppresses warnings and all other log levels
     /// independent of the verbose mode.
     #[arg(short = 'q', long)]
     quiet: bool,
@@ -133,10 +133,10 @@ pub struct QueryOpt {
     row_groups_per_file: u32,
     /// Then the size of the currently written parquet files goes beyond this threshold the current
     /// row group will be finished and then the file will be closed. So the file will be somewhat
-    /// larger than the threshold. All furthrer row groups will be written into new files to which
+    /// larger than the threshold. All further row groups will be written into new files to which
     /// the threshold size limit is applied as well. If this option is not set, no size threshold is
     /// applied. If the threshold is applied the first file name will have the suffix `_01`, the
-    /// second the suffix `_2` and so on. Therfore the first resulting file will be called e.g.
+    /// second the suffix `_2` and so on. Therefore, the first resulting file will be called e.g.
     /// `out_1.par`, if `out.par` has been specified as the output argument.
     /// Also note that this option will not act as an upper bound. It will act as a lower bound for
     /// all but the last file, all others however will not be larger than this threshold by more
@@ -152,16 +152,16 @@ pub struct QueryOpt {
     ///
     /// This is useful in situations there ODBC would require us to allocate a ridiculous amount of
     /// memory for a single element of a row. Usually this is the case because the Database schema
-    /// has been ill defined (like choosing `TEXT` for a user name, although a users name is
+    /// has been ill-defined (like choosing `TEXT` for a username, although a users name is
     /// unlikely to be several GB long). Another situation is that the ODBC driver is not good at
-    /// reporting the maximum length and therfore reports a really large value. The third option is
+    /// reporting the maximum length and therefore reports a really large value. The third option is
     /// of course that your values are actually large. In this case you just need a  ton of memory.
     /// You can use the batch size limit though to retrieve less at once. For binary columns this is
-    /// a maximum element length in bytes. For text columns it depends wether UTF-8 or UTF-16
-    /// encoding is used. See documentation of the `encondig` option. In case of UTF-8 this is the
+    /// a maximum element length in bytes. For text columns it depends on whether UTF-8 or UTF-16
+    /// encoding is used. See documentation of the `encoding` option. In case of UTF-8 this is the
     /// maximum length in bytes for an element. In case of UTF-16 the binary length is multiplied by
     /// two. This allows domain experts to configure limits (roughly) in the domain of how many
-    /// letters do I expect in this column, rather than to care about wether the command is executed
+    /// letters do I expect in this column, rather than to care about whether the command is executed
     /// on Linux or Windows. The encoding of the column on the Database does not matter for this
     /// setting or determining buffer sizes.
     #[arg(long)]
@@ -195,7 +195,7 @@ pub struct QueryOpt {
     /// been introduced in an effort to increase the compatibility of the output with Apache Spark.
     #[clap(long)]
     prefer_varbinary: bool,
-    /// Specify the fallback encoding of the parquet output column. You can parse mutliple values
+    /// Specify the fallback encoding of the parquet output column. You can parse multiple values
     /// in format `COLUMN:ENCODING`. `ENCODING` must be one of: `plain`, `delta-binary-packed`,
     /// `delta-byte-array`, `delta-length-byte-array` or `rle`.
     #[arg(
@@ -204,18 +204,18 @@ pub struct QueryOpt {
         action = ArgAction::Append
     )]
     parquet_column_encoding: Vec<(String, Encoding)>,
-    /// Tells the odbc2parquet, that the ODBC driver does not support binding 64 Bit integers (aka
+    /// Tells the odbc2parquet, that the ODBC driver does not support binding 64-Bit integers (aka
     /// S_C_BIGINT in ODBC speak). This will cause the odbc2parquet to query large integers as text
-    /// instead and convert them to 64 Bit integers itself. Setting this flag will not affect the
-    /// output, but may incurr a performance penality. In case you are using an Oracle Database it
-    /// can make queries work which did not before, because Oracle does not support 64 Bit integers.
+    /// instead and convert them to 64-Bit integers itself. Setting this flag will not affect the
+    /// output, but may incur a performance penalty. In case you are using an Oracle Database it
+    /// can make queries work which did not before, because Oracle does not support 64-Bit integers.
     #[clap(long)]
     driver_does_not_support_64bit_integers: bool,
     /// The IBM DB2 Linux ODBC drivers have been reported to return memory garbage instead of
     /// indicators for the string length. Setting this flag will cause `odbc2parquet` to rely on
     /// terminating zeroes, instead of indicators. This prevents `odbc2parquet` from disambiguating
     /// between empty strings and `NULL``. As a side effect of this workaround empty might be mapped
-    /// to NULL. Currently this workaround is only active if UTF-8 is used. This should be the case
+    /// to NULL. Currently, this workaround is only active if UTF-8 is used. This should be the case
     /// on non-window platforms by default, or if the `System` encoding is active.
     #[clap(long)]
     avoid_decimal: bool,
@@ -236,7 +236,7 @@ pub struct QueryOpt {
     output: IoArg,
     /// Query executed against the ODBC data source. Question marks (`?`) can be used as
     /// placeholders for positional parameters. E.g. "SELECT Name FROM Employees WHERE salary > ?;".
-    /// Instead of passing a query verbatum, you may pass a plain dash (`-`), to indicate that the
+    /// Instead of passing a query verbatim, you may pass a plain dash (`-`), to indicate that the
     /// query should be read from standard input. In this case the entire input until EOF will be
     /// considered the query.
     query: String,
@@ -251,7 +251,7 @@ pub struct InsertOpt {
     connect_opts: ConnectOpts,
     /// Encoding used for transferring character data to the database.
     ///
-    /// `Utf16`: Use 16Bit characters to send text text to the database, which implies the using
+    /// `Utf16`: Use 16Bit characters to send text to the database, which implies the using
     /// UTF-16 encoding. This should work well independent of the system configuration, but requires
     /// additional work since text is always stored as UTF-8 in parquet.
     ///
@@ -409,7 +409,7 @@ fn open_connection<'e>(
         DriverCompleteOption::NoPrompt
     };
 
-    // We are not interessted in the completed connection string, beyond creating a connection, so
+    // We are not interested in the completed connection string, beyond creating a connection, so
     // we pass an empty buffer.
     let mut completed_connection_string = OutputStringBuffer::empty();
 
 
@@ -62,15 +62,15 @@ impl ParquetBuffer {
         source: impl Iterator<Item = Option<i128>>,
         length_in_bytes: usize,
     ) -> Result<(), Error> {
-        self.write_optional_any_falliable(cw, source.map(Ok), |num| {
+        self.write_optional_any_fallible(cw, source.map(Ok), |num| {
             let out = num.to_be_bytes()[(16 - length_in_bytes)..].to_owned();
             // Vec<u8> -> ByteArray -> FixedLenByteArray
             let out: ByteArray = out.into();
             out.into()
         })
     }
 
-    fn write_optional_any_falliable<T, S>(
+    fn write_optional_any_fallible<T, S>(
         &mut self,
         cw: &mut ColumnWriterImpl<T>,
         source: impl Iterator<Item = Result<Option<S>, Error>>,
@@ -98,7 +98,7 @@ impl ParquetBuffer {
     /// Write to a parquet buffer using an iterator over optional source items. A default
     /// transformation, defined via the `IntoPhysical` trait is used to transform the items into
     /// buffer elements.
-    pub fn write_optional_falliable<T>(
+    pub fn write_optional_fallible<T>(
         &mut self,
         cw: &mut ColumnWriterImpl<T>,
         source: impl Iterator<Item = Result<Option<T::T>, Error>>,
@@ -107,7 +107,7 @@ impl ParquetBuffer {
         T: DataType,
         T::T: BufferedDataType,
     {
-        self.write_optional_any_falliable(cw, source, |s| s)
+        self.write_optional_any_fallible(cw, source, |s| s)
     }
 
     /// Write to a parquet buffer using an iterator over optional source items. A default
@@ -122,7 +122,7 @@ impl ParquetBuffer {
         T: DataType,
         T::T: BufferedDataType,
     {
-        self.write_optional_any_falliable(cw, source.map(Ok), |s| s)
+        self.write_optional_any_fallible(cw, source.map(Ok), |s| s)
     }
 
     /// Iterate over the elements of a column reader over an optional column.
@@ -144,7 +144,7 @@ impl ParquetBuffer {
         let (_complete_rec, _num_val, _num_lvl) =
             cr.read_records(batch_size, Some(def_levels), None, values)?;
         // Strip mutability form the element of values, so we can use it in scan, there we only want
-        // to mutate which part of values we see, not the elements of values themselfes.
+        // to mutate which part of values we see, not the elements of values themselves.
         let values = values.as_slice();
         let it = def_levels.iter().scan(values, |values, def| match def {
             0 => Some(None),
@@ -161,7 +161,7 @@ impl ParquetBuffer {
 
     /// The elements of a column reader over a required column. Contrary to its counterpart
     /// [`Self::read_optional`] this does not return an iterator but a slice. This allows for a
-    /// memcopy into the ODBC buffer, if no transformation is required. Also since there are no
+    /// memcpy into the ODBC buffer, if no transformation is required. Also since there are no
     /// NULL values, one does not need to now the def_levels in order to make sense of the values.
     pub fn read_required<T>(
         &mut self,
@@ -182,7 +182,7 @@ impl ParquetBuffer {
 }
 
 pub trait BufferedDataType: Sized {
-    /// The tuple returned is (Values, Definiton levels)
+    /// The tuple returned is (Values, Definition levels)
     fn mut_buf(buffer: &mut ParquetBuffer) -> (&mut Vec<Self>, &mut Vec<i16>);
 }
 
 
@@ -64,7 +64,7 @@ pub fn query(environment: &Environment, opt: QueryOpt) -> Result<(), Error> {
 
     let odbc_conn = open_connection(environment, &connect_opts)?;
     let db_name = odbc_conn.database_management_system_name()?;
-    info!("Database Managment System Name: {db_name}");
+    info!("Database Management System Name: {db_name}");
 
     let parquet_format_options = ParquetWriterOptions {
         column_compression_default: column_compression_default
 
@@ -54,7 +54,7 @@ impl FileSizeLimit {
     }
 }
 
-/// Batches can be limitied by either number of rows or the total size of the rows in the batch in
+/// Batches can be limited by either number of rows or the total size of the rows in the batch in
 /// bytes.
 pub enum BatchSizeLimit {
     Rows(usize),
 
@@ -21,10 +21,8 @@ pub struct Boolean {
 }
 
 impl Boolean {
-    pub fn new(repetetion: Repetition) -> Self {
-        Self {
-            repetition: repetetion,
-        }
+    pub fn new(repetition: Repetition) -> Self {
+        Self { repetition }
     }
 }
 
 
@@ -22,7 +22,7 @@ use crate::{
         binary::Binary,
         boolean::Boolean,
         date::Date,
-        decimal::decmial_fetch_strategy,
+        decimal::decimal_fetch_strategy,
         identical::{fetch_identical, fetch_identical_with_logical_type},
         text::text_strategy,
         time::time_from_text,
@@ -33,7 +33,7 @@ use crate::{
 
 /// Decisions on how to handle a particular column of the ODBC result set. What buffer to bind to it
 /// for fetching, into what parquet type it is going to be translated and how to translate it from
-/// the odbc buffer elements to afformentioned parquet type.
+/// the odbc buffer elements to aforementioned parquet type.
 pub trait ColumnStrategy {
     /// Parquet column type used in parquet schema
     fn parquet_type(&self, name: &str) -> Type;
@@ -59,17 +59,17 @@ pub struct MappingOptions<'a> {
     pub column_length_limit: Option<usize>,
 }
 
-/// Fetch strategies based on column description and enviroment arguments `MappingOptions`.
+/// Fetch strategies based on column description and environment arguments `MappingOptions`.
 ///
 /// * `cd`: Description of the column for which we need to pick a fetch strategy
 /// * `name`: Name of the column which we fetch
 /// * `mapping_options`: Options describing the environment and desired outcome which are also
 ///   influencing the decision of what to pick.
-/// * `cursor`: Used to query additional inforamtion about the columns, not contained in the initial
+/// * `cursor`: Used to query additional information about the columns, not contained in the initial
 ///   column description. Passing them here, allows us to query these only lazily then needed. ODBC
 ///   calls can be quite costly, although an argument could be made, that these times do not matter
 ///   within the runtime of the odbc2parquet command line tool.
-/// * `index`: One based column index. Usefull if additional metainformation needs to be acquired
+/// * `index`: One based column index. Useful if additional meta-information needs to be acquired
 ///   using `cursor`
 pub fn strategy_from_column_description(
     cd: &ColumnDescription,
@@ -87,7 +87,7 @@ pub fn strategy_from_column_description(
         column_length_limit,
     } = mapping_options;
 
-    // Convert ODBC nullability to Parquet repetition. If the ODBC driver can not tell wether a
+    // Convert ODBC nullability to Parquet repetition. If the ODBC driver can not tell whether a
     // given column in the result may contain NULLs we assume it does.
     let repetition = match cd.nullability {
         Nullability::Nullable | Nullability::Unknown => Repetition::OPTIONAL,
@@ -138,7 +138,7 @@ pub fn strategy_from_column_description(
         ),
         DataType::Date => Box::new(Date::new(repetition)),
         DataType::Numeric { scale, precision } | DataType::Decimal { scale, precision } => {
-            decmial_fetch_strategy(
+            decimal_fetch_strategy(
                 is_optional,
                 scale as i32,
                 precision.try_into().unwrap(),
@@ -208,9 +208,9 @@ pub fn strategy_from_column_description(
         } => {
             if db_name == "Microsoft SQL Server" {
                 // -155 is an indication for "Timestamp with timezone" on Microsoft SQL Server. We
-                // give it special treatment so users can sort by time instead lexographically.
+                // give it special treatment so users can sort by time instead lexicographically.
                 info!(
-                    "Detected Timestamp type with time zone. Appyling instant semantics for \
+                    "Detected Timestamp type with time zone. Applying instant semantics for \
                     column {}.",
                     cd.name_to_string()?
                 );
 
@@ -15,9 +15,9 @@ pub struct CurrentFile {
     writer: SerializedFileWriter<Box<dyn Write + Send>>,
     /// Path to the file currently being written to.
     path: TempPath,
-    /// Keep track of curret file size so we can split it, should it get too large.
+    /// Keep track of current file size so we can split it, should it get too large.
     file_size: ByteSize,
-    /// Keep track of the total number of rows writte into the file so far.
+    /// Keep track of the total number of rows written into the file so far.
     total_num_rows: u64,
 }
 
 
@@ -20,10 +20,8 @@ pub struct Date {
 }
 
 impl Date {
-    pub fn new(repetetion: Repetition) -> Self {
-        Self {
-            repetition: repetetion,
-        }
+    pub fn new(repetition: Repetition) -> Self {
+        Self { repetition }
     }
 }
Original file line number	Diff line number	Diff line change
`@@ -54,7 +54,7 @@ impl FileSizeLimit {`
`54`	`54`	`}`
`55`	`55`	`}`
`56`	`56`
`57`		`-/// Batches can be limitied by either number of rows or the total size of the rows in the batch in`
	`57`	`+/// Batches can be limited by either number of rows or the total size of the rows in the batch in`
`58`	`58`	`/// bytes.`
`59`	`59`	`pub enum BatchSizeLimit {`
`60`	`60`	`Rows(usize),`
Original file line number	Diff line number	Diff line change
`@@ -21,10 +21,8 @@ pub struct Boolean {`
`21`	`21`	`}`
`22`	`22`
`23`	`23`	`impl Boolean {`
`24`		`- pub fn new(repetetion: Repetition) -> Self {`
`25`		`- Self {`
`26`		`- repetition: repetetion,`
`27`		`- }`
	`24`	`+ pub fn new(repetition: Repetition) -> Self {`
	`25`	`+ Self { repetition }`
`28`	`26`	`}`
`29`	`27`	`}`
`30`	`28`
Original file line number	Diff line number	Diff line change
`@@ -20,10 +20,8 @@ pub struct Date {`
`20`	`20`	`}`
`21`	`21`
`22`	`22`	`impl Date {`
`23`		`- pub fn new(repetetion: Repetition) -> Self {`
`24`		`- Self {`
`25`		`- repetition: repetetion,`
`26`		`- }`
	`23`	`+ pub fn new(repetition: Repetition) -> Self {`
	`24`	`+ Self { repetition }`
`27`	`25`	`}`
`28`	`26`	`}`
`29`	`27`