Skip to content

Commit 071d5d1

Browse files
SamuelMarkspacman82
authored andcommitted
[README.md] Use Dockerfile syntax highlighting ; [*] typo / spelling fixes
1 parent c685891 commit 071d5d1

22 files changed

+74
-78
lines changed

.github/workflows/release.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ jobs:
115115
- name: Checkout
116116
uses: actions/checkout@v2
117117

118-
- name: Install latests rust toolchain
118+
- name: Install latest rust toolchain
119119
uses: actions-rs/toolchain@v1
120120
with:
121121
toolchain: stable

.github/workflows/test.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ jobs:
3131
- name: Checkout
3232
uses: actions/checkout@v2
3333

34-
- name: Install latests rust toolchain
34+
- name: Install latest rust toolchain
3535
uses: actions-rs/toolchain@v1
3636
with:
3737
toolchain: stable
@@ -66,7 +66,7 @@ jobs:
6666
runs-on: ubuntu-latest
6767
if: ${{ github.actor == 'dependabot[bot]' && github.event_name == 'pull_request'}}
6868
steps:
69-
- name: Merge Depenabot Pull Request
69+
- name: Merge Dependabot Pull Request
7070
run: gh pr merge --auto --merge "$PR_URL"
7171
env:
7272
PR_URL: ${{github.event.pull_request.html_url}}

README.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -93,13 +93,13 @@ If you have a rust tool chain installed, you can install this tool via cargo.
9393
cargo install odbc2parquet
9494
```
9595

96-
### Build in docker from stracth
96+
### Build in docker `from scratch`
9797

98-
```
98+
```dockerfile
9999
FROM rust:alpine AS builder
100100

101101
RUN apk add --no-cache musl-dev unixodbc-static
102-
# In addation to unixodbc you also want to install the database drivers you need.
102+
# In addition to unixodbc you also want to install the database drivers you need and `COPY` them over to the `runner`
103103

104104
WORKDIR /src/odbc2parquet
105105
COPY . .

docker-compose.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,5 +26,5 @@ services:
2626
# Overrides default command so things don't shut down after the process ends.
2727
command: sleep infinity
2828

29-
# Allows accessing dbs through the port forwardings. dev container behaves like host for networking
29+
# Allows accessing dbs through port forwarding. dev container behaves like host for networking
3030
network_mode: host

src/enum_args.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ pub fn encoding_from_str(source: &str) -> Result<Encoding, Error> {
7878
"delta-byte-array" => Encoding::DELTA_BYTE_ARRAY,
7979
"delta-length-byte-array" => Encoding::DELTA_LENGTH_BYTE_ARRAY,
8080
"rle" => Encoding::RLE,
81-
// ommitted, not a valid fallback encoding
81+
// omitted, not a valid fallback encoding
8282
//"rle-dictionary" => Encoding::RLE_DICTIONARY,
8383
_ => bail!(
8484
"Sorry, I do not know a column encoding called '{}'.",

src/insert.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ pub fn insert(odbc_env: &Environment, insert_opt: &InsertOpt) -> Result<(), Erro
9494
batch_size = num_rows;
9595
let descs = column_buf_desc.iter().map(|(desc, _)| *desc);
9696
// An inefficiency here: Currently `odbc-api`s interface forces us to prepare the
97-
// statetement again, in case we need to allocate more row groups.
97+
// statement again, in case we need to allocate more row groups.
9898
odbc_buffer = odbc_conn
9999
.prepare(&insert_statement)?
100100
.into_column_inserter(batch_size, descs)?;
@@ -573,7 +573,7 @@ fn parquet_type_to_odbc_buffer_desc(
573573
BufferDesc::WText { max_str_len },
574574
ByteArrayType::map_to_wtext(
575575
move |text, index, odbc_buf| {
576-
// This allocation is not strictly neccessary, we could just as
576+
// This allocation is not strictly necessary, we could just as
577577
// write directly into the buffer or at least preallocate the
578578
// U16String.
579579
let value = U16String::from_str(

src/main.rs

+17-17
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ use clap_complete::{generate, Shell};
2323
#[derive(Parser)]
2424
#[clap(version)]
2525
struct Cli {
26-
/// Only print errors to standard error stream. Supresses warnings and all other log levels
26+
/// Only print errors to standard error stream. Suppresses warnings and all other log levels
2727
/// independent of the verbose mode.
2828
#[arg(short = 'q', long)]
2929
quiet: bool,
@@ -133,10 +133,10 @@ pub struct QueryOpt {
133133
row_groups_per_file: u32,
134134
/// Then the size of the currently written parquet files goes beyond this threshold the current
135135
/// row group will be finished and then the file will be closed. So the file will be somewhat
136-
/// larger than the threshold. All furthrer row groups will be written into new files to which
136+
/// larger than the threshold. All further row groups will be written into new files to which
137137
/// the threshold size limit is applied as well. If this option is not set, no size threshold is
138138
/// applied. If the threshold is applied the first file name will have the suffix `_01`, the
139-
/// second the suffix `_2` and so on. Therfore the first resulting file will be called e.g.
139+
/// second the suffix `_2` and so on. Therefore, the first resulting file will be called e.g.
140140
/// `out_1.par`, if `out.par` has been specified as the output argument.
141141
/// Also note that this option will not act as an upper bound. It will act as a lower bound for
142142
/// all but the last file, all others however will not be larger than this threshold by more
@@ -152,16 +152,16 @@ pub struct QueryOpt {
152152
///
153153
/// This is useful in situations there ODBC would require us to allocate a ridiculous amount of
154154
/// memory for a single element of a row. Usually this is the case because the Database schema
155-
/// has been ill defined (like choosing `TEXT` for a user name, although a users name is
155+
/// has been ill-defined (like choosing `TEXT` for a username, although a users name is
156156
/// unlikely to be several GB long). Another situation is that the ODBC driver is not good at
157-
/// reporting the maximum length and therfore reports a really large value. The third option is
157+
/// reporting the maximum length and therefore reports a really large value. The third option is
158158
/// of course that your values are actually large. In this case you just need a ton of memory.
159159
/// You can use the batch size limit though to retrieve less at once. For binary columns this is
160-
/// a maximum element length in bytes. For text columns it depends wether UTF-8 or UTF-16
161-
/// encoding is used. See documentation of the `encondig` option. In case of UTF-8 this is the
160+
/// a maximum element length in bytes. For text columns it depends on whether UTF-8 or UTF-16
161+
/// encoding is used. See documentation of the `encoding` option. In case of UTF-8 this is the
162162
/// maximum length in bytes for an element. In case of UTF-16 the binary length is multiplied by
163163
/// two. This allows domain experts to configure limits (roughly) in the domain of how many
164-
/// letters do I expect in this column, rather than to care about wether the command is executed
164+
/// letters do I expect in this column, rather than to care about whether the command is executed
165165
/// on Linux or Windows. The encoding of the column on the Database does not matter for this
166166
/// setting or determining buffer sizes.
167167
#[arg(long)]
@@ -195,7 +195,7 @@ pub struct QueryOpt {
195195
/// been introduced in an effort to increase the compatibility of the output with Apache Spark.
196196
#[clap(long)]
197197
prefer_varbinary: bool,
198-
/// Specify the fallback encoding of the parquet output column. You can parse mutliple values
198+
/// Specify the fallback encoding of the parquet output column. You can parse multiple values
199199
/// in format `COLUMN:ENCODING`. `ENCODING` must be one of: `plain`, `delta-binary-packed`,
200200
/// `delta-byte-array`, `delta-length-byte-array` or `rle`.
201201
#[arg(
@@ -204,18 +204,18 @@ pub struct QueryOpt {
204204
action = ArgAction::Append
205205
)]
206206
parquet_column_encoding: Vec<(String, Encoding)>,
207-
/// Tells the odbc2parquet, that the ODBC driver does not support binding 64 Bit integers (aka
207+
/// Tells the odbc2parquet, that the ODBC driver does not support binding 64-Bit integers (aka
208208
/// S_C_BIGINT in ODBC speak). This will cause the odbc2parquet to query large integers as text
209-
/// instead and convert them to 64 Bit integers itself. Setting this flag will not affect the
210-
/// output, but may incurr a performance penality. In case you are using an Oracle Database it
211-
/// can make queries work which did not before, because Oracle does not support 64 Bit integers.
209+
/// instead and convert them to 64-Bit integers itself. Setting this flag will not affect the
210+
/// output, but may incur a performance penalty. In case you are using an Oracle Database it
211+
/// can make queries work which did not before, because Oracle does not support 64-Bit integers.
212212
#[clap(long)]
213213
driver_does_not_support_64bit_integers: bool,
214214
/// The IBM DB2 Linux ODBC drivers have been reported to return memory garbage instead of
215215
/// indicators for the string length. Setting this flag will cause `odbc2parquet` to rely on
216216
/// terminating zeroes, instead of indicators. This prevents `odbc2parquet` from disambiguating
217217
/// between empty strings and `NULL``. As a side effect of this workaround empty might be mapped
218-
/// to NULL. Currently this workaround is only active if UTF-8 is used. This should be the case
218+
/// to NULL. Currently, this workaround is only active if UTF-8 is used. This should be the case
219219
/// on non-window platforms by default, or if the `System` encoding is active.
220220
#[clap(long)]
221221
avoid_decimal: bool,
@@ -236,7 +236,7 @@ pub struct QueryOpt {
236236
output: IoArg,
237237
/// Query executed against the ODBC data source. Question marks (`?`) can be used as
238238
/// placeholders for positional parameters. E.g. "SELECT Name FROM Employees WHERE salary > ?;".
239-
/// Instead of passing a query verbatum, you may pass a plain dash (`-`), to indicate that the
239+
/// Instead of passing a query verbatim, you may pass a plain dash (`-`), to indicate that the
240240
/// query should be read from standard input. In this case the entire input until EOF will be
241241
/// considered the query.
242242
query: String,
@@ -251,7 +251,7 @@ pub struct InsertOpt {
251251
connect_opts: ConnectOpts,
252252
/// Encoding used for transferring character data to the database.
253253
///
254-
/// `Utf16`: Use 16Bit characters to send text text to the database, which implies the using
254+
/// `Utf16`: Use 16Bit characters to send text to the database, which implies the using
255255
/// UTF-16 encoding. This should work well independent of the system configuration, but requires
256256
/// additional work since text is always stored as UTF-8 in parquet.
257257
///
@@ -409,7 +409,7 @@ fn open_connection<'e>(
409409
DriverCompleteOption::NoPrompt
410410
};
411411

412-
// We are not interessted in the completed connection string, beyond creating a connection, so
412+
// We are not interested in the completed connection string, beyond creating a connection, so
413413
// we pass an empty buffer.
414414
let mut completed_connection_string = OutputStringBuffer::empty();
415415

src/parquet_buffer.rs

+8-8
Original file line numberDiff line numberDiff line change
@@ -62,15 +62,15 @@ impl ParquetBuffer {
6262
source: impl Iterator<Item = Option<i128>>,
6363
length_in_bytes: usize,
6464
) -> Result<(), Error> {
65-
self.write_optional_any_falliable(cw, source.map(Ok), |num| {
65+
self.write_optional_any_fallible(cw, source.map(Ok), |num| {
6666
let out = num.to_be_bytes()[(16 - length_in_bytes)..].to_owned();
6767
// Vec<u8> -> ByteArray -> FixedLenByteArray
6868
let out: ByteArray = out.into();
6969
out.into()
7070
})
7171
}
7272

73-
fn write_optional_any_falliable<T, S>(
73+
fn write_optional_any_fallible<T, S>(
7474
&mut self,
7575
cw: &mut ColumnWriterImpl<T>,
7676
source: impl Iterator<Item = Result<Option<S>, Error>>,
@@ -98,7 +98,7 @@ impl ParquetBuffer {
9898
/// Write to a parquet buffer using an iterator over optional source items. A default
9999
/// transformation, defined via the `IntoPhysical` trait is used to transform the items into
100100
/// buffer elements.
101-
pub fn write_optional_falliable<T>(
101+
pub fn write_optional_fallible<T>(
102102
&mut self,
103103
cw: &mut ColumnWriterImpl<T>,
104104
source: impl Iterator<Item = Result<Option<T::T>, Error>>,
@@ -107,7 +107,7 @@ impl ParquetBuffer {
107107
T: DataType,
108108
T::T: BufferedDataType,
109109
{
110-
self.write_optional_any_falliable(cw, source, |s| s)
110+
self.write_optional_any_fallible(cw, source, |s| s)
111111
}
112112

113113
/// Write to a parquet buffer using an iterator over optional source items. A default
@@ -122,7 +122,7 @@ impl ParquetBuffer {
122122
T: DataType,
123123
T::T: BufferedDataType,
124124
{
125-
self.write_optional_any_falliable(cw, source.map(Ok), |s| s)
125+
self.write_optional_any_fallible(cw, source.map(Ok), |s| s)
126126
}
127127

128128
/// Iterate over the elements of a column reader over an optional column.
@@ -144,7 +144,7 @@ impl ParquetBuffer {
144144
let (_complete_rec, _num_val, _num_lvl) =
145145
cr.read_records(batch_size, Some(def_levels), None, values)?;
146146
// Strip mutability form the element of values, so we can use it in scan, there we only want
147-
// to mutate which part of values we see, not the elements of values themselfes.
147+
// to mutate which part of values we see, not the elements of values themselves.
148148
let values = values.as_slice();
149149
let it = def_levels.iter().scan(values, |values, def| match def {
150150
0 => Some(None),
@@ -161,7 +161,7 @@ impl ParquetBuffer {
161161

162162
/// The elements of a column reader over a required column. Contrary to its counterpart
163163
/// [`Self::read_optional`] this does not return an iterator but a slice. This allows for a
164-
/// memcopy into the ODBC buffer, if no transformation is required. Also since there are no
164+
/// memcpy into the ODBC buffer, if no transformation is required. Also since there are no
165165
/// NULL values, one does not need to now the def_levels in order to make sense of the values.
166166
pub fn read_required<T>(
167167
&mut self,
@@ -182,7 +182,7 @@ impl ParquetBuffer {
182182
}
183183

184184
pub trait BufferedDataType: Sized {
185-
/// The tuple returned is (Values, Definiton levels)
185+
/// The tuple returned is (Values, Definition levels)
186186
fn mut_buf(buffer: &mut ParquetBuffer) -> (&mut Vec<Self>, &mut Vec<i16>);
187187
}
188188

src/query.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ pub fn query(environment: &Environment, opt: QueryOpt) -> Result<(), Error> {
6464

6565
let odbc_conn = open_connection(environment, &connect_opts)?;
6666
let db_name = odbc_conn.database_management_system_name()?;
67-
info!("Database Managment System Name: {db_name}");
67+
info!("Database Management System Name: {db_name}");
6868

6969
let parquet_format_options = ParquetWriterOptions {
7070
column_compression_default: column_compression_default

src/query/batch_size_limit.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ impl FileSizeLimit {
5454
}
5555
}
5656

57-
/// Batches can be limitied by either number of rows or the total size of the rows in the batch in
57+
/// Batches can be limited by either number of rows or the total size of the rows in the batch in
5858
/// bytes.
5959
pub enum BatchSizeLimit {
6060
Rows(usize),

src/query/boolean.rs

+2-4
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,8 @@ pub struct Boolean {
2121
}
2222

2323
impl Boolean {
24-
pub fn new(repetetion: Repetition) -> Self {
25-
Self {
26-
repetition: repetetion,
27-
}
24+
pub fn new(repetition: Repetition) -> Self {
25+
Self { repetition }
2826
}
2927
}
3028

src/query/column_strategy.rs

+9-9
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use crate::{
2222
binary::Binary,
2323
boolean::Boolean,
2424
date::Date,
25-
decimal::decmial_fetch_strategy,
25+
decimal::decimal_fetch_strategy,
2626
identical::{fetch_identical, fetch_identical_with_logical_type},
2727
text::text_strategy,
2828
time::time_from_text,
@@ -33,7 +33,7 @@ use crate::{
3333

3434
/// Decisions on how to handle a particular column of the ODBC result set. What buffer to bind to it
3535
/// for fetching, into what parquet type it is going to be translated and how to translate it from
36-
/// the odbc buffer elements to afformentioned parquet type.
36+
/// the odbc buffer elements to aforementioned parquet type.
3737
pub trait ColumnStrategy {
3838
/// Parquet column type used in parquet schema
3939
fn parquet_type(&self, name: &str) -> Type;
@@ -59,17 +59,17 @@ pub struct MappingOptions<'a> {
5959
pub column_length_limit: Option<usize>,
6060
}
6161

62-
/// Fetch strategies based on column description and enviroment arguments `MappingOptions`.
62+
/// Fetch strategies based on column description and environment arguments `MappingOptions`.
6363
///
6464
/// * `cd`: Description of the column for which we need to pick a fetch strategy
6565
/// * `name`: Name of the column which we fetch
6666
/// * `mapping_options`: Options describing the environment and desired outcome which are also
6767
/// influencing the decision of what to pick.
68-
/// * `cursor`: Used to query additional inforamtion about the columns, not contained in the initial
68+
/// * `cursor`: Used to query additional information about the columns, not contained in the initial
6969
/// column description. Passing them here, allows us to query these only lazily then needed. ODBC
7070
/// calls can be quite costly, although an argument could be made, that these times do not matter
7171
/// within the runtime of the odbc2parquet command line tool.
72-
/// * `index`: One based column index. Usefull if additional metainformation needs to be acquired
72+
/// * `index`: One based column index. Useful if additional meta-information needs to be acquired
7373
/// using `cursor`
7474
pub fn strategy_from_column_description(
7575
cd: &ColumnDescription,
@@ -87,7 +87,7 @@ pub fn strategy_from_column_description(
8787
column_length_limit,
8888
} = mapping_options;
8989

90-
// Convert ODBC nullability to Parquet repetition. If the ODBC driver can not tell wether a
90+
// Convert ODBC nullability to Parquet repetition. If the ODBC driver can not tell whether a
9191
// given column in the result may contain NULLs we assume it does.
9292
let repetition = match cd.nullability {
9393
Nullability::Nullable | Nullability::Unknown => Repetition::OPTIONAL,
@@ -138,7 +138,7 @@ pub fn strategy_from_column_description(
138138
),
139139
DataType::Date => Box::new(Date::new(repetition)),
140140
DataType::Numeric { scale, precision } | DataType::Decimal { scale, precision } => {
141-
decmial_fetch_strategy(
141+
decimal_fetch_strategy(
142142
is_optional,
143143
scale as i32,
144144
precision.try_into().unwrap(),
@@ -208,9 +208,9 @@ pub fn strategy_from_column_description(
208208
} => {
209209
if db_name == "Microsoft SQL Server" {
210210
// -155 is an indication for "Timestamp with timezone" on Microsoft SQL Server. We
211-
// give it special treatment so users can sort by time instead lexographically.
211+
// give it special treatment so users can sort by time instead lexicographically.
212212
info!(
213-
"Detected Timestamp type with time zone. Appyling instant semantics for \
213+
"Detected Timestamp type with time zone. Applying instant semantics for \
214214
column {}.",
215215
cd.name_to_string()?
216216
);

src/query/current_file.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@ pub struct CurrentFile {
1515
writer: SerializedFileWriter<Box<dyn Write + Send>>,
1616
/// Path to the file currently being written to.
1717
path: TempPath,
18-
/// Keep track of curret file size so we can split it, should it get too large.
18+
/// Keep track of current file size so we can split it, should it get too large.
1919
file_size: ByteSize,
20-
/// Keep track of the total number of rows writte into the file so far.
20+
/// Keep track of the total number of rows written into the file so far.
2121
total_num_rows: u64,
2222
}
2323

src/query/date.rs

+2-4
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,8 @@ pub struct Date {
2020
}
2121

2222
impl Date {
23-
pub fn new(repetetion: Repetition) -> Self {
24-
Self {
25-
repetition: repetetion,
26-
}
23+
pub fn new(repetition: Repetition) -> Self {
24+
Self { repetition }
2725
}
2826
}
2927

0 commit comments

Comments
 (0)