Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: lancedb/lance
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: v0.17.0-beta.12
Choose a base ref
...
head repository: lancedb/lance
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: v0.17.0-beta.13
Choose a head ref
  • 2 commits
  • 3 files changed
  • 2 contributors

Commits on Sep 4, 2024

  1. chore: don't sort the data when optimizing the FTS index (#2818)

    The FTS index doesn't require the data be sorted, so just don't do it to
    save resources
    
    Signed-off-by: BubbleCal <bubble-cal@outlook.com>
    BubbleCal authored Sep 4, 2024

    Verified

    This commit was created on GitHub.com and signed with GitHub’s verified signature.
    Copy the full SHA
    59885e3 View commit details
  2. feat: make default I/O buffer size configurable via env var (#2826)

    westonpace authored Sep 4, 2024

    Verified

    This commit was created on GitHub.com and signed with GitHub’s verified signature.
    Copy the full SHA
    2a9f8b5 View commit details
Showing with 14 additions and 9 deletions.
  1. +5 −2 rust/lance/src/dataset/scanner.rs
  2. +5 −3 rust/lance/src/index/append.rs
  3. +4 −4 rust/lance/src/io/exec/take.rs
7 changes: 5 additions & 2 deletions rust/lance/src/dataset/scanner.rs
Original file line number Diff line number Diff line change
@@ -78,7 +78,10 @@ lazy_static::lazy_static! {
// We want to support ~256 concurrent reads to maximize throughput on cloud storage systems
// Our typical page size is 8MiB (though not all reads are this large yet due to offset buffers, validity buffers, etc.)
// So we want to support 256 * 8MiB ~= 2GiB of queued reads
pub const DEFAULT_IO_BUFFER_SIZE: u64 = 2 * 1024 * 1024 * 1024;
lazy_static::lazy_static! {
pub static ref DEFAULT_IO_BUFFER_SIZE: u64 = std::env::var("LANCE_DEFAULT_IO_BUFFER_SIZE")
.map(|val| val.parse().unwrap()).unwrap_or(2 * 1024 * 1024 * 1024);
}

/// Defines an ordering for a single column
///
@@ -1470,7 +1473,7 @@ impl Scanner {
}

fn get_io_buffer_size(&self) -> u64 {
self.io_buffer_size.unwrap_or(DEFAULT_IO_BUFFER_SIZE)
self.io_buffer_size.unwrap_or(*DEFAULT_IO_BUFFER_SIZE)
}

/// Create an Execution plan with a scan node
8 changes: 5 additions & 3 deletions rust/lance/src/index/append.rs
Original file line number Diff line number Diff line change
@@ -85,12 +85,14 @@ pub async fn merge_indices<'a>(
.await?;

let mut scanner = dataset.scan();
let orodering = match index.index_type() {
IndexType::Inverted => None,
_ => Some(vec![ColumnOrdering::asc_nulls_first(column.name.clone())]),
};
scanner
.with_fragments(unindexed)
.with_row_id()
.order_by(Some(vec![ColumnOrdering::asc_nulls_first(
column.name.clone(),
)]))?
.order_by(orodering)?
.project(&[&column.name])?;
let new_data_stream = scanner.try_into_stream().await?;

8 changes: 4 additions & 4 deletions rust/lance/src/io/exec/take.rs
Original file line number Diff line number Diff line change
@@ -390,7 +390,7 @@ mod tests {
10,
10,
Some(4),
DEFAULT_IO_BUFFER_SIZE,
*DEFAULT_IO_BUFFER_SIZE,
true,
false,
false,
@@ -426,7 +426,7 @@ mod tests {
10,
10,
Some(4),
DEFAULT_IO_BUFFER_SIZE,
*DEFAULT_IO_BUFFER_SIZE,
true,
false,
false,
@@ -462,7 +462,7 @@ mod tests {
10,
10,
Some(4),
DEFAULT_IO_BUFFER_SIZE,
*DEFAULT_IO_BUFFER_SIZE,
false,
false,
false,
@@ -483,7 +483,7 @@ mod tests {
10,
10,
Some(4),
DEFAULT_IO_BUFFER_SIZE,
*DEFAULT_IO_BUFFER_SIZE,
true,
false,
false,