Skip to content
Permalink

Comparing changes

This is a direct comparison between two commits made in this repository or its related repositories. View the default comparison for this range or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: lancedb/lance
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 99b0064f33ae5d447af37ed4d5d4cd99cff73608
Choose a base ref
..
head repository: lancedb/lance
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: b2f7ebc4336a008e4cef38da2e0c468852bd6068
Choose a head ref
Showing with 4,177 additions and 1,422 deletions.
  1. +5 −4 .github/workflows/cargo-publish.yml
  2. +1 −1 .github/workflows/ci-benchmarks.yml
  3. +2 −2 .github/workflows/java-publish.yml
  4. +24 −0 .github/workflows/java.yml
  5. +3 −2 .github/workflows/python.yml
  6. +38 −14 .github/workflows/rust.yml
  7. +283 −328 Cargo.lock
  8. +31 −29 Cargo.toml
  9. +251 −0 deny.toml
  10. +46 −45 java/core/lance-jni/src/blocking_dataset.rs
  11. +254 −12 java/core/lance-jni/src/fragment.rs
  12. +80 −1 java/core/lance-jni/src/traits.rs
  13. +1 −6 java/core/pom.xml
  14. +14 −7 java/core/src/main/java/com/lancedb/lance/Dataset.java
  15. +0 −94 java/core/src/main/java/com/lancedb/lance/DatasetFragment.java
  16. +96 −23 java/core/src/main/java/com/lancedb/lance/Fragment.java
  17. +47 −87 java/core/src/main/java/com/lancedb/lance/FragmentMetadata.java
  18. +2 −11 java/core/src/main/java/com/lancedb/lance/FragmentOperation.java
  19. +67 −0 java/core/src/main/java/com/lancedb/lance/fragment/DataFile.java
  20. +60 −0 java/core/src/main/java/com/lancedb/lance/fragment/DeletionFile.java
  21. +19 −0 java/core/src/main/java/com/lancedb/lance/fragment/DeletionFileType.java
  22. +37 −0 java/core/src/main/java/com/lancedb/lance/fragment/RowIdMeta.java
  23. +9 −8 java/core/src/main/java/com/lancedb/lance/schema/SqlExpressions.java
  24. +3 −3 java/core/src/test/java/com/lancedb/lance/FragmentTest.java
  25. +5 −5 java/core/src/test/java/com/lancedb/lance/ScannerTest.java
  26. +2 −2 java/core/src/test/java/com/lancedb/lance/TestUtils.java
  27. +2 −7 java/pom.xml
  28. +2 −2 java/spark/pom.xml
  29. +1 −3 java/spark/src/main/java/com/lancedb/lance/spark/internal/LanceDatasetAdapter.java
  30. +4 −4 java/spark/src/main/java/com/lancedb/lance/spark/internal/LanceFragmentScanner.java
  31. +3 −1 protos/transaction.proto
  32. +193 −215 python/Cargo.lock
  33. +1 −1 python/Cargo.toml
  34. +139 −26 python/python/lance/dataset.py
  35. +68 −12 python/python/lance/fragment.py
  36. +17 −2 python/python/lance/lance/__init__.pyi
  37. +37 −18 python/python/lance/ray/sink.py
  38. +26 −0 python/python/tests/test_balanced.py
  39. +25 −0 python/python/tests/test_dataset.py
  40. +3 −3 python/python/tests/test_fragment.py
  41. +23 −0 python/python/tests/test_ray.py
  42. +32 −0 python/python/tests/test_scalar_index.py
  43. +140 −0 python/python/tests/test_vector_index.py
  44. +104 −18 python/src/dataset.rs
  45. +32 −13 python/src/fragment.rs
  46. +2 −1 python/src/lib.rs
  47. +46 −0 python/src/transaction.rs
  48. +6 −5 rust/lance-arrow/src/floats.rs
  49. +78 −0 rust/lance-core/src/datatypes/schema.rs
  50. +91 −1 rust/lance-core/src/utils/deletion.rs
  51. +2 −2 rust/lance-datafusion/Cargo.toml
  52. +25 −11 rust/lance-datafusion/src/exec.rs
  53. +4 −3 rust/lance-datafusion/src/planner.rs
  54. +2 −1 rust/lance-datafusion/src/sql.rs
  55. +9 −5 rust/lance-datafusion/src/substrait.rs
  56. +8 −0 rust/lance-encoding-datafusion/Cargo.toml
  57. +4 −0 rust/lance-encoding-datafusion/build.rs
  58. +8 −0 rust/lance-encoding/Cargo.toml
  59. +4 −0 rust/lance-encoding/build.rs
  60. +8 −0 rust/lance-file/Cargo.toml
  61. +4 −0 rust/lance-file/build.rs
  62. +6 −0 rust/lance-index/Cargo.toml
  63. +4 −0 rust/lance-index/build.rs
  64. +5 −2 rust/lance-index/src/scalar/btree.rs
  65. +1 −2 rust/lance-index/src/scalar/inverted/builder.rs
  66. +14 −4 rust/lance-index/src/scalar/inverted/index.rs
  67. +9 −0 rust/lance-index/src/traits.rs
  68. +35 −18 rust/lance-index/src/vector/flat.rs
  69. +10 −9 rust/lance-index/src/vector/hnsw/builder.rs
  70. +6 −3 rust/lance-index/src/vector/ivf.rs
  71. +1 −0 rust/lance-index/src/vector/ivf/transform.rs
  72. +12 −2 rust/lance-index/src/vector/sq.rs
  73. +19 −19 rust/lance-index/src/vector/sq/storage.rs
  74. +82 −21 rust/lance-index/src/vector/transform.rs
  75. +0 −3 rust/lance-io/Cargo.toml
  76. +10 −0 rust/lance-io/src/lib.rs
  77. +104 −2 rust/lance-linalg/src/distance.rs
  78. +6 −0 rust/lance-table/Cargo.toml
  79. +4 −0 rust/lance-table/build.rs
  80. +6 −3 rust/lance/Cargo.toml
  81. +66 −1 rust/lance/src/arrow/json.rs
  82. +9 −0 rust/lance/src/datafusion/dataframe.rs
  83. +65 −2 rust/lance/src/dataset.rs
  84. +66 −6 rust/lance/src/dataset/fragment.rs
  85. +2 −1 rust/lance/src/dataset/optimize.rs
  86. +185 −80 rust/lance/src/dataset/scanner.rs
  87. +168 −0 rust/lance/src/dataset/schema_evolution.rs
  88. +69 −3 rust/lance/src/dataset/take.rs
  89. +1 −2 rust/lance/src/dataset/transaction.rs
  90. +104 −56 rust/lance/src/dataset/write/merge_insert.rs
  91. +225 −15 rust/lance/src/index.rs
  92. +13 −3 rust/lance/src/index/vector.rs
  93. +1 −1 rust/lance/src/index/vector/builder.rs
  94. +11 −47 rust/lance/src/index/vector/ivf.rs
  95. +184 −6 rust/lance/src/index/vector/ivf/v2.rs
  96. +1 −1 rust/lance/src/index/vector/pq.rs
  97. +100 −24 rust/lance/src/index/vector/utils.rs
  98. +6 −5 rust/lance/src/io/exec/fts.rs
  99. +12 −32 rust/lance/src/io/exec/knn.rs
  100. +2 −0 rust/lance/src/io/exec/optimizer.rs
  101. +4 −2 rust/lance/src/io/exec/pushdown_scan.rs
  102. +9 −5 rust/lance/src/io/exec/scalar_index.rs
  103. +3 −1 rust/lance/src/io/exec/scan.rs
  104. +4 −3 rust/lance/src/io/exec/testing.rs
9 changes: 5 additions & 4 deletions .github/workflows/cargo-publish.yml
Original file line number Diff line number Diff line change
@@ -8,7 +8,7 @@ on:
workflow_dispatch:
inputs:
tag:
description: 'Tag to publish (e.g., v1.0.0)'
description: "Tag to publish (e.g., v1.0.0)"
required: true
type: string

@@ -19,12 +19,13 @@ env:

jobs:
build:
runs-on: ubuntu-24.04
# Needs additional disk space for the full build.
runs-on: ubuntu-2404-4x-x64
timeout-minutes: 60
env:
# Need up-to-date compilers for kernels
CC: clang-18
CXX: clang-18
CXX: clang++-18
defaults:
run:
working-directory: .
@@ -53,5 +54,5 @@ jobs:
- uses: albertlockett/publish-crates@v2.2
with:
registry-token: ${{ secrets.CARGO_REGISTRY_TOKEN }}
args: '--all-features'
args: "--all-features"
path: .
2 changes: 1 addition & 1 deletion .github/workflows/ci-benchmarks.yml
Original file line number Diff line number Diff line change
@@ -13,7 +13,7 @@ jobs:
env:
# Need up-to-date compilers for kernels
CC: clang-18
CXX: clang-18
CXX: clang++-18
defaults:
run:
shell: bash
4 changes: 2 additions & 2 deletions .github/workflows/java-publish.yml
Original file line number Diff line number Diff line change
@@ -12,7 +12,7 @@ jobs:
macos-arm64:
name: Build on MacOS Arm64
runs-on: macos-14
timeout-minutes: 30
timeout-minutes: 60
defaults:
run:
working-directory: ./java
@@ -66,7 +66,7 @@ jobs:
if-no-files-found: error
linux-x86:
runs-on: ubuntu-24.04
timeout-minutes: 45
timeout-minutes: 60
needs: [macos-arm64, linux-arm64]
defaults:
run:
24 changes: 24 additions & 0 deletions .github/workflows/java.yml
Original file line number Diff line number Diff line change
@@ -70,6 +70,30 @@ jobs:
distribution: temurin
java-version: ${{ matrix.java-version }}
cache: "maven"
- name: Running code style check with Java ${{ matrix.java-version }}
run: |
if [ "${{ matrix.java-version }}" == "17" ]; then
export JAVA_TOOL_OPTIONS="$JAVA_TOOL_OPTIONS \
-XX:+IgnoreUnrecognizedVMOptions \
--add-opens=java.base/java.lang=ALL-UNNAMED \
--add-opens=java.base/java.lang.invoke=ALL-UNNAMED \
--add-opens=java.base/java.lang.reflect=ALL-UNNAMED \
--add-opens=java.base/java.io=ALL-UNNAMED \
--add-opens=java.base/java.net=ALL-UNNAMED \
--add-opens=java.base/java.nio=ALL-UNNAMED \
--add-opens=java.base/java.util=ALL-UNNAMED \
--add-opens=java.base/java.util.concurrent=ALL-UNNAMED \
--add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED \
--add-opens=java.base/jdk.internal.ref=ALL-UNNAMED \
--add-opens=java.base/sun.nio.ch=ALL-UNNAMED \
--add-opens=java.base/sun.nio.cs=ALL-UNNAMED \
--add-opens=java.base/sun.security.action=ALL-UNNAMED \
--add-opens=java.base/sun.util.calendar=ALL-UNNAMED \
--add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED \
-Djdk.reflect.useDirectMethodHandle=false \
-Dio.netty.tryReflectionSetAccessible=true"
fi
mvn spotless:check
- name: Running tests with Java ${{ matrix.java-version }}
run: |
if [ "${{ matrix.java-version }}" == "17" ]; then
5 changes: 3 additions & 2 deletions .github/workflows/python.yml
Original file line number Diff line number Diff line change
@@ -39,7 +39,7 @@ jobs:
env:
# Need up-to-date compilers for kernels
CC: clang-18
CXX: clang-18
CXX: clang++-18
steps:
- uses: actions/checkout@v4
with:
@@ -67,8 +67,9 @@ jobs:
sudo apt install -y protobuf-compiler libssl-dev
- name: Lint Rust
run: |
ALL_FEATURES=`cargo metadata --format-version=1 --no-deps | jq -r '.packages[] | .features | keys | .[]' | grep -v protoc | sort | uniq | paste -s -d "," -`
cargo fmt --all -- --check
cargo clippy --locked --all-features --tests -- -D warnings
cargo clippy --locked --features ${ALL_FEATURES} --tests -- -D warnings
- name: Build
run: |
python -m venv venv
52 changes: 38 additions & 14 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
@@ -46,8 +46,18 @@ jobs:
sudo apt install -y protobuf-compiler libssl-dev
- name: Run clippy
run: |
ALL_FEATURES=`cargo metadata --format-version=1 --no-deps | jq -r '.packages[] | .features | keys | .[]' | grep -v protoc | sort | uniq | paste -s -d "," -`
cargo clippy --version
cargo clippy --locked --all-features --tests --benches -- -D warnings
cargo clippy --locked --features ${ALL_FEATURES} --tests --benches -- -D warnings
cargo-deny:
name: Check Rust dependencies (cargo-deny)
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v4
- uses: EmbarkStudios/cargo-deny-action@v2
with:
log-level: warn
command: check
linux-build:
runs-on: "ubuntu-24.04"
timeout-minutes: 45
@@ -59,7 +69,7 @@ jobs:
env:
# Need up-to-date compilers for kernels
CC: clang
CXX: clang
CXX: clang++
steps:
- uses: actions/checkout@v4
# pin the toolchain version to avoid surprises
@@ -81,13 +91,18 @@ jobs:
- name: Run tests
if: ${{ matrix.toolchain == 'stable' }}
run: |
cargo llvm-cov --locked --workspace --codecov --output-path coverage.codecov --all-features
ALL_FEATURES=`cargo metadata --format-version=1 --no-deps | jq -r '.packages[] | .features | keys | .[]' | grep -v protoc | sort | uniq | paste -s -d "," -`
cargo llvm-cov --locked --workspace --codecov --output-path coverage.codecov --features ${ALL_FEATURES}
- name: Build tests (nightly)
run: cargo test --locked --all-features --workspace --no-run
if: ${{ matrix.toolchain != 'stable' }}
run: |
ALL_FEATURES=`cargo metadata --format-version=1 --no-deps | jq -r '.packages[] | .features | keys | .[]' | grep -v protoc | sort | uniq | paste -s -d "," -`
cargo test --locked --features ${ALL_FEATURES} --workspace --no-run
- name: Run tests (nightly)
if: ${{ matrix.toolchain != 'stable' }}
run: |
cargo test --all-features --workspace
ALL_FEATURES=`cargo metadata --format-version=1 --no-deps | jq -r '.packages[] | .features | keys | .[]' | grep -v protoc | sort | uniq | paste -s -d "," -`
cargo test --features ${ALL_FEATURES} --workspace
- name: Upload coverage to Codecov
if: ${{ matrix.toolchain == 'stable' }}
uses: codecov/codecov-action@v4
@@ -113,20 +128,22 @@ jobs:
sudo apt install -y protobuf-compiler libssl-dev pkg-config
- name: Build tests
run: |
cargo test --locked --all-features --no-run
ALL_FEATURES=`cargo metadata --format-version=1 --no-deps | jq -r '.packages[] | .features | keys | .[]' | grep -v protoc | sort | uniq | paste -s -d "," -`
cargo test --locked --features ${ALL_FEATURES} --no-run
- name: Start DynamoDB local for tests
run: |
docker run -d -e AWS_ACCESS_KEY_ID=DUMMYKEY -e AWS_SECRET_ACCESS_KEY=DUMMYKEY -p 8000:8000 amazon/dynamodb-local
- name: Run tests
run: |
cargo test --locked --all-features
ALL_FEATURES=`cargo metadata --format-version=1 --no-deps | jq -r '.packages[] | .features | keys | .[]' | grep -v protoc | sort | uniq | paste -s -d "," -`
cargo test --locked --features ${ALL_FEATURES}
build-no-lock:
runs-on: ubuntu-24.04
timeout-minutes: 30
env:
# Need up-to-date compilers for kernels
CC: clang
CXX: clang
CXX: clang++
steps:
- uses: actions/checkout@v4
# Remote cargo.lock to force a fresh build
@@ -139,7 +156,9 @@ jobs:
sudo apt update
sudo apt install -y protobuf-compiler libssl-dev
- name: Build all
run: cargo build --benches --all-features --tests
run: |
ALL_FEATURES=`cargo metadata --format-version=1 --no-deps | jq -r '.packages[] | .features | keys | .[]' | grep -v protoc | sort | uniq | paste -s -d "," -`
cargo build --benches --features ${ALL_FEATURES} --tests
mac-build:
runs-on: "macos-14"
timeout-minutes: 45
@@ -165,11 +184,14 @@ jobs:
run: |
rustup update ${{ matrix.toolchain }} && rustup default ${{ matrix.toolchain }}
- name: Build tests
run: cargo test --locked --all-features --no-run
run: |
cargo test --locked --features fp16kernels,cli,tensorflow,dynamodb,substrait --no-run
- name: Run tests
run: cargo test --all-features
run: |
cargo test --features fp16kernels,cli,tensorflow,dynamodb,substrait
- name: Check benchmarks
run: cargo check --benches --all-features
run: |
cargo check --benches --features fp16kernels,cli,tensorflow,dynamodb,substrait
windows-build:
runs-on: windows-latest
defaults:
@@ -203,7 +225,7 @@ jobs:
env:
# Need up-to-date compilers for kernels
CC: clang
CXX: clang
CXX: clang++
steps:
- uses: actions/checkout@v4
with:
@@ -218,4 +240,6 @@ jobs:
with:
toolchain: ${{ matrix.msrv }}
- name: cargo +${{ matrix.msrv }} check
run: cargo check --workspace --tests --benches --all-features
run: |
ALL_FEATURES=`cargo metadata --format-version=1 --no-deps | jq -r '.packages[] | .features | keys | .[]' | grep -v protoc | sort | uniq | paste -s -d "," -`
cargo check --workspace --tests --benches --features ${ALL_FEATURES}
Loading