@@ -1742,14 +1742,15 @@ mod tests {
1742
1742
1743
1743
use arrow_array:: types:: UInt64Type ;
1744
1744
use arrow_array:: {
1745
- make_array, Float32Array , RecordBatchIterator , RecordBatchReader , UInt64Array ,
1745
+ make_array, FixedSizeListArray , Float32Array , RecordBatch , RecordBatchIterator ,
1746
+ RecordBatchReader , UInt64Array ,
1746
1747
} ;
1747
1748
use arrow_buffer:: { BooleanBuffer , NullBuffer } ;
1748
- use arrow_schema:: Field ;
1749
+ use arrow_schema:: { DataType , Field , Schema } ;
1749
1750
use itertools:: Itertools ;
1750
1751
use lance_core:: utils:: address:: RowAddress ;
1751
1752
use lance_core:: ROW_ID ;
1752
- use lance_datagen:: { array, gen, Dimension , RowCount } ;
1753
+ use lance_datagen:: { array, gen, ArrayGeneratorExt , Dimension , RowCount } ;
1753
1754
use lance_index:: vector:: sq:: builder:: SQBuildParams ;
1754
1755
use lance_linalg:: distance:: l2_distance_batch;
1755
1756
use lance_testing:: datagen:: {
@@ -1760,7 +1761,7 @@ mod tests {
1760
1761
use rstest:: rstest;
1761
1762
use tempfile:: tempdir;
1762
1763
1763
- use crate :: dataset:: InsertBuilder ;
1764
+ use crate :: dataset:: { InsertBuilder , WriteMode , WriteParams } ;
1764
1765
use crate :: index:: prefilter:: DatasetPreFilter ;
1765
1766
use crate :: index:: vector:: IndexFileVersion ;
1766
1767
use crate :: index:: vector_index_details;
@@ -2300,6 +2301,75 @@ mod tests {
2300
2301
assert_eq ! ( results[ "vec" ] . logical_null_count( ) , 0 ) ;
2301
2302
}
2302
2303
2304
+ #[ tokio:: test]
2305
+ async fn test_index_lifecycle_nulls ( ) {
2306
+ // Generate random data with nulls
2307
+ let nrows = 2_000 ;
2308
+ let dims = 32 ;
2309
+ let data = gen ( )
2310
+ . col (
2311
+ "vec" ,
2312
+ array:: rand_vec :: < Float32Type > ( Dimension :: from ( dims as u32 ) ) . with_random_nulls ( 0.5 ) ,
2313
+ )
2314
+ . into_batch_rows ( RowCount :: from ( nrows) )
2315
+ . unwrap ( ) ;
2316
+ let num_non_null = data[ "vec" ] . len ( ) - data[ "vec" ] . logical_null_count ( ) ;
2317
+
2318
+ let mut dataset = InsertBuilder :: new ( "memory://" )
2319
+ . execute ( vec ! [ data] )
2320
+ . await
2321
+ . unwrap ( ) ;
2322
+
2323
+ // Create index
2324
+ let index_params = VectorIndexParams :: with_ivf_pq_params (
2325
+ MetricType :: L2 ,
2326
+ IvfBuildParams :: new ( 2 ) ,
2327
+ PQBuildParams :: new ( 2 , 8 ) ,
2328
+ ) ;
2329
+ dataset
2330
+ . create_index ( & [ "vec" ] , IndexType :: Vector , None , & index_params, false )
2331
+ . await
2332
+ . unwrap ( ) ;
2333
+
2334
+ // Check that the index is working
2335
+ async fn check_index ( dataset : & Dataset , num_non_null : usize , dims : usize ) {
2336
+ let query = vec ! [ 0.0 ; dims] . into_iter ( ) . collect :: < Float32Array > ( ) ;
2337
+ let results = dataset
2338
+ . scan ( )
2339
+ . nearest ( "vec" , & query, 2_000 )
2340
+ . unwrap ( )
2341
+ . nprobs ( 2 )
2342
+ . try_into_batch ( )
2343
+ . await
2344
+ . unwrap ( ) ;
2345
+ assert_eq ! ( results. num_rows( ) , num_non_null) ;
2346
+ }
2347
+ check_index ( & dataset, num_non_null, dims) . await ;
2348
+
2349
+ // Append more data
2350
+ let data = gen ( )
2351
+ . col (
2352
+ "vec" ,
2353
+ array:: rand_vec :: < Float32Type > ( Dimension :: from ( dims as u32 ) ) . with_random_nulls ( 0.5 ) ,
2354
+ )
2355
+ . into_batch_rows ( RowCount :: from ( 500 ) )
2356
+ . unwrap ( ) ;
2357
+ let num_non_null = data[ "vec" ] . len ( ) - data[ "vec" ] . logical_null_count ( ) + num_non_null;
2358
+ let mut dataset = InsertBuilder :: new ( Arc :: new ( dataset) )
2359
+ . with_params ( & WriteParams {
2360
+ mode : WriteMode :: Append ,
2361
+ ..Default :: default ( )
2362
+ } )
2363
+ . execute ( vec ! [ data] )
2364
+ . await
2365
+ . unwrap ( ) ;
2366
+ check_index ( & dataset, num_non_null, dims) . await ;
2367
+
2368
+ // Optimize the index
2369
+ dataset. optimize_indices ( & Default :: default ( ) ) . await . unwrap ( ) ;
2370
+ check_index ( & dataset, num_non_null, dims) . await ;
2371
+ }
2372
+
2303
2373
#[ tokio:: test]
2304
2374
async fn test_create_ivf_pq_cosine ( ) {
2305
2375
let test_dir = tempdir ( ) . unwrap ( ) ;
0 commit comments