@@ -1515,11 +1515,25 @@ mod tests {
1515
1515
. await
1516
1516
. unwrap ( ) ;
1517
1517
1518
+ async fn assert_indexed_rows ( dataset : & Dataset , expected_indexed_rows : usize ) {
1519
+ let stats = dataset. index_statistics ( "text_idx" ) . await . unwrap ( ) ;
1520
+ let stats: serde_json:: Value = serde_json:: from_str ( & stats) . unwrap ( ) ;
1521
+ let indexed_rows = stats[ "num_indexed_rows" ] . as_u64 ( ) . unwrap ( ) as usize ;
1522
+ let unindexed_rows = stats[ "num_unindexed_rows" ] . as_u64 ( ) . unwrap ( ) as usize ;
1523
+ let num_rows = dataset. count_all_rows ( ) . await . unwrap ( ) ;
1524
+ assert_eq ! ( indexed_rows, expected_indexed_rows) ;
1525
+ assert_eq ! ( unindexed_rows, num_rows - expected_indexed_rows) ;
1526
+ }
1527
+
1528
+ let num_rows = dataset. count_all_rows ( ) . await . unwrap ( ) ;
1529
+ assert_indexed_rows ( & dataset, num_rows) . await ;
1530
+
1518
1531
let new_words = [ "elephant" , "fig" , "grape" , "honeydew" ] ;
1519
1532
let new_data = StringArray :: from_iter_values ( new_words. iter ( ) . map ( |s| s. to_string ( ) ) ) ;
1520
1533
let batch = RecordBatch :: try_new ( schema. clone ( ) , vec ! [ Arc :: new( new_data) ] ) . unwrap ( ) ;
1521
1534
let batch_iter = RecordBatchIterator :: new ( vec ! [ Ok ( batch) ] , schema. clone ( ) ) ;
1522
1535
dataset. append ( batch_iter, None ) . await . unwrap ( ) ;
1536
+ assert_indexed_rows ( & dataset, num_rows) . await ;
1523
1537
1524
1538
dataset
1525
1539
. optimize_indices ( & OptimizeOptions {
@@ -1528,6 +1542,8 @@ mod tests {
1528
1542
} )
1529
1543
. await
1530
1544
. unwrap ( ) ;
1545
+ let num_rows = dataset. count_all_rows ( ) . await . unwrap ( ) ;
1546
+ assert_indexed_rows ( & dataset, num_rows) . await ;
1531
1547
1532
1548
for & word in words. iter ( ) . chain ( new_words. iter ( ) ) {
1533
1549
let query_result = dataset
@@ -1584,6 +1600,7 @@ mod tests {
1584
1600
let batch = RecordBatch :: try_new ( schema. clone ( ) , vec ! [ Arc :: new( new_data) ] ) . unwrap ( ) ;
1585
1601
let batch_iter = RecordBatchIterator :: new ( vec ! [ Ok ( batch) ] , schema. clone ( ) ) ;
1586
1602
dataset. append ( batch_iter, None ) . await . unwrap ( ) ;
1603
+ assert_indexed_rows ( & dataset, num_rows) . await ;
1587
1604
1588
1605
// we should be able to query the new words
1589
1606
for & word in uppercase_words. iter ( ) {
@@ -1619,6 +1636,8 @@ mod tests {
1619
1636
} )
1620
1637
. await
1621
1638
. unwrap ( ) ;
1639
+ let num_rows = dataset. count_all_rows ( ) . await . unwrap ( ) ;
1640
+ assert_indexed_rows ( & dataset, num_rows) . await ;
1622
1641
1623
1642
// we should be able to query the new words after optimization
1624
1643
for & word in uppercase_words. iter ( ) {
@@ -1671,6 +1690,7 @@ mod tests {
1671
1690
assert_eq ! ( texts. len( ) , 1 , "query: {}, texts: {:?}" , word, texts) ;
1672
1691
assert_eq ! ( texts[ 0 ] , word, "query: {}, texts: {:?}" , word, texts) ;
1673
1692
}
1693
+ assert_indexed_rows ( & dataset, num_rows) . await ;
1674
1694
}
1675
1695
}
1676
1696
0 commit comments