Skip to content

Commit 8a61b69

Browse files
authored
test: assert the indexed/unindexed rows for optimizing tests (#3436)
1 parent fbea65b commit 8a61b69

File tree

1 file changed

+20
-0
lines changed

1 file changed

+20
-0
lines changed

rust/lance/src/index.rs

+20
Original file line numberDiff line numberDiff line change
@@ -1515,11 +1515,25 @@ mod tests {
15151515
.await
15161516
.unwrap();
15171517

1518+
async fn assert_indexed_rows(dataset: &Dataset, expected_indexed_rows: usize) {
1519+
let stats = dataset.index_statistics("text_idx").await.unwrap();
1520+
let stats: serde_json::Value = serde_json::from_str(&stats).unwrap();
1521+
let indexed_rows = stats["num_indexed_rows"].as_u64().unwrap() as usize;
1522+
let unindexed_rows = stats["num_unindexed_rows"].as_u64().unwrap() as usize;
1523+
let num_rows = dataset.count_all_rows().await.unwrap();
1524+
assert_eq!(indexed_rows, expected_indexed_rows);
1525+
assert_eq!(unindexed_rows, num_rows - expected_indexed_rows);
1526+
}
1527+
1528+
let num_rows = dataset.count_all_rows().await.unwrap();
1529+
assert_indexed_rows(&dataset, num_rows).await;
1530+
15181531
let new_words = ["elephant", "fig", "grape", "honeydew"];
15191532
let new_data = StringArray::from_iter_values(new_words.iter().map(|s| s.to_string()));
15201533
let batch = RecordBatch::try_new(schema.clone(), vec![Arc::new(new_data)]).unwrap();
15211534
let batch_iter = RecordBatchIterator::new(vec![Ok(batch)], schema.clone());
15221535
dataset.append(batch_iter, None).await.unwrap();
1536+
assert_indexed_rows(&dataset, num_rows).await;
15231537

15241538
dataset
15251539
.optimize_indices(&OptimizeOptions {
@@ -1528,6 +1542,8 @@ mod tests {
15281542
})
15291543
.await
15301544
.unwrap();
1545+
let num_rows = dataset.count_all_rows().await.unwrap();
1546+
assert_indexed_rows(&dataset, num_rows).await;
15311547

15321548
for &word in words.iter().chain(new_words.iter()) {
15331549
let query_result = dataset
@@ -1584,6 +1600,7 @@ mod tests {
15841600
let batch = RecordBatch::try_new(schema.clone(), vec![Arc::new(new_data)]).unwrap();
15851601
let batch_iter = RecordBatchIterator::new(vec![Ok(batch)], schema.clone());
15861602
dataset.append(batch_iter, None).await.unwrap();
1603+
assert_indexed_rows(&dataset, num_rows).await;
15871604

15881605
// we should be able to query the new words
15891606
for &word in uppercase_words.iter() {
@@ -1619,6 +1636,8 @@ mod tests {
16191636
})
16201637
.await
16211638
.unwrap();
1639+
let num_rows = dataset.count_all_rows().await.unwrap();
1640+
assert_indexed_rows(&dataset, num_rows).await;
16221641

16231642
// we should be able to query the new words after optimization
16241643
for &word in uppercase_words.iter() {
@@ -1671,6 +1690,7 @@ mod tests {
16711690
assert_eq!(texts.len(), 1, "query: {}, texts: {:?}", word, texts);
16721691
assert_eq!(texts[0], word, "query: {}, texts: {:?}", word, texts);
16731692
}
1693+
assert_indexed_rows(&dataset, num_rows).await;
16741694
}
16751695
}
16761696

0 commit comments

Comments
 (0)