Skip to content

Commit 62a2256

Browse files
authored
fix: full text search index may be corrupted after remapping (#3388)
the posting lists must be written in the original order --------- Signed-off-by: BubbleCal <bubble-cal@outlook.com>
1 parent 4149457 commit 62a2256

File tree

2 files changed

+28
-2
lines changed

2 files changed

+28
-2
lines changed

rust/lance-index/src/scalar/inverted/builder.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -285,8 +285,7 @@ impl InvertedIndexBuilder {
285285
Result::Ok((batch, max_score))
286286
}
287287
});
288-
let mut stream =
289-
stream::iter(batches).buffer_unordered(get_num_compute_intensive_cpus());
288+
let mut stream = stream::iter(batches).buffered(get_num_compute_intensive_cpus());
290289
let mut offsets = Vec::new();
291290
let mut max_scores = Vec::new();
292291
let mut num_rows = 0;

rust/lance/src/index.rs

+27
Original file line numberDiff line numberDiff line change
@@ -945,6 +945,7 @@ impl DatasetIndexInternalExt for Dataset {
945945
#[cfg(test)]
946946
mod tests {
947947
use crate::dataset::builder::DatasetBuilder;
948+
use crate::dataset::optimize::{compact_files, CompactionOptions};
948949
use crate::utils::test::{DatagenExt, FragmentCount, FragmentRowCount};
949950

950951
use super::*;
@@ -1556,6 +1557,32 @@ mod tests {
15561557

15571558
assert_eq!(texts.len(), 1, "query: {}, texts: {:?}", word, texts);
15581559
assert_eq!(texts[0], word, "query: {}, texts: {:?}", word, texts);
1560+
1561+
// we should be able to query the new words after compaction
1562+
compact_files(&mut dataset, CompactionOptions::default(), None)
1563+
.await
1564+
.unwrap();
1565+
for &word in uppercase_words.iter() {
1566+
let query_result = dataset
1567+
.scan()
1568+
.project(&["text"])
1569+
.unwrap()
1570+
.full_text_search(FullTextSearchQuery::new(word.to_string()))
1571+
.unwrap()
1572+
.try_into_batch()
1573+
.await
1574+
.unwrap();
1575+
let texts = query_result["text"]
1576+
.as_string::<i32>()
1577+
.iter()
1578+
.map(|v| match v {
1579+
None => "".to_string(),
1580+
Some(v) => v.to_string(),
1581+
})
1582+
.collect::<Vec<String>>();
1583+
assert_eq!(texts.len(), 1, "query: {}, texts: {:?}", word, texts);
1584+
assert_eq!(texts[0], word, "query: {}, texts: {:?}", word, texts);
1585+
}
15591586
}
15601587
}
15611588

0 commit comments

Comments
 (0)