Skip to content

Commit

Permalink
cache store readers to utilize lru cache (4x performance)
Browse files Browse the repository at this point in the history
cache store readers, to utilize lru cache (4x faster performance, due to less decompress calls on the block)
  • Loading branch information
PSeitz committed May 12, 2021
1 parent b6a0f42 commit 0a4f433
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 1 deletion.
7 changes: 6 additions & 1 deletion src/indexer/merger.rs
Original file line number Diff line number Diff line change
Expand Up @@ -915,9 +915,14 @@ impl IndexMerger {
store_writer: &mut StoreWriter,
doc_id_mapping: &Option<Vec<(DocId, SegmentReaderWithOrdinal)>>,
) -> crate::Result<()> {
let store_readers: Vec<_> = self
.readers
.iter()
.map(|reader| reader.get_store_reader())
.collect::<Result<_, _>>()?;
if let Some(doc_id_mapping) = doc_id_mapping {
for (old_doc_id, reader_with_ordinal) in doc_id_mapping {
let store_reader = reader_with_ordinal.reader.get_store_reader()?;
let store_reader = &store_readers[reader_with_ordinal.ordinal as usize];
let raw_doc = store_reader.get_raw(*old_doc_id)?;
store_writer.store_bytes(raw_doc.get_bytes())?;
}
Expand Down
3 changes: 3 additions & 0 deletions src/indexer/merger_sorted_index_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ mod tests {
let mut schema_builder = schema::Schema::builder();
let int_options = IntOptions::default()
.set_fast(Cardinality::SingleValue)
.set_stored()
.set_indexed();
let int_field = schema_builder.add_u64_field("intval", int_options);

Expand Down Expand Up @@ -201,6 +202,8 @@ mod tests {
doc.get_first(my_text_field).unwrap().text(),
Some("blubber")
);
let doc = searcher.doc(DocAddress::new(0, 0)).unwrap();
assert_eq!(doc.get_first(int_field).unwrap().u64_value(), Some(1000));
}
}

Expand Down
2 changes: 2 additions & 0 deletions src/store/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ impl StoreReader {
/// Calling `.get(doc)` is relatively costly as it requires
/// decompressing a compressed block. The store utilizes a LRU cache,
/// so accessing docs from the same compressed block should be faster.
/// For that reason a store reader should be kept and reused.
///
/// It should not be called to score documents
/// for instance.
Expand All @@ -103,6 +104,7 @@ impl StoreReader {
/// Calling `.get(doc)` is relatively costly as it requires
/// decompressing a compressed block. The store utilizes a LRU cache,
/// so accessing docs from the same compressed block should be faster.
/// For that reason a store reader should be kept and reused.
///
pub fn get_raw(&self, doc_id: DocId) -> crate::Result<RawDocument> {
let checkpoint = self.block_checkpoint(doc_id).ok_or_else(|| {
Expand Down

0 comments on commit 0a4f433

Please sign in to comment.