Skip to content

Commit

Permalink
forward delete bitsets on merge, add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
PSeitz committed Sep 6, 2021
1 parent c50c5ed commit c02100e
Show file tree
Hide file tree
Showing 6 changed files with 255 additions and 43 deletions.
12 changes: 12 additions & 0 deletions src/core/segment_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use crate::core::SegmentId;
use crate::directory::CompositeFile;
use crate::directory::FileSlice;
use crate::error::DataCorruption;
use crate::fastfield::merge_delete_bitset;
use crate::fastfield::DeleteBitSet;
use crate::fastfield::FacetReader;
use crate::fastfield::FastFieldReaders;
Expand Down Expand Up @@ -69,6 +70,17 @@ impl SegmentReader {
&self.schema
}

/// Merges the passed bitset with the existing one.
pub fn apply_delete_bitset(&mut self, delete_bitset: DeleteBitSet) {
if let Some(existing_bitset) = self.delete_bitset_opt.as_mut() {
let merged_bitset = merge_delete_bitset(&delete_bitset, existing_bitset);
self.delete_bitset_opt = Some(merged_bitset);
} else {
self.delete_bitset_opt = Some(delete_bitset);
}
self.num_docs = self.max_doc - self.num_deleted_docs();
}

/// Return the number of documents that have been
/// deleted in the segment.
pub fn num_deleted_docs(&self) -> DocId {
Expand Down
2 changes: 1 addition & 1 deletion src/fastfield/delete.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ pub fn merge_delete_bitset(left: &DeleteBitSet, right: &DeleteBitSet) -> DeleteB
/// Set of deleted `DocId`s.
#[derive(Clone)]
pub struct DeleteBitSet {
pub(crate) data: OwnedBytes,
data: OwnedBytes,
num_deleted: usize,
}

Expand Down
12 changes: 3 additions & 9 deletions src/indexer/merger.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use crate::error::DataCorruption;
use crate::fastfield::merge_delete_bitset;
use crate::fastfield::CompositeFastFieldSerializer;
use crate::fastfield::DeleteBitSet;
use crate::fastfield::DynamicFastFieldReader;
Expand Down Expand Up @@ -93,7 +92,7 @@ impl<'a> From<(usize, &'a SegmentReader)> for SegmentReaderWithOrdinal<'a> {
pub struct IndexMerger {
index_settings: IndexSettings,
schema: Schema,
readers: Vec<SegmentReader>,
pub(crate) readers: Vec<SegmentReader>,
max_doc: u32,
}

Expand Down Expand Up @@ -208,20 +207,15 @@ impl IndexMerger {
for segment in segments {
if segment.meta().num_docs() > 0 {
let reader = SegmentReader::open(segment)?;
max_doc += reader.num_docs();
readers.push(reader);
}
}
for (reader, new_delete_bitset_opt) in readers.iter_mut().zip(delete_bitset_opt.into_iter())
{
if let Some(new_delete_bitset) = new_delete_bitset_opt {
if let Some(existing_bitset) = reader.delete_bitset_opt.as_mut() {
let merged_bitset = merge_delete_bitset(&new_delete_bitset, existing_bitset);
reader.delete_bitset_opt = Some(merged_bitset);
} else {
reader.delete_bitset_opt = Some(new_delete_bitset);
}
reader.apply_delete_bitset(new_delete_bitset);
}
max_doc += reader.num_docs();
}
if let Some(sort_by_field) = index_settings.sort_by_field.as_ref() {
readers = Self::sort_readers_by_min_sort_field(readers, sort_by_field)?;
Expand Down
2 changes: 1 addition & 1 deletion src/indexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ pub use self::prepared_commit::PreparedCommit;
pub use self::segment_entry::SegmentEntry;
pub use self::segment_manager::SegmentManager;
pub use self::segment_serializer::SegmentSerializer;
pub use self::segment_updater::merge_segments;
pub use self::segment_updater::merge_indices;
pub use self::segment_writer::SegmentWriter;

/// Alias for the default merge policy, which is the `LogMergePolicy`.
Expand Down
Loading

0 comments on commit c02100e

Please sign in to comment.