-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement reset iterator, small improvements
- Loading branch information
Julian büttner
committed
Nov 14, 2023
1 parent
3369988
commit 4daedb6
Showing
10 changed files
with
318 additions
and
171 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
use swapvec::{SwapVec, SwapVecConfig}; | ||
|
||
const DATA_MB: u64 = 20; | ||
|
||
fn main() { | ||
let element_count = DATA_MB / 8; | ||
let big_iterator = 0..element_count * 1024 * 1024; | ||
|
||
let config = swapvec::SwapVecConfig { | ||
batch_size: 8 * 1024, | ||
..SwapVecConfig::default() | ||
}; | ||
let mut swapvec: SwapVec<_> = SwapVec::with_config(config); | ||
swapvec.consume(big_iterator.into_iter()).unwrap(); | ||
|
||
println!("Data size: {}MB", DATA_MB); | ||
println!("Done. Batches written: {}", swapvec.batches_written()); | ||
println!( | ||
"Filesize: {}MB", | ||
swapvec | ||
.file_size() | ||
.map(|x| x as f32 / 1024. / 1024.) | ||
.unwrap_or(0.) | ||
); | ||
println!("Read back"); | ||
|
||
let read_back: Vec<_> = swapvec.into_iter().map(|x| x.unwrap()).collect(); | ||
|
||
println!("Elements read back: {}", read_back.len()); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
use std::{ | ||
hash::{DefaultHasher, Hash, Hasher}, | ||
io::{self, BufReader, BufWriter, Error, Read, Seek, Write}, | ||
}; | ||
|
||
use crate::SwapVecError; | ||
|
||
#[derive(Debug)] | ||
pub struct BatchInfo { | ||
pub hash: u64, | ||
pub bytes: usize, | ||
} | ||
|
||
pub(crate) struct BatchWriter<T: Write> { | ||
inner: BufWriter<T>, | ||
batch_infos: Vec<BatchInfo>, | ||
} | ||
|
||
pub(crate) struct BatchReader<T: Read> { | ||
inner: BufReader<T>, | ||
batch_infos: Vec<BatchInfo>, | ||
batch_index: usize, | ||
buffer: Vec<u8>, | ||
} | ||
|
||
fn hash_bytes(bytes: &[u8]) -> u64 { | ||
let mut hasher = DefaultHasher::new(); | ||
bytes.hash(&mut hasher); | ||
hasher.finish() | ||
} | ||
|
||
impl<T: Write> BatchWriter<T> { | ||
pub fn new(writer: T) -> Self { | ||
Self { | ||
batch_infos: Vec::new(), | ||
inner: BufWriter::new(writer), | ||
} | ||
} | ||
pub fn write_batch(&mut self, buffer: &[u8]) -> Result<(), io::Error> { | ||
self.inner.write_all(buffer)?; | ||
self.batch_infos.push(BatchInfo { | ||
hash: hash_bytes(buffer), | ||
bytes: buffer.len(), | ||
}); | ||
self.inner.flush() | ||
} | ||
pub fn bytes_written(&self) -> usize { | ||
self.batch_infos.iter().map(|b| b.bytes).sum() | ||
} | ||
pub fn batch_count(&self) -> usize { | ||
self.batch_infos.len() | ||
} | ||
} | ||
|
||
impl<T: Read + Seek> BatchReader<T> { | ||
pub fn reset(&mut self) -> Result<(), Error> { | ||
self.inner.seek(io::SeekFrom::Start(0))?; | ||
self.batch_index = 0; | ||
self.buffer.clear(); | ||
Ok(()) | ||
} | ||
} | ||
|
||
impl<T: Read> BatchReader<T> { | ||
pub fn read_batch(&mut self) -> Result<Option<&[u8]>, SwapVecError> { | ||
let batch_info = self.batch_infos.get(self.batch_index); | ||
self.batch_index += 1; | ||
if batch_info.is_none() { | ||
return Ok(None); | ||
} | ||
let batch_info = batch_info.unwrap(); | ||
self.buffer.resize(batch_info.bytes, 0); | ||
self.inner.read_exact(self.buffer.as_mut_slice())?; | ||
if hash_bytes(self.buffer.as_slice()) != batch_info.hash { | ||
// return Err(SwapVecError::WrongChecksum); | ||
} | ||
Ok(Some(self.buffer.as_slice())) | ||
} | ||
} | ||
|
||
impl<T: Read + Write + Seek> TryFrom<BatchWriter<T>> for BatchReader<T> { | ||
type Error = std::io::Error; | ||
|
||
fn try_from(value: BatchWriter<T>) -> Result<Self, Self::Error> { | ||
let mut inner = value | ||
.inner | ||
.into_inner() | ||
.map_err(|inner_error| inner_error.into_error())?; | ||
inner.seek(io::SeekFrom::Start(0))?; | ||
Ok(Self { | ||
inner: BufReader::new(inner), | ||
batch_infos: value.batch_infos, | ||
batch_index: 0, | ||
buffer: Vec::new(), | ||
}) | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod test { | ||
use std::io::Cursor; | ||
|
||
use super::*; | ||
|
||
#[test] | ||
fn read_write_checked_io() { | ||
let buffer = Cursor::new(vec![0; 128]); | ||
let mut batch_writer = BatchWriter::new(buffer); | ||
batch_writer | ||
.write_batch(&[1, 2, 3]) | ||
.expect("Could not write to IO buffer"); | ||
batch_writer | ||
.write_batch(&[44, 55]) | ||
.expect("Could not write to IO buffer"); | ||
|
||
// batch_writer.wtf(); | ||
// panic!() | ||
let mut reader: BatchReader<_> = batch_writer | ||
.try_into() | ||
.expect("Could not flush into IO buffer"); | ||
assert_eq!( | ||
reader | ||
.read_batch() | ||
.expect("Could not read batch") | ||
.expect("Batch was unexpectedly empty"), | ||
&[1, 2, 3] | ||
); | ||
reader.reset().expect("Could not reset"); | ||
assert_eq!( | ||
reader | ||
.read_batch() | ||
.expect("Could not read batch") | ||
.expect("Batch was unexpectedly empty"), | ||
&[1, 2, 3] | ||
); | ||
assert_eq!( | ||
reader | ||
.read_batch() | ||
.expect("Could not read batch") | ||
.expect("Batch was unexpectedly empty"), | ||
&[44, 55] | ||
); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.