From 33699880dc1bbad90d06ac3f9900e632f7d20e36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Julian=20B=C3=BCttner?= Date: Mon, 15 May 2023 12:00:33 +0200 Subject: [PATCH] Allow providing own implementation of compression algorithm. --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/compression.rs | 43 +++++++++++++++++++++++++++++++++++++ src/lib.rs | 1 + src/swapvec.rs | 28 ++++++++++++++++++------ tests/compression.rs | 4 ++-- tests/custom_compression.rs | 35 ++++++++++++++++++++++++++++++ 7 files changed, 104 insertions(+), 11 deletions(-) create mode 100644 tests/custom_compression.rs diff --git a/Cargo.lock b/Cargo.lock index ca6739a..b2a8939 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -158,7 +158,7 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "swapvec" -version = "0.2.0" +version = "0.3.0" dependencies = [ "bincode", "lz4_flex", diff --git a/Cargo.toml b/Cargo.toml index 3be089b..06fe9b1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "swapvec" -version = "0.2.0" +version = "0.3.0" edition = "2021" authors = ["Julian Büttner "] license = "MIT" diff --git a/src/compression.rs b/src/compression.rs index cf2e683..beea0c3 100644 --- a/src/compression.rs +++ b/src/compression.rs @@ -2,8 +2,41 @@ use lz4_flex::{compress_prepend_size, decompress_size_prepended}; use crate::{swapvec::CompressionLevel, Compression}; +/// Provide your own compression algorithm by +/// creating an empty struct implementing `compress` +/// and `decompress`. +/// +/// Your compression algorithm is allowed to fail, +/// but _must_ always decompress into the same +/// bytes. Undefined behaviour otherwise. +/// +/// Note: You must always also implement +/// CompressBoxedClone, to allow cloning +/// and debugging of the configuration. +/// +/// ```rust +/// use swapvec::Compress; +/// struct DummyCompression; +/// impl Compress for DummyCompression { +/// fn compress(&self, block: Vec) -> Vec { +/// block +/// } +/// fn decompress(&self, block: Vec) -> Result, ()> { +/// Ok(block) +/// } +/// } +/// +/// let bytes = vec![1, 2, 3]; +/// let compression = DummyCompression; +/// assert_eq!(bytes, compression.decompress(compression.compress(bytes.clone())).unwrap()); +/// ``` pub trait Compress { + /// Compress bytes blockwise. The compressed block + /// will be put into `self.decompress()` later. fn compress(&self, block: Vec) -> Vec; + /// Receive block which was earlier `compress()`ed. + /// If the result is `Ok`, the same bytes which were + /// `compress()`es earlier are expected. fn decompress(&self, block: Vec) -> Result, ()>; } @@ -19,6 +52,7 @@ impl Compress for Option { }; miniz_oxide::deflate::compress_to_vec(&block, compression_level) } + Some(Compression::Custom(algo)) => algo.compress(block), None => block, } } @@ -28,11 +62,20 @@ impl Compress for Option { Some(Compression::Deflate(_)) => { miniz_oxide::inflate::decompress_to_vec(&block).map_err(|_| ()) } + Some(Compression::Custom(algo)) => algo.decompress(block), None => Ok(block), } } } +/// Your custom compression algorithm struct must be debugable +/// and clonable. Implement this trait to keep the main +/// configuration debugable and clonable. +pub trait CompressBoxedClone: Compress + std::fmt::Debug { + /// Clone your empty struct and return it as a new Box. + fn boxed_clone(&self) -> Box; +} + #[cfg(test)] mod test { use super::*; diff --git a/src/lib.rs b/src/lib.rs index 590faa0..c818abc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,5 +7,6 @@ mod swapvec; mod swapveciter; pub use self::swapvec::{Compression, CompressionLevel, SwapVec, SwapVecConfig}; +pub use compression::{Compress, CompressBoxedClone}; pub use error::SwapVecError; pub use swapveciter::SwapVecIter; diff --git a/src/swapvec.rs b/src/swapvec.rs index 1ed3cb5..e40f112 100644 --- a/src/swapvec.rs +++ b/src/swapvec.rs @@ -11,7 +11,11 @@ use std::os::unix::io::AsRawFd; use serde::{Deserialize, Serialize}; -use crate::{compression::Compress, error::SwapVecError, swapveciter::SwapVecIter}; +use crate::{ + compression::{Compress, CompressBoxedClone}, + error::SwapVecError, + swapveciter::SwapVecIter, +}; /// Set compression level of the compression /// algorithm. This maps to different values @@ -32,7 +36,7 @@ pub enum CompressionLevel { /// Configure compression for the temporary /// file into which your data might be swapped out. -#[derive(Debug, Clone, Copy)] +#[derive(Debug)] #[non_exhaustive] pub enum Compression { /// Read more about LZ4 here: [LZ4] @@ -40,6 +44,19 @@ pub enum Compression { Lz4, /// Deflate, mostly known from gzip. Deflate(CompressionLevel), + /// Provide your own compression algortihm by implementing + /// `Compress`. + Custom(Box), +} + +impl Clone for Compression { + fn clone(&self) -> Self { + match &self { + Self::Lz4 => Self::Lz4, + Self::Deflate(n) => Self::Deflate(*n), + Self::Custom(x) => Self::Custom(x.boxed_clone()), + } + } } /// Configure when and how the vector should swap. @@ -50,7 +67,7 @@ pub enum Compression { /// Keep in mind, that if the temporary file exists, /// after ever batch_size elements, at least one write (syscall) /// will happen. -#[derive(Debug, Clone, Copy)] +#[derive(Debug)] pub struct SwapVecConfig { /// The vector will create a temporary file and starting to /// swap after so many elements. @@ -208,10 +225,7 @@ where /// Get the file size in bytes of the temporary file. /// Might do IO and therefore could return some Result. pub fn file_size(&self) -> Option { - match self.tempfile.as_ref() { - None => None, - Some(f) => Some(f.file_size()), - } + self.tempfile.as_ref().map(|f| f.file_size()) } /// Basically int(elements pushed / batch size) diff --git a/tests/compression.rs b/tests/compression.rs index 060697b..c1d48fc 100644 --- a/tests/compression.rs +++ b/tests/compression.rs @@ -14,12 +14,12 @@ fn write_and_read_back_with_compression() { for compression in compression_configs { let config = SwapVecConfig { - compression, + compression: compression.clone(), swap_after: 16, batch_size: 8, }; let mut v = SwapVec::with_config(config); - v.consume(data.iter().map(|x| *x)).unwrap(); + v.consume(data.iter().copied()).unwrap(); let read_back: Vec = v .into_iter() .map(|x| { diff --git a/tests/custom_compression.rs b/tests/custom_compression.rs new file mode 100644 index 0000000..0cc62eb --- /dev/null +++ b/tests/custom_compression.rs @@ -0,0 +1,35 @@ +use swapvec::{Compress, CompressBoxedClone, Compression, SwapVec, SwapVecConfig}; + +#[derive(Debug)] +struct MyCompression; + +impl Compress for MyCompression { + fn compress(&self, block: Vec) -> Vec { + block + } + fn decompress(&self, block: Vec) -> Result, ()> { + Ok(block) + } +} + +impl CompressBoxedClone for MyCompression { + fn boxed_clone(&self) -> Box { + Box::new(MyCompression) + } +} + +#[test] +fn custom_compression() { + let config = SwapVecConfig { + compression: Some(Compression::Custom(Box::new(MyCompression))), + swap_after: 16, + batch_size: 5, + }; + + let vector: Vec = (0..999).collect(); + let mut v = SwapVec::with_config(config); + v.consume(vector.clone().into_iter()).unwrap(); + assert!(v.written_to_file()); + let vector_read_back: Vec = v.into_iter().map(|x| x.unwrap()).collect(); + assert_eq!(vector, vector_read_back); +}