Skip to content

Commit

Permalink
Allow providing own implementation of compression algorithm.
Browse files Browse the repository at this point in the history
  • Loading branch information
Julian Büttner committed May 15, 2023
1 parent b8a76ac commit 3369988
Show file tree
Hide file tree
Showing 7 changed files with 104 additions and 11 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "swapvec"
version = "0.2.0"
version = "0.3.0"
edition = "2021"
authors = ["Julian Büttner <git@julianbuettner.dev>"]
license = "MIT"
Expand Down
43 changes: 43 additions & 0 deletions src/compression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,41 @@ use lz4_flex::{compress_prepend_size, decompress_size_prepended};

use crate::{swapvec::CompressionLevel, Compression};

/// Provide your own compression algorithm by
/// creating an empty struct implementing `compress`
/// and `decompress`.
///
/// Your compression algorithm is allowed to fail,
/// but _must_ always decompress into the same
/// bytes. Undefined behaviour otherwise.
///
/// Note: You must always also implement
/// CompressBoxedClone, to allow cloning
/// and debugging of the configuration.
///
/// ```rust
/// use swapvec::Compress;
/// struct DummyCompression;
/// impl Compress for DummyCompression {
/// fn compress(&self, block: Vec<u8>) -> Vec<u8> {
/// block
/// }
/// fn decompress(&self, block: Vec<u8>) -> Result<Vec<u8>, ()> {
/// Ok(block)
/// }
/// }
///
/// let bytes = vec![1, 2, 3];
/// let compression = DummyCompression;
/// assert_eq!(bytes, compression.decompress(compression.compress(bytes.clone())).unwrap());
/// ```
pub trait Compress {
/// Compress bytes blockwise. The compressed block
/// will be put into `self.decompress()` later.
fn compress(&self, block: Vec<u8>) -> Vec<u8>;
/// Receive block which was earlier `compress()`ed.
/// If the result is `Ok`, the same bytes which were
/// `compress()`es earlier are expected.
fn decompress(&self, block: Vec<u8>) -> Result<Vec<u8>, ()>;
}

Expand All @@ -19,6 +52,7 @@ impl Compress for Option<Compression> {
};
miniz_oxide::deflate::compress_to_vec(&block, compression_level)
}
Some(Compression::Custom(algo)) => algo.compress(block),
None => block,
}
}
Expand All @@ -28,11 +62,20 @@ impl Compress for Option<Compression> {
Some(Compression::Deflate(_)) => {
miniz_oxide::inflate::decompress_to_vec(&block).map_err(|_| ())
}
Some(Compression::Custom(algo)) => algo.decompress(block),
None => Ok(block),
}
}
}

/// Your custom compression algorithm struct must be debugable
/// and clonable. Implement this trait to keep the main
/// configuration debugable and clonable.
pub trait CompressBoxedClone: Compress + std::fmt::Debug {
/// Clone your empty struct and return it as a new Box.
fn boxed_clone(&self) -> Box<dyn CompressBoxedClone>;
}

#[cfg(test)]
mod test {
use super::*;
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,6 @@ mod swapvec;
mod swapveciter;

pub use self::swapvec::{Compression, CompressionLevel, SwapVec, SwapVecConfig};
pub use compression::{Compress, CompressBoxedClone};
pub use error::SwapVecError;
pub use swapveciter::SwapVecIter;
28 changes: 21 additions & 7 deletions src/swapvec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@ use std::os::unix::io::AsRawFd;

use serde::{Deserialize, Serialize};

use crate::{compression::Compress, error::SwapVecError, swapveciter::SwapVecIter};
use crate::{
compression::{Compress, CompressBoxedClone},
error::SwapVecError,
swapveciter::SwapVecIter,
};

/// Set compression level of the compression
/// algorithm. This maps to different values
Expand All @@ -32,14 +36,27 @@ pub enum CompressionLevel {

/// Configure compression for the temporary
/// file into which your data might be swapped out.
#[derive(Debug, Clone, Copy)]
#[derive(Debug)]
#[non_exhaustive]
pub enum Compression {
/// Read more about LZ4 here: [LZ4]
/// [LZ4]: https://github.com/lz4/lz4
Lz4,
/// Deflate, mostly known from gzip.
Deflate(CompressionLevel),
/// Provide your own compression algortihm by implementing
/// `Compress`.
Custom(Box<dyn CompressBoxedClone>),
}

impl Clone for Compression {
fn clone(&self) -> Self {
match &self {
Self::Lz4 => Self::Lz4,
Self::Deflate(n) => Self::Deflate(*n),
Self::Custom(x) => Self::Custom(x.boxed_clone()),
}
}
}

/// Configure when and how the vector should swap.
Expand All @@ -50,7 +67,7 @@ pub enum Compression {
/// Keep in mind, that if the temporary file exists,
/// after ever batch_size elements, at least one write (syscall)
/// will happen.
#[derive(Debug, Clone, Copy)]
#[derive(Debug)]
pub struct SwapVecConfig {
/// The vector will create a temporary file and starting to
/// swap after so many elements.
Expand Down Expand Up @@ -208,10 +225,7 @@ where
/// Get the file size in bytes of the temporary file.
/// Might do IO and therefore could return some Result.
pub fn file_size(&self) -> Option<u64> {
match self.tempfile.as_ref() {
None => None,
Some(f) => Some(f.file_size()),
}
self.tempfile.as_ref().map(|f| f.file_size())
}

/// Basically int(elements pushed / batch size)
Expand Down
4 changes: 2 additions & 2 deletions tests/compression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@ fn write_and_read_back_with_compression() {

for compression in compression_configs {
let config = SwapVecConfig {
compression,
compression: compression.clone(),
swap_after: 16,
batch_size: 8,
};
let mut v = SwapVec::with_config(config);
v.consume(data.iter().map(|x| *x)).unwrap();
v.consume(data.iter().copied()).unwrap();
let read_back: Vec<i32> = v
.into_iter()
.map(|x| {
Expand Down
35 changes: 35 additions & 0 deletions tests/custom_compression.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
use swapvec::{Compress, CompressBoxedClone, Compression, SwapVec, SwapVecConfig};

#[derive(Debug)]
struct MyCompression;

impl Compress for MyCompression {
fn compress(&self, block: Vec<u8>) -> Vec<u8> {
block
}
fn decompress(&self, block: Vec<u8>) -> Result<Vec<u8>, ()> {
Ok(block)
}
}

impl CompressBoxedClone for MyCompression {
fn boxed_clone(&self) -> Box<dyn CompressBoxedClone> {
Box::new(MyCompression)
}
}

#[test]
fn custom_compression() {
let config = SwapVecConfig {
compression: Some(Compression::Custom(Box::new(MyCompression))),
swap_after: 16,
batch_size: 5,
};

let vector: Vec<u64> = (0..999).collect();
let mut v = SwapVec::with_config(config);
v.consume(vector.clone().into_iter()).unwrap();
assert!(v.written_to_file());
let vector_read_back: Vec<u64> = v.into_iter().map(|x| x.unwrap()).collect();
assert_eq!(vector, vector_read_back);
}

0 comments on commit 3369988

Please sign in to comment.