diff --git a/src/core/Cargo.toml b/src/core/Cargo.toml index 01a8a03734..8468956b6b 100644 --- a/src/core/Cargo.toml +++ b/src/core/Cargo.toml @@ -27,7 +27,7 @@ default = [] [dependencies] az = "1.0.0" -byteorder = "1.5.0" +byteorder = "1.4.3" camino = { version = "1.1.6", features = ["serde1"] } cfg-if = "1.0" counter = "0.5.7" @@ -45,7 +45,7 @@ murmurhash3 = "0.0.5" niffler = { version = "2.3.1", default-features = false, features = [ "gz" ] } nohash-hasher = "0.2.0" num-iter = "0.1.43" -once_cell = "1.19.0" +once_cell = "1.18.0" ouroboros = "0.18.3" piz = "0.5.0" primal-check = "0.3.1" @@ -88,7 +88,7 @@ skip_feature_sets = [ ## Wasm section. Crates only used for WASM, as well as specific configurations [target.'cfg(all(target_arch = "wasm32", target_os="unknown"))'.dependencies.wasm-bindgen] -version = "0.2.90" +version = "0.2.89" features = ["serde-serialize"] [target.'cfg(all(target_arch = "wasm32", target_os="unknown"))'.dependencies.web-sys] diff --git a/src/core/src/signature.rs b/src/core/src/signature.rs index f5cb9a2b4e..a75eb6c3f8 100644 --- a/src/core/src/signature.rs +++ b/src/core/src/signature.rs @@ -771,8 +771,14 @@ impl Select for Signature { } else { valid }; - // TODO: execute downsample if needed - + // keep compatible scaled if applicable + if let Some(sel_scaled) = selection.scaled() { + valid = if let Sketch::MinHash(mh) = s { + valid && mh.scaled() <= sel_scaled as u64 + } else { + valid + }; + } /* valid = if let Some(abund) = selection.abund() { valid && *s.with_abundance() == abund @@ -785,8 +791,20 @@ impl Select for Signature { valid }; */ + valid }); + + // downsample the retained sketches if needed. + if let Some(sel_scaled) = selection.scaled() { + for sketch in self.signatures.iter_mut() { + if let Sketch::MinHash(mh) = sketch { + if (mh.scaled() as u32) < sel_scaled { + *sketch = Sketch::MinHash(mh.downsample_scaled(sel_scaled as u64)?); + } + } + } + } Ok(self) } } @@ -841,6 +859,10 @@ mod test { use super::Signature; + use crate::prelude::Select; + use crate::selection::Selection; + use crate::sketch::Sketch; + #[test] fn load_sig() { let mut filename = PathBuf::from(env!("CARGO_MANIFEST_DIR")); @@ -979,4 +1001,47 @@ mod test { assert_eq!(sk.size(), 500); } } + + #[test] + fn selection_with_downsample() { + let mut filename = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + filename.push("../../tests/test-data/47+63-multisig.sig"); + + let file = File::open(filename).unwrap(); + let reader = BufReader::new(file); + let sigs: Vec<Signature> = serde_json::from_reader(reader).expect("Loading error"); + + // create Selection object + let mut selection = Selection::default(); + selection.set_scaled(2000); + // iterate and check scaled + for sig in &sigs { + let modified_sig = sig.clone().select(&selection).unwrap(); + for sketch in modified_sig.sketches() { + if let Sketch::MinHash(mh) = sketch { + dbg!("scaled: {:?}", mh.scaled()); + assert_eq!(mh.scaled(), 2000); + } + } + } + } + + #[test] + fn selection_scaled_too_low() { + let mut filename = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + filename.push("../../tests/test-data/47+63-multisig.sig"); + + let file = File::open(filename).unwrap(); + let reader = BufReader::new(file); + let sigs: Vec<Signature> = serde_json::from_reader(reader).expect("Loading error"); + + // create Selection object + let mut selection = Selection::default(); + selection.set_scaled(100); + // iterate and check no sigs are returned (original scaled is 1000) + for sig in &sigs { + let modified_sig = sig.clone().select(&selection).unwrap(); + assert_eq!(modified_sig.size(), 0); + } + } }