diff --git a/Cargo.lock b/Cargo.lock index 2db96670de..0795b19d9b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1607,7 +1607,7 @@ checksum = "9f1341053f34bb13b5e9590afb7d94b48b48d4b87467ec28e3c238693bb553de" [[package]] name = "sourmash" -version = "0.13.0" +version = "0.13.1" dependencies = [ "az", "byteorder", @@ -1624,6 +1624,7 @@ dependencies = [ "getset", "histogram", "itertools 0.12.1", + "js-sys", "log", "md5", "memmap2", diff --git a/Makefile b/Makefile index 9b26d91331..891b710732 100644 --- a/Makefile +++ b/Makefile @@ -56,6 +56,9 @@ last-tag: wasm: wasm-pack build src/core -d ../../pkg +wasm-test: + wasm-pack test --node src/core + wasi: cargo wasi build diff --git a/flake.nix b/flake.nix index 8d4fae898e..57213ac6aa 100644 --- a/flake.nix +++ b/flake.nix @@ -128,6 +128,7 @@ cargo-outdated cargo-udeps cargo-deny + cargo-wasi #cargo-semver-checks nixpkgs-fmt ]; diff --git a/src/core/CHANGELOG.md b/src/core/CHANGELOG.md index 67a3134144..ac4d169e80 100644 --- a/src/core/CHANGELOG.md +++ b/src/core/CHANGELOG.md @@ -5,6 +5,29 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [unreleased] + +## [0.13.1] - 2024-03-23 + +MSRV: 1.65 + +Changes/additions: + +* Implement file parsing for webassembly (#3047) +* fix `calculate_gather_stats` `threshold=0` bug (#3052) +* fix clippy beta issues (#3088) + +Updates: + +* Bump wasm-bindgen-test from 0.3.41 to 0.3.42 (#3063) +* Bump web-sys from 0.3.68 to 0.3.69 (#3061) +* Bump log from 0.4.20 to 0.4.21 (#3062) +* Bump rayon from 1.8.1 to 1.9.0 (#3058) +* Bump tempfile from 3.10.0 to 3.10.1 (#3059) +* Bump serde_json from 1.0.113 to 1.0.114 (#3044) +* Bump serde from 1.0.196 to 1.0.197 (#3045) +* Bump itertools from 0.12.0 to 0.12.1 (#3043) + ## [0.13.0] - 2024-02-23 MSRV: 1.65 @@ -17,6 +40,7 @@ Changes/additions: * make core Manifest booleans python compatible (core) (#3007) Updates: + * Bump roaring from 0.10.2 to 0.10.3 (#3014) * Bump histogram from 0.9.0 to 0.9.1 (#3002) * Bump chrono from 0.4.33 to 0.4.34 (#3000) @@ -287,7 +311,11 @@ Fixed: - Fix mem leak in get_mins (#807) - Fixes for WASI and WASM compilation (#771) (#723) -[unreleased]: https://github.com/sourmash-bio/sourmash/compare/r0.11.0...HEAD +[unreleased]: https://github.com/sourmash-bio/sourmash/compare/r0.13.1...HEAD +[0.13.1]: https://github.com/sourmash-bio/sourmash/compare/r0.13.0...r0.13.1 +[0.13.0]: https://github.com/sourmash-bio/sourmash/compare/r0.12.1...r0.13.0 +[0.12.1]: https://github.com/sourmash-bio/sourmash/compare/r0.12.0...r0.12.1 +[0.12.0]: https://github.com/sourmash-bio/sourmash/compare/r0.11.0...r0.12.0 [0.11.0]: https://github.com/sourmash-bio/sourmash/compare/r0.10.0...r0.11.0 [0.10.0]: https://github.com/sourmash-bio/sourmash/compare/r0.9.0...r0.10.0 [0.9.0]: https://github.com/sourmash-bio/sourmash/compare/r0.9.0...r0.10.0 diff --git a/src/core/Cargo.toml b/src/core/Cargo.toml index 0f292db6d6..2b4ae08b59 100644 --- a/src/core/Cargo.toml +++ b/src/core/Cargo.toml @@ -1,8 +1,8 @@ [package] name = "sourmash" -version = "0.13.0" -authors = ["Luiz Irber "] -description = "MinHash sketches for genomic data" +version = "0.13.1" +authors = ["Luiz Irber ", "N. Tessa Pierce-Ward "] +description = "tools for comparing biological sequences with k-mer sketches" repository = "https://github.com/sourmash-bio/sourmash" keywords = ["minhash", "bioinformatics"] categories = ["science", "algorithms", "data-structures"] @@ -43,6 +43,7 @@ log = "0.4.21" md5 = "0.7.0" memmap2 = "0.9.4" murmurhash3 = "0.0.5" +needletail = { version = "0.5.1", default-features = false } niffler = { version = "2.3.1", default-features = false, features = [ "gz" ] } nohash-hasher = "0.2.0" num-iter = "0.1.44" @@ -64,8 +65,6 @@ typed-builder = "0.18.0" vec-collections = "0.4.3" [dev-dependencies] -criterion = "0.5.1" -needletail = { version = "0.5.1", default-features = false } proptest = { version = "1.4.0", default-features = false, features = ["std"]} rand = "0.8.2" tempfile = "3.10.1" @@ -95,17 +94,13 @@ skip_feature_sets = [ ## Wasm section. Crates only used for WASM, as well as specific configurations -[target.'cfg(all(target_arch = "wasm32", target_os="unknown"))'.dependencies.wasm-bindgen] -version = "0.2.89" -features = ["serde-serialize"] +[target.'cfg(all(target_arch = "wasm32", target_os="unknown"))'.dependencies] +js-sys = "0.3.68" +web-sys = { version = "0.3.69", features = ["console", "File", "FileReaderSync"] } +wasm-bindgen = { version = "0.2.89", features = ["serde-serialize"] } -[target.'cfg(all(target_arch = "wasm32", target_os="unknown"))'.dependencies.web-sys] -version = "0.3.69" -features = ["console", "File"] - -[target.'cfg(all(target_arch = "wasm32"))'.dependencies.chrono] -version = "0.4.32" -features = ["wasmbind"] +[target.'cfg(all(target_arch = "wasm32"))'.dependencies] +chrono = { version = "0.4.32", features = ["wasmbind"] } [target.'cfg(all(target_arch = "wasm32", target_os="unknown"))'.dev-dependencies] wasm-bindgen-test = "0.3.42" @@ -113,3 +108,5 @@ wasm-bindgen-test = "0.3.42" ### These crates don't compile on wasm [target.'cfg(not(target_arch = "wasm32"))'.dependencies] rocksdb = { version = "0.21.0", optional = true } +[target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies] +criterion = "0.5.1" diff --git a/src/core/src/wasm.rs b/src/core/src/wasm.rs index c2a0eb6c30..cd9efec091 100644 --- a/src/core/src/wasm.rs +++ b/src/core/src/wasm.rs @@ -4,6 +4,7 @@ #[global_allocator] static ALLOC: wee_alloc::WeeAlloc = wee_alloc::WeeAlloc::INIT; +use needletail::parse_fastx_reader; use wasm_bindgen::prelude::*; use crate::cmd::ComputeParameters as _ComputeParameters; @@ -57,15 +58,15 @@ impl KmerMinHash { } #[wasm_bindgen] - pub fn add_sequence_js(&mut self, buf: &str) { - self.0 - .add_sequence(buf.as_bytes(), true) - .expect("Error adding sequence"); + pub fn add_sequence_js(&mut self, buf: &str) -> Result<(), JsErrors> { + self.0.add_sequence(buf.as_bytes(), true)?; + Ok(()) } #[wasm_bindgen] - pub fn to_json(&mut self) -> String { - serde_json::to_string(&self.0).unwrap() + pub fn to_json(&mut self) -> Result { + let json = serde_json::to_string(&self.0)?; + Ok(json) } } @@ -81,6 +82,40 @@ impl ComputeParameters { pub fn set_ksizes(&mut self, ksizes: Vec) { self.0.set_ksizes(ksizes); } + + #[wasm_bindgen] + pub fn set_scaled(&mut self, scaled: u32) { + self.0.set_scaled(scaled as u64); + } + + #[wasm_bindgen] + pub fn set_num(&mut self, num: u32) { + self.0.set_num_hashes(num); + } + + #[wasm_bindgen] + pub fn set_protein(&mut self, is_protein: bool) { + self.0.set_protein(is_protein); + } + + #[wasm_bindgen] + pub fn set_dayhoff(&mut self, dayhoff: bool) { + self.0.set_dayhoff(dayhoff); + } + + #[wasm_bindgen] + pub fn set_hp(&mut self, hp: bool) { + self.0.set_hp(hp); + } + + #[wasm_bindgen] + pub fn set_track_abundance(&mut self, track: bool) { + self.0.set_track_abundance(track); + } + #[wasm_bindgen] + pub fn set_seed(&mut self, seed: u32) { + self.0.set_seed(seed.into()); + } } #[wasm_bindgen] @@ -93,20 +128,39 @@ impl Signature { } #[wasm_bindgen] - pub fn add_sequence_js(&mut self, buf: &str) { - self.0 - .add_sequence(buf.as_bytes(), true) - .expect("Error adding sequence"); + pub fn add_sequence_js(&mut self, buf: &str) -> Result<(), JsErrors> { + self.0.add_sequence(buf.as_bytes(), true)?; + + Ok(()) } #[wasm_bindgen] - pub fn add_from_file(&mut self, fp: web_sys::File) { - unimplemented!() + pub fn add_from_file( + &mut self, + fp: web_sys::File, + callback: Option, + ) -> Result<(), JsErrors> { + let wf = SyncFile::new(fp, callback); + + let (rdr, _format) = niffler::send::get_reader(Box::new(wf))?; + + let mut parser = parse_fastx_reader(std::io::BufReader::with_capacity( + 1024 << 14, // 16 MiB + rdr, + ))?; + + while let Some(record) = parser.next() { + let record = record?; + self.0.add_sequence(&record.seq(), true)?; + } + + Ok(()) } #[wasm_bindgen] - pub fn to_json(&mut self) -> String { - serde_json::to_string(&self.0).unwrap() + pub fn to_json(&mut self) -> Result { + let json = serde_json::to_string(&self.0)?; + Ok(json) } pub fn size(&self) -> usize { @@ -114,6 +168,28 @@ impl Signature { } } +#[derive(thiserror::Error, Debug)] +pub enum JsErrors { + #[error(transparent)] + SourmashError(#[from] crate::Error), + + #[error(transparent)] + SerdeError(#[from] serde_json::error::Error), + + #[error(transparent)] + NifflerError(#[from] niffler::Error), + + #[error(transparent)] + NeedletailError(#[from] needletail::errors::ParseError), +} + +impl Into for JsErrors { + fn into(self) -> JsValue { + let error = js_sys::Error::new(&self.to_string()); + error.into() + } +} + #[cfg(test)] mod test { use super::*; @@ -127,3 +203,92 @@ mod test { assert_eq!(sig.size(), 3); } } + +// ============================== + +use js_sys::Number; +use js_sys::Uint8Array; +use once_cell::sync::Lazy; +use web_sys::FileReaderSync; + +thread_local! { + static FILE_READER_SYNC: Lazy = Lazy::new(|| { + FileReaderSync::new().expect("Failed to create FileReaderSync. Is it running in a web worker context?") + }); +} + +/// Wrapper around a `web_sys::File` that implements `Read` and `Seek`. +pub struct SyncFile { + file: web_sys::File, + pos: u64, + cb: Option, +} + +/// Because this needs to be initialized in a Web Worker, it is safe to make it Send. +/// (hopefully. I don't think they can be sent across Web Workers, nor accessed from other WW) +unsafe impl Send for SyncFile {} + +impl SyncFile { + pub fn new(file: web_sys::File, cb: Option) -> Self { + Self { file, pos: 0, cb } + } + + /// File size in bytes. + pub fn size(&self) -> u64 { + let size = self.file.size(); + if size <= Number::MAX_SAFE_INTEGER { + return size as u64; + } else { + panic!("size is not safe to convert to integer from float") + } + } + + fn set_pos(&mut self, pos: u64) { + self.pos = pos; + self.cb.as_ref().map(|f| { + let arr = js_sys::Array::new_with_length(1); + arr.set(0, self.progress().into()); + f.apply(&JsValue::null(), &arr) + .expect("Error calling progress callback"); + }); + } + + /// Current progress on the file + pub fn progress(&self) -> f64 { + self.pos as f64 / self.file.size() + } +} + +impl std::io::Read for SyncFile { + fn read(&mut self, buf: &mut [u8]) -> Result { + let current_offset = self.pos; + let new_offset_f64 = current_offset as f64; + let new_offset_end_f64 = current_offset.saturating_add( + u64::try_from(buf.len()).map_err(|_| std::io::Error::other("Can't convert to u64"))?, + ) as f64; + + let blob = self + .file + .slice_with_f64_and_f64(new_offset_f64, new_offset_end_f64) + .map_err(|_| std::io::Error::other("failed to slice file"))?; + let array_buffer = FILE_READER_SYNC + .with(|frs| frs.read_as_array_buffer(&blob)) + .map_err(|_| std::io::Error::other("failed to read as array buffer"))?; + + let array = Uint8Array::new(&array_buffer); + let read_bytes = usize::try_from(array.byte_length()) + .map_err(|_| std::io::Error::other("read too many bytes at once"))?; + + // Copy to output buffer + array.copy_to(&mut buf[..read_bytes]); + + // Update position + self.set_pos( + current_offset + .checked_add(read_bytes as u64) + .ok_or_else(|| std::io::Error::other("new position too large"))?, + ); + + Ok(read_bytes) + } +} diff --git a/src/core/tests/dedicated_worker.rs b/src/core/tests/dedicated_worker.rs new file mode 100644 index 0000000000..f7186a003f --- /dev/null +++ b/src/core/tests/dedicated_worker.rs @@ -0,0 +1,5 @@ +#![cfg(all(target_arch = "wasm32", target_os = "unknown"))] + +use wasm_bindgen_test::wasm_bindgen_test_configure; + +wasm_bindgen_test_configure!(run_in_dedicated_worker); diff --git a/src/core/tests/node.rs b/src/core/tests/node.rs new file mode 100644 index 0000000000..f846433061 --- /dev/null +++ b/src/core/tests/node.rs @@ -0,0 +1,8 @@ +#![cfg(all(target_arch = "wasm32", target_os = "unknown"))] + +use wasm_bindgen_test::*; + +#[wasm_bindgen_test] +fn pass() { + assert_eq!(1, 1); +} diff --git a/src/core/tests/service_worker.rs b/src/core/tests/service_worker.rs new file mode 100644 index 0000000000..dae9341d9e --- /dev/null +++ b/src/core/tests/service_worker.rs @@ -0,0 +1,5 @@ +#![cfg(all(target_arch = "wasm32", target_os = "unknown"))] + +use wasm_bindgen_test::wasm_bindgen_test_configure; + +wasm_bindgen_test_configure!(run_in_service_worker); diff --git a/src/core/tests/shared_worker.rs b/src/core/tests/shared_worker.rs new file mode 100644 index 0000000000..8d8bfc7a4f --- /dev/null +++ b/src/core/tests/shared_worker.rs @@ -0,0 +1,5 @@ +#![cfg(all(target_arch = "wasm32", target_os = "unknown"))] + +use wasm_bindgen_test::wasm_bindgen_test_configure; + +wasm_bindgen_test_configure!(run_in_shared_worker); diff --git a/src/core/tests/web.rs b/src/core/tests/web.rs new file mode 100644 index 0000000000..3bbc3dad61 --- /dev/null +++ b/src/core/tests/web.rs @@ -0,0 +1,5 @@ +#![cfg(all(target_arch = "wasm32", target_os = "unknown"))] + +use wasm_bindgen_test::wasm_bindgen_test_configure; + +wasm_bindgen_test_configure!(run_in_browser);